From: Alex Zhuravlev Date: Tue, 7 Aug 2012 09:56:48 +0000 (+0400) Subject: LU-1711 mount: obd_mount to start osd X-Git-Tag: 2.3.51~117 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=35920b759ed78441db0cd9de6ac8ec66da862f22 LU-1711 mount: obd_mount to start osd ... instead of mounting underlying ldiskfs directly. so, now OSD can be any depending on osd= mount option. Signed-off-by: Alex Zhuravlev Change-Id: Iab1ca6ca9f84ba6c708e05cd85c4de0a51d34de6 Reviewed-on: http://review.whamcloud.com/3650 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jinshan Xiong --- diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index fbc945a..f91b21c 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -490,9 +490,13 @@ struct lustre_sb_info { struct obd_device *lsi_mgc; /* mgc obd */ struct lustre_mount_data *lsi_lmd; /* mount command info */ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ + struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ struct vfsmount *lsi_srv_mnt; /* the one server mount */ cfs_atomic_t lsi_mounts; /* references to the srv_mnt */ char lsi_svname[MTI_NAME_MAXLEN]; + char lsi_osd_obdname[64]; + char lsi_osd_uuid[64]; + struct obd_export *lsi_osd_exp; char lsi_osd_type[16]; char lsi_fstype[16]; struct backing_dev_info lsi_bdi; /* each client mountpoint needs diff --git a/lustre/include/obd.h b/lustre/include/obd.h index bc1883d..6490a04 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -836,7 +836,7 @@ struct niobuf_local { #define LUSTRE_CMM_NAME "cmm" #define LUSTRE_MDD_NAME "mdd" -#define LUSTRE_OSD_NAME "osd-ldiskfs" +#define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs" #define LUSTRE_OSD_ZFS_NAME "osd-zfs" #define LUSTRE_VVP_NAME "vvp" #define LUSTRE_LMV_NAME "lmv" diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 34b3091..f61df7c 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -94,7 +94,6 @@ static struct lu_device *mdd_device_fini(const struct lu_env *env, struct lu_device *d) { struct mdd_device *mdd = lu2mdd_dev(d); - struct lu_device *next = &mdd->mdd_child->dd_lu_dev; int rc; rc = mdd_procfs_fini(mdd); @@ -102,7 +101,7 @@ static struct lu_device *mdd_device_fini(const struct lu_env *env, CERROR("proc fini error %d \n", rc); return ERR_PTR(rc); } - return next; + return NULL; } static void mdd_changelog_fini(const struct lu_env *env, @@ -1281,13 +1280,12 @@ static struct lu_device *mdd_device_free(const struct lu_env *env, struct lu_device *lu) { struct mdd_device *m = lu2mdd_dev(lu); - struct lu_device *next = &m->mdd_child->dd_lu_dev; ENTRY; LASSERT(cfs_atomic_read(&lu->ld_ref) == 0); md_device_fini(&m->mdd_md_dev); OBD_FREE_PTR(m); - RETURN(next); + RETURN(NULL); } static struct obd_ops mdd_obd_device_ops = { diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 71ea401..182100e 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -312,7 +312,6 @@ static int mds_cmd_setup(struct obd_device *obd, struct lustre_cfg *lcfg) * OSD did mount already, so put mount back */ cfs_atomic_dec(&lsi->lsi_mounts); - mntput(mnt); cfs_init_rwsem(&mds->mds_notify_lock); obd->obd_fsops = fsfilt_get_ops(lsi->lsi_fstype); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index d2e5312..c7e1b68 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -4360,6 +4360,8 @@ static void mdt_stack_fini(const struct lu_env *env, lu_stack_fini(env, top); m->mdt_child = NULL; m->mdt_bottom = NULL; + + obd_disconnect(m->mdt_bottom_exp); } static struct lu_device *mdt_layer_setup(struct lu_env *env, @@ -4423,6 +4425,41 @@ out: return ERR_PTR(rc); } +static int mdt_connect_to_next(const struct lu_env *env, struct mdt_device *m, + const char *next, struct obd_export **exp) +{ + struct obd_connect_data *data = NULL; + struct obd_device *obd; + int rc; + ENTRY; + + OBD_ALLOC_PTR(data); + if (data == NULL) + GOTO(out, rc = -ENOMEM); + + obd = class_name2obd(next); + if (obd == NULL) { + CERROR("%s: can't locate next device: %s\n", + m->mdt_md_dev.md_lu_dev.ld_obd->obd_name, next); + GOTO(out, rc = -ENOTCONN); + } + + data->ocd_connect_flags = OBD_CONNECT_VERSION; + data->ocd_version = LUSTRE_VERSION_CODE; + + rc = obd_connect(NULL, exp, obd, &obd->obd_uuid, data, NULL); + if (rc) { + CERROR("%s: cannot connect to next dev %s (%d)\n", + m->mdt_md_dev.md_lu_dev.ld_obd->obd_name, next, rc); + GOTO(out, rc); + } + +out: + if (data) + OBD_FREE_PTR(data); + RETURN(rc); +} + static int mdt_stack_init(struct lu_env *env, struct mdt_device *m, struct lustre_cfg *cfg, @@ -4432,16 +4469,27 @@ static int mdt_stack_init(struct lu_env *env, struct lu_device *tmp; struct md_device *md; struct lu_device *child_lu_dev; + char *osdname; int rc; ENTRY; - /* init the stack */ - tmp = mdt_layer_setup(env, LUSTRE_OSD_NAME, d, cfg); - if (IS_ERR(tmp)) { - RETURN(PTR_ERR(tmp)); - } - m->mdt_bottom = lu2dt_dev(tmp); - d = tmp; + /* find bottom osd */ + OBD_ALLOC(osdname, MTI_NAME_MAXLEN); + if (osdname == NULL) + RETURN(-ENOMEM); + + snprintf(osdname, MTI_NAME_MAXLEN, "%s-osd", lustre_cfg_string(cfg, 0)); + rc = mdt_connect_to_next(env, m, osdname, &m->mdt_bottom_exp); + OBD_FREE(osdname, MTI_NAME_MAXLEN); + if (rc) + RETURN(rc); + + tmp = m->mdt_bottom_exp->exp_obd->obd_lu_dev; + LASSERT(tmp); + m->mdt_bottom = lu2dt_dev(tmp); + tmp->ld_site = d->ld_site; + d = tmp; + tmp = mdt_layer_setup(env, LUSTRE_MDD_NAME, d, cfg); if (IS_ERR(tmp)) { GOTO(out, rc = PTR_ERR(tmp)); @@ -4598,6 +4646,8 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) } LASSERT(cfs_atomic_read(&d->ld_ref) == 0); + server_put_mount(mdt2obd_dev(m)->obd_name, NULL); + EXIT; } @@ -4670,7 +4720,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, m->mdt_som_conf = 0; m->mdt_opts.mo_cos = MDT_COS_DEFAULT; - lmi = server_get_mount_2(dev); + lmi = server_get_mount(dev); if (lmi == NULL) { CERROR("Cannot get mount info for %s!\n", dev); RETURN(-EFAULT); @@ -4799,9 +4849,6 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, GOTO(err_llog_cleanup, rc); #endif - server_put_mount_2(dev, lmi->lmi_mnt); - lmi = NULL; - rc = next->md_ops->mdo_iocontrol(env, next, OBD_IOC_GET_MNTOPT, 0, &mntopts); if (rc) @@ -4898,8 +4945,8 @@ err_lu_site: err_free_site: OBD_FREE_PTR(mite); err_lmi: - if (lmi) - server_put_mount_2(dev, lmi->lmi_mnt); + if (lmi) + server_put_mount(dev, lmi->lmi_mnt); return (rc); } diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 52ba781..e15524d 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -111,8 +111,10 @@ struct mdt_device { /* ptlrpc handle for MDS->client connections (for lock ASTs). */ struct ptlrpc_client *mdt_ldlm_client; /* underlying device */ + struct obd_export *mdt_child_exp; struct md_device *mdt_child; struct dt_device *mdt_bottom; + struct obd_export *mdt_bottom_exp; /** target device */ struct lu_target mdt_lut; /* diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 8a141ab..3d6a145 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -112,6 +112,7 @@ struct obd_type *class_search_type(const char *name) cfs_spin_unlock(&obd_types_lock); return NULL; } +EXPORT_SYMBOL(class_search_type); struct obd_type *class_get_type(const char *name) { diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index f844734..368256a 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -172,8 +172,6 @@ struct lustre_mount_info *server_get_mount(const char *name) } lsi = s2lsi(lmi->lmi_sb); - if (lmi->lmi_mnt) - mntget(lmi->lmi_mnt); cfs_atomic_inc(&lsi->lsi_mounts); CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n", @@ -204,22 +202,6 @@ struct lustre_mount_info *server_get_mount_2(const char *name) } EXPORT_SYMBOL(server_get_mount_2); -static void unlock_mntput(struct vfsmount *mnt) -{ -#ifdef HAVE_KERNEL_LOCKED - /* for kernel < 2.6.37 */ - if (kernel_locked()) { - unlock_kernel(); - mntput(mnt); - lock_kernel(); - } else { - mntput(mnt); - } -#else - mntput(mnt); -#endif -} - static int lustre_put_lsi(struct super_block *sb); /* to be called from obd_cleanup methods */ @@ -230,12 +212,6 @@ int server_put_mount(const char *name, struct vfsmount *mnt) int count = 0; ENTRY; - /* This might be the last one, can't deref after this */ - if (mnt) { - count = mnt_get_count(mnt) - 1; - unlock_mntput(mnt); - } - cfs_mutex_lock(&lustre_mount_info_lock); lmi = server_find_mount(name); cfs_mutex_unlock(&lustre_mount_info_lock); @@ -244,7 +220,6 @@ int server_put_mount(const char *name, struct vfsmount *mnt) RETURN(-ENOENT); } lsi = s2lsi(lmi->lmi_sb); - LASSERT(lmi->lmi_mnt == mnt); CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n", lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count); @@ -398,22 +373,22 @@ EXPORT_SYMBOL(do_lcfg); * obd type-specific methods. */ static int lustre_start_simple(char *obdname, char *type, char *uuid, - char *s1, char *s2) + char *s1, char *s2, char *s3, char *s4) { - int rc; - CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type); + int rc; + CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type); - rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0); - if (rc) { - CERROR("%s attach error %d\n", obdname, rc); - return(rc); - } - rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0); - if (rc) { - CERROR("%s setup error %d\n", obdname, rc); - do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0); - } - return rc; + rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0); + if (rc) { + CERROR("%s attach error %d\n", obdname, rc); + return(rc); + } + rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4); + if (rc) { + CERROR("%s setup error %d\n", obdname, rc); + do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0); + } + return rc; } /* Set up a MGS to serve startup logs */ @@ -442,7 +417,7 @@ static int server_start_mgs(struct super_block *sb) if (!rc) { rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME, - LUSTRE_MGS_OBDNAME, 0, 0); + LUSTRE_MGS_OBDNAME, 0, 0, 0, 0); /* Do NOT call server_deregister_mount() here. This leads to * inability cleanup cleanly and free lsi and other stuff when * mgs calls server_put_mount() in error handling case. -umka */ @@ -658,7 +633,7 @@ static int lustre_start_mgc(struct super_block *sb) /* Start the MGC */ rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, (char *)uuid->uuid, LUSTRE_MGS_OBDNAME, - niduuid); + niduuid, 0, 0); OBD_FREE_PTR(uuid); if (rc) GOTO(out_free, rc); @@ -1201,7 +1176,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt) rc = lustre_start_simple(LUSTRE_OSS_OBDNAME, LUSTRE_OSS_NAME, LUSTRE_OSS_OBDNAME"_uuid", - 0, 0); + 0, 0, 0, 0); if (rc) { cfs_mutex_unlock(&server_start_lock); CERROR("failed to start OSS: %d\n", rc); @@ -1360,6 +1335,11 @@ static int lustre_put_lsi(struct super_block *sb) CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts)); if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) { + if (IS_SERVER(lsi) && lsi->lsi_osd_exp) { + obd_disconnect(lsi->lsi_osd_exp); + /* wait till OSD is gone */ + obd_zombie_barrier(); + } lustre_free_lsi(sb); RETURN(1); } @@ -1394,7 +1374,7 @@ static int lsi_prepare(struct lustre_sb_info *lsi) strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type); } else { - strcpy(lsi->lsi_osd_type, LUSTRE_OSD_NAME); + strcpy(lsi->lsi_osd_type, LUSTRE_OSD_LDISKFS_NAME); } /* XXX: a temp. solution for components using fsfilt @@ -1439,151 +1419,12 @@ static int lsi_prepare(struct lustre_sb_info *lsi) /*************** server mount ******************/ -/** Kernel mount using mount options in MOUNT_DATA_FILE. - * Since this file lives on the disk, we pre-mount using a common - * type, read the file, then re-mount using the type specified in the - * file. - */ -static struct vfsmount *server_kernel_mount(struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct lustre_mount_data *lmd = lsi->lsi_lmd; - struct vfsmount *mnt; - struct file_system_type *type; - char *options = NULL; - unsigned long page, s_flags; - struct page *__page; - int len; - int rc; - ENTRY; - - rc = lsi_prepare(lsi); - if (rc) - RETURN(ERR_PTR(rc)); - - if (strcmp(lmd->lmd_osd_type, "osd-ldiskfs") == 0) { - /* with ldiskfs we're still mounting in the kernel space */ - OBD_FREE(lmd->lmd_osd_type, - strlen(lmd->lmd_osd_type) + 1); - lmd->lmd_osd_type = NULL; - } else { - /* non-ldiskfs backends (zfs) do mounting internally */ - RETURN(NULL); - } - - /* In the past, we have always used flags = 0. - Note ext3/ldiskfs can't be mounted ro. */ - s_flags = sb->s_flags; - - /* allocate memory for options */ - OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD); - if (!__page) - GOTO(out_free, rc = -ENOMEM); - page = (unsigned long)cfs_page_address(__page); - options = (char *)page; - memset(options, 0, CFS_PAGE_SIZE); - - /* Glom up mount options */ - memset(options, 0, CFS_PAGE_SIZE); - strncpy(options, lsi->lsi_lmd->lmd_opts, CFS_PAGE_SIZE - 2); - - len = CFS_PAGE_SIZE - strlen(options) - 2; - if (*options != 0) - strcat(options, ","); - strncat(options, "no_mbcache", len); - - /* Add in any mount-line options */ - if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) { - len = CFS_PAGE_SIZE - strlen(options) - 2; - strcat(options, ","); - strncat(options, lmd->lmd_opts, len); - } - - /* Special permanent mount flags */ - if (IS_OST(lsi)) - s_flags |= MS_NOATIME | MS_NODIRATIME; - - CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n", - lsi->lsi_osd_type, lmd->lmd_dev, options); - type = get_fs_type(lsi->lsi_fstype); - if (!type) { - CERROR("get_fs_type failed\n"); - GOTO(out_free, rc = -ENODEV); - } - mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options); - cfs_module_put(type->owner); - if (IS_ERR(mnt)) { - rc = PTR_ERR(mnt); - CERROR("vfs_kern_mount failed: rc = %d\n", rc); - GOTO(out_free, rc); - } - - if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV) - simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD, - LR_CLIENT_START); - - OBD_PAGE_FREE(__page); - CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt); - RETURN(mnt); - -out_free: - if (__page) - OBD_PAGE_FREE(__page); - RETURN(ERR_PTR(rc)); -} - -/** Wait here forever until the mount refcount is 0 before completing umount, - * else we risk dereferencing a null pointer. - * LNET may take e.g. 165s before killing zombies. - */ -static void server_wait_finished(struct vfsmount *mnt) -{ - cfs_waitq_t waitq; - int rc, waited = 0; - cfs_sigset_t blocked; - - if (mnt == NULL) { - cfs_waitq_init(&waitq); - cfs_waitq_wait_event_interruptible_timeout(waitq, 0, - cfs_time_seconds(3), rc); - return; - } - - LASSERT(mnt); - cfs_waitq_init(&waitq); - - while (mnt_get_count(mnt) > 1) { - if (waited && (waited % 30 == 0)) - LCONSOLE_WARN("Mount still busy with %d refs after " - "%d secs.\n", - mnt_get_count(mnt), - waited); - /* Cannot use l_event_wait() for an interruptible sleep. */ - waited += 3; - blocked = cfs_block_sigsinv(sigmask(SIGKILL)); - cfs_waitq_wait_event_interruptible_timeout( - waitq, - (mnt_get_count(mnt) == 1), - cfs_time_seconds(3), - rc); - cfs_restore_sigs(blocked); - if (rc < 0) { - LCONSOLE_EMERG("Danger: interrupted umount %s with " - "%d refs!\n", mnt_get_devname(mnt), - mnt_get_count(mnt)); - break; - } - - } -} - /** Start the shutdown of servers at umount. */ static void server_put_super(struct super_block *sb) { struct lustre_sb_info *lsi = s2lsi(sb); struct obd_device *obd; - struct vfsmount *mnt = lsi->lsi_srv_mnt; char *tmpname, *extraname = NULL; int tmpname_sz; int lsiflags = lsi->lsi_flags; @@ -1641,13 +1482,10 @@ static void server_put_super(struct super_block *sb) /* Clean the mgc and sb */ lustre_common_put_super(sb); - /* Wait for the targets to really clean up - can't exit (and let the - sb get destroyed) while the mount is still in use */ - server_wait_finished(mnt); - - /* drop the One True Mount */ - if (mnt) - unlock_mntput(mnt); + /* wait till all in-progress cleanups are done + * specifically we're interested in ofd cleanup + * as it pins OSS */ + obd_zombie_barrier(); /* Stop the servers (MDS, OSS) if no longer needed. We must wait until the target is really gone so that our type refcount check @@ -1772,6 +1610,58 @@ static int server_fill_super_common(struct super_block *sb) RETURN(0); } +static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) +{ + struct lustre_mount_data *lmd = lsi->lsi_lmd; + struct obd_device *obd; + struct dt_device_param p; + char flagstr[16]; + int rc; + ENTRY; + + CDEBUG(D_MOUNT, + "Attempting to start %s, type=%s, lsifl=%x, mountfl=%lx\n", + lsi->lsi_svname, lsi->lsi_osd_type, lsi->lsi_flags, mflags); + + sprintf(lsi->lsi_osd_obdname, "%s-osd", lsi->lsi_svname); + strcpy(lsi->lsi_osd_uuid, lsi->lsi_osd_obdname); + strcat(lsi->lsi_osd_uuid, "_UUID"); + sprintf(flagstr, "%lu:%lu", mflags, (unsigned long) lmd->lmd_flags); + + obd = class_name2obd(lsi->lsi_osd_obdname); + if (obd == NULL) { + rc = lustre_start_simple(lsi->lsi_osd_obdname, + lsi->lsi_osd_type, + lsi->lsi_osd_uuid, lmd->lmd_dev, + flagstr, lsi->lsi_lmd->lmd_opts, + lsi->lsi_svname); + if (rc) + GOTO(out, rc); + obd = class_name2obd(lsi->lsi_osd_obdname); + LASSERT(obd); + } + + rc = obd_connect(NULL, &lsi->lsi_osd_exp, obd, &obd->obd_uuid, NULL, NULL); + if (rc) { + obd->obd_force = 1; + class_manual_cleanup(obd); + lsi->lsi_dt_dev = NULL; + } + + /* XXX: to keep support old components relying on lsi_srv_mnt + * we get this info from OSD just started */ + LASSERT(obd->obd_lu_dev); + lsi->lsi_dt_dev = lu2dt_dev(obd->obd_lu_dev); + LASSERT(lsi->lsi_dt_dev); + + dt_conf_get(NULL, lsi->lsi_dt_dev, &p); + + lsi->lsi_srv_mnt = p.ddp_mnt; + +out: + RETURN(rc); +} + /** Fill in the superblock info for a Lustre server. * Mount the device with the correct options. * Read the on-disk config file. @@ -1780,20 +1670,21 @@ static int server_fill_super_common(struct super_block *sb) static int server_fill_super(struct super_block *sb) { struct lustre_sb_info *lsi = s2lsi(sb); - struct vfsmount *mnt; int rc; ENTRY; - /* the One True Mount */ - mnt = server_kernel_mount(sb); - if (IS_ERR(mnt)) { - rc = PTR_ERR(mnt); - CERROR("Unable to mount device %s: %d\n", - lsi->lsi_lmd->lmd_dev, rc); + rc = lsi_prepare(lsi); + if (rc) + RETURN(rc); + + /* Start low level OSD */ + rc = osd_start(lsi, sb->s_flags); + if (rc) { + CERROR("Unable to start osd on %s: %d\n", + lsi->lsi_lmd->lmd_dev, rc); lustre_put_lsi(sb); RETURN(rc); - } - lsi->lsi_srv_mnt = mnt; + } CDEBUG(D_MOUNT, "Found service %s on device %s\n", lsi->lsi_svname, lsi->lsi_lmd->lmd_dev); @@ -1804,7 +1695,6 @@ static int server_fill_super(struct super_block *sb) " the disk journal.\n", lsi->lsi_svname); lustre_put_lsi(sb); - unlock_mntput(mnt); RETURN(-EALREADY); } @@ -1823,7 +1713,7 @@ static int server_fill_super(struct super_block *sb) /* Set up all obd devices for service */ if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) && (IS_OST(lsi) || IS_MDT(lsi))) { - rc = server_start_targets(sb, mnt); + rc = server_start_targets(sb, lsi->lsi_srv_mnt); if (rc < 0) { CERROR("Unable to start targets: %d\n", rc); GOTO(out_mnt, rc); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index ac87ce5..b06c04b 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -61,46 +61,74 @@ static struct lu_kmem_descr ofd_caches[] = { } }; +static int ofd_connect_to_next(const struct lu_env *env, struct ofd_device *m, + const char *next, struct obd_export **exp) +{ + struct obd_connect_data *data = NULL; + struct obd_device *obd; + int rc; + ENTRY; + + OBD_ALLOC_PTR(data); + if (data == NULL) + GOTO(out, rc = -ENOMEM); + + obd = class_name2obd(next); + if (obd == NULL) { + CERROR("%s: can't locate next device: %s\n", + m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next); + GOTO(out, rc = -ENOTCONN); + } + + data->ocd_connect_flags = OBD_CONNECT_VERSION; + data->ocd_version = LUSTRE_VERSION_CODE; + + rc = obd_connect(NULL, exp, obd, &obd->obd_uuid, data, NULL); + if (rc) { + CERROR("%s: cannot connect to next dev %s: rc = %d\n", + m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next, rc); + GOTO(out, rc); + } + +out: + if (data) + OBD_FREE_PTR(data); + RETURN(rc); +} + static int ofd_stack_init(const struct lu_env *env, struct ofd_device *m, struct lustre_cfg *cfg) { struct lu_device *ofd_lu = &m->ofd_dt_dev.dd_lu_dev; const char *dev = lustre_cfg_string(cfg, 0); - struct obd_type *type; - struct lu_device_type *ldt; struct lu_device *d; struct ofd_thread_info *info = ofd_info(env); struct lustre_mount_info *lmi; int rc; + char *osdname; ENTRY; - lmi = server_get_mount_2(dev); + lmi = server_get_mount(dev); if (lmi == NULL) { CERROR("Cannot get mount info for %s!\n", dev); RETURN(-ENODEV); } - type = class_get_type(s2lsi(lmi->lmi_sb)->lsi_osd_type); - if (!type) { - CERROR("Unknown type: '%s'\n", - s2lsi(lmi->lmi_sb)->lsi_osd_type); - RETURN(-ENODEV); - } + /* find bottom osd */ + OBD_ALLOC(osdname, MTI_NAME_MAXLEN); + if (osdname == NULL) + RETURN(-ENOMEM); - ldt = type->typ_lu; - if (ldt == NULL) { - CERROR("type: '%s'\n", s2lsi(lmi->lmi_sb)->lsi_osd_type); - GOTO(out_type, rc = -EINVAL); - } + snprintf(osdname, MTI_NAME_MAXLEN, "%s-osd", dev); + rc = ofd_connect_to_next(env, m, osdname, &m->ofd_osd_exp); + OBD_FREE(osdname, MTI_NAME_MAXLEN); + if (rc) + RETURN(rc); - ldt->ldt_obd_type = type; - d = ldt->ldt_ops->ldto_device_alloc(env, ldt, cfg); - if (IS_ERR(d)) { - CERROR("Cannot allocate device: '%s'\n", - s2lsi(lmi->lmi_sb)->lsi_osd_type); - GOTO(out_type, rc = -ENODEV); - } + d = m->ofd_osd_exp->exp_obd->obd_lu_dev; + LASSERT(d); + m->ofd_osd = lu2dt_dev(d); LASSERT(ofd_lu->ld_site); d->ld_site = ofd_lu->ld_site; @@ -108,39 +136,6 @@ static int ofd_stack_init(const struct lu_env *env, snprintf(info->fti_u.name, sizeof(info->fti_u.name), "%s-osd", lustre_cfg_string(cfg, 0)); - type->typ_refcnt++; - - rc = lu_env_refill((struct lu_env *)env); - if (rc != 0) { - CERROR("Failure to refill session: '%d'\n", rc); - GOTO(out_free, rc); - } - - rc = ldt->ldt_ops->ldto_device_init(env, d, dev, NULL); - if (rc) { - CERROR("can't init device '%s', rc = %d\n", - s2lsi(lmi->lmi_sb)->lsi_osd_type, rc); - GOTO(out_free, rc); - } - lu_device_get(d); - lu_ref_add(&d->ld_reference, "lu-stack", &lu_site_init); - - m->ofd_osd = lu2dt_dev(d); - - /* process setup config */ - rc = d->ld_ops->ldo_process_config(env, d, cfg); - if (rc) - GOTO(out_fini, rc); - - RETURN(rc); - -out_fini: - ldt->ldt_ops->ldto_device_fini(env, d); -out_free: - type->typ_refcnt--; - ldt->ldt_ops->ldto_device_free(env, d); -out_type: - class_put_type(type); RETURN(rc); } @@ -173,7 +168,10 @@ static void ofd_stack_fini(const struct lu_env *env, struct ofd_device *m, top->ld_ops->ldo_process_config(env, top, lcfg); lustre_cfg_free(lcfg); - lu_stack_fini(env, &m->ofd_osd->dd_lu_dev); + lu_site_purge(env, top->ld_site, ~0); + + LASSERT(m->ofd_osd_exp); + obd_disconnect(m->ofd_osd_exp); m->ofd_osd = NULL; EXIT; @@ -601,10 +599,11 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) d->ld_obd->obd_namespace = m->ofd_namespace = NULL; } - ofd_stack_fini(env, m, m->ofd_site.ls_top_dev); + ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev); lu_site_fini(&m->ofd_site); ofd_procfs_fini(m); LASSERT(cfs_atomic_read(&d->ld_ref) == 0); + server_put_mount(obd->obd_name, NULL); EXIT; } diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index b5563cf..c94a7a2 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -75,6 +75,7 @@ enum { struct ofd_device { struct dt_device ofd_dt_dev; struct dt_device *ofd_osd; + struct obd_export *ofd_osd_exp; struct dt_device_param ofd_dt_conf; /* DLM name-space for meta-data locks maintained by this server */ struct ldlm_namespace *ofd_namespace; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index b50d865..f1c0506 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -976,7 +976,8 @@ static int osd_object_print(const struct lu_env *env, void *cookie, d = o->oo_dir->od_container.ic_descr; else d = NULL; - return (*p)(env, cookie, LUSTRE_OSD_NAME"-object@%p(i:%p:%lu/%u)[%s]", + return (*p)(env, cookie, + LUSTRE_OSD_LDISKFS_NAME"-object@%p(i:%p:%lu/%u)[%s]", o, o->oo_inode, o->oo_inode ? o->oo_inode->i_ino : 0UL, o->oo_inode ? o->oo_inode->i_generation : 0, @@ -994,7 +995,7 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, struct kstatfs *ksfs; int result = 0; - if (unlikely(osd->od_mount == NULL)) + if (unlikely(osd->od_mnt == NULL)) return -EINPROGRESS; /* osd_lproc.c call this without env, allocate ksfs for that case */ @@ -1038,6 +1039,7 @@ static void osd_conf_get(const struct lu_env *env, /* * XXX should be taken from not-yet-existing fs abstraction layer. */ + param->ddp_mnt = osd_dt_dev(dev)->od_mnt; param->ddp_max_name_len = LDISKFS_NAME_LEN; param->ddp_max_nlink = LDISKFS_LINK_MAX; param->ddp_block_shift = sb->s_blocksize_bits; @@ -1074,7 +1076,7 @@ static struct lu_buf *osd_buf_get(const struct lu_env *env, void *area, ssize_t */ static int osd_sync(const struct lu_env *env, struct dt_device *d) { - CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_NAME); + CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_LDISKFS_NAME); return ldiskfs_force_commit(osd_sb(osd_dt_dev(d))); } @@ -1096,7 +1098,7 @@ static int osd_commit_async(const struct lu_env *env, struct super_block *s = osd_sb(osd_dt_dev(d)); ENTRY; - CDEBUG(D_HA, "async commit OSD %s\n", LUSTRE_OSD_NAME); + CDEBUG(D_HA, "async commit OSD %s\n", LUSTRE_OSD_LDISKFS_NAME); RETURN(s->s_op->sync_fs(s, 0)); } @@ -1110,7 +1112,7 @@ static int osd_ro(const struct lu_env *env, struct dt_device *d) int rc; ENTRY; - CERROR("*** setting device %s read-only ***\n", LUSTRE_OSD_NAME); + CERROR("*** setting %s read-only ***\n", osd_dt_dev(d)->od_svname); rc = __lvfs_set_rdonly(sb->s_bdev, LDISKFS_SB(sb)->journal_bdev); RETURN(rc); @@ -4331,49 +4333,92 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o) static int osd_mount(const struct lu_env *env, struct osd_device *o, struct lustre_cfg *cfg) { - struct lustre_mount_info *lmi; - const char *dev = lustre_cfg_string(cfg, 0); - struct lustre_sb_info *lsi; - int rc = 0; - + const char *name = lustre_cfg_string(cfg, 0); + const char *dev = lustre_cfg_string(cfg, 1); + const char *opts; + unsigned long page, s_flags, lmd_flags = 0; + struct page *__page; + struct file_system_type *type; + char *options = NULL; + char *str; + int rc = 0; ENTRY; + if (o->od_mnt != NULL) + RETURN(0); + o->od_fsops = fsfilt_get_ops(mt_str(LDD_MT_LDISKFS)); if (o->od_fsops == NULL) { CERROR("Can't find fsfilt_ldiskfs\n"); RETURN(-ENOTSUPP); } - if (o->od_mount != NULL) { - CERROR("Already mounted (%s)\n", dev); - RETURN(-EEXIST); - } + OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD); + if (__page == NULL) + RETURN(-ENOMEM); + + str = lustre_cfg_string(cfg, 2); + s_flags = simple_strtoul(str, NULL, 0); + str = strstr(str, ":"); + if (str) + lmd_flags = simple_strtoul(str + 1, NULL, 0); + opts = lustre_cfg_string(cfg, 3); + page = (unsigned long)cfs_page_address(__page); + options = (char *)page; + *options = '\0'; + if (opts == NULL) + strcat(options, "user_xattr,acl"); + else + strcat(options, opts); - /* get mount */ - lmi = server_get_mount(dev); - if (lmi == NULL) { - CERROR("Cannot get mount info for %s!\n", dev); - RETURN(-EFAULT); - } + /* Glom up mount options */ + if (*options != '\0') + strcat(options, ","); + strlcat(options, "no_mbcache", CFS_PAGE_SIZE); - LASSERT(lmi != NULL); - /* save lustre_mount_info in dt_device */ - o->od_mount = lmi; - o->od_mnt = lmi->lmi_mnt; + type = get_fs_type("ldiskfs"); + if (!type) { + CERROR("%s: cannot find ldiskfs module\n", name); + GOTO(out, rc = -ENODEV); + } - lsi = s2lsi(lmi->lmi_sb); + o->od_mnt = vfs_kern_mount(type, s_flags, dev, options); + cfs_module_put(type->owner); - if (lsi->lsi_flags & LDD_F_IAM_DIR) { + if (IS_ERR(o->od_mnt)) { + rc = PTR_ERR(o->od_mnt); + CERROR("%s: can't mount %s: %d\n", name, dev, rc); + o->od_mnt = NULL; + GOTO(out, rc); + } + + if (lvfs_check_rdonly(o->od_mnt->mnt_sb->s_bdev)) { + CERROR("%s: underlying device %s is marked as read-only. " + "Setup failed\n", name, dev); + mntput(o->od_mnt); + o->od_mnt = NULL; + GOTO(out, rc = -EROFS); + } + + if (!LDISKFS_HAS_COMPAT_FEATURE(o->od_mnt->mnt_sb, + LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)) { + CERROR("%s: device %s is mounted w/o journal\n", name, dev); + mntput(o->od_mnt); + o->od_mnt = NULL; + GOTO(out, rc = -EINVAL); + } + + if (lmd_flags & LMD_FLG_IAM) { o->od_iop_mode = 0; - LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", dev); + LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", name); } else o->od_iop_mode = 1; + if (lmd_flags & LMD_FLG_NOSCRUB) + o->od_scrub.os_no_scrub = 1; - if (lsi->lsi_flags & LDD_F_SV_TYPE_OST) { - rc = osd_compat_init(o); - if (rc) - CERROR("%s: can't initialize compats: %d\n", dev, rc); - } +out: + if (__page) + OBD_PAGE_FREE(__page); RETURN(rc); } @@ -4384,6 +4429,8 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, int rc; ENTRY; + rc = osd_shutdown(env, osd_dev(d)); + osd_compat_fini(osd_dev(d)); shrink_dcache_sb(osd_sb(osd_dev(d))); @@ -4395,46 +4442,104 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, RETURN (ERR_PTR(rc)); } - if (osd_dev(d)->od_mount) - server_put_mount(osd_dev(d)->od_mount->lmi_name, - osd_dev(d)->od_mount->lmi_mnt); - osd_dev(d)->od_mount = NULL; + if (osd_dev(d)->od_mnt) { + mntput(osd_dev(d)->od_mnt); + osd_dev(d)->od_mnt = NULL; + } RETURN(NULL); } +static int osd_device_init0(const struct lu_env *env, + struct osd_device *o, + struct lustre_cfg *cfg) +{ + struct lu_device *l = osd2lu_dev(o); + struct osd_thread_info *info; + int rc; + + /* if the module was re-loaded, env can loose its keys */ + rc = lu_env_refill((struct lu_env *) env); + if (rc) + GOTO(out, rc); + info = osd_oti_get(env); + LASSERT(info); + + l->ld_ops = &osd_lu_ops; + o->od_dt_dev.dd_ops = &osd_dt_ops; + + cfs_spin_lock_init(&o->od_osfs_lock); + cfs_mutex_init(&o->od_otable_mutex); + o->od_osfs_age = cfs_time_shift_64(-1000); + + o->od_capa_hash = init_capa_hash(); + if (o->od_capa_hash == NULL) + GOTO(out, rc = -ENOMEM); + + o->od_read_cache = 1; + o->od_writethrough_cache = 1; + + rc = osd_mount(env, o, cfg); + if (rc) + GOTO(out_capa, rc); + + /* setup scrub, including OI files initialization */ + rc = osd_scrub_setup(env, o); + if (rc < 0) + GOTO(out_mnt, rc); + + strncpy(o->od_svname, lustre_cfg_string(cfg, 4), + sizeof(o->od_svname) - 1); + + if (strstr(o->od_svname, "-OST")) { + rc = osd_compat_init(o); + if (rc != 0) + GOTO(out_mnt, rc); + } + + rc = osd_procfs_init(o, o->od_svname); + if (rc != 0) { + CERROR("%s: can't initialize procfs: rc = %d\n", + o->od_svname, rc); + GOTO(out_compat, rc); + } + + RETURN(0); +out_compat: + osd_compat_fini(o); +out_mnt: + osd_oi_fini(info, o); + osd_shutdown(env, o); + mntput(o->od_mnt); + o->od_mnt = NULL; +out_capa: + cleanup_capa_hash(o->od_capa_hash); +out: + RETURN(rc); +} + static struct lu_device *osd_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *cfg) { - struct lu_device *l; - struct osd_device *o; - - OBD_ALLOC_PTR(o); - if (o != NULL) { - int result; - - result = dt_device_init(&o->od_dt_dev, t); - if (result == 0) { - l = osd2lu_dev(o); - l->ld_ops = &osd_lu_ops; - o->od_dt_dev.dd_ops = &osd_dt_ops; - cfs_spin_lock_init(&o->od_osfs_lock); - cfs_mutex_init(&o->od_otable_mutex); - o->od_osfs_age = cfs_time_shift_64(-1000); - o->od_capa_hash = init_capa_hash(); - if (o->od_capa_hash == NULL) { - dt_device_fini(&o->od_dt_dev); - l = ERR_PTR(-ENOMEM); - } - } else - l = ERR_PTR(result); + struct osd_device *o; + int rc; - if (IS_ERR(l)) - OBD_FREE_PTR(o); - } else - l = ERR_PTR(-ENOMEM); - return l; + OBD_ALLOC_PTR(o); + if (o == NULL) + return ERR_PTR(-ENOMEM); + + rc = dt_device_init(&o->od_dt_dev, t); + if (rc == 0) { + rc = osd_device_init0(env, o, cfg); + if (rc) + dt_device_fini(&o->od_dt_dev); + } + + if (unlikely(rc != 0)) + OBD_FREE_PTR(o); + + return rc == 0 ? osd2lu_dev(o) : ERR_PTR(rc); } static struct lu_device *osd_device_free(const struct lu_env *env, @@ -4462,7 +4567,7 @@ static int osd_process_config(const struct lu_env *env, break; case LCFG_CLEANUP: lu_dev_del_linkage(d->ld_site, d); - err = osd_shutdown(env, o); + err = 0; break; default: err = -ENOSYS; @@ -4477,6 +4582,58 @@ static int osd_recovery_complete(const struct lu_env *env, RETURN(0); } +/* + * we use exports to track all osd users + */ +static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + struct osd_device *osd = osd_dev(obd->obd_lu_dev); + struct lustre_handle conn; + int rc; + ENTRY; + + CDEBUG(D_CONFIG, "connect #%d\n", osd->od_connects); + + rc = class_connect(&conn, obd, cluuid); + if (rc) + RETURN(rc); + + *exp = class_conn2export(&conn); + + cfs_spin_lock(&osd->od_osfs_lock); + osd->od_connects++; + cfs_spin_unlock(&osd->od_osfs_lock); + + RETURN(0); +} + +/* + * once last export (we don't count self-export) disappeared + * osd can be released + */ +static int osd_obd_disconnect(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + struct osd_device *osd = osd_dev(obd->obd_lu_dev); + int rc, release = 0; + ENTRY; + + /* Only disconnect the underlying layers on the final disconnect. */ + cfs_spin_lock(&osd->od_osfs_lock); + osd->od_connects--; + if (osd->od_connects == 0) + release = 1; + cfs_spin_unlock(&osd->od_osfs_lock); + + rc = class_disconnect(exp); /* bz 9811 */ + + if (rc == 0 && release) + class_manual_cleanup(obd); + RETURN(rc); +} + static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { @@ -4484,11 +4641,6 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, int result; ENTRY; - /* 1. setup scrub, including OI files initialization */ - result = osd_scrub_setup(env, osd); - if (result < 0) - RETURN(result); - /* 2. setup quota slave instance */ osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev, osd->od_proc_entry); @@ -4553,9 +4705,9 @@ static const struct lu_device_type_operations osd_device_type_ops = { .ldto_device_fini = osd_device_fini }; -static struct lu_device_type osd_device_type = { +struct lu_device_type osd_device_type = { .ldt_tags = LU_DEVICE_DT, - .ldt_name = LUSTRE_OSD_NAME, + .ldt_name = LUSTRE_OSD_LDISKFS_NAME, .ldt_ops = &osd_device_type_ops, .ldt_ctx_tags = LCT_LOCAL, }; @@ -4564,7 +4716,9 @@ static struct lu_device_type osd_device_type = { * lprocfs legacy support. */ static struct obd_ops osd_obd_device_ops = { - .o_owner = THIS_MODULE + .o_owner = THIS_MODULE, + .o_connect = osd_obd_connect, + .o_disconnect = osd_obd_disconnect }; static int __init osd_mod_init(void) @@ -4574,16 +4728,16 @@ static int __init osd_mod_init(void) osd_oi_mod_init(); lprocfs_osd_init_vars(&lvars); return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars, - LUSTRE_OSD_NAME, &osd_device_type); + LUSTRE_OSD_LDISKFS_NAME, &osd_device_type); } static void __exit osd_mod_exit(void) { - class_unregister_type(LUSTRE_OSD_NAME); + class_unregister_type(LUSTRE_OSD_LDISKFS_NAME); } MODULE_AUTHOR("Sun Microsystems, Inc. "); -MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_NAME")"); +MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_LDISKFS_NAME")"); MODULE_LICENSE("GPL"); cfs_module(osd, "0.1.0", osd_mod_init, osd_mod_exit); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 3c5406b..939d539 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -255,7 +255,6 @@ struct osd_device { /* super-class */ struct dt_device od_dt_dev; /* information about underlying file system */ - struct lustre_mount_info *od_mount; struct vfsmount *od_mnt; /* object index */ struct osd_oi **od_oi_table; @@ -287,6 +286,7 @@ struct osd_device { __u32 od_iop_mode; struct fsfilt_operations *od_fsops; + int od_connects; /* * mapping for legacy OST objids @@ -751,7 +751,7 @@ static inline struct osd_device *osd_obj2dev(const struct osd_object *o) static inline struct super_block *osd_sb(const struct osd_device *dev) { - return dev->od_mount->lmi_mnt->mnt_sb; + return dev->od_mnt->mnt_sb; } static inline int osd_object_is_root(const struct osd_object *obj) diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index d05bac6..6cb96a7 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -250,12 +250,13 @@ static const char *osd_counter_names[] = { int osd_procfs_init(struct osd_device *osd, const char *name) { struct lprocfs_static_vars lvars; - struct lu_device *ld = &osd->od_dt_dev.dd_lu_dev; struct obd_type *type; int rc; ENTRY; - type = ld->ld_type->ldt_obd_type; + /* at the moment there is no linkage between lu_type + * and obd_type, so we lookup obd_type this way */ + type = class_search_type(LUSTRE_OSD_LDISKFS_NAME); LASSERT(name != NULL); LASSERT(type != NULL); @@ -325,14 +326,14 @@ static int lprocfs_osd_rd_mntdev(char *page, char **start, off_t off, int count, struct osd_device *osd = osd_dt_dev(data); LASSERT(osd != NULL); - if (unlikely(osd->od_mount == NULL)) + if (unlikely(osd->od_mnt == NULL)) return -EINPROGRESS; - LASSERT(mnt_get_devname(osd->od_mount->lmi_mnt)); + LASSERT(mnt_get_devname(osd->od_mnt)); *eof = 1; return snprintf(page, count, "%s\n", - mnt_get_devname(osd->od_mount->lmi_mnt)); + mnt_get_devname(osd->od_mnt)); } #ifdef HAVE_LDISKFS_PDO @@ -366,7 +367,7 @@ static int lprocfs_osd_rd_auto_scrub(char *page, char **start, off_t off, struct osd_device *dev = data; LASSERT(dev != NULL); - if (unlikely(dev->od_mount == NULL)) + if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; *eof = 1; @@ -380,7 +381,7 @@ static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer, int val, rc; LASSERT(dev != NULL); - if (unlikely(dev->od_mount == NULL)) + if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; rc = lprocfs_write_helper(buffer, count, &val); @@ -397,7 +398,7 @@ static int lprocfs_osd_rd_oi_scrub(char *page, char **start, off_t off, struct osd_device *dev = data; LASSERT(dev != NULL); - if (unlikely(dev->od_mount == NULL)) + if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; *eof = 1; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 65157f4..98827d7 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -1042,8 +1042,6 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) cfs_init_rwsem(&scrub->os_rwsem); cfs_spin_lock_init(&scrub->os_lock); CFS_INIT_LIST_HEAD(&scrub->os_inconsistent_items); - if (get_mount_flags(dev->od_mount->lmi_sb) & LMD_FLG_NOSCRUB) - scrub->os_no_scrub = 1; push_ctxt(&saved, ctxt, NULL); filp = filp_open(osd_scrub_name, O_RDWR | O_CREAT, 0644); diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index e4f97cc..fedf1e9 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -340,6 +340,8 @@ static void osd_conf_get(const struct lu_env *env, param->ddp_inodespace = OSD_DNODE_EST_COUNT; /* per-fragment overhead to be used by the client code */ param->ddp_grant_frag = udmu_blk_insert_cost(); + + param->ddp_mnt = NULL; } /* @@ -483,30 +485,20 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o) static int osd_mount(const struct lu_env *env, struct osd_device *o, struct lustre_cfg *cfg) { - char *dev = lustre_cfg_string(cfg, 0); - struct lustre_mount_info *lmi; - struct lustre_sb_info *lsi; - dmu_buf_t *rootdb; - int rc; + char *dev = lustre_cfg_string(cfg, 1); + dmu_buf_t *rootdb; + int rc; ENTRY; if (o->od_objset.os != NULL) RETURN(0); - lmi = server_get_mount(dev); - if (lmi == NULL) { - CERROR("Unknown mount point: '%s'\n", dev); - RETURN(-ENODEV); - } - - lsi = s2lsi(lmi->lmi_sb); - dev = lsi->lsi_lmd->lmd_dev; - if (strlen(dev) >= sizeof(o->od_mntdev)) RETURN(-E2BIG); strcpy(o->od_mntdev, dev); - strcpy(o->od_svname, lsi->lsi_svname); + strncpy(o->od_svname, lustre_cfg_string(cfg, 4), + sizeof(o->od_svname) - 1); rc = -udmu_objset_open(o->od_mntdev, &o->od_objset); if (rc) { @@ -587,28 +579,36 @@ out: RETURN(rc); } +static struct lu_device *osd_device_fini(const struct lu_env *env, + struct lu_device *dev); + static struct lu_device *osd_device_alloc(const struct lu_env *env, - struct lu_device_type *t, + struct lu_device_type *type, struct lustre_cfg *cfg) { - struct osd_device *o; - int rc; + struct osd_device *dev; + int rc; - OBD_ALLOC_PTR(o); - if (o == NULL) + OBD_ALLOC_PTR(dev); + if (dev == NULL) return ERR_PTR(-ENOMEM); - rc = dt_device_init(&o->od_dt_dev, t); + rc = dt_device_init(&dev->od_dt_dev, type); if (rc == 0) { - rc = osd_device_init0(env, o, cfg); + rc = osd_device_init0(env, dev, cfg); + if (rc == 0) { + rc = osd_mount(env, dev, cfg); + if (rc) + osd_device_fini(env, osd2lu_dev(dev)); + } if (rc) - dt_device_fini(&o->od_dt_dev); + dt_device_fini(&dev->od_dt_dev); } if (unlikely(rc != 0)) - OBD_FREE_PTR(o); + OBD_FREE_PTR(dev); - return rc == 0 ? osd2lu_dev(o) : ERR_PTR(rc); + return rc == 0 ? osd2lu_dev(dev) : ERR_PTR(rc); } static struct lu_device *osd_device_free(const struct lu_env *env, @@ -631,9 +631,8 @@ static struct lu_device *osd_device_free(const struct lu_env *env, static struct lu_device *osd_device_fini(const struct lu_env *env, struct lu_device *d) { - struct osd_device *o = osd_dev(d); - struct lustre_mount_info *lmi; - int rc; + struct osd_device *o = osd_dev(d); + int rc; ENTRY; @@ -655,10 +654,6 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, if (o->od_objset.os) osd_umount(env, o); - lmi = server_get_mount_2(o->od_svname); - LASSERT(lmi); - server_put_mount(lmi->lmi_name, lmi->lmi_mnt); - RETURN(NULL); } @@ -699,6 +694,58 @@ static int osd_recovery_complete(const struct lu_env *env, struct lu_device *d) RETURN(0); } +/* + * we use exports to track all osd users + */ +static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + struct osd_device *osd = osd_dev(obd->obd_lu_dev); + struct lustre_handle conn; + int rc; + ENTRY; + + CDEBUG(D_CONFIG, "connect #%d\n", osd->od_connects); + + rc = class_connect(&conn, obd, cluuid); + if (rc) + RETURN(rc); + + *exp = class_conn2export(&conn); + + cfs_spin_lock(&osd->od_objset.lock); + osd->od_connects++; + cfs_spin_unlock(&osd->od_objset.lock); + + RETURN(0); +} + +/* + * once last export (we don't count self-export) disappeared + * osd can be released + */ +static int osd_obd_disconnect(struct obd_export *exp) +{ + struct obd_device *obd = exp->exp_obd; + struct osd_device *osd = osd_dev(obd->obd_lu_dev); + int rc, release = 0; + ENTRY; + + /* Only disconnect the underlying layers on the final disconnect. */ + cfs_spin_lock(&osd->od_objset.lock); + osd->od_connects--; + if (osd->od_connects == 0) + release = 1; + cfs_spin_unlock(&osd->od_objset.lock); + + rc = class_disconnect(exp); /* bz 9811 */ + + if (rc == 0 && release) + class_manual_cleanup(obd); + RETURN(rc); +} + static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { @@ -756,6 +803,8 @@ static struct lu_device_type osd_device_type = { static struct obd_ops osd_obd_device_ops = { .o_owner = THIS_MODULE, + .o_connect = osd_obd_connect, + .o_disconnect = osd_obd_disconnect }; int __init osd_init(void) diff --git a/lustre/osd-zfs/osd_lproc.c b/lustre/osd-zfs/osd_lproc.c index c050acb..6ce8a3b 100644 --- a/lustre/osd-zfs/osd_lproc.c +++ b/lustre/osd-zfs/osd_lproc.c @@ -44,6 +44,7 @@ #define DEBUG_SUBSYSTEM S_CLASS #include +#include #include #include @@ -190,14 +191,17 @@ struct lprocfs_vars lprocfs_osd_module_vars[] = { int osd_procfs_init(struct osd_device *osd, const char *name) { - struct lu_device *ld = &osd->od_dt_dev.dd_lu_dev; - struct obd_type *type = ld->ld_type->ldt_obd_type; - int rc; + struct obd_type *type; + int rc; ENTRY; if (osd->od_proc_entry) RETURN(0); + /* at the moment there is no linkage between lu_type + * and obd_type, so we lookup obd_type this way */ + type = class_search_type(LUSTRE_OSD_ZFS_NAME); + LASSERT(name != NULL); LASSERT(type != NULL);