From: Alex Zhuravlev Date: Sun, 17 Jun 2012 07:33:25 +0000 (+0400) Subject: LU-1581 utils: min zfs support X-Git-Tag: 2.2.90~9 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=bae418cb53f30c14d2e07201360df267f3652746 LU-1581 utils: min zfs support set of osd_*() instance to support zfs backend Signed-off-by: Alex Zhuravlev Change-Id: I6733d6ad139c288d26e1a5f2617b07c9fdc6ed0f Reviewed-on: http://review.whamcloud.com/3227 Reviewed-by: Andreas Dilger Tested-by: Hudson Reviewed-by: Li Wei --- diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index c44012b..1ce9307 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -104,11 +104,24 @@ mkfs_lustre_SOURCES += mount_utils_ldiskfs.c mkfs_lustre_CPPFLAGS = -UTUNEFS $(AM_CPPFLAGS) mkfs_lustre_LDADD := libiam.a $(LIBPTLCTL) mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL) libiam.a +if ZFS_ENABLED +mkfs_lustre_SOURCES += mount_utils_zfs.c +mkfs_lustre_CPPFLAGS += -DHAVE_IOCTL_IN_UNISTD_H +mkfs_lustre_CPPFLAGS += -I /usr/include/libspl -I /usr/include/libzfs +mkfs_lustre_LDFLAGS = -pthread -rdynamic -ldl +endif -tunefs_lustre_SOURCES = $(mkfs_lustre_SOURCES) +tunefs_lustre_SOURCES = mkfs_lustre.c mount_utils.c mount_utils.h +tunefs_lustre_SOURCES += mount_utils_ldiskfs.c tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS) tunefs_lustre_LDADD := $(mkfs_lustre_LDADD) tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES) +if ZFS_ENABLED +tunefs_lustre_SOURCES += mount_utils_zfs.c +tunefs_lustre_CPPFLAGS += -DHAVE_IOCTL_IN_UNISTD_H +tunefs_lustre_CPPFLAGS += -I /usr/include/libspl -I /usr/include/libzfs +tunefs_lustre_LDFLAGS = -pthread -rdynamic -ldl +endif l_getidentity_SOURCES = l_getidentity.c l_getidentity_LDADD := $(LIBPTLCTL) diff --git a/lustre/utils/mount_utils.h b/lustre/utils/mount_utils.h index e50e787..8fb8494 100644 --- a/lustre/utils/mount_utils.h +++ b/lustre/utils/mount_utils.h @@ -38,6 +38,7 @@ #define _MOUNT_UTILS_H_ #include +#include extern char *progname; extern int verbose; diff --git a/lustre/utils/mount_utils_zfs.c b/lustre/utils/mount_utils_zfs.c new file mode 100644 index 0000000..d824306 --- /dev/null +++ b/lustre/utils/mount_utils_zfs.c @@ -0,0 +1,675 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. A copy is + * included in the COPYING file that accompanied this code. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +/* + * Copyright (c) 2011, 2012 Whamcloud, Inc. + * Use is subject to license terms. + * + */ +/* + * Author: Brian Behlendorf + */ + +#include +#include +#include +#include + +#include "mount_utils.h" + +/* Persistent mount data is stored in these user attributes */ +#define LDD_VERSION_PROP "lustre:version" +#define LDD_FLAGS_PROP "lustre:flags" +#define LDD_INDEX_PROP "lustre:index" +#define LDD_FSNAME_PROP "lustre:fsname" +#define LDD_SVNAME_PROP "lustre:svname" +#define LDD_UUID_PROP "lustre:uuid" +#define LDD_USERDATA_PROP "lustre:userdata" +#define LDD_MOUNTOPTS_PROP "lustre:mountopts" +#define LDD_MGSNODE_PROP "lustre:mgsnode" +#define LDD_FAILNODE_PROP "lustre:failnode" +#define LDD_FAILMODE_PROP "lustre:failmode" +#define LDD_IDENTITY_UPCALL_PROP "lustre:identity_upcall" + +/* indicate if the ZFS OSD has been successfully setup */ +static int osd_zfs_setup = 0; + +static libzfs_handle_t *g_zfs; + +/* dynamic linking handles for libzfs & libnvpair */ +static void *handle_libzfs; +static void *handle_nvpair; + +/* symbol table looked up with dlsym */ +struct zfs_symbols { + libzfs_handle_t *(*libzfs_init)(void); + void (*libzfs_fini)(libzfs_handle_t *); + int (*libzfs_load_module)(char *); + zfs_handle_t* (*zfs_open)(libzfs_handle_t *, const char *, int); + int (*zfs_destroy)(zfs_handle_t *, boolean_t); + void (*zfs_close)(zfs_handle_t *); + int (*zfs_prop_set)(zfs_handle_t*, const char*, const char*); + nvlist_t* (*zfs_get_user_props) (zfs_handle_t *); + int (*zfs_name_valid)(const char *, zfs_type_t); + zpool_handle_t* (*zpool_open)(libzfs_handle_t *, const char *); + void (*zpool_close)(zpool_handle_t *zhp); + int (*nvlist_lookup_string)(nvlist_t*, const char*, char**); + int (*nvlist_lookup_nvlist)(nvlist_t *, const char *, nvlist_t **); +}; + +static struct zfs_symbols sym; +void zfs_fini(void); + +#define DLSYM(handle, func) \ + do { \ + sym.func = (typeof(sym.func))dlsym(handle, #func); \ + } while(0) + +/* populate the symbol table after a successful call to dlopen() */ +static int zfs_populate_symbols(void) +{ + char *error; + + dlerror(); /* Clear any existing error */ + + DLSYM(handle_libzfs, libzfs_init); +#define libzfs_init (*sym.libzfs_init) + DLSYM(handle_libzfs, libzfs_fini); +#define libzfs_fini (*sym.libzfs_fini) + DLSYM(handle_libzfs, libzfs_load_module); +#define libzfs_load_module (*sym.libzfs_load_module) + DLSYM(handle_libzfs, zfs_open); +#define zfs_open (*sym.zfs_open) + DLSYM(handle_libzfs, zfs_destroy); +#define zfs_destroy (*sym.zfs_destroy) + DLSYM(handle_libzfs, zfs_close); +#define zfs_close (*sym.zfs_close) + DLSYM(handle_libzfs, zfs_prop_set); +#define zfs_prop_set (*sym.zfs_prop_set) + DLSYM(handle_libzfs, zfs_get_user_props); +#define zfs_get_user_props (*sym.zfs_get_user_props) + DLSYM(handle_libzfs, zfs_name_valid); +#define zfs_name_valid (*sym.zfs_name_valid) + DLSYM(handle_libzfs, zpool_open); +#define zpool_open (*sym.zpool_open) + DLSYM(handle_libzfs, zpool_close); +#define zpool_close (*sym.zpool_close) + DLSYM(handle_nvpair, nvlist_lookup_string); +#define nvlist_lookup_string (*sym.nvlist_lookup_string) + DLSYM(handle_nvpair, nvlist_lookup_nvlist); +#define nvlist_lookup_nvlist (*sym.nvlist_lookup_nvlist) + + error = dlerror(); + if (error != NULL) { + fatal(); + fprintf(stderr, "%s\n", error); + return EINVAL; + } + return 0; +} + +static int zfs_set_prop_int(zfs_handle_t *zhp, char *prop, __u32 val) +{ + char str[64]; + int ret; + + (void) snprintf(str, sizeof (str), "%lu", (unsigned long)val); + vprint(" %s=%s\n", prop, str); + ret = zfs_prop_set(zhp, prop, str); + + return ret; +} + +/* + * Write the zfs property string, note that properties with a NULL or + * zero-length value will not be written and 0 returned. + */ +static int zfs_set_prop_str(zfs_handle_t *zhp, char *prop, char *val) +{ + int ret = 0; + + if (val && strlen(val) > 0) { + vprint(" %s=%s\n", prop, val); + ret = zfs_prop_set(zhp, prop, val); + } + + return ret; +} + +static int zfs_set_prop_param(zfs_handle_t *zhp, struct lustre_disk_data *ldd, + char *param, char *prop) +{ + char *str; + int ret = 0; + + if (get_param(ldd->ldd_params, param, &str) == 0) { + vprint(" %s=%s\n", prop, str); + ret = zfs_prop_set(zhp, prop, str); + free(str); + } + + return ret; +} + +static int osd_check_zfs_setup(void) +{ + if (osd_zfs_setup == 0) { + /* setup failed */ + fatal(); + fprintf(stderr, "Failed to initialize ZFS library. Are the ZFS " + "packages and modules correctly installed?\n"); + } + return osd_zfs_setup == 1; +} + +/* Write the server config as properties associated with the dataset */ +int zfs_write_ldd(struct mkfs_opts *mop) +{ + struct lustre_disk_data *ldd = &mop->mo_ldd; + char *ds = mop->mo_device; + zfs_handle_t *zhp; + int ret = EINVAL; + + if (osd_check_zfs_setup() == 0) + return EINVAL; + + zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) { + fprintf(stderr, "Failed to open zfs dataset %s\n", ds); + goto out; + } + + vprint("Writing %s properties\n", ds); + + ret = zfs_set_prop_int(zhp, LDD_VERSION_PROP, ldd->ldd_config_ver); + if (ret) + goto out_close; + + ret = zfs_set_prop_int(zhp, LDD_FLAGS_PROP, ldd->ldd_flags); + if (ret) + goto out_close; + + ret = zfs_set_prop_int(zhp, LDD_INDEX_PROP, ldd->ldd_svindex); + if (ret) + goto out_close; + + ret = zfs_set_prop_str(zhp, LDD_FSNAME_PROP, ldd->ldd_fsname); + if (ret) + goto out_close; + + ret = zfs_set_prop_str(zhp, LDD_SVNAME_PROP, ldd->ldd_svname); + if (ret) + goto out_close; + + ret = zfs_set_prop_str(zhp, LDD_UUID_PROP, (char *)ldd->ldd_uuid); + if (ret) + goto out_close; + + ret = zfs_set_prop_str(zhp, LDD_USERDATA_PROP, ldd->ldd_userdata); + if (ret) + goto out_close; + + ret = zfs_set_prop_str(zhp, LDD_MOUNTOPTS_PROP, ldd->ldd_mount_opts); + if (ret) + goto out_close; + + ret = zfs_set_prop_param(zhp, ldd, PARAM_MGSNODE, LDD_MGSNODE_PROP); + if (ret) + goto out_close; + + ret = zfs_set_prop_param(zhp, ldd, PARAM_FAILNODE, LDD_FAILNODE_PROP); + if (ret) + goto out_close; + + ret = zfs_set_prop_param(zhp, ldd, PARAM_FAILMODE, LDD_FAILMODE_PROP); + if (ret) + goto out_close; + + ret = zfs_set_prop_param(zhp, ldd, PARAM_MDT PARAM_ID_UPCALL, + LDD_IDENTITY_UPCALL_PROP); + if (ret) + goto out_close; + +out_close: + zfs_close(zhp); +out: + return ret; +} + +static int zfs_get_prop_int(zfs_handle_t *zhp, char *prop, __u32 *val) +{ + nvlist_t *propval; + char *propstr; + int ret; + + ret = nvlist_lookup_nvlist(zfs_get_user_props(zhp), prop, &propval); + if (ret) + return ret; + + ret = nvlist_lookup_string(propval, ZPROP_VALUE, &propstr); + if (ret) + return ret; + + errno = 0; + *val = strtoul(propstr, NULL, 10); + if (errno) + return errno; + + return ret; +} + +static int zfs_get_prop_str(zfs_handle_t *zhp, char *prop, char *val) +{ + nvlist_t *propval; + char *propstr; + int ret; + + ret = nvlist_lookup_nvlist(zfs_get_user_props(zhp), prop, &propval); + if (ret) + return ret; + + ret = nvlist_lookup_string(propval, ZPROP_VALUE, &propstr); + if (ret) + return ret; + + (void) strcpy(val, propstr); + + return ret; +} + +static int zfs_get_prop_param(zfs_handle_t *zhp, struct lustre_disk_data *ldd, + char *param, char *prop) +{ + nvlist_t *propval; + char *propstr; + int ret; + + ret = nvlist_lookup_nvlist(zfs_get_user_props(zhp), prop, &propval); + if (ret) + return ret; + + ret = nvlist_lookup_string(propval, ZPROP_VALUE, &propstr); + if (ret) + return ret; + + ret = add_param(ldd->ldd_params, param, propstr); + + return ret; +} + +/* + * Read the server config as properties associated with the dataset. + * Missing entries as not treated error and are simply skipped. + */ +int zfs_read_ldd(char *ds, struct lustre_disk_data *ldd) +{ + zfs_handle_t *zhp; + int ret = EINVAL; + + if (osd_check_zfs_setup() == 0) + return EINVAL; + + zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) + goto out; + + ret = zfs_get_prop_int(zhp, LDD_VERSION_PROP, &ldd->ldd_config_ver); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_int(zhp, LDD_FLAGS_PROP, &ldd->ldd_flags); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_int(zhp, LDD_INDEX_PROP, &ldd->ldd_svindex); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_str(zhp, LDD_FSNAME_PROP, ldd->ldd_fsname); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_str(zhp, LDD_SVNAME_PROP, ldd->ldd_svname); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_str(zhp, LDD_UUID_PROP, (char *)ldd->ldd_uuid); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_str(zhp, LDD_USERDATA_PROP, ldd->ldd_userdata); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_str(zhp, LDD_MOUNTOPTS_PROP, ldd->ldd_mount_opts); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_param(zhp, ldd, PARAM_MGSNODE, LDD_MGSNODE_PROP); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_param(zhp, ldd, PARAM_FAILNODE, LDD_FAILNODE_PROP); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_param(zhp, ldd, PARAM_FAILMODE, LDD_FAILMODE_PROP); + if (ret && (ret != ENOENT)) + goto out_close; + + ret = zfs_get_prop_param(zhp, ldd, PARAM_MDT PARAM_ID_UPCALL, + LDD_IDENTITY_UPCALL_PROP); + if (ret && (ret != ENOENT)) + goto out_close; + + ldd->ldd_mount_type = LDD_MT_ZFS; + ret = 0; +out_close: + zfs_close(zhp); +out: + return ret; +} + +int zfs_is_lustre(char *ds, unsigned *mount_type) +{ + struct lustre_disk_data tmp_ldd; + int ret; + + if (osd_zfs_setup == 0) + return 0; + + ret = zfs_read_ldd(ds, &tmp_ldd); + if ((ret == 0) && (tmp_ldd.ldd_config_ver > 0) && + (strlen(tmp_ldd.ldd_fsname) > 0) && + (strlen(tmp_ldd.ldd_svname) > 0)) { + *mount_type = tmp_ldd.ldd_mount_type; + return 1; + } + + return 0; +} + +static char *zfs_mkfs_opts(struct mkfs_opts *mop, char *str, int len) +{ + memset(str, 0, len); + + if (strlen(mop->mo_mkfsopts) != 0) + snprintf(str, len, " -o %s", mop->mo_mkfsopts); + + return str; +} + +static int zfs_create_vdev(struct mkfs_opts *mop, char *vdev) +{ + int ret = 0; + + /* Silently ignore reserved vdev names */ + if ((strncmp(vdev, "disk", 4) == 0) || + (strncmp(vdev, "file", 4) == 0) || + (strncmp(vdev, "mirror", 6) == 0) || + (strncmp(vdev, "raidz", 5) == 0) || + (strncmp(vdev, "spare", 5) == 0) || + (strncmp(vdev, "log", 3) == 0) || + (strncmp(vdev, "cache", 5) == 0)) + return ret; + + /* + * Verify a file exists at the provided absolute path. If it doesn't + * and mo_device_sz is set attempt to create a file vdev to be used. + * Relative paths will be passed directly to 'zpool create' which + * will check multiple multiple locations under /dev/. + */ + if (vdev[0] == '/') { + ret = access(vdev, F_OK); + if (ret == 0) + return ret; + + ret = errno; + if (ret != ENOENT) { + fatal(); + fprintf(stderr, "Unable to access required vdev " + "for pool %s (%d)\n", vdev, ret); + return ret; + } + + if (mop->mo_device_sz == 0) { + fatal(); + fprintf(stderr, "Unable to create vdev due to " + "missing --device-size=#N(KB) parameter\n"); + return EINVAL; + } + + ret = file_create(vdev, mop->mo_device_sz); + if (ret) { + fatal(); + fprintf(stderr, "Unable to create vdev %s (%d)\n", + vdev, ret); + return ret; + } + } + + return ret; +} + +int zfs_make_lustre(struct mkfs_opts *mop) +{ + zfs_handle_t *zhp; + zpool_handle_t *php; + char *pool = NULL; + char *mkfs_cmd = NULL; + char *mkfs_tmp = NULL; + char *ds = mop->mo_device; + int pool_exists = 0, ret; + + if (osd_check_zfs_setup() == 0) + return EINVAL; + + /* no automatic index with zfs backend */ + if (mop->mo_ldd.ldd_flags & LDD_F_NEED_INDEX) { + fatal(); + fprintf(stderr, "The target index must be specified with " + "--index\n"); + return EINVAL; + } + + pool = strdup(ds); + if (pool == NULL) + return ENOMEM; + + mkfs_cmd = malloc(PATH_MAX); + if (mkfs_cmd == NULL) { + ret = ENOMEM; + goto out; + } + + mkfs_tmp = malloc(PATH_MAX); + if (mkfs_tmp == NULL) { + ret = ENOMEM; + goto out; + } + + /* Due to zfs_name_valid() check the '/' must exist */ + strchr(pool, '/')[0] = '\0'; + + /* If --reformat was given attempt to destroy the previous dataset */ + if ((mop->mo_flags & MO_FORCEFORMAT) && + ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM)) != NULL)) { + + ret = zfs_destroy(zhp, 0); + if (ret) { + zfs_close(zhp); + fprintf(stderr, "Failed destroy zfs dataset %s (%d)\n", + ds, ret); + goto out; + } + + zfs_close(zhp); + } + + /* + * Create the zpool if the vdevs have been specified and the pool + * does not already exists. The pool creation itself will be done + * with the zpool command rather than the zpool_create() library call + * so the existing zpool error handling can be leveraged. + */ + php = zpool_open(g_zfs, pool); + if (php) { + pool_exists = 1; + zpool_close(php); + } + + if ((mop->mo_pool_vdevs != NULL) && (pool_exists == 0)) { + + memset(mkfs_cmd, 0, PATH_MAX); + snprintf(mkfs_cmd, PATH_MAX, + "zpool create -f -O canmount=off %s", pool); + + /* Append the vdev config and create file vdevs as required */ + while (*mop->mo_pool_vdevs != NULL) { + strscat(mkfs_cmd, " ", PATH_MAX); + strscat(mkfs_cmd, *mop->mo_pool_vdevs, PATH_MAX); + + ret = zfs_create_vdev(mop, *mop->mo_pool_vdevs); + if (ret) + goto out; + + mop->mo_pool_vdevs++; + } + + vprint("mkfs_cmd = %s\n", mkfs_cmd); + ret = run_command(mkfs_cmd, PATH_MAX); + if (ret) { + fatal(); + fprintf(stderr, "Unable to create pool %s (%d)\n", + pool, ret); + goto out; + } + } + + /* + * Create the ZFS filesystem with any required mkfs options: + * - canmount=off is set to prevent zfs automounting + * - version=4 is set because SA are not yet handled by the osd + */ + memset(mkfs_cmd, 0, PATH_MAX); + snprintf(mkfs_cmd, PATH_MAX, + "zfs create -o canmount=off -o xattr=sa%s %s", + zfs_mkfs_opts(mop, mkfs_tmp, PATH_MAX), + ds); + + vprint("mkfs_cmd = %s\n", mkfs_cmd); + ret = run_command(mkfs_cmd, PATH_MAX); + if (ret) { + fatal(); + fprintf(stderr, "Unable to create filesystem %s (%d)\n", + ds, ret); + goto out; + } + +out: + if (pool != NULL) + free(pool); + + if (mkfs_cmd != NULL) + free(mkfs_cmd); + + if (mkfs_tmp != NULL) + free(mkfs_tmp); + + return ret; +} + +int zfs_prepare_lustre(struct mkfs_opts *mop, + char *default_mountopts, int default_len, + char *always_mountopts, int always_len) +{ + int ret; + + if (osd_check_zfs_setup() == 0) + return EINVAL; + + ret = zfs_name_valid(mop->mo_device, ZFS_TYPE_FILESYSTEM); + if (!ret) { + fatal(); + fprintf(stderr, "Invalid filesystem name %s\n", mop->mo_device); + return EINVAL; + } + + return 0; +} + +int zfs_init(void) +{ + int ret = 0; + + /* If the ZFS libs are not installed, don't print an error to avoid + * spamming ldiskfs users. An error message will still be printed if + * someone tries to do some real work involving a ZFS backend */ + + handle_libzfs = dlopen("libzfs.so", RTLD_LAZY); + if (handle_libzfs == NULL) + return EINVAL; + + handle_nvpair = dlopen("libnvpair.so", RTLD_LAZY); + if (handle_nvpair == NULL) { + ret = EINVAL; + goto out; + } + + ret = zfs_populate_symbols(); + if (ret) + goto out; + + if (libzfs_load_module("zfs") != 0) { + /* The ZFS modules are not installed */ + ret = EINVAL; + goto out; + } + + g_zfs = libzfs_init(); + if (g_zfs == NULL) { + fprintf(stderr, "Failed to initialize ZFS library\n"); + ret = EINVAL; + } +out: + osd_zfs_setup = 1; + if (ret) + zfs_fini(); + return ret; +} + +void zfs_fini(void) +{ + if (g_zfs) { + libzfs_fini(g_zfs); + g_zfs = NULL; + } + if (handle_nvpair) { + dlclose(handle_nvpair); + handle_nvpair = NULL; + } + if (handle_libzfs) { + dlclose(handle_libzfs); + handle_libzfs = NULL; + } + + osd_zfs_setup = 0; +}