4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012, 2016, Intel Corporation.
24 * Use is subject to license terms.
27 * Author: Brian Behlendorf <behlendorf1@llnl.gov>
29 #include "mount_utils.h"
35 #define HOSTID_PATH "/etc/hostid"
37 /* Persistent mount data is stored in these user attributes */
38 #define LDD_PREFIX "lustre:"
39 #define LDD_VERSION_PROP LDD_PREFIX "version"
40 #define LDD_FLAGS_PROP LDD_PREFIX "flags"
41 #define LDD_INDEX_PROP LDD_PREFIX "index"
42 #define LDD_FSNAME_PROP LDD_PREFIX "fsname"
43 #define LDD_SVNAME_PROP LDD_PREFIX "svname"
44 #define LDD_UUID_PROP LDD_PREFIX "uuid"
45 #define LDD_USERDATA_PROP LDD_PREFIX "userdata"
46 #define LDD_MOUNTOPTS_PROP LDD_PREFIX "mountopts"
48 /* This structure is used to help bridge the gap between the ZFS
49 * properties Lustre uses and their corresponding internal LDD fields.
50 * It is meant to be used internally by the mount utility only. */
51 struct zfs_ldd_prop_bridge {
52 /* Contains the publicly visible name for the property
53 * (i.e. what is shown when running "zfs get") */
55 /* Contains the offset into the lustre_disk_data structure where
56 * the value of this property is or will be stored. (i.e. the
57 * property is read from and written to this offset within ldd) */
59 /* Function pointer responsible for reading in the @prop
60 * property from @zhp and storing it in @ldd_field */
61 int (*zlpb_get_prop_fn)(zfs_handle_t *zhp, char *prop, void *ldd_field);
62 /* Function pointer responsible for writing the value of @ldd_field
63 * into the @prop dataset property in @zhp */
64 int (*zlpb_set_prop_fn)(zfs_handle_t *zhp, char *prop, void *ldd_field);
67 /* Forward declarations needed to initialize the ldd prop bridge list */
68 static int zfs_get_prop_int(zfs_handle_t *, char *, void *);
69 static int zfs_set_prop_int(zfs_handle_t *, char *, void *);
70 static int zfs_get_prop_str(zfs_handle_t *, char *, void *);
71 static int zfs_set_prop_str(zfs_handle_t *, char *, void *);
73 /* Helper for initializing the entries in the special_ldd_prop_params list.
74 * - @name: stored directly in the zlpb_prop_name field
75 * (e.g. lustre:fsname, lustre:version, etc.)
76 * - @field: the field in the lustre_disk_data which directly maps to
77 * the @name property. (e.g. ldd_fsname, ldd_config_ver, etc.)
78 * - @type: The type of @field. Only "int" and "str" are supported.
80 #define ZLB_INIT(name, field, type) \
82 .zlpb_prop_name = name, \
83 .zlpb_ldd_offset = offsetof(struct lustre_disk_data, field), \
84 .zlpb_get_prop_fn = zfs_get_prop_ ## type, \
85 .zlpb_set_prop_fn = zfs_set_prop_ ## type \
88 /* These ldd properties are special because they all have their own
89 * individual fields in the lustre_disk_data structure, as opposed to
90 * being globbed into the ldd_params field. As such, these need special
91 * handling when reading/writing the ldd structure to/from persistent
93 struct zfs_ldd_prop_bridge special_ldd_prop_params[] = {
94 ZLB_INIT(LDD_VERSION_PROP, ldd_config_ver, int),
95 ZLB_INIT(LDD_FLAGS_PROP, ldd_flags, int),
96 ZLB_INIT(LDD_INDEX_PROP, ldd_svindex, int),
97 ZLB_INIT(LDD_FSNAME_PROP, ldd_fsname, str),
98 ZLB_INIT(LDD_SVNAME_PROP, ldd_svname, str),
99 ZLB_INIT(LDD_UUID_PROP, ldd_uuid, str),
100 ZLB_INIT(LDD_USERDATA_PROP, ldd_userdata, str),
101 ZLB_INIT(LDD_MOUNTOPTS_PROP, ldd_mount_opts, str),
105 /* indicate if the ZFS OSD has been successfully setup */
106 static int osd_zfs_setup = 0;
108 static libzfs_handle_t *g_zfs;
112 static int zfs_set_prop_int(zfs_handle_t *zhp, char *prop, void *val)
117 (void) snprintf(str, sizeof (str), "%i", *(int *)val);
118 vprint(" %s=%s\n", prop, str);
119 ret = zfs_prop_set(zhp, prop, str);
125 * Write the zfs property string, note that properties with a NULL or
126 * zero-length value will not be written and 0 returned.
128 static int zfs_set_prop_str(zfs_handle_t *zhp, char *prop, void *val)
132 if (val && strlen(val) > 0) {
133 vprint(" %s=%s\n", prop, (char *)val);
134 ret = zfs_prop_set(zhp, prop, (char *)val);
141 * Remove a property from zfs property dataset
143 static int zfs_remove_prop(zfs_handle_t *zhp, nvlist_t *nvl, char *propname)
145 nvlist_remove_all(nvl, propname);
146 /* XXX: please replace zfs_prop_inherit() if there is a better function
147 * to call zfs_ioctl() to update data on-disk.
149 return zfs_prop_inherit(zhp, propname, false);
152 static int zfs_erase_prop(zfs_handle_t *zhp, char *param)
155 char propname[ZFS_MAXPROPLEN];
156 int len = strlen(param) + strlen(LDD_PREFIX);
158 if (len > ZFS_MAXPROPLEN) {
159 fprintf(stderr, "%s: zfs prop to erase is too long-\n%s\n",
164 nvl = zfs_get_user_props(zhp);
168 snprintf(propname, len + 1, "%s%s", LDD_PREFIX, param);
169 return zfs_remove_prop(zhp, nvl, propname);
172 static int zfs_erase_allprops(zfs_handle_t *zhp)
175 nvpair_t *curr = NULL;
177 nvl = zfs_get_user_props(zhp);
181 curr = nvlist_next_nvpair(nvl, curr);
183 nvpair_t *next = nvlist_next_nvpair(nvl, curr);
185 zfs_remove_prop(zhp, nvl, nvpair_name(curr));
193 * Map '<key>=<value> ...' pairs in the passed string to dataset properties
194 * of the form 'lustre:<key>=<value>'. "<key>=" means to remove this key
197 static int zfs_set_prop_params(zfs_handle_t *zhp, char *params)
199 char *params_dup, *token, *key, *value;
200 char *save_token = NULL;
201 char propname[ZFS_MAXPROPLEN];
204 params_dup = strdup(params);
205 if (params_dup == NULL)
208 token = strtok_r(params_dup, " ", &save_token);
210 key = strtok(token, "=");
214 value = strtok(NULL, "=");
216 /* remove this prop when its value is null */
217 ret = zfs_erase_prop(zhp, key);
221 snprintf(propname, strlen(LDD_PREFIX) + strlen(key) + 1,
222 "%s%s", LDD_PREFIX, key);
223 vprint(" %s=%s\n", propname, value);
225 ret = zfs_prop_set(zhp, propname, value);
230 token = strtok_r(NULL, " ", &save_token);
238 static int zfs_check_hostid(struct mkfs_opts *mop)
241 unsigned long hostid;
244 if (strstr(mop->mo_ldd.ldd_params, PARAM_FAILNODE) == NULL)
247 f = fopen("/sys/module/spl/parameters/spl_hostid", "r");
250 fprintf(stderr, "Failed to open spl_hostid: %s\n",
254 rc = fscanf(f, "%li", &hostid);
258 fprintf(stderr, "Failed to read spl_hostid: %d\n", rc);
265 f = fopen(HOSTID_PATH, "r");
269 rc = fread(&hostid, sizeof(uint32_t), 1, f);
273 fprintf(stderr, "Failed to read "HOSTID_PATH": %d\n",
280 if (mop->mo_flags & MO_NOHOSTID_CHECK) {
281 fprintf(stderr, "WARNING: spl_hostid not set. ZFS has "
282 "no zpool import protection\n");
285 fprintf(stderr, "spl_hostid not set. See %s(8)",
294 static int osd_check_zfs_setup(void)
296 if (osd_zfs_setup == 0) {
299 fprintf(stderr, "Failed to initialize ZFS library. Are the ZFS "
300 "packages and modules correctly installed?\n");
302 return osd_zfs_setup == 1;
305 /* Write the server config as properties associated with the dataset */
306 int zfs_write_ldd(struct mkfs_opts *mop)
308 struct lustre_disk_data *ldd = &mop->mo_ldd;
309 char *ds = mop->mo_device;
311 struct zfs_ldd_prop_bridge *bridge;
314 if (osd_check_zfs_setup() == 0)
317 zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM);
319 fprintf(stderr, "Failed to open zfs dataset %s\n", ds);
323 ret = zfs_check_hostid(mop);
327 vprint("Writing %s properties\n", ds);
329 if (mop->mo_flags & MO_ERASE_ALL)
330 ret = zfs_erase_allprops(zhp);
331 ret = zfs_set_prop_params(zhp, ldd->ldd_params);
333 for (i = 0; special_ldd_prop_params[i].zlpb_prop_name != NULL; i++) {
334 bridge = &special_ldd_prop_params[i];
335 ret = bridge->zlpb_set_prop_fn(zhp, bridge->zlpb_prop_name,
336 (void *)ldd + bridge->zlpb_ldd_offset);
347 /* Mark a property to be removed by the form of "key=" */
348 int zfs_erase_ldd(struct mkfs_opts *mop, char *param)
350 char key[ZFS_MAXPROPLEN] = "";
352 if (strlen(LDD_PREFIX) + strlen(param) > ZFS_MAXPROPLEN) {
353 fprintf(stderr, "%s: zfs prop to erase is too long-\n%s\n",
357 snprintf(key, strlen(param) + 2, "%s=", param);
358 return add_param(mop->mo_ldd.ldd_params, key, "");
361 static int zfs_get_prop_int(zfs_handle_t *zhp, char *prop, void *val)
367 ret = nvlist_lookup_nvlist(zfs_get_user_props(zhp), prop, &propval);
371 ret = nvlist_lookup_string(propval, ZPROP_VALUE, &propstr);
376 *(__u32 *)val = strtoul(propstr, NULL, 10);
383 static int zfs_get_prop_str(zfs_handle_t *zhp, char *prop, void *val)
389 ret = nvlist_lookup_nvlist(zfs_get_user_props(zhp), prop, &propval);
393 ret = nvlist_lookup_string(propval, ZPROP_VALUE, &propstr);
397 (void) strcpy(val, propstr);
402 static int zfs_is_special_ldd_prop_param(char *name)
406 for (i = 0; special_ldd_prop_params[i].zlpb_prop_name != NULL; i++)
407 if (!strcmp(name, special_ldd_prop_params[i].zlpb_prop_name))
413 static int zfs_get_prop_params(zfs_handle_t *zhp, char *param)
417 char key[ZFS_MAXPROPLEN] = "";
418 char value[PARAM_MAX] = "";
421 props = zfs_get_user_props(zhp);
426 while (nvp = nvlist_next_nvpair(props, nvp), nvp) {
427 ret = zfs_get_prop_str(zhp, nvpair_name(nvp), value);
431 if (strncmp(nvpair_name(nvp), LDD_PREFIX, strlen(LDD_PREFIX)))
434 if (zfs_is_special_ldd_prop_param(nvpair_name(nvp)))
437 sprintf(key, "%s=", nvpair_name(nvp) + strlen(LDD_PREFIX));
438 ret = add_param(param, key, value);
447 * Read the server config as properties associated with the dataset.
448 * Missing entries as not treated error and are simply skipped.
450 int zfs_read_ldd(char *ds, struct lustre_disk_data *ldd)
453 struct zfs_ldd_prop_bridge *bridge;
456 if (osd_check_zfs_setup() == 0)
459 zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM);
461 zhp = zfs_open(g_zfs, ds, ZFS_TYPE_SNAPSHOT);
466 for (i = 0; special_ldd_prop_params[i].zlpb_prop_name != NULL; i++) {
467 bridge = &special_ldd_prop_params[i];
468 ret = bridge->zlpb_get_prop_fn(zhp, bridge->zlpb_prop_name,
469 (void *)ldd + bridge->zlpb_ldd_offset);
470 if (ret && (ret != ENOENT))
474 ret = zfs_get_prop_params(zhp, ldd->ldd_params);
475 if (ret && (ret != ENOENT))
478 ldd->ldd_mount_type = LDD_MT_ZFS;
486 /* Print ldd params */
487 void zfs_print_ldd_params(struct mkfs_opts *mop)
489 char *from = mop->mo_ldd.ldd_params;
493 vprint("Parameters:");
495 /* skip those keys to be removed in the form of "key=" */
496 to = strstr(from, "= ");
498 /* "key=" may be in the end */
499 if (*(from + strlen(from) - 1) == '=')
500 to = from + strlen(from) - 1;
502 /* find " " inward */
505 len = strlen(from) - strlen(to);
506 while ((*(from + len) != ' ') && len)
510 /* no space in the end */
511 vprint("%*.*s", len, len, from);
513 /* If there is no "key=" or "key=" is in the end, stop. */
514 if (!to || strlen(to) == 1)
522 int zfs_is_lustre(char *ds, unsigned *mount_type)
524 struct lustre_disk_data tmp_ldd;
527 if (osd_zfs_setup == 0)
530 ret = zfs_read_ldd(ds, &tmp_ldd);
531 if ((ret == 0) && (tmp_ldd.ldd_config_ver > 0) &&
532 (strlen(tmp_ldd.ldd_svname) > 0)) {
533 *mount_type = tmp_ldd.ldd_mount_type;
540 static char *zfs_mkfs_opts(struct mkfs_opts *mop, char *str, int len)
544 if (strlen(mop->mo_mkfsopts) != 0)
545 snprintf(str, len, " -o %s", mop->mo_mkfsopts);
550 static int zfs_create_vdev(struct mkfs_opts *mop, char *vdev)
554 /* Silently ignore reserved vdev names */
555 if ((strncmp(vdev, "disk", 4) == 0) ||
556 (strncmp(vdev, "file", 4) == 0) ||
557 (strncmp(vdev, "mirror", 6) == 0) ||
558 (strncmp(vdev, "raidz", 5) == 0) ||
559 (strncmp(vdev, "spare", 5) == 0) ||
560 (strncmp(vdev, "log", 3) == 0) ||
561 (strncmp(vdev, "cache", 5) == 0))
565 * Verify a file exists at the provided absolute path. If it doesn't
566 * and mo_device_kb is set attempt to create a file vdev to be used.
567 * Relative paths will be passed directly to 'zpool create' which
568 * will check multiple multiple locations under /dev/.
570 if (vdev[0] == '/') {
571 ret = access(vdev, F_OK);
578 fprintf(stderr, "Unable to access required vdev "
579 "for pool %s (%d)\n", vdev, ret);
583 if (mop->mo_device_kb == 0) {
585 fprintf(stderr, "Unable to create vdev due to "
586 "missing --device-size=#N(KB) parameter\n");
590 ret = file_create(vdev, mop->mo_device_kb);
593 fprintf(stderr, "Unable to create vdev %s (%d)\n",
602 int zfs_make_lustre(struct mkfs_opts *mop)
607 char *mkfs_cmd = NULL;
608 char *mkfs_tmp = NULL;
609 char *ds = mop->mo_device;
610 int pool_exists = 0, ret;
612 if (osd_check_zfs_setup() == 0)
615 /* no automatic index with zfs backend */
616 if (mop->mo_ldd.ldd_flags & LDD_F_NEED_INDEX) {
618 fprintf(stderr, "The target index must be specified with "
623 ret = zfs_check_hostid(mop);
631 mkfs_cmd = malloc(PATH_MAX);
632 if (mkfs_cmd == NULL) {
637 mkfs_tmp = malloc(PATH_MAX);
638 if (mkfs_tmp == NULL) {
643 /* Due to zfs_prepare_lustre() check the '/' must exist */
644 strchr(pool, '/')[0] = '\0';
646 /* If --reformat was given attempt to destroy the previous dataset */
647 if ((mop->mo_flags & MO_FORCEFORMAT) &&
648 ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM)) != NULL)) {
650 ret = zfs_destroy(zhp, 0);
653 fprintf(stderr, "Failed destroy zfs dataset %s (%d)\n",
662 * Create the zpool if the vdevs have been specified and the pool
663 * does not already exists. The pool creation itself will be done
664 * with the zpool command rather than the zpool_create() library call
665 * so the existing zpool error handling can be leveraged.
667 php = zpool_open(g_zfs, pool);
670 zpool_set_prop(php, "canmount", "off");
674 if ((mop->mo_pool_vdevs != NULL) && (pool_exists == 0)) {
676 memset(mkfs_cmd, 0, PATH_MAX);
677 snprintf(mkfs_cmd, PATH_MAX,
678 "zpool create -f -O canmount=off %s", pool);
680 /* Append the vdev config and create file vdevs as required */
681 while (*mop->mo_pool_vdevs != NULL) {
682 strscat(mkfs_cmd, " ", PATH_MAX);
683 strscat(mkfs_cmd, *mop->mo_pool_vdevs, PATH_MAX);
685 ret = zfs_create_vdev(mop, *mop->mo_pool_vdevs);
689 mop->mo_pool_vdevs++;
692 vprint("mkfs_cmd = %s\n", mkfs_cmd);
693 ret = run_command(mkfs_cmd, PATH_MAX);
696 fprintf(stderr, "Unable to create pool %s (%d)\n",
703 * Set Options on ZPOOL
705 * ALL - canmount=off (set above)
706 * 0.7.0 - multihost=on
707 * 0.7.0 - feature@userobj_accounting=enabled
709 php = zpool_open(g_zfs, pool);
711 zpool_set_prop(php, "multihost", "on");
712 zpool_set_prop(php, "feature@userobj_accounting", "enabled");
718 * Create the ZFS filesystem with any required mkfs options:
719 * - canmount=off is set to prevent zfs automounting
721 memset(mkfs_cmd, 0, PATH_MAX);
722 snprintf(mkfs_cmd, PATH_MAX,
723 "zfs create -o canmount=off %s %s",
724 zfs_mkfs_opts(mop, mkfs_tmp, PATH_MAX), ds);
726 vprint("mkfs_cmd = %s\n", mkfs_cmd);
727 ret = run_command(mkfs_cmd, PATH_MAX);
730 fprintf(stderr, "Unable to create filesystem %s (%d)\n",
736 * Attempt to set dataset properties to reasonable defaults
737 * to optimize performance, unless the values were specified
738 * at the mkfs command line. Some ZFS pools or ZFS versions
739 * do not support these properties. We can safely ignore the
740 * errors and continue in those cases.
742 * zfs 0.6.1 - system attribute based xattrs
743 * zfs 0.6.5 - large block support
744 * zfs 0.7.0 - large dnode support
746 * Check if zhp is NULL as a defensive measure. Any dataset
747 * validation errors that would cause zfs_open() to fail
748 * should have been caught earlier.
750 zhp = zfs_open(g_zfs, ds, ZFS_TYPE_FILESYSTEM);
752 /* zfs 0.6.1 - system attribute based xattrs */
753 if (!strstr(mop->mo_mkfsopts, "xattr="))
754 zfs_set_prop_str(zhp, "xattr", "sa");
756 /* zfs 0.7.0 - large dnode support */
757 if (!strstr(mop->mo_mkfsopts, "dnodesize=") &&
758 !strstr(mop->mo_mkfsopts, "dnsize="))
759 zfs_set_prop_str(zhp, "dnodesize", "auto");
761 if (IS_OST(&mop->mo_ldd)) {
762 /* zfs 0.6.5 - large block support */
763 if (!strstr(mop->mo_mkfsopts, "recordsize=") &&
764 !strstr(mop->mo_mkfsopts, "recsize="))
765 zfs_set_prop_str(zhp, "recordsize", "1M");
775 if (mkfs_cmd != NULL)
778 if (mkfs_tmp != NULL)
784 int zfs_enable_quota(struct mkfs_opts *mop)
786 fprintf(stderr, "this option is not only valid for zfs\n");
790 int zfs_prepare_lustre(struct mkfs_opts *mop,
791 char *wanted_mountopts, size_t len)
793 if (osd_check_zfs_setup() == 0)
796 if (zfs_name_valid(mop->mo_device, ZFS_TYPE_FILESYSTEM) == 0) {
798 fprintf(stderr, "Invalid filesystem name %s\n", mop->mo_device);
802 if (strchr(mop->mo_device, '/') == NULL) {
804 fprintf(stderr, "Missing pool in filesystem name %s\n",
812 int zfs_tune_lustre(char *dev, struct mount_opts *mop)
814 if (osd_check_zfs_setup() == 0)
820 int zfs_label_lustre(struct mount_opts *mop)
825 if (osd_check_zfs_setup() == 0)
828 zhp = zfs_open(g_zfs, mop->mo_source, ZFS_TYPE_FILESYSTEM);
832 ret = zfs_set_prop_str(zhp, LDD_SVNAME_PROP, mop->mo_ldd.ldd_svname);
838 int zfs_rename_fsname(struct mkfs_opts *mop, const char *oldname)
840 struct mount_opts opts;
841 char mntpt[] = "/tmp/mntXXXXXX";
845 /* Change the filesystem label. */
846 opts.mo_ldd = mop->mo_ldd;
847 opts.mo_source = mop->mo_device;
848 ret = zfs_label_lustre(&opts);
852 fprintf(stderr, "Can't change filesystem label: %s\n",
857 /* Mount this device temporarily in order to write these files */
858 if (mkdtemp(mntpt) == NULL) {
861 fprintf(stderr, "Can't create temp mount point %s: %s\n",
862 mntpt, strerror(ret));
866 cmd_buf = malloc(PATH_MAX);
872 memset(cmd_buf, 0, PATH_MAX);
873 snprintf(cmd_buf, PATH_MAX - 1, "zfs set mountpoint=%s %s && "
874 "zfs set canmount=on %s && zfs mount %s",
875 mntpt, mop->mo_device, mop->mo_device, mop->mo_device);
876 ret = run_command(cmd_buf, PATH_MAX);
880 fprintf(stderr, "Unable to mount %s (%s)\n",
881 mop->mo_device, strerror(ret));
883 fprintf(stderr, "Is the %s module available?\n",
884 MT_STR(&mop->mo_ldd));
888 ret = lustre_rename_fsname(mop, mntpt, oldname);
889 memset(cmd_buf, 0, PATH_MAX);
890 snprintf(cmd_buf, PATH_MAX - 1, "zfs umount %s && "
891 "zfs set canmount=off %s && zfs set mountpoint=none %s",
892 mop->mo_device, mop->mo_device, mop->mo_device);
893 run_command(cmd_buf, PATH_MAX);
906 g_zfs = libzfs_init();
909 /* Try to load zfs.ko and retry libzfs_init() */
911 ret = system("/sbin/modprobe -q zfs");
914 g_zfs = libzfs_init();
923 fprintf(stderr, "Failed to initialize ZFS library: %d\n", ret);
938 struct module_backfs_ops zfs_ops = {
941 .read_ldd = zfs_read_ldd,
942 .write_ldd = zfs_write_ldd,
943 .erase_ldd = zfs_erase_ldd,
944 .print_ldd_params = zfs_print_ldd_params,
945 .is_lustre = zfs_is_lustre,
946 .make_lustre = zfs_make_lustre,
947 .prepare_lustre = zfs_prepare_lustre,
948 .tune_lustre = zfs_tune_lustre,
949 .label_lustre = zfs_label_lustre,
950 .enable_quota = zfs_enable_quota,
951 .rename_fsname = zfs_rename_fsname,
953 #endif /* PLUGIN_DIR */