for the new layout.
.RS 1.2i
.TP
+.B crush
+CRUSH hash algorithm. This is a consistent hash
+algorithm, so minimum sub files need to relocate
+during directory restripe.
+.TP
.B fnv_1a_64
Fowler-Noll-Vo (FNV-1a) hash algorithm. This provides
reasonably uniform, but not cryptographically strong,
as the master/starting MDT for the directory. If multiple
.I MDT_INDEX
values are given, then the stripes will be allocated on the specified
-MDT indices. If index -1 (default) is used, it will randomly pick
-.I COUNT
-less full MDTs.
+MDT indices. If index -1 (default) is used, it will prefer to select
+.B COUNT
+MDTs proportional to the free space and inodes on each.
.TP
.BR \-H ", " \-\-mdt-hash =\fIHASH_TYPE\fR
Use
for the striped directory.
.RS 1.2i
.TP
+.B crush
+CRUSH hash algorithm. This is a consistent hash
+algorithm, so minimum sub files need to relocate
+during directory restripe.
+.TP
.B fnv_1a_64
Fowler-Noll-Vo (FNV-1a) hash algorithm. This provides
reasonably uniform, but not cryptographically strong,
#include <uapi/linux/lustre/lustre_idl.h>
#include <lu_ref.h>
#include <linux/percpu_counter.h>
+#include <linux/ctype.h>
struct seq_file;
struct proc_dir_entry;
return name_is_dot_or_dotdot(lname->ln_name, lname->ln_namelen);
}
+static inline bool lu_name_is_temp_file(const char *name, int namelen,
+ bool dot_prefix, int suffixlen)
+{
+ int lower = 0;
+ int upper = 0;
+ int digit = 0;
+ int len = suffixlen;
+
+ if (dot_prefix && name[0] != '.')
+ return false;
+
+ if (namelen < dot_prefix + suffixlen + 2 ||
+ name[namelen - suffixlen - 1] != '.')
+ return false;
+
+ while (len) {
+ lower += islower(name[namelen - len]);
+ upper += isupper(name[namelen - len]);
+ digit += isdigit(name[namelen - len]);
+ len--;
+ }
+ /* mktemp() filename suffixes will have a mix of upper- and lower-case
+ * letters and/or numbers, not all numbers, or all upper or lower-case.
+ * About 0.07% of randomly-generated names will slip through,
+ * but this avoids 99.93% of cross-MDT renames for those files.
+ */
+ if (digit >= suffixlen - 2 || upper == suffixlen || lower == suffixlen)
+ return false;
+
+ return true;
+}
+
+static inline bool lu_name_is_backup_file(const char *name, int namelen,
+ int *suffixlen)
+{
+ if (namelen > 1 &&
+ name[namelen - 2] != '.' && name[namelen - 1] == '~') {
+ if (suffixlen)
+ *suffixlen = 1;
+ return true;
+ }
+
+ if (namelen > 4 && name[namelen - 4] == '.' &&
+ (!strncasecmp(name + namelen - 3, "bak", 3) ||
+ !strncasecmp(name + namelen - 3, "sav", 3))) {
+ if (suffixlen)
+ *suffixlen = 4;
+ return true;
+ }
+
+ if (namelen > 5 && name[namelen - 5] == '.' &&
+ !strncasecmp(name + namelen - 4, "orig", 4)) {
+ if (suffixlen)
+ *suffixlen = 5;
+ return true;
+ }
+
+ return false;
+}
+
static inline bool lu_name_is_valid_len(const char *name, size_t name_len)
{
return name != NULL &&
lsm->lsm_md_stripe_count - lsm->lsm_md_migrate_offset <= 1)
return false;
+ if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
+ return true;
+
return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
}
return do_div(hash, count);
}
-static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
+/*
+ * Robert Jenkins' function for mixing 32-bit values
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * a, b = random bits, c = input and output
+ *
+ * Mixing inputs to generate an evenly distributed hash.
+ */
+#define crush_hashmix(a, b, c) \
+do { \
+ a = a - b; a = a - c; a = a ^ (c >> 13); \
+ b = b - c; b = b - a; b = b ^ (a << 8); \
+ c = c - a; c = c - b; c = c ^ (b >> 13); \
+ a = a - b; a = a - c; a = a ^ (c >> 12); \
+ b = b - c; b = b - a; b = b ^ (a << 16); \
+ c = c - a; c = c - b; c = c ^ (b >> 5); \
+ a = a - b; a = a - c; a = a ^ (c >> 3); \
+ b = b - c; b = b - a; b = b ^ (a << 10); \
+ c = c - a; c = c - b; c = c ^ (b >> 15); \
+} while (0)
+
+#define crush_hash_seed 1315423911
+
+static inline __u32 crush_hash(__u32 a, __u32 b)
+{
+ __u32 hash = crush_hash_seed ^ a ^ b;
+ __u32 x = 231232;
+ __u32 y = 1232;
+
+ crush_hashmix(a, b, hash);
+ crush_hashmix(x, a, hash);
+ crush_hashmix(b, y, hash);
+
+ return hash;
+}
+
+/* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
+ * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
+ * algorithm.
+ */
+static inline unsigned int
+lmv_hash_crush(unsigned int count, const char *name, int namelen)
+{
+ unsigned long long straw;
+ unsigned long long highest_straw = 0;
+ unsigned int pg_id;
+ unsigned int idx = 0;
+ int i;
+
+ /* put temp and backup file on the same MDT where target is located.
+ * temporary file naming rule:
+ * 1. rsync: .<target>.XXXXXX
+ * 2. dstripe: <target>.XXXXXXXX
+ */
+ if (lu_name_is_temp_file(name, namelen, true, 6)) {
+ name++;
+ namelen -= 8;
+ } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
+ namelen -= 9;
+ } else if (lu_name_is_backup_file(name, namelen, &i)) {
+ LASSERT(i < namelen);
+ namelen -= i;
+ }
+
+ pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
+
+ /* distribute PG among all stripes pseudo-randomly, so they are almost
+ * evenly distributed, and when stripe count changes, only (delta /
+ * total) sub files need to be moved, herein 'delta' is added or removed
+ * stripe count, 'total' is total stripe count before change for
+ * removal, or count after change for addition.
+ */
+ for (i = 0; i < count; i++) {
+ straw = crush_hash(pg_id, i);
+ if (straw > highest_straw) {
+ highest_straw = straw;
+ idx = i;
+ }
+ }
+ LASSERT(idx < count);
+
+ return idx;
+}
+
+static inline int lmv_name_to_stripe_index(__u32 hash_type,
unsigned int stripe_count,
const char *name, int namelen)
{
- int idx;
+ unsigned int idx;
LASSERT(namelen > 0);
+ LASSERT(stripe_count > 0);
- if (stripe_count <= 1)
+ if (stripe_count == 1)
return 0;
- switch (lmv_hash_type & LMV_HASH_TYPE_MASK) {
+ switch (hash_type & LMV_HASH_TYPE_MASK) {
case LMV_HASH_TYPE_ALL_CHARS:
idx = lmv_hash_all_chars(stripe_count, name, namelen);
break;
case LMV_HASH_TYPE_FNV_1A_64:
idx = lmv_hash_fnv1a(stripe_count, name, namelen);
break;
- default:
- idx = -EBADFD;
+ case LMV_HASH_TYPE_CRUSH:
+ idx = lmv_hash_crush(stripe_count, name, namelen);
break;
+ default:
+ return -EBADFD;
}
CDEBUG(D_INFO, "name %.*s hash_type %#x idx %d/%u\n", namelen, name,
- lmv_hash_type, idx, stripe_count);
+ hash_type, idx, stripe_count);
return idx;
}
-static inline bool lmv_magic_supported(__u32 lum_magic)
+static inline bool lmv_user_magic_supported(__u32 lum_magic)
{
return lum_magic == LMV_USER_MAGIC ||
lum_magic == LMV_USER_MAGIC_SPECIFIC ||
lum_magic == LMV_MAGIC_FOREIGN;
}
+static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
+{
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ return false;
+
+ if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
+ return false;
+
+ if (!lmv_is_known_hash_type(lmv->lmv_hash_type))
+ return false;
+
+ return true;
+}
+
#endif
OBD_CONNECT2_SELINUX_POLICY | \
OBD_CONNECT2_LSOM | \
OBD_CONNECT2_ASYNC_DISCARD | \
- OBD_CONNECT2_PCC)
+ OBD_CONNECT2_PCC | \
+ OBD_CONNECT2_CRUSH)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
return hash;
}
+/* CRUSH placement group count */
+#define LMV_CRUSH_PG_COUNT 4096
+
union lmv_mds_md {
__u32 lmv_magic;
struct lmv_mds_md_v1 lmv_md_v1;
LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
LMV_HASH_TYPE_ALL_CHARS = 1,
LMV_HASH_TYPE_FNV_1A_64 = 2,
+ LMV_HASH_TYPE_CRUSH = 3,
LMV_HASH_TYPE_MAX,
};
#define LMV_HASH_NAME_ALL_CHARS "all_char"
#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64"
-
-/* not real hash type, but exposed to user as "space" hash type */
-#define LMV_HASH_NAME_SPACE "space"
+#define LMV_HASH_NAME_CRUSH "crush"
/* Right now only the lower part(0-16bits) of lmv_hash_type is being used,
* and the higher part will be the flag to indicate the status of object,
static inline bool lmv_is_known_hash_type(__u32 type)
{
return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
- (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
+ (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS ||
+ (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_CRUSH;
}
/* The striped directory has ever lost its master LMV EA, then LFSCK
* re-generated it. This flag is used to indicate such case. It is an
* on-disk flag. */
-#define LMV_HASH_FLAG_LOST_LMV 0x10000000
+#define LMV_HASH_FLAG_LOST_LMV 0x10000000
-#define LMV_HASH_FLAG_BAD_TYPE 0x20000000
-#define LMV_HASH_FLAG_MIGRATION 0x80000000
+#define LMV_HASH_FLAG_BAD_TYPE 0x20000000
+#define LMV_HASH_FLAG_MIGRATION 0x80000000
extern char *mdt_hash_name[LMV_HASH_TYPE_MAX];
int err;
ENTRY;
- if (unlikely(!lmv_magic_supported(lump->lum_magic)))
+ if (unlikely(!lmv_user_magic_supported(lump->lum_magic)))
RETURN(-EINVAL);
if (lump->lum_magic != LMV_MAGIC_FOREIGN) {
!OBD_FAIL_CHECK(OBD_FAIL_LLITE_NO_CHECK_DEAD))
RETURN(-ENOENT);
- if (unlikely(!lmv_magic_supported(cpu_to_le32(lump->lum_magic))))
+ if (!(exp_connect_flags2(sbi->ll_md_exp) & OBD_CONNECT2_CRUSH)) {
+ if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) ==
+ LMV_HASH_TYPE_CRUSH) {
+ /* if server doesn't support 'crush' hash type,
+ * switch to fnv_1a_64.
+ */
+ lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK;
+ lump->lum_hash_type |= LMV_HASH_TYPE_FNV_1A_64;
+ } else if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) ==
+ LMV_HASH_TYPE_UNKNOWN) {
+ /* from 2.14 MDT will choose default hash type if client
+ * doesn't set a valid one, while old server doesn't
+ * handle it.
+ */
+ lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK;
+ lump->lum_hash_type |= LMV_HASH_TYPE_DEFAULT;
+ }
+ }
+
+ if (unlikely(!lmv_user_magic_supported(cpu_to_le32(lump->lum_magic))))
lustre_swab_lmv_user_md(lump);
if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
lum = (struct lmv_user_md *)data->ioc_inlbuf2;
lumlen = data->ioc_inllen2;
- if (!lmv_magic_supported(lum->lum_magic)) {
+ if (!lmv_user_magic_supported(lum->lum_magic)) {
CERROR("%s: wrong lum magic %x : rc = %d\n", filename,
lum->lum_magic, -EINVAL);
GOTO(lmv_out_free, rc = -EINVAL);
OBD_CONNECT2_INC_XID |
OBD_CONNECT2_LSOM |
OBD_CONNECT2_ASYNC_DISCARD |
- OBD_CONNECT2_PCC;
+ OBD_CONNECT2_PCC |
+ OBD_CONNECT2_CRUSH;
#ifdef HAVE_LRU_RESIZE_SUPPORT
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
/* The on-disk LMV EA only contains header, but the
* returned LMV EA size should contain the space for
* the FIDs of all shards of the striped directory. */
- if (le32_to_cpu(lmv1->lmv_magic) == LMV_MAGIC_V1)
+ if (lmv_is_sane(lmv1))
rc = lmv_mds_md_size(
le32_to_cpu(lmv1->lmv_stripe_count),
- LMV_MAGIC_V1);
+ le32_to_cpu(lmv1->lmv_magic));
} else {
lfm = buf->lb_buf;
if (le32_to_cpu(lfm->lfm_magic) == LMV_MAGIC_FOREIGN)
RETURN(0);
}
- if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
+ if (!lmv_is_sane(lmv1))
RETURN(-EINVAL);
- if (le32_to_cpu(lmv1->lmv_stripe_count) < 1)
- RETURN(0);
-
LASSERT(lo->ldo_stripe == NULL);
OBD_ALLOC(stripe, sizeof(stripe[0]) *
(le32_to_cpu(lmv1->lmv_stripe_count)));
ENTRY;
- if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
- RETURN(-EINVAL);
-
- if (stripe_count == 0)
+ if (!lmv_is_sane(lmv))
RETURN(-EINVAL);
dof->dof_type = DFT_DIR;
lc->ldo_dir_stripe_count = 0;
}
+ if (lc->ldo_dir_hash_type == LMV_HASH_TYPE_UNKNOWN)
+ lc->ldo_dir_hash_type =
+ d->lod_mdt_descs.ltd_lmv_desc.ld_pattern;
+
CDEBUG(D_INFO, "final dir stripe [%hu %d %u]\n",
lc->ldo_dir_stripe_count,
(int)lc->ldo_dir_stripe_offset, lc->ldo_dir_hash_type);
}
LUSTRE_RW_ATTR(lmv_failout);
+char *mdt_hash_name[] = { "none",
+ LMV_HASH_NAME_ALL_CHARS,
+ LMV_HASH_NAME_FNV_1A_64,
+ LMV_HASH_NAME_CRUSH,
+};
+
+static ssize_t mdt_hash_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+
+ return snprintf(buf, PAGE_SIZE, "%s\n",
+ mdt_hash_name[lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern]);
+}
+
+static ssize_t mdt_hash_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ char *hash;
+ int len;
+ int i;
+
+ hash = kstrndup(buffer, count, GFP_KERNEL);
+ if (!hash)
+ return -ENOMEM;
+
+ len = strcspn(hash, "\n ");
+ hash[len] = '\0';
+ for (i = LMV_HASH_TYPE_ALL_CHARS; i < LMV_HASH_TYPE_MAX; i++) {
+ if (!strcmp(hash, mdt_hash_name[i])) {
+ lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern = i;
+ kfree(hash);
+ return count;
+ }
+ }
+ kfree(hash);
+
+ return -EINVAL;
+}
+LUSTRE_RW_ATTR(mdt_hash);
+
static struct lprocfs_vars lprocfs_lod_obd_vars[] = {
{ NULL }
};
&lustre_attr_mdt_qos_maxage.attr,
&lustre_attr_mdt_qos_prio_free.attr,
&lustre_attr_mdt_qos_threshold_rr.attr,
+ &lustre_attr_mdt_hash.attr,
NULL,
};
spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen > 0) {
const struct lmv_user_md *lum = spec->u.sp_ea.eadata;
- if (!lmv_magic_supported(le32_to_cpu(lum->lum_magic)) &&
+ if (!lmv_user_magic_supported(le32_to_cpu(lum->lum_magic)) &&
le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC_V0) {
rc = -EINVAL;
CERROR("%s: invalid lmv_user_md: magic = %x, "
memset(lmv, 0, sizeof(*lmv));
lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_V1);
lmv->lmv_stripe_count = cpu_to_le32(1);
+ lmv->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_DEFAULT);
fid_le_to_cpu(&lmv->lmv_stripe_fids[0],
mdd_object_fid(sobj));
sbuf->lb_buf = lmv;
lum_stripe_count ||
lmv->lmv_master_mdt_index !=
lmu->lum_stripe_offset ||
- (lmv_hash_type != 0 &&
+ (lmu->lum_hash_type &&
lmv_hash_type != lmu->lum_hash_type)) {
CERROR("%s: \'"DNAME"\' migration was "
"interrupted, run \'lfs migrate "
RETURN(-EPERM);
}
+ if ((!(exp_connect_flags2(exp) & OBD_CONNECT2_CRUSH)) &&
+ (le32_to_cpu(lum->lum_hash_type) & LMV_HASH_TYPE_MASK) ==
+ LMV_HASH_TYPE_CRUSH)
+ RETURN(-EPROTO);
+
if (!md_capable(uc, CFS_CAP_SYS_ADMIN) &&
uc->uc_gid != mdt->mdt_enable_remote_dir_gid &&
mdt->mdt_enable_remote_dir_gid != -1)
LASSERT(ma->ma_valid & MA_LMV);
LASSERT(lmv);
- if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ if (!lmv_is_sane(lmv))
RETURN(-EINVAL);
- if (le32_to_cpu(lmv->lmv_stripe_count) < 1)
- RETURN(0);
-
for (i = 0; i < le32_to_cpu(lmv->lmv_stripe_count); i++) {
fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[i]);
GOTO(unlock_obj, rc = -EINVAL);
}
- if (lum_stripe_count > 1 &&
+ if (lum_stripe_count > 1 && lmu->lum_hash_type &&
(lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
lmu->lum_hash_type) {
CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
struct mgs_direntry *de;
char *key;
int rc, key_sz;
- size_t suffix_len = sizeof(".bak") - 1;
INIT_LIST_HEAD(log_list);
goto next;
}
- /* filter out ".bak" files */
- if (key_sz >= suffix_len &&
- !memcmp(".bak", key + key_sz - suffix_len, suffix_len)) {
+ /* filter out backup files */
+ if (lu_name_is_backup_file(key, key_sz, NULL)) {
CDEBUG(D_MGS, "Skipping backup file %.*s\n",
key_sz, key);
goto next;
/* Default threshold for rr (roughly 17%) */
ltd->ltd_qos.lq_threshold_rr = 43;
ltd->ltd_is_mdt = is_mdt;
+ if (is_mdt)
+ ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
lu_qos_rr_init(<d->ltd_qos.lq_rr);
}
lmv = (struct lmv_mds_md_v1 *)ma->ma_lmm;
- if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1) {
+ if (!lmv_is_sane(lmv)) {
rc = -EINVAL;
CERROR("Invalid mds md magic %x "DFID": rc = %d\n",
le32_to_cpu(lmv->lmv_magic), PFID(lu_object_fid(obj)),
if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
GOTO(out_nvbuf, rc = 0);
- if (rc || le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ if (rc)
GOTO(out_nvbuf, rc);
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ GOTO(out_nvbuf, rc = -EINVAL);
+
zap_cursor_init_serialized(zc, osd->od_os, oid, 0);
rc = -zap_cursor_retrieve(zc, za);
if (rc == -ENOENT) {
BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000);
BUILD_BUG_ON(LMV_HASH_FLAG_BAD_TYPE != 0x20000000);
BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000);
+ BUILD_BUG_ON(LMV_CRUSH_PG_COUNT != 4096);
/* Checks for struct obd_statfs */
LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
#define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628
$LCTL set_param fail_loc=0x1628 fail_val=1
- createmany -d $DIR/$tdir/striped_dir/d $MDSCOUNT ||
+ createmany -d $DIR/$tdir/striped_dir/d $((MDSCOUNT * 5)) ||
error "(2) Fail to create file under striped directory"
$LCTL set_param fail_loc=0 fail_val=0
local repaired=$(do_facet mds2 $LCTL get_param -n \
mdd.$(facet_svc mds2).lfsck_namespace |
awk '/^name_hash_repaired/ { print $2 }')
+ echo "repaired $repaired name entries with bad hash"
[ $repaired -ge 1 ] ||
error "(5) Fail to repair bad name hash: $repaired"
umount_client $MOUNT || error "(6) umount failed"
mount_client $MOUNT || error "(7) mount failed"
- for ((i = 0; i < $MDSCOUNT; i++)); do
+ for ((i = 0; i < $((MDSCOUNT * 5)); i++)); do
stat $DIR/$tdir/striped_dir/d$i ||
error "(8) Fail to stat d$i after LFSCK"
rmdir $DIR/$tdir/striped_dir/d$i ||
}
run_test 33g "nonroot user create already existing root created file"
+test_33h() {
+ [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs"
+ [ $MDS1_VERSION -lt $(version_code 2.13.50) ] &&
+ skip "Need MDS version at least 2.13.50"
+
+ test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir ||
+ error "mkdir $tdir failed"
+ touch $DIR/$tdir/$tfile || error "touch $tfile failed"
+
+ local index=$($LFS getstripe -m $DIR/$tdir/$tfile)
+ local index2
+
+ for fname in $DIR/$tdir/$tfile.bak \
+ $DIR/$tdir/$tfile.SAV \
+ $DIR/$tdir/$tfile.orig \
+ $DIR/$tdir/$tfile~; do
+ touch $fname || error "touch $fname failed"
+ index2=$($LFS getstripe -m $fname)
+ [ $index -eq $index2 ] ||
+ error "$fname MDT index mismatch $index != $index2"
+ done
+
+ local failed=0
+ for i in {1..50}; do
+ for fname in $(mktemp -u $DIR/$tdir/.$tfile.XXXXXX) \
+ $(mktemp $DIR/$tdir/$tfile.XXXXXXXX); do
+ touch $fname || error "touch $fname failed"
+ index2=$($LFS getstripe -m $fname)
+ if [[ $index != $index2 ]]; then
+ failed=$((failed + 1))
+ echo "$fname MDT index mismatch $index != $index2"
+ fi
+ done
+ done
+ echo "$failed MDT index mismatches"
+ (( failed < 4 )) || error "MDT index mismatch $failed times"
+
+}
+run_test 33h "temp file is located on the same MDT as target"
+
TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
test_34a() {
rm -f $DIR/f34
local i
# generate some changelog records to accumulate on each MDT
- test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed"
+ test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir ||
+ error "mkdir $tdir failed"
createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) ||
error "create $DIR/$tdir/$tfile failed"
local i
# generate some changelog records to accumulate on each MDT
- test_mkdir -c $MDSCOUNT $DIR/$tdir || error "test_mkdir $tdir failed"
+ test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir ||
+ error "test_mkdir $tdir failed"
createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) ||
error "create $DIR/$tdir/$tfile failed"
changelog_register || error "first changelog_register failed"
# generate some changelog records to accumulate on each MDT
- test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed"
+ test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir ||
+ error "mkdir $tdir failed"
createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) ||
error "create $DIR/$tdir/$tfile failed"
chown nobody $DIR/$tdir/d
touch $DIR/$tdir/d/file
- $LFS mv -M1 $DIR/$tdir/d || error "lfs mv failed"
+ $LFS mv -m1 $DIR/$tdir/d || error "lfs mv failed"
}
run_test 316 "lfs mv"
# $LFS quotaoff -ug $MOUNT > /dev/null 2>&1
fi
fi
+
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param lod.*.mdt_hash=crush"
return 0
}
test_mkdir() {
local path
local p_option
+ local hash_type
+ local hash_name=("all_char" "fnv_1a_64" "crush")
local dirstripe_count=${DIRSTRIPE_COUNT:-"2"}
local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))}
local OPTIND=1
- while getopts "c:i:p" opt; do
+ while getopts "c:H:i:p" opt; do
case $opt in
c) dirstripe_count=$OPTARG;;
+ H) hash_type=$OPTARG;;
i) dirstripe_index=$OPTARG;;
p) p_option="-p";;
- \?) error "only support -i -c -p";;
+ \?) error "only support -c -H -i -p";;
esac
done
mdt_index=$dirstripe_index
fi
+ # randomly choose hash type
+ [ -z "$hash_type" ] &&
+ hash_type=${hash_name[$((RANDOM % ${#hash_name[@]}))]}
+
if (($MDS1_VERSION >= $(version_code 2.8.0))); then
if [ $dirstripe_count -eq -1 ]; then
dirstripe_count=$((RANDOM % MDSCOUNT + 1))
dirstripe_count=1
fi
- echo "striped dir -i$mdt_index -c$dirstripe_count $path"
- $LFS mkdir -i$mdt_index -c$dirstripe_count $path ||
- error "mkdir -i $mdt_index -c$dirstripe_count $path failed"
+ echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path"
+ $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path ||
+ error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed"
fi
}
"\tstripe_count: stripe count of the striped directory\n" \
"\tmdt_index: MDT index of first stripe\n" \
"\tmdt_hash: hash type of the striped directory. mdt types:\n" \
- " fnv_1a_64 FNV-1a hash algorithm (default)\n" \
+ " crush CRUSH hash algorithm (default)\n" \
+ " fnv_1a_64 FNV-1a hash algorithm\n" \
" all_char sum of characters % MDT_COUNT (not recommended)\n" \
- " space create subdirectories with balanced space usage\n" \
"\tdefault_stripe: set default dirstripe of the directory\n" \
"\tmode: the file access permission of the directory (octal)\n" \
"To create dir with a foreign (free format) layout :\n" \
" it's the MDT index of first stripe\n"
"\tmdt_count: number of MDTs to stripe a directory over\n"
"\tmdt_hash: hash type of the striped directory. mdt types:\n"
- " fnv_1a_64 FNV-1a hash algorithm (default)\n"
+ " crush CRUSH hash algorithm (default)\n"
+ " fnv_1a_64 FNV-1a hash algorithm\n"
" all_char sum of characters % MDT_COUNT\n"
"\n"
"migrate file objects from one OST "
if (lsa.lsa_pattern != LLAPI_LAYOUT_RAID0)
lmu->lum_hash_type = lsa.lsa_pattern;
else
- lmu->lum_hash_type = LMV_HASH_TYPE_DEFAULT;
+ lmu->lum_hash_type = LMV_HASH_TYPE_UNKNOWN;
if (lsa.lsa_pool_name) {
strncpy(lmu->lum_pool_name, lsa.lsa_pool_name,
sizeof(lmu->lum_pool_name) - 1);
if (lsa.lsa_pattern != LLAPI_LAYOUT_RAID0)
param->lsp_stripe_pattern = lsa.lsa_pattern;
else
- param->lsp_stripe_pattern = LMV_HASH_TYPE_DEFAULT;
+ param->lsp_stripe_pattern = LMV_HASH_TYPE_UNKNOWN;
param->lsp_pool = lsa.lsa_pool_name;
param->lsp_is_specific = false;
if (lsa.lsa_nr_tgts > 1) {
return CMD_HELP;
}
+ lmu.lum_hash_type = LMV_HASH_TYPE_UNKNOWN;
/* initialize migrate mdt parameters */
param.fp_lmv_md = &lmu;
rc = llapi_migrate_mdt(argv[optind], ¶m);
if (rc != 0)
fprintf(stderr, "%s mv: cannot migrate '%s' to MDT%04x: %s\n",
- progname, argv[optind], param.fp_mdt_index,
+ progname, argv[optind], lmu.lum_stripe_offset,
strerror(-rc));
return rc;
}
char *mdt_hash_name[] = { "none",
LMV_HASH_NAME_ALL_CHARS,
LMV_HASH_NAME_FNV_1A_64,
+ LMV_HASH_NAME_CRUSH,
};
struct lustre_foreign_type lu_foreign_types[] = {
{
int64_t _hash = hash & LMV_HASH_TYPE_MASK;
- return _hash > LMV_HASH_TYPE_UNKNOWN && _hash < LMV_HASH_TYPE_MAX;
+ return _hash >= LMV_HASH_TYPE_UNKNOWN && _hash < LMV_HASH_TYPE_MAX;
}
/*
CHECK_CDEFINE(LMV_HASH_FLAG_LOST_LMV);
CHECK_CDEFINE(LMV_HASH_FLAG_BAD_TYPE);
CHECK_CDEFINE(LMV_HASH_FLAG_MIGRATION);
+ CHECK_CDEFINE(LMV_CRUSH_PG_COUNT);
}
static void
BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000);
BUILD_BUG_ON(LMV_HASH_FLAG_BAD_TYPE != 0x20000000);
BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000);
+ BUILD_BUG_ON(LMV_CRUSH_PG_COUNT != 4096);
/* Checks for struct obd_statfs */
LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",