#define lmv_tgt_desc lu_tgt_desc
+struct qos_exclude_prefix {
+ struct list_head qep_list;
+ struct rhash_head qep_hash;
+ char qep_name[NAME_MAX + 1];
+};
+
struct lmv_obd {
struct lu_client_fld lmv_fld;
spinlock_t lmv_lock;
void *lmv_cache;
__u32 lmv_qos_rr_index; /* next round-robin MDT idx */
+ struct rhashtable lmv_qos_exclude_hash;
+ struct list_head lmv_qos_exclude_list;
};
#define lmv_mdt_count lmv_mdt_descs.ltd_lmv_desc.ld_tgt_count
struct md_op_data *op_data);
int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data);
+extern const struct rhashtable_params qos_exclude_hash_params;
+void qos_exclude_prefix_free(void *vprefix, void *data);
+
/* lproc_lmv.c */
int lmv_tunables_init(struct obd_device *obd);
#endif
RETURN(rc);
}
+static u32 qos_exclude_hashfh(const void *data, u32 len, u32 seed)
+{
+ const char *name = data;
+
+ return hashlen_hash(cfs_hashlen_string((void *)(unsigned long)seed,
+ name));
+}
+
+static int qos_exclude_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct qos_exclude_prefix *prefix = obj;
+ const char *name = arg->key;
+
+ return strcmp(name, prefix->qep_name);
+}
+
+const struct rhashtable_params qos_exclude_hash_params = {
+ .key_len = 1, /* actually variable */
+ .key_offset = offsetof(struct qos_exclude_prefix, qep_name),
+ .head_offset = offsetof(struct qos_exclude_prefix, qep_hash),
+ .hashfn = qos_exclude_hashfh,
+ .obj_cmpfn = qos_exclude_cmpfn,
+ .automatic_shrinking = true,
+};
+
+void qos_exclude_prefix_free(void *vprefix, void *data)
+{
+ struct qos_exclude_prefix *prefix = vprefix;
+
+ list_del(&prefix->qep_list);
+ kfree(prefix);
+}
+
static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
{
struct lmv_obd *lmv = &obd->u.lmv;
- struct lmv_desc *desc;
+ struct lmv_desc *desc;
struct lnet_processid lnet_id;
+ struct qos_exclude_prefix *prefix;
int i = 0;
int rc;
lmv->max_easize = 0;
spin_lock_init(&lmv->lmv_lock);
+ INIT_LIST_HEAD(&lmv->lmv_qos_exclude_list);
/*
* initialize rr_index to lower 32bit of netid, so that client
CWARN("%s: error initialize target table: rc = %d\n",
obd->obd_name, rc);
- RETURN(rc);
+ rc = rhashtable_init(&lmv->lmv_qos_exclude_hash,
+ &qos_exclude_hash_params);
+ if (rc) {
+ CERROR("%s: qos exclude hash initalize failed: %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+ }
+
+ prefix = kmalloc(sizeof(*prefix), __GFP_ZERO);
+ if (!prefix)
+ GOTO(out, rc = -ENOMEM);
+ /* Apache Spark creates a _temporary directory for staging files */
+ strcpy(prefix->qep_name, "_temporary");
+ rc = rhashtable_insert_fast(&lmv->lmv_qos_exclude_hash,
+ &prefix->qep_hash, qos_exclude_hash_params);
+ if (rc) {
+ kfree(prefix);
+ GOTO(out, rc);
+ }
+
+ list_add_tail(&prefix->qep_list, &lmv->lmv_qos_exclude_list);
+ GOTO(out, rc);
+out:
+ if (rc)
+ rhashtable_destroy(&lmv->lmv_qos_exclude_hash);
+ return rc;
}
static int lmv_cleanup(struct obd_device *obd)
ENTRY;
+ rhashtable_free_and_destroy(&lmv->lmv_qos_exclude_hash,
+ qos_exclude_prefix_free, NULL);
fld_client_fini(&lmv->lmv_fld);
fld_client_debugfs_fini(&lmv->lmv_fld);
return tgt;
}
+static bool lmv_qos_exclude(struct lmv_obd *lmv, struct md_op_data *op_data)
+{
+ const char *name = op_data->op_name;
+ size_t namelen = op_data->op_namelen;
+ char buf[NAME_MAX + 1];
+ struct qos_exclude_prefix *prefix;
+ char *p;
+
+ /* skip encrypted files */
+ if (op_data->op_file_encctx)
+ return false;
+
+ /* name length may not be validated yet */
+ if (namelen > NAME_MAX)
+ return false;
+
+ p = strrchr(name, '.');
+ if (p) {
+ namelen = p - name;
+ if (!namelen)
+ return false;
+ strncpy(buf, name, namelen);
+ buf[namelen] = '\0';
+ name = buf;
+ }
+
+ prefix = rhashtable_lookup_fast(&lmv->lmv_qos_exclude_hash, name,
+ qos_exclude_hash_params);
+ return prefix != NULL;
+}
+
static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
const void *data, size_t datalen, umode_t mode, uid_t uid,
gid_t gid, kernel_cap_t cap_effective, __u64 rdev,
RETURN(-ENODEV);
if (unlikely(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE))
GOTO(new_tgt, -EAGAIN);
- } else if (lmv_op_default_qos_mkdir(op_data) ||
+ } else if ((lmv_op_default_qos_mkdir(op_data) &&
+ !lmv_qos_exclude(lmv, op_data)) ||
tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) {
new_tgt:
tgt = lmv_locate_tgt_by_space(lmv, op_data, tgt);
LUSTRE_RW_ATTR(qos_threshold_rr);
#ifdef CONFIG_PROC_FS
+/* directories with exclude prefixes will be created on the same MDT as its
+ * parent directory, the prefixes are set with the rule as shell environment
+ * PATH: ':' is used as separator for prefixes. And for convenience, '+/-' is
+ * used to add/remove prefixes.
+ */
+static int qos_exclude_prefixes_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *obd = m->private;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct qos_exclude_prefix *prefix;
+
+restart:
+ spin_lock(&lmv->lmv_lock);
+ list_for_each_entry(prefix, &lmv->lmv_qos_exclude_list, qep_list) {
+ seq_printf(m, "%s\n", prefix->qep_name);
+ if (seq_has_overflowed(m)) {
+ spin_unlock(&lmv->lmv_lock);
+ kvfree(m->buf);
+ m->count = 0;
+ m->buf = kvmalloc(m->size <<= 1, GFP_KERNEL_ACCOUNT);
+ if (!m->buf)
+ return -ENOMEM;
+ goto restart;
+ }
+ }
+ spin_unlock(&lmv->lmv_lock);
+
+ return 0;
+}
+
+static ssize_t qos_exclude_prefixes_seq_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct obd_device *obd;
+ struct lmv_obd *lmv;
+ char *buf;
+ char op = 0;
+ char *p;
+ char *name;
+ char namebuf[NAME_MAX + 1];
+ struct qos_exclude_prefix *prefix;
+ struct qos_exclude_prefix *tmp;
+ int len;
+ bool pruned = false;
+ int rc;
+
+ /* one extra char to ensure buf ends with '\0' */
+ OBD_ALLOC(buf, count + 1);
+ if (!buf)
+ return -ENOMEM;
+ if (copy_from_user(buf, buffer, count)) {
+ OBD_FREE(buf, count + 1);
+ return -EFAULT;
+ }
+
+ obd = ((struct seq_file *)file->private_data)->private;
+ lmv = &obd->u.lmv;
+ p = buf;
+ while (p) {
+ while (*p == ':')
+ p++;
+ if (*p == '\0')
+ break;
+ if (*p == '+' || *p == '-')
+ op = *p++;
+
+ name = p;
+ p = strchr(name, ':');
+ if (p)
+ len = p - name;
+ else
+ len = strlen(name);
+ if (!len)
+ break;
+ if (len > NAME_MAX) {
+ CERROR("%s: %s length exceeds NAME_MAX\n",
+ obd->obd_name, name);
+ OBD_FREE(buf, count + 1);
+ return -ERANGE;
+ }
+
+ switch (op) {
+ default:
+ if (!pruned) {
+ spin_lock(&lmv->lmv_lock);
+ list_for_each_entry_safe(prefix, tmp,
+ &lmv->lmv_qos_exclude_list,
+ qep_list) {
+ list_del(&prefix->qep_list);
+ rhashtable_remove_fast(
+ &lmv->lmv_qos_exclude_hash,
+ &prefix->qep_hash,
+ qos_exclude_hash_params);
+ kfree(prefix);
+ }
+ spin_unlock(&lmv->lmv_lock);
+ pruned = true;
+ }
+ fallthrough;
+ case '+':
+ prefix = kmalloc(sizeof(*prefix), __GFP_ZERO);
+ if (!prefix) {
+ OBD_FREE(buf, count + 1);
+ return -ENOMEM;
+ }
+ strncpy(prefix->qep_name, name, len);
+ rc = rhashtable_lookup_insert_fast(
+ &lmv->lmv_qos_exclude_hash,
+ &prefix->qep_hash,
+ qos_exclude_hash_params);
+ if (!rc) {
+ spin_lock(&lmv->lmv_lock);
+ list_add_tail(&prefix->qep_list,
+ &lmv->lmv_qos_exclude_list);
+ spin_unlock(&lmv->lmv_lock);
+ } else {
+ kfree(prefix);
+ }
+ break;
+ case '-':
+ strncpy(namebuf, name, len);
+ namebuf[len] = '\0';
+ prefix = rhashtable_lookup(&lmv->lmv_qos_exclude_hash,
+ namebuf,
+ qos_exclude_hash_params);
+ if (prefix) {
+ spin_lock(&lmv->lmv_lock);
+ list_del(&prefix->qep_list);
+ spin_unlock(&lmv->lmv_lock);
+ rhashtable_remove_fast(
+ &lmv->lmv_qos_exclude_hash,
+ &prefix->qep_hash,
+ qos_exclude_hash_params);
+ kfree(prefix);
+ }
+ break;
+ }
+ }
+
+ OBD_FREE(buf, count + 1);
+ return count;
+}
+LPROC_SEQ_FOPS(qos_exclude_prefixes);
+
static void *lmv_tgt_seq_start(struct seq_file *p, loff_t *pos)
{
struct obd_device *obd = p->private;
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
+
+struct lprocfs_vars lprocfs_lmv_obd_vars[] = {
+ { .name = "qos_exclude_prefixes",
+ .fops = &qos_exclude_prefixes_fops },
+ { .name = "target_obd",
+ .fops = &lmv_proc_target_fops },
+ { NULL }
+};
#endif /* CONFIG_PROC_FS */
static struct attribute *lmv_attrs[] = {
int rc;
obd->obd_ktype.default_groups = KOBJ_ATTR_GROUPS(lmv);
+#ifdef CONFIG_PROC_FS
+ obd->obd_vars = lprocfs_lmv_obd_vars;
+#endif
rc = lprocfs_obd_setup(obd, true);
if (rc)
goto out_failed;
lprocfs_obd_cleanup(obd);
goto out_failed;
}
-
- rc = lprocfs_seq_create(obd->obd_proc_entry, "target_obd",
- 0444, &lmv_proc_target_fops, obd);
- if (rc) {
- lprocfs_free_md_stats(obd);
- lprocfs_obd_cleanup(obd);
- CWARN("%s: error adding LMV target_obd file: rc = %d\n",
- obd->obd_name, rc);
- rc = 0;
- }
#endif /* CONFIG_PROC_FS */
out_failed:
return rc;
}
run_test 413j "set default LMV by setxattr"
+test_413k() {
+ (( $MDS1_VERSION >= $(version_code 2.15.60) )) ||
+ skip "Need server version at least 2.15.60"
+
+ local index1
+ local index2
+ local old=$($LCTL get_param -n lmv.*.qos_exclude_prefixes)
+ local count=$($LCTL get_param -n lmv.*.qos_exclude_prefixes | wc -l)
+ local prefixes="abc:123:foo bar"
+
+ # add prefixes
+ stack_trap "$LCTL set_param lmv.*.qos_exclude_prefixes=\"$old\""
+ $LCTL set_param lmv.*.qos_exclude_prefixes="+$prefixes"
+
+ mkdir $DIR/$tdir || error "mkdir $tdir failed"
+ index1=$($LFS getstripe -m $DIR/$tdir)
+ for dname in _temporary _temporary.XXXXXX abc 123 "foo bar"; do
+ mkdir "$DIR/$tdir/$dname" || error "mkdir $dname failed"
+ index2=$($LFS getstripe -m "$DIR/$tdir/$dname")
+ ((index1 == index2)) ||
+ error "$tdir on MDT$index1, $dname on MDT$index2"
+ done
+
+ # remove prefixes
+ $LCTL set_param lmv.*.qos_exclude_prefixes="-$prefixes"
+
+ # total prefixes length > PAGE_SIZE can be printed correctly
+ for c in {a..z}; do
+ prefixes=$(str_repeat $c 255)
+ $LCTL set_param lmv.*.qos_exclude_prefixes="+$prefixes" >/dev/null
+ done
+ local count2=$($LCTL get_param -n lmv.*.qos_exclude_prefixes | wc -l)
+ ((count2 == count + 26)) ||
+ error "prefixes count $count2 != $((count + 26))"
+}
+run_test 413k "QoS mkdir exclude prefixes"
+
test_413z() {
local pids=""
local subdir