RETURN(rc);
}
-static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt,
+static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 mdt,
unsigned short dir_depth)
{
struct lu_tgt_desc *tgt, *cur = NULL;
tgt->ltd_qos.ltq_usable = 1;
lu_tgt_qos_weight_calc(tgt);
- if (tgt->ltd_index == *mdt)
+ if (tgt->ltd_index == mdt)
cur = tgt;
total_avail += tgt->ltd_qos.ltq_avail;
total_weight += tgt->ltd_qos.ltq_weight;
(total_usable * 256 * (1 + dir_depth / 4));
if (cur && cur->ltd_qos.ltq_avail >= rand) {
tgt = cur;
- GOTO(unlock, rc = 0);
+ GOTO(unlock, tgt);
}
rand = lu_prandom_u64_max(total_weight);
if (cur_weight < rand)
continue;
- *mdt = tgt->ltd_index;
ltd_qos_update(&lmv->lmv_mdt_descs, tgt, &total_weight);
- GOTO(unlock, rc = 0);
+ GOTO(unlock, tgt);
}
/* no proper target found */
return tgt;
}
-static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv, __u32 *mdt)
+static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
{
struct lu_tgt_desc *tgt;
int i;
if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
continue;
- *mdt = tgt->ltd_index;
- lmv->lmv_qos_rr_index = (*mdt + 1) %
+ lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) %
lmv->lmv_mdt_descs.ltd_tgts_size;
spin_unlock(&lmv->lmv_lock);
RETURN(ERR_PTR(-ENODEV));
}
+/* locate MDT which is less full (avoid the most full MDT) */
+static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv)
+{
+ struct lu_tgt_desc *min = NULL;
+ struct lu_tgt_desc *tgt;
+ __u64 avail = 0;
+ __u64 rand;
+
+ ENTRY;
+
+ if (!ltd_qos_is_usable(&lmv->lmv_mdt_descs))
+ RETURN(ERR_PTR(-EAGAIN));
+
+ down_write(&lmv->lmv_qos.lq_rw_sem);
+
+ if (!ltd_qos_is_usable(&lmv->lmv_mdt_descs))
+ GOTO(unlock, tgt = ERR_PTR(-EAGAIN));
+
+ lmv_foreach_tgt(lmv, tgt) {
+ if (!tgt->ltd_exp || !tgt->ltd_active) {
+ tgt->ltd_qos.ltq_usable = 0;
+ continue;
+ }
+
+ tgt->ltd_qos.ltq_usable = 1;
+ lu_tgt_qos_weight_calc(tgt);
+ avail += tgt->ltd_qos.ltq_avail;
+ if (!min || min->ltd_qos.ltq_avail > tgt->ltd_qos.ltq_avail)
+ min = tgt;
+ }
+
+ /* avoid the most full MDT */
+ if (min)
+ avail -= min->ltd_qos.ltq_avail;
+
+ rand = lu_prandom_u64_max(avail);
+ avail = 0;
+ lmv_foreach_connected_tgt(lmv, tgt) {
+ if (!tgt->ltd_qos.ltq_usable)
+ continue;
+
+ if (tgt == min)
+ continue;
+
+ avail += tgt->ltd_qos.ltq_avail;
+ if (avail < rand)
+ continue;
+
+ GOTO(unlock, tgt);
+ }
+
+ /* no proper target found */
+ GOTO(unlock, tgt = ERR_PTR(-EAGAIN));
+unlock:
+ up_write(&lmv->lmv_qos.lq_rw_sem);
+
+ RETURN(tgt);
+}
+
/* locate MDT by file name, for striped directory, the file name hash decides
* which stripe its dirent is stored.
*/
} else if (lmv_op_qos_mkdir(op_data)) {
struct lmv_tgt_desc *tmp = tgt;
- tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds,
+ tgt = lmv_locate_tgt_qos(lmv, op_data->op_mds,
op_data->op_dir_depth);
if (tgt == ERR_PTR(-EAGAIN)) {
if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
*/
tgt = tmp;
else
- tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
+ tgt = lmv_locate_tgt_rr(lmv);
}
if (IS_ERR(tgt))
RETURN(PTR_ERR(tgt));
+ op_data->op_mds = tgt->ltd_index;
/*
* only update statfs after QoS mkdir, this means the cached
* statfs may be stale, and current mkdir may not follow QoS
RETURN(rc);
}
+/* migrate the top directory */
+static inline bool lmv_op_topdir_migrate(const struct md_op_data *op_data)
+{
+ if (!S_ISDIR(op_data->op_mode))
+ return false;
+
+ if (lmv_dir_layout_changing(op_data->op_mea1))
+ return false;
+
+ return true;
+}
+
+/* migrate top dir to specific MDTs */
+static inline bool lmv_topdir_specific_migrate(const struct md_op_data *op_data)
+{
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ if (!lmv_op_topdir_migrate(op_data))
+ return false;
+
+ return le32_to_cpu(lum->lum_stripe_offset) != LMV_OFFSET_DEFAULT;
+}
+
+/* migrate top dir in QoS mode if user issued "lfs migrate -m -1..." */
+static inline bool lmv_topdir_qos_migrate(const struct md_op_data *op_data)
+{
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ if (!lmv_op_topdir_migrate(op_data))
+ return false;
+
+ return le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT;
+}
+
+static inline bool lmv_subdir_specific_migrate(const struct md_op_data *op_data)
+{
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ if (!S_ISDIR(op_data->op_mode))
+ return false;
+
+ if (!lmv_dir_layout_changing(op_data->op_mea1))
+ return false;
+
+ return le32_to_cpu(lum->lum_stripe_offset) != LMV_OFFSET_DEFAULT;
+}
+
static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data,
const char *name, size_t namelen,
struct ptlrpc_request **request)
if (IS_ERR(child_tgt))
RETURN(PTR_ERR(child_tgt));
- /* for directory, migrate to MDT specified by lum_stripe_offset;
- * otherwise migrate to the target stripe of parent, but parent
- * directory may have finished migration (normally current file too),
- * allocate FID on MDT lum_stripe_offset, and server will check
- * whether file was migrated already.
- */
- if (S_ISDIR(op_data->op_mode) || !tp_tgt) {
+ if (lmv_topdir_specific_migrate(op_data)) {
struct lmv_user_md *lum = op_data->op_data;
op_data->op_mds = le32_to_cpu(lum->lum_stripe_offset);
- } else {
+ } else if (lmv_topdir_qos_migrate(op_data)) {
+ tgt = lmv_locate_tgt_lf(lmv);
+ if (tgt == ERR_PTR(-EAGAIN))
+ tgt = lmv_locate_tgt_rr(lmv);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ op_data->op_mds = tgt->ltd_index;
+ } else if (lmv_subdir_specific_migrate(op_data)) {
+ struct lmv_user_md *lum = op_data->op_data;
+ __u32 i;
+
+ LASSERT(tp_tgt);
+ if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
+ /* adjust MDTs in lum, since subdir is located on where
+ * its parent stripe is, not the first specified MDT.
+ */
+ for (i = 0; i < le32_to_cpu(lum->lum_stripe_count);
+ i++) {
+ if (le32_to_cpu(lum->lum_objects[i].lum_mds) ==
+ tp_tgt->ltd_index)
+ break;
+ }
+
+ if (i == le32_to_cpu(lum->lum_stripe_count))
+ RETURN(-ENODEV);
+
+ lum->lum_objects[i].lum_mds =
+ lum->lum_objects[0].lum_mds;
+ lum->lum_objects[0].lum_mds =
+ cpu_to_le32(tp_tgt->ltd_index);
+ }
+ /* NB, the above adjusts subdir migration for command like
+ * "lfs migrate -m 0,1,2 ...", but for migration like
+ * "lfs migrate -m 0 -c 2 ...", the top dir is migrated to MDT0
+ * and MDT1, however its subdir may be migrated to MDT1 and MDT2
+ */
+
+ lum->lum_stripe_offset = cpu_to_le32(tp_tgt->ltd_index);
+ op_data->op_mds = tp_tgt->ltd_index;
+ } else if (tp_tgt) {
op_data->op_mds = tp_tgt->ltd_index;
+ } else {
+ op_data->op_mds = sp_tgt->ltd_index;
}
+
rc = lmv_fid_alloc(NULL, exp, &target_fid, op_data);
if (rc)
RETURN(rc);
error "migrate remote dir error"
echo "Finish migration, then checking.."
- for file in $(find $migrate_dir); do
+ for file in $(find $migrate_dir -maxdepth 1); do
mdt_index=$($LFS getstripe -m $file)
if [ $mdt_index -lt $new_index ] ||
[ $mdt_index -gt $((new_index + new_count - 1)) ]; then
}
run_test 230t "migrate directory with project ID set"
+test_230u()
+{
+ (( MDSCOUNT > 3 )) || skip_env "needs >= 4 MDTs"
+ (( MDS1_VERSION >= $(version_code 2.14.53) )) ||
+ skip "Need MDS version at least 2.14.53"
+
+ local count
+
+ mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed"
+ mkdir $DIR/$tdir/sub{0..99} || error "mkdir sub failed"
+ $LFS migrate -m -1 $DIR/$tdir/sub{0..99} || error "migrate sub failed"
+ for i in $(seq 0 $((MDSCOUNT - 1))); do
+ count=$($LFS getstripe -m $DIR/$tdir/sub* | grep -c ^$i)
+ echo "$count dirs migrated to MDT$i"
+ done
+ count=$($LFS getstripe -m $DIR/$tdir/sub* | sort -u | wc -l)
+ (( count >= MDSCOUNT - 1 )) || error "dirs migrated to $count MDTs"
+}
+run_test 230u "migrate directory by QOS"
+
+test_230v()
+{
+ (( MDSCOUNT > 3 )) || skip_env "needs >= 4 MDTs"
+ (( MDS1_VERSION >= $(version_code 2.14.53) )) ||
+ skip "Need MDS version at least 2.14.53"
+
+ local count
+
+ mkdir $DIR/$tdir || error "mkdir $tdir failed"
+ mkdir $DIR/$tdir/sub{0..99} || error "mkdir sub failed"
+ $LFS migrate -m 0,2,1 $DIR/$tdir || error "migrate $tdir failed"
+ for i in $(seq 0 $((MDSCOUNT - 1))); do
+ count=$($LFS getstripe -m $DIR/$tdir/sub* | grep -c ^$i)
+ echo "$count subdirs migrated to MDT$i"
+ (( i == 3 )) && (( count > 0 )) &&
+ error "subdir shouldn't be migrated to MDT3"
+ done
+ count=$($LFS getstripe -m $DIR/$tdir/sub* | sort -u | wc -l)
+ (( count == 3 )) || error "dirs migrated to $count MDTs"
+}
+run_test 230v "subdir migrated to the MDT where its parent is located"
+
test_231a()
{
# For simplicity this test assumes that max_pages_per_rpc