From e49a446fd41b46e6909b87074d859ca74a22d0ee Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 22 Apr 2021 15:33:24 +0300 Subject: [PATCH] EX-2797 lpurge: initial support for DoM lpurge should be able to scan MDT device, recognize objects with DoM component and remove a replica with DoM component if another in-sync replica exists. Test-Parameters: testlist=hot-pools Signed-off-by: Alex Zhuravlev Change-Id: If12e0448ab07527d86832d942a63b4a0189ad7a0 Reviewed-on: https://review.whamcloud.com/43405 Reviewed-by: John L. Hammond Tested-by: John L. Hammond --- lipe/src/lpurge.c | 133 ++++++++++++++++++++++++++++++++++++---------- lustre/tests/hot-pools.sh | 90 ++++++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 29 deletions(-) diff --git a/lipe/src/lpurge.c b/lipe/src/lpurge.c index ca4493f..73c0d0e 100644 --- a/lipe/src/lpurge.c +++ b/lipe/src/lpurge.c @@ -157,6 +157,7 @@ char *ostname; char *ostprefix; char ost_mntdev[PATH_MAX]; char *ost_mntpt; +bool is_mdt = false; int lustre_fd = -1; int open_by_fid_fd = -1; unsigned long oldest; @@ -312,6 +313,8 @@ static void lpurge_find_device(char *devname) while (i && ostprefix[i] != '/') i--; ostprefix[i] = 0; + if (strstr(ostprefix, "-MDT")) + is_mdt = true; globfree(&paths); } @@ -474,6 +477,8 @@ static int lipe_scan_llite(struct lipe_instance *instance, int num_threads) policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR | LIPE_OBJECT_ATTR_LMAEA | LIPE_OBJECT_ATTR_FILTER_FID; + if (is_mdt) + policy.lp_attr_bits |= LIPE_OBJECT_ATTR_LOVEA; rc = lipe_scan(instance, &policy, &result, NULL, NULL, num_threads, "test", NULL, true, false, &ldd_err); @@ -541,6 +546,101 @@ static void lpurge_reclaim_slot(unsigned int index) lpurge_hist[index].ls_age = lpurge_hist[index + 1].ls_age; pthread_mutex_unlock(&lpurge_hist[index].ls_mutex); } +static inline struct lov_user_md * +lov_comp_entry(struct lov_comp_md_v1 *comp_v1, int ent_idx) +{ + return (struct lov_user_md *)((char *)comp_v1 + + comp_v1->lcm_entries[ent_idx].lcme_offset); +} + +static int lpurge_check_mdt_object(struct lpurge_slot *ls, + struct lipe_object_attrs *attrs) +{ + struct lov_user_md_v3 *v3; + struct lov_comp_md_v1 *comp_v1; + struct lov_comp_md_entry_v1 *entry; + unsigned src_comp_id = -1; + unsigned tgt_comp_id = -1; + int i; + + if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_LOVEA) == 0) { + ls->ls_nomirror_objs++; + ls->ls_nomirror_space += attrs->loa_blocks >> 10; + return 0; + } + + comp_v1 = (struct lov_comp_md_v1 *)attrs->loa_lum; + if (comp_v1->lcm_magic != LOV_USER_MAGIC_COMP_V1 || + comp_v1->lcm_mirror_count == 0) { + ls->ls_nomirror_objs++; + ls->ls_nomirror_space += attrs->loa_blocks >> 10; + return 0; + } + + for (i = 0; i < comp_v1->lcm_entry_count; i++) { + + entry = &comp_v1->lcm_entries[i]; + + if ((entry->lcme_flags & LCME_FL_INIT) == 0 || + (entry->lcme_flags & LCME_FL_STALE) != 0) + continue; + + v3 = (struct lov_user_md_v3 *)lov_comp_entry(comp_v1, i); + + if (v3->lmm_pattern == LOV_PATTERN_MDT) { + src_comp_id = mirror_id_of(entry->lcme_id); + } else { + tgt_comp_id = mirror_id_of(entry->lcme_id); + } + } + + if (src_comp_id != -1 && tgt_comp_id != -1) { + /* there is MDT component and in-sync replica */ + attrs->loa_filter_fid.ff_parent = attrs->loa_fid; + attrs->loa_filter_fid.ff_layout.ol_comp_id = + src_comp_id << MIRROR_ID_SHIFT; + attrs->loa_filter_fid_size = sizeof(struct filter_fid); + return 1; + } + + ls->ls_nomirror_objs++; + ls->ls_nomirror_space += attrs->loa_blocks >> 10; + return 0; +} + +static int lpurge_check_ost_object(struct lpurge_slot *ls, + struct lipe_object_attrs *attrs) +{ + if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0) { + ls->ls_nopfid_objs++; + ls->ls_nopfid_space += attrs->loa_blocks >> 10; + return 0; + } + + /* to avoid N OSTs to 1 MDT scalability issue we only consider + * objects which store 1st stripe + */ + if (attrs->loa_filter_fid.ff_parent.f_ver != 0) { + ls->ls_notfirst_objs++; + ls->ls_notfirst_space += attrs->loa_blocks >> 10; + return 0; + } + + /* if the object has got ost_layout structure which encodes + * whether the object has a mirror, then we can skip objects + * with mirror_id=0 (no mirror) + */ + if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) { + if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id) + == 0) { + ls->ls_nomirror_objs++; + ls->ls_nomirror_space += attrs->loa_blocks >> 10; + return 0; + } + } + + return 1; +} /* * so this is a callback for object scanner @@ -568,7 +668,7 @@ int lpurge_lipe_callback(struct lipe_instance *instance, struct lpurge_slot *ls = NULL; struct lpurge_object *lo = NULL; time_t age, last_used; - int index; + int index, rc; /* no attr, ignore */ if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_ATTR) == 0) @@ -604,33 +704,12 @@ int lpurge_lipe_callback(struct lipe_instance *instance, pthread_mutex_lock(&ls->ls_mutex); - if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0) { - ls->ls_nopfid_objs++; - ls->ls_nopfid_space += attrs->loa_blocks >> 10; - goto out_ls_mutex; - } - - /* to avoid N OSTs to 1 MDT scalability issue we only consider - * objects which store 1st stripe - */ - if (attrs->loa_filter_fid.ff_parent.f_ver != 0) { - ls->ls_notfirst_objs++; - ls->ls_notfirst_space += attrs->loa_blocks >> 10; + if (is_mdt) + rc = lpurge_check_mdt_object(ls, attrs); + else + rc = lpurge_check_ost_object(ls, attrs); + if (!rc) goto out_ls_mutex; - } - - /* if the object has got ost_layout structure which encodes - * whether the object has a mirror, then we can skip objects - * with mirror_id=0 (no mirror) - */ - if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) { - if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id) - == 0) { - ls->ls_nomirror_objs++; - ls->ls_nomirror_space += attrs->loa_blocks >> 10; - goto out_ls_mutex; - } - } llapi_printf(LLAPI_MSG_DEBUG, "found under "DFID": size %ld block %ld age %ld slot %d\n", diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh index 348abdb..1664046 100644 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -149,6 +149,18 @@ init_lpurge_vars() { done for i in $(seq $MDSCOUNT); do + ost=$(facet_svc mds$i) + LPURGE_DEV+=("$ost") + LPURGE_DEV_FACET+=("mds$i") + LPURGE_CFG+=("/etc/lpurge/lpurge-$ost.conf") + LPURGE_SRVFILE+=("/etc/systemd/system/lpurge-$ost.service") + LPURGE_DUMPFILE+=("/var/run/lpurge-$ost.stats") + LPURGE_FIDS_DUMPFILE+=("/var/run/lpurge-$ost.fids") + LPURGE_PIDFILE+=("/var/run/lpurge-$ost.pid") + LPURGE_START_TIME+=(0) + done + + for i in $(seq $MDSCOUNT); do facet=mds$i LPURGE_MDS+=("$((i - 1)):$(facet_active_host $facet):$LPURGE_MOUNT") done @@ -1566,86 +1578,100 @@ run_test 54 "lpurge: start with all parameters" test_55() { init_hot_pools_env + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" + LPURGE_DEV= LPURGE_MOUNT= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= \ start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with no parameters should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_DEV= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with only '-M' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_MOUNT= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with only '-D' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_MOUNT=foo start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-M' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_DEV=foo start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-D' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_INTV="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-i' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_SCAN_RATE="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-R' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_SLOT_SIZE="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-S' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_MAX_JOBS="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-j' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_SCAN_THREADS="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-t' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_FREEHI="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-h' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" LPURGE_FREELO="-1" start_one_lpurge_cmd ! check_one_lpurge_is_started || { stop_one_lpurge_cmd error "start lpurge with bad '-l' option should fail" } + do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT" - zconf_umount_clients $(comma_list $(all_osts_nodes)) $MOUNT + zconf_umount_clients $(comma_list $(all_server_nodes)) $MOUNT start_lpurge_cmd ! check_lpurge_is_started || { stop_lpurge_cmd error "start lpurge with no client mounted on OSS should fail" } - zconf_mount_clients $(comma_list $(all_osts_nodes)) $MOUNT || + zconf_mount_clients $(comma_list $(all_server_nodes)) $MOUNT || error "failed to mount Lustre clients on OSS nodes" } run_test 55 "lpurge: start with bad command line options" @@ -1750,6 +1776,66 @@ test_56() { } run_test 56 "lamigo and lpurge: replicate and purge" +test_57() { + local saved_max_kb + local files=10 + local avail + + init_hot_pools_env + + stack_trap "rm -rf $DIR/$tdir $DIR/$tfile" + + $LFS mkdir -c 1 -i 0 $DIR/$tdir || error "can't mkdir" + + # allow large DoM component + saved_max_kb=$(do_facet $SINGLEMDS $LCTL \ + get_param -n lod.*.dom_stripesize_max_kb | tail -1) + do_nodes $(comma_list $(all_mdts_nodes)) \ + $LCTL set_param lod.*.dom_stripesize_max_kb=$((1024*100)) + stack_trap "do_nodes $(comma_list $(all_mdts_nodes)) \ + $LCTL set_param lod.*.dom_stripesize_max_kb=$saved_max_kb" + + avail=$($LFS df | grep MDT0000 | awk '{print $4}') + (( avail=avail/2 )) # going to fill a half of MDT + (( towrite = avail / files )) + (( towrite < 5*1024 )) && skip "not enough space on $SINGLEMDS" + echo "!!! gonna write $towrite" + + # create file with DoM component and replicate it + $LFS setstripe -E $(((towrite/1024)*1024))k -L mdt -E -1 -c1 $DIR/$tdir || + error "can't setstripe" + for ((i=0; i < $files; i++)); do + # XXX: replace with fallocate + dd if=/dev/zero of=$DIR/$tdir/f$i bs=1k \ + count=$(((towrite/1024+1)*1024)) || error "can't dd" + $LFS mirror extend -N -p $LAMIGO_TGT $DIR/$tdir/f$i || + error "can't create mirror" + $LFS getstripe $DIR/$tdir/f$i | grep pattern.*mdt || + error "no DoM component on $DIR/$tdir/f$i" + done + cancel_lru_locks osc + cancel_lru_locks mdc + + $LFS df + local before=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l) + [[ $before == $files ]] || error "unexpected $before" + + LPURGE_FREELO=5 LPURGE_FREEHI=90 start_lpurge_service + check_lpurge_is_started || error "failed to start lpurge" + stack_trap stop_lpurge_service + + for ((i=0; i < 10; i++)); do + local after=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l) + [[ $after != $before ]] && break + echo checking... + sleep 2 + done + after=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l) + [[ $after != $before ]] || + error "no mirror was removed: $before == $after" +} +run_test 57 "lpurge to handle DoM component" + test_58() { local tf=$DIR/$tfile -- 1.8.3.1