Whamcloud - gitweb
EX-2797 lpurge: initial support for DoM
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 22 Apr 2021 12:33:24 +0000 (15:33 +0300)
committerJohn L. Hammond <jhammond@whamcloud.com>
Wed, 4 Aug 2021 13:21:35 +0000 (08:21 -0500)
lpurge should be able to scan MDT device, recognize objects with DoM
component and remove a replica with DoM component if another in-sync
replica exists.

Test-Parameters: testlist=hot-pools
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: If12e0448ab07527d86832d942a63b4a0189ad7a0
Reviewed-on: https://review.whamcloud.com/43405
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Tested-by: John L. Hammond <jhammond@whamcloud.com>
lipe/src/lpurge.c
lustre/tests/hot-pools.sh

index ca4493f..73c0d0e 100644 (file)
@@ -157,6 +157,7 @@ char *ostname;
 char *ostprefix;
 char ost_mntdev[PATH_MAX];
 char *ost_mntpt;
+bool is_mdt = false;
 int lustre_fd = -1;
 int open_by_fid_fd = -1;
 unsigned long oldest;
@@ -312,6 +313,8 @@ static void lpurge_find_device(char *devname)
        while (i && ostprefix[i] != '/')
                i--;
        ostprefix[i] = 0;
+       if (strstr(ostprefix, "-MDT"))
+               is_mdt = true;
 
        globfree(&paths);
 }
@@ -474,6 +477,8 @@ static int lipe_scan_llite(struct lipe_instance *instance, int num_threads)
        policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR |
                                LIPE_OBJECT_ATTR_LMAEA |
                                LIPE_OBJECT_ATTR_FILTER_FID;
+       if (is_mdt)
+               policy.lp_attr_bits |= LIPE_OBJECT_ATTR_LOVEA;
        rc = lipe_scan(instance, &policy, &result, NULL,
                       NULL, num_threads, "test", NULL,
                       true, false, &ldd_err);
@@ -541,6 +546,101 @@ static void lpurge_reclaim_slot(unsigned int index)
        lpurge_hist[index].ls_age = lpurge_hist[index + 1].ls_age;
        pthread_mutex_unlock(&lpurge_hist[index].ls_mutex);
 }
+static inline struct lov_user_md *
+lov_comp_entry(struct lov_comp_md_v1 *comp_v1, int ent_idx)
+{
+       return (struct lov_user_md *)((char *)comp_v1 +
+                       comp_v1->lcm_entries[ent_idx].lcme_offset);
+}
+
+static int lpurge_check_mdt_object(struct lpurge_slot *ls,
+                                  struct lipe_object_attrs *attrs)
+{
+       struct lov_user_md_v3 *v3;
+       struct lov_comp_md_v1 *comp_v1;
+       struct lov_comp_md_entry_v1 *entry;
+       unsigned src_comp_id = -1;
+       unsigned tgt_comp_id = -1;
+       int i;
+
+       if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_LOVEA) == 0) {
+               ls->ls_nomirror_objs++;
+               ls->ls_nomirror_space += attrs->loa_blocks >> 10;
+               return 0;
+       }
+
+       comp_v1 = (struct lov_comp_md_v1 *)attrs->loa_lum;
+       if (comp_v1->lcm_magic != LOV_USER_MAGIC_COMP_V1 ||
+           comp_v1->lcm_mirror_count == 0) {
+               ls->ls_nomirror_objs++;
+               ls->ls_nomirror_space += attrs->loa_blocks >> 10;
+               return 0;
+       }
+
+       for (i = 0; i < comp_v1->lcm_entry_count; i++) {
+
+               entry = &comp_v1->lcm_entries[i];
+
+               if ((entry->lcme_flags & LCME_FL_INIT) == 0 ||
+                   (entry->lcme_flags & LCME_FL_STALE) != 0)
+                       continue;
+
+               v3 = (struct lov_user_md_v3 *)lov_comp_entry(comp_v1, i);
+
+               if (v3->lmm_pattern == LOV_PATTERN_MDT) {
+                       src_comp_id = mirror_id_of(entry->lcme_id);
+               } else {
+                       tgt_comp_id = mirror_id_of(entry->lcme_id);
+               }
+       }
+
+       if (src_comp_id != -1 && tgt_comp_id != -1) {
+               /* there is MDT component and in-sync replica */
+               attrs->loa_filter_fid.ff_parent = attrs->loa_fid;
+               attrs->loa_filter_fid.ff_layout.ol_comp_id =
+                       src_comp_id << MIRROR_ID_SHIFT;
+               attrs->loa_filter_fid_size = sizeof(struct filter_fid);
+               return 1;
+       }
+
+       ls->ls_nomirror_objs++;
+       ls->ls_nomirror_space += attrs->loa_blocks >> 10;
+       return 0;
+}
+
+static int lpurge_check_ost_object(struct lpurge_slot *ls,
+                                  struct lipe_object_attrs *attrs)
+{
+       if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0) {
+               ls->ls_nopfid_objs++;
+               ls->ls_nopfid_space += attrs->loa_blocks >> 10;
+               return 0;
+       }
+
+       /* to avoid N OSTs to 1 MDT scalability issue we only consider
+        * objects which store 1st stripe
+        */
+       if (attrs->loa_filter_fid.ff_parent.f_ver != 0) {
+               ls->ls_notfirst_objs++;
+               ls->ls_notfirst_space += attrs->loa_blocks >> 10;
+               return 0;
+       }
+
+       /* if the object has got ost_layout structure which encodes
+        * whether the object has a mirror, then we can skip objects
+        * with mirror_id=0 (no mirror)
+        */
+       if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) {
+               if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id)
+                               == 0) {
+                       ls->ls_nomirror_objs++;
+                       ls->ls_nomirror_space += attrs->loa_blocks >> 10;
+                       return 0;
+               }
+       }
+
+       return 1;
+}
 
 /*
  * so this is a callback for object scanner
@@ -568,7 +668,7 @@ int lpurge_lipe_callback(struct lipe_instance *instance,
        struct lpurge_slot *ls = NULL;
        struct lpurge_object *lo = NULL;
        time_t age, last_used;
-       int index;
+       int index, rc;
 
        /* no attr, ignore */
        if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_ATTR) == 0)
@@ -604,33 +704,12 @@ int lpurge_lipe_callback(struct lipe_instance *instance,
 
        pthread_mutex_lock(&ls->ls_mutex);
 
-       if ((attrs->loa_attr_bits & LIPE_OBJECT_ATTR_FILTER_FID) == 0) {
-               ls->ls_nopfid_objs++;
-               ls->ls_nopfid_space += attrs->loa_blocks >> 10;
-               goto out_ls_mutex;
-       }
-
-       /* to avoid N OSTs to 1 MDT scalability issue we only consider
-        * objects which store 1st stripe
-        */
-       if (attrs->loa_filter_fid.ff_parent.f_ver != 0) {
-               ls->ls_notfirst_objs++;
-               ls->ls_notfirst_space += attrs->loa_blocks >> 10;
+       if (is_mdt)
+               rc = lpurge_check_mdt_object(ls, attrs);
+       else
+               rc = lpurge_check_ost_object(ls, attrs);
+       if (!rc)
                goto out_ls_mutex;
-       }
-
-       /* if the object has got ost_layout structure which encodes
-        * whether the object has a mirror, then we can skip objects
-        * with mirror_id=0 (no mirror)
-        */
-       if (attrs->loa_filter_fid_size >= sizeof(struct filter_fid)) {
-               if (mirror_id_of(attrs->loa_filter_fid.ff_layout.ol_comp_id)
-                   == 0) {
-                       ls->ls_nomirror_objs++;
-                       ls->ls_nomirror_space += attrs->loa_blocks >> 10;
-                       goto out_ls_mutex;
-               }
-       }
 
        llapi_printf(LLAPI_MSG_DEBUG,
                     "found under "DFID": size %ld block %ld age %ld slot %d\n",
index 348abdb..1664046 100644 (file)
@@ -149,6 +149,18 @@ init_lpurge_vars() {
        done
 
        for i in $(seq $MDSCOUNT); do
+               ost=$(facet_svc mds$i)
+               LPURGE_DEV+=("$ost")
+               LPURGE_DEV_FACET+=("mds$i")
+               LPURGE_CFG+=("/etc/lpurge/lpurge-$ost.conf")
+               LPURGE_SRVFILE+=("/etc/systemd/system/lpurge-$ost.service")
+               LPURGE_DUMPFILE+=("/var/run/lpurge-$ost.stats")
+               LPURGE_FIDS_DUMPFILE+=("/var/run/lpurge-$ost.fids")
+               LPURGE_PIDFILE+=("/var/run/lpurge-$ost.pid")
+               LPURGE_START_TIME+=(0)
+       done
+
+       for i in $(seq $MDSCOUNT); do
                facet=mds$i
                LPURGE_MDS+=("$((i - 1)):$(facet_active_host $facet):$LPURGE_MOUNT")
        done
@@ -1566,86 +1578,100 @@ run_test 54 "lpurge: start with all parameters"
 test_55() {
        init_hot_pools_env
 
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
+
        LPURGE_DEV= LPURGE_MOUNT= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= \
        start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with no parameters should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_DEV= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with only '-M' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_MOUNT= LPURGE_MDS= LPURGE_POOL= LPURGE_INTV= start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with only '-D' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_MOUNT=foo start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-M' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_DEV=foo start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-D' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_INTV="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-i' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_SCAN_RATE="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-R' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_SLOT_SIZE="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-S' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_MAX_JOBS="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-j' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_SCAN_THREADS="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-t' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_FREEHI="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-h' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
        LPURGE_FREELO="-1" start_one_lpurge_cmd
        ! check_one_lpurge_is_started || {
                stop_one_lpurge_cmd
                error "start lpurge with bad '-l' option should fail"
        }
+       do_nodes $(comma_list $(all_server_nodes)) "lsof -t $MOUNT"
 
-       zconf_umount_clients $(comma_list $(all_osts_nodes)) $MOUNT
+       zconf_umount_clients $(comma_list $(all_server_nodes)) $MOUNT
        start_lpurge_cmd
        ! check_lpurge_is_started || {
                stop_lpurge_cmd
                error "start lpurge with no client mounted on OSS should fail"
        }
-       zconf_mount_clients $(comma_list $(all_osts_nodes)) $MOUNT ||
+       zconf_mount_clients $(comma_list $(all_server_nodes)) $MOUNT ||
                error "failed to mount Lustre clients on OSS nodes"
 }
 run_test 55 "lpurge: start with bad command line options"
@@ -1750,6 +1776,66 @@ test_56() {
 }
 run_test 56 "lamigo and lpurge: replicate and purge"
 
+test_57() {
+       local saved_max_kb
+       local files=10
+       local avail
+
+       init_hot_pools_env
+
+       stack_trap "rm -rf $DIR/$tdir $DIR/$tfile"
+
+       $LFS mkdir -c 1 -i 0 $DIR/$tdir || error "can't mkdir"
+
+       # allow large DoM component
+       saved_max_kb=$(do_facet $SINGLEMDS $LCTL \
+               get_param -n lod.*.dom_stripesize_max_kb | tail -1)
+       do_nodes $(comma_list $(all_mdts_nodes)) \
+               $LCTL set_param lod.*.dom_stripesize_max_kb=$((1024*100))
+       stack_trap "do_nodes $(comma_list $(all_mdts_nodes)) \
+               $LCTL set_param lod.*.dom_stripesize_max_kb=$saved_max_kb"
+
+       avail=$($LFS df | grep MDT0000 | awk '{print $4}')
+       (( avail=avail/2 ))     # going to fill a half of MDT
+       (( towrite = avail / files ))
+       (( towrite < 5*1024 )) && skip "not enough space on $SINGLEMDS"
+       echo "!!! gonna write $towrite"
+
+       # create file with DoM component and replicate it
+       $LFS setstripe -E $(((towrite/1024)*1024))k -L mdt -E -1 -c1 $DIR/$tdir ||
+               error "can't setstripe"
+       for ((i=0; i < $files; i++)); do
+               # XXX: replace with fallocate
+               dd if=/dev/zero of=$DIR/$tdir/f$i bs=1k \
+                       count=$(((towrite/1024+1)*1024)) || error "can't dd"
+               $LFS mirror extend -N -p $LAMIGO_TGT $DIR/$tdir/f$i ||
+                       error "can't create mirror"
+               $LFS getstripe $DIR/$tdir/f$i | grep pattern.*mdt ||
+                       error "no DoM component on $DIR/$tdir/f$i"
+       done
+       cancel_lru_locks osc
+       cancel_lru_locks mdc
+
+       $LFS df
+       local before=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l)
+       [[ $before == $files ]] || error "unexpected $before"
+
+       LPURGE_FREELO=5 LPURGE_FREEHI=90 start_lpurge_service
+       check_lpurge_is_started || error "failed to start lpurge"
+       stack_trap stop_lpurge_service
+
+       for ((i=0; i < 10; i++)); do
+               local after=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l)
+               [[ $after != $before ]] && break
+               echo checking...
+               sleep 2
+       done
+       after=$($LFS getstripe $DIR/$tdir/f*|egrep "mirror_count:.*2" | wc -l)
+       [[ $after != $before ]] ||
+               error "no mirror was removed: $before == $after"
+}
+run_test 57 "lpurge to handle DoM component"
+
 test_58() {
        local tf=$DIR/$tfile