From: John L. Hammond Date: Fri, 24 Sep 2021 17:59:31 +0000 (-0700) Subject: EX-2067 lpurge: check layout before opening X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=ebecdabd60da021eb68f74d99dab909858c590c1;p=fs%2Flustre-release.git EX-2067 lpurge: check layout before opening In lpurge_mirror_delete(), fetch the layout by getxattr() and check if we would be deleting the last stale mirror before we open the file. This is to avoid breaking the lease held by a concurrent mirror resync. Lustre-change: https://review.whamcloud.com/45033 Lustre-commit: 912766d05efd95886597a46dcbaaf10237e56ebf Test-Parameters: trivial testlist=hot-pools Signed-off-by: John L. Hammond Signed-off-by: Jian Yu Change-Id: I8b1addd14b290faa537fcc15514cae8b18c802f2 Reviewed-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/45111 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lipe/src/lpurge.c b/lipe/src/lpurge.c index 91b670f..4423b38 100644 --- a/lipe/src/lpurge.c +++ b/lipe/src/lpurge.c @@ -30,34 +30,35 @@ * - take OST load into account * */ -#include #include -#include -#include -#include -#include +#include +#include #include +#include #include -#include +#include #include -#include #include -#include -#include -#include +#include +#include #include -#include +#include +#include #include -#include -#include -#include -#include +#include +#include +#include #include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include "lipe_object_attrs.h" #include "lipe_version.h" #include "list.h" @@ -869,6 +870,7 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) { char fid_buf[FID_LEN + 1]; char vname_buf[PATH_MAX]; + char lov_xattr_buf[XATTR_SIZE_MAX]; struct ll_ioc_lease *lil = NULL; struct llapi_layout *layout = NULL; int mdt_index = -1; @@ -876,6 +878,36 @@ lpurge_mirror_delete(const struct lu_fid *fid, unsigned int mirror_id) int vfd = -1; int rc; + /* Before we open the file and break any leases, fetch the + * layout and check to see if we would be trying to delete the + * last non stale mirror. To avoid races, we'll check again + * after we open the file. */ + + memset(lov_xattr_buf, 0, sizeof(lov_xattr_buf)); + snprintf(lov_xattr_buf, sizeof(lov_xattr_buf), DFID, PFID(fid)); + + rc = ioctl(open_by_fid_fd, IOC_MDC_GETFILESTRIPE, lov_xattr_buf); + if (rc < 0) { + rc = -errno; + llapi_printf(LLAPI_MSG_DEBUG, "cannot IOC_MDC_GETFILESTRIPE "DFID", rc = %d\n", + PFID(fid), rc); + goto out; + } + + layout = llapi_layout_get_by_xattr(lov_xattr_buf, sizeof(lov_xattr_buf), 0); + if (layout == NULL) { + rc = -errno; + goto out; + } + + if (last_non_stale_mirror(mirror_id, layout)) { + rc = -EUCLEAN; + goto out; + } + + llapi_layout_free(layout); + layout = NULL; + /* Inline replacement for * lfs mirror split -d --mirror-id mirror_id $MOUNTPOINT/.lustre/fid/FID */ diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh old mode 100644 new mode 100755 index 1e323cb..3d831b9 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -1899,6 +1899,97 @@ test_58() { } run_test 58 "replicaste DoM files" +test_59() { + local td=$DIR/$tdir + local tf=$td/$tfile + local free_MB + local size_MB + local freehi=99 + local freelo=96 + local ids + local cmd + local pid + + init_hot_pools_env + + # start lamigo + start_lamigo_service + check_lamigo_is_started || error "failed to start lamigo" + stack_trap stop_lamigo_service + + # start lpurge + LPURGE_FREELO=$freelo LPURGE_FREEHI=$freehi start_lpurge_service + check_lpurge_is_started || error "failed to start lpurge" + stack_trap stop_lpurge_service + + $LFS df -h + free_MB=$(($(lfs_df -p $LAMIGO_SRC $DIR | + awk '/summary/{print $4}') / 1024)) + size_MB=$((free_MB * (100 - freelo + 1) / 100)) + + # create a regular file in source pool + mkdir $td || error "mkdir $td failed" + cmd="$LFS setstripe -c -1 -p $LAMIGO_SRC $td" + echo $cmd + $cmd || error "'$cmd' failed" + + cmd="$MULTIOP $tf oO_CREAT:O_RDWR:eRE+eU" + echo $cmd + $cmd || error "'$cmd' failed" + + cancel_lru_locks osc + cancel_lru_locks mdc + sleep $((LAMIGO_AGE * 2)) + + # verify the file replicated in target pool + $LFS getstripe $tf + verify_one_lamigo_param 0 replicated 1 + verify_file_mirror $tf 2 + + # fill in data + yes "${size_MB}M file"| + dd bs=1M count=$size_MB iflag=fullblock of=$tf || + error "failed to write to $tf" + + cmd="$MULTIOP $tf oO_RDWR:eR_E-eUc" + echo $cmd + $cmd & + pid=$! + + echo "Before purging:" + $LFS df -h + $LFS getstripe $tf + + sleep $((LPURGE_INTV * 2)) + + echo "After trying to purge:" + $LFS df -h + $LFS getstripe $tf + + # since the mirror in source pool is the last non-stale mirror, + # it cannot be purged because 'lfs mirror split' cannot get WRITE lease. + ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | tr '\n' ' ')) + verify_comp_attr lcme_flags $tf ${ids[0]} init,prefer + verify_comp_attr lcme_flags $tf ${ids[1]} init,stale + + # release the lease lock + kill -USR1 $pid && wait $pid || error "$MULTIOP failed" + + wait_file_resync $tf + + sleep $((LPURGE_INTV * 2)) + + # verify the file purged from source pool + echo "After purging:" + wait_file_mirror $tf 1 900 + $LFS df -h + $LFS getstripe $tf + verify_mirror_count $tf 1 + ids=($($LFS getstripe $tf | awk '/lcme_id/{print $2}' | tr '\n' ' ')) + verify_comp_attr pool $tf ${ids[${#ids[@]}-1]} $LAMIGO_TGT +} +run_test 59 "lpurge: check layout before opening" + complete $SECONDS check_and_cleanup_lustre exit_status