From b5850d8fdf66888753efa67b24a6b6a2471a15f8 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Fri, 12 Jan 2024 04:03:15 -0500 Subject: [PATCH] EX-8971 pcc: add lctl pcc abort command to abort attaches This patch adds a new PCC command "lctl pcc abort [--wait|-w] [--detach|-d] $LUSTRE_MNTPT $PCCROOT". --wait|-w: wait all in-flight attaches aborted. --detach|-d: detach the PCC copies when scan the PCC backend. It can be used to abort in-progress attaches for a given PCC backend. It does not remove the PCC backend from a client. Add sanity-pcc/test_109 to verify it. Change-Id: Ib7152f7418aa1beb840919e98bf8de53c99b5c54 Signed-off-by: Qian Yingjin Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53656 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/doc/lctl-pcc-abort.8 | 31 +++++++++ lustre/doc/lctl-pcc.8 | 6 +- lustre/doc/llapi_pcc_del.3 | 12 +++- lustre/doc/llapi_pcc_detach_fid_fd.3 | 23 +++++-- lustre/include/uapi/linux/lustre/lustre_user.h | 28 +++++--- lustre/llite/pcc.c | 3 +- lustre/tests/sanity-pcc.sh | 88 ++++++++++++++++++++++++++ lustre/utils/lctl.c | 3 + lustre/utils/liblustreapi_pcc.c | 48 +++++++++----- lustre/utils/obd.c | 69 ++++++++++++++++++++ lustre/utils/obdctl.h | 1 + 11 files changed, 275 insertions(+), 37 deletions(-) create mode 100644 lustre/doc/lctl-pcc-abort.8 diff --git a/lustre/doc/lctl-pcc-abort.8 b/lustre/doc/lctl-pcc-abort.8 new file mode 100644 index 0000000..da609cb --- /dev/null +++ b/lustre/doc/lctl-pcc-abort.8 @@ -0,0 +1,31 @@ +.TH lctl-pcc-abort 8 2024-01-15 "Lustre" "configuration Utilities" +.SH NAME +lctl pcc abort commands used to abort in-progress attaches on a given PCC backend on a client. +.SH SYNOPSIS +.B lctl pcc abort [\fB--wait\fR|\fB-w\fR] [\fB--detach\fR|\fB-d\fR] \fIMNTPATH\fR [\fIPCCPATH\fR] +.SH DESCRIPTION +.TP +.B lctl pcc abort [\fB--wait\fR|\fB-w\fR] [\fB--detach\fR|\fB-d\fR] \fIMNTPATH\fR [\fIPCCPATH\fR] +Abort in-progress attaches on a given PCC backend specified by path +.IR PCCPATH +on a Lustre client referenced by the mount point of +.IR MNTPATH . +If +.IR PCCPATH +is not specified, it will abort in-progress attaches on all PCC backends on this +client. +.SH OPTIONS +.TP +.B --wait | -w +The caller must wait all in-progress attaches finished when remove a PCC +backend from a client. +.TP +.B --detach | -d +This option is used by PCC abort command. It indicates that it also needs to +detach PCC copies when abort in-progress attaches on a given PCC backend. +.TP +.SH SEE ALSO +.BR lctl-pcc (8), +.BR lfs (1), +.BR lfs-hsm (1), +.BR lfs-pcc (1) diff --git a/lustre/doc/lctl-pcc.8 b/lustre/doc/lctl-pcc.8 index 82d163a..1c806a3 100644 --- a/lustre/doc/lctl-pcc.8 +++ b/lustre/doc/lctl-pcc.8 @@ -9,7 +9,6 @@ lctl pcc commands used to interact with PCC features. .B lctl pcc clear [\fB--keep\fR|\fB-k\fR] [\fB--wait\fR|\fB-w\fR] [\fB--abort\fR|\fB-a\fR] <\fImntpath\fR> .br .B lctl pcc list <\fImntpath\fR> -.SH DESCRIPTION .TP .B lctl pcc add \fR<\fImntpath\fR> <\fIpccpath\fR> [\fB--param\fR|\fB-p\fR <\fIparam\fR>] Add a PCC backend specified by HSM root path @@ -66,10 +65,15 @@ still running at this client. .B --wait | -w The caller must wait all in-progress attaches finished when remove a PCC backend from a client. +.TP .B --abort | -a The caller tries to abort all in-progress attaches when remove a PCC backend from a client. .TP +.B --detach | -d +This option is using by PCC abort command. It indicates that it also needs to +detach PCC copies when abort in-progress attaches on a given PCC backend. +.TP .SH SEE ALSO .BR lfs (1), .BR lfs-hsm (1), diff --git a/lustre/doc/llapi_pcc_del.3 b/lustre/doc/llapi_pcc_del.3 index c0a1411..e20b517 100644 --- a/lustre/doc/llapi_pcc_del.3 +++ b/lustre/doc/llapi_pcc_del.3 @@ -22,9 +22,11 @@ FIDs. The input parameter .IR flags currently has three flag values .B PCC_CLEANUP_FL_KEEP_DATA, -.B PCC_CLEANUP_FL_WAIT +.B PCC_CLEANUP_FL_WAIT, +.B PCC_CLEANUP_FL_ABORT, +.B PCC_CLEANUP_FL_NOT_DEL, and -.B PCC_CLEANUP_FL_ABORT. +.B PCC_CLEANUP_FL_NOT_DETACH. .B PCC_CLEANUP_FL_KEEP_DATA means it removes the PCC backend from the Lustre client, but retains @@ -36,6 +38,12 @@ means the caller must wait for the in-progress attaches finished when remove the PCC backend from the client. .B PCC_CLEANUP_FL_ABORT means the caller tries to abort all in-progress attaches on the PCC backend. +.B PCC_CLEANUP_FL_NOT_DEL +means that it does not remove the PCC backend from a client. i.e. just abort +in-progress attaches for a given PCC backend. +.B PCC_CLEANUP_FL_NOT_DETACH +means that it does not detach PCC copies from the PCC backend during abort. + .SH RETURN VALUES .PP .B llapi_pcc_del() diff --git a/lustre/doc/llapi_pcc_detach_fid_fd.3 b/lustre/doc/llapi_pcc_detach_fid_fd.3 index 9b4fa2f..e3d69b9 100644 --- a/lustre/doc/llapi_pcc_detach_fid_fd.3 +++ b/lustre/doc/llapi_pcc_detach_fid_fd.3 @@ -44,13 +44,15 @@ data structure, which contains the following values: .nf .LP PCC_DETACH_FL_NONE = 0x0, - PCC_DETACH_FL_UNCACHE = 0x01, - PCC_DETACH_FL_KNOWN_READWRITE = 0x02, - PCC_DETACH_FL_KNOWN_READONLY = 0x04, - PCC_DETACH_FL_CACHE_REMOVED = 0x08, - PCC_DETACH_FL_ATTACHING = 0x10, - PCC_DETACH_FL_ATTACHING_WAIT = 0x20, - PCC_DETACH_FL_ATTACH_ABORT = 0x40, + PCC_DETACH_FL_UNCACHE = 0x0001, + PCC_DETACH_FL_KNOWN_READWRITE = 0x0002, + PCC_DETACH_FL_KNOWN_READONLY = 0x0004, + PCC_DETACH_FL_CACHE_REMOVED = 0x0008, + PCC_DETACH_FL_ATTACHING = 0x0010, + PCC_DETACH_FL_ATTACHING_WAIT = 0x0020, + PCC_DETACH_FL_ATTACH_ABORT = 0x0040, + PCC_DETACH_FL_KEEP_DATA = 0x0080, + PCC_DETACH_FL_NOT_DETACH = 0x0100, .fi .TP PCC_DETACH_FL_NONE @@ -77,6 +79,13 @@ indicates to wait the in-progress attach finished. .TP PCC_DETACH_FL_ATTACH_ABORT indicates to abort the in-progress attach during detach. +.TP +PCC_DETACH_FL_KEEP_DATA +indicates to keep the valid cached PCC copies on the PCC backend. +.TP +PCC_DETACH_FL_NOT_DETACH +indicates not to detach PCC copies from the PCC backend. + .SH RETURN VALUES .LP .BR llapi_pcc_detach_fid_fd() , diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 4e4badc..98de08d 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -2877,19 +2877,23 @@ enum lu_pcc_detach_flags { /* Detach only, keep the PCC copy */ PCC_DETACH_FL_NONE = 0x0, /* Remove the cached file after detach */ - PCC_DETACH_FL_UNCACHE = 0x01, + PCC_DETACH_FL_UNCACHE = 0x0001, /* Known the file was once used as PCC-RW */ - PCC_DETACH_FL_KNOWN_READWRITE = 0x02, + PCC_DETACH_FL_KNOWN_READWRITE = 0x0002, /* Known the file was once used as PCC-RO */ - PCC_DETACH_FL_KNOWN_READONLY = 0x04, + PCC_DETACH_FL_KNOWN_READONLY = 0x0004, /* Indicate PCC cached copy is removed */ - PCC_DETACH_FL_CACHE_REMOVED = 0x08, + PCC_DETACH_FL_CACHE_REMOVED = 0x0008, /* Indicate the file is being attached */ - PCC_DETACH_FL_ATTACHING = 0x10, + PCC_DETACH_FL_ATTACHING = 0x0010, /* Indicate to wait the attach process finished */ - PCC_DETACH_FL_ATTACHING_WAIT = 0x20, + PCC_DETACH_FL_ATTACHING_WAIT = 0x0020, /* Indicate to abort the in-progress attach during detach */ - PCC_DETACH_FL_ATTACH_ABORT = 0x40, + PCC_DETACH_FL_ATTACH_ABORT = 0x0040, + /* Keep the valid cached PCC copies. */ + PCC_DETACH_FL_KEEP_DATA = 0x0080, + /* Not detach PCC copies from PCC backend */ + PCC_DETACH_FL_NOT_DETACH = 0x0100, }; struct lu_pcc_detach_fid { @@ -2927,11 +2931,15 @@ struct lu_pcc_state { enum lu_pcc_cleanup_flags { PCC_CLEANUP_FL_NONE = 0x0, /* Remove the PCC backend but retain the data on the cache */ - PCC_CLEANUP_FL_KEEP_DATA = 0x1, + PCC_CLEANUP_FL_KEEP_DATA = 0x01, /* Wait in-progress attaches finished when remove the PCC backend */ - PCC_CLEANUP_FL_WAIT = 0x2, + PCC_CLEANUP_FL_WAIT = 0x02, /* Abort in-progress attaches when remove the PCC backend */ - PCC_CLEANUP_FL_ABORT = 0x4, + PCC_CLEANUP_FL_ABORT = 0x04, + /* Not remove the PCC backend, i.e. only abort in-progress attaches */ + PCC_CLEANUP_FL_NOT_DEL = 0x08, + /* Not detach PCC copies */ + PCC_CLEANUP_FL_NOT_DETACH = 0x10, }; enum lu_project_type { diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index a9aaa11..5d56093 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -4554,7 +4554,8 @@ repeat: GOTO(out_unlock, rc = 0); } - if (!pcci || !pcc_inode_has_layout(pcci)) + if (!pcci || !pcc_inode_has_layout(pcci) || + *flags & PCC_DETACH_FL_NOT_DETACH) GOTO(out_unlock, rc = 0); LASSERT(atomic_read(&pcci->pcci_refcount) > 0); diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 6fd28fe..6e4217d 100755 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -5111,6 +5111,94 @@ test_108b() { } run_test 108b "Test for in-progress attach abort (DIO)" +test_109() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file1=$DIR/$tfile.1 + local file2=$DIR/$tfile.2 + local cnt=10 + + $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || + skip "Server does not support PCC-RO" + + setup_loopdev client $loopfile $mntpt 600 + mkdir -p $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping client \ + "projid={100}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1" + + local dio_attach_iosize + + # disable attach via direct I/O (DIO) + dio_attach_iosize=$($LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1) + stack_trap "$LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize" + $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=0 + + stack_trap "rm -f $file1 $file2" + echo "small_file_data" > $file1 || error "failed to write $file1" + dd if=/dev/zero of=$file2 bs=1M count=$cnt || + error "failed to write $file2" + + $LFS pcc attach $file1 || error "failed to attach $file1" + #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE 0x141a + $LCTL set_param fail_loc=0x8000141a fail_val=5 + $LFS pcc attach $file2 & + sleep 2 + $LCTL pcc abort -v --wait $MOUNT $hsm_root + check_lpcc_state $file1 "readonly" client + check_lpcc_state $file2 "none" client + $LCTL pcc abort --detach $MOUNT $hsm_root + check_lpcc_state $file1 "none" client + $LFS pcc state $file1 | grep -q 'flags: valid' || + error "$file1 should be valid cached on PCC" + check_lpcc_state $file2 "none" client + + $LFS pcc attach $file1 || error "failed to attach $file1" + #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE 0x141a + $LCTL set_param fail_loc=0x8000141a fail_val=5 + $LFS pcc attach $file2 & + sleep 2 + $LCTL pcc abort -v --wait --detach $MOUNT $hsm_root + check_lpcc_state $file1 "none" client + $LFS pcc state $file1 | grep -q 'flags: valid' || + error "$file1 should be valid cached on PCC" + check_lpcc_state $file2 "none" client + + $LFS pcc attach $file1 || error "failed to attach $file1" + #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE 0x141a + $LCTL set_param fail_loc=0x8000141a fail_val=5 + $LFS pcc attach $file2 & + sleep 2 + $LCTL pcc abort -v --wait $MOUNT + check_lpcc_state $file1 "readonly" client + check_lpcc_state $file2 "none" client + $LCTL pcc abort --detach $MOUNT + check_lpcc_state $file1 "none" client + $LFS pcc state $file1 | grep -q 'flags: valid' || + error "$file1 should be valid cached on PCC" + + $LFS pcc attach $file1 || error "failed to attach $file1" + #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE 0x141a + $LCTL set_param fail_loc=0x8000141a fail_val=5 + $LFS pcc attach $file2 & + sleep 2 + $LCTL pcc abort -v --wait --detach $MOUNT + check_lpcc_state $file1 "none" client + $LFS pcc state $file1 | grep -q 'flags: valid' || + error "$file1 should be valid cached on PCC" + check_lpcc_state $file2 "none" client + + $LFS pcc attach $file1 || error "failed to attach $file1" + #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE 0x141a + $LCTL set_param fail_loc=0x8000141a fail_val=5 + $LFS pcc attach $file2 & + sleep 2 + $LCTL pcc clear -v -k --abort --wait $MOUNT + check_lpcc_state $file1 "none" client + check_lpcc_state $file2 "none" client +} +run_test 109 "Attach abort with data retain when clear a PCC backend" + wait_lpcc_purge_scan_end() { local pidfile=$1 local statsfile=$2 diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 01c50b6..e8830db 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -77,6 +77,9 @@ command_t pccdev_cmdlist[] = { { .pc_name = "list", .pc_func = jt_pcc_list, .pc_help = "List all PCC backends on a client.\n" "usage: lctl pcc list \n" }, + { .pc_name = "abort", .pc_func = jt_pcc_abort, + .pc_help = "Abort in-progress attaches for specified PCC backend.\n" + "usage: lclt pcc abort [pccpath]\n" }, { .pc_name = "--list-commands", .pc_func = jt_pcc_list_commands, .pc_help = "list commands supported by lctl pcc"}, { .pc_name = "help", .pc_func = Parser_help, .pc_help = "help" }, diff --git a/lustre/utils/liblustreapi_pcc.c b/lustre/utils/liblustreapi_pcc.c index 52cadba..7be16ae 100644 --- a/lustre/utils/liblustreapi_pcc.c +++ b/lustre/utils/liblustreapi_pcc.c @@ -573,7 +573,9 @@ static int llapi_pcc_scan_detach(const char *pname, const char *fname, int rc; /* It is the saved lov file when archive on HSM backend. */ - detach.pccd_flags = hsc->hsc_flags | PCC_DETACH_FL_UNCACHE; + detach.pccd_flags = hsc->hsc_flags & PCC_DETACH_FL_KEEP_DATA ? + hsc->hsc_flags : (hsc->hsc_flags | + PCC_DETACH_FL_UNCACHE); lov_file = endswith(fname, ".lov"); if (lov_file) { size_t len; @@ -623,6 +625,9 @@ static int llapi_pcc_scan_detach(const char *pname, const char *fname, llapi_printf(LLAPI_MSG_DEBUG, "'%s' is being aborted, will remove it later\n", fidname); + } else if (detach.pccd_flags & PCC_DETACH_FL_KEEP_DATA) { + llapi_printf(LLAPI_MSG_DEBUG, "'%s' is retained in PCC\n", + fidname); } else { snprintf(fullname, sizeof(fullname), "%s/%s", pname, fidname); llapi_printf(LLAPI_MSG_DEBUG, @@ -662,24 +667,34 @@ static int llapi_pcc_del_internal(const char *mntpath, const char *pccpath, int rc; - if ((flags & (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) == - PCC_CLEANUP_FL_WAIT) { - snprintf(cmd, sizeof(cmd), "%s %s", - PCC_CMDNAME_DEL_WAIT, pccpath); - hsc.hsc_flags = PCC_DETACH_FL_ATTACHING_WAIT; - } else { - snprintf(cmd, sizeof(cmd), "%s %s", PCC_CMDNAME_DEL, pccpath); + if (!(flags & PCC_CLEANUP_FL_NOT_DEL)) { + if ((flags & (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) == + PCC_CLEANUP_FL_WAIT) { + snprintf(cmd, sizeof(cmd), "%s %s", + PCC_CMDNAME_DEL_WAIT, pccpath); + hsc.hsc_flags = PCC_DETACH_FL_ATTACHING_WAIT; + } else { + snprintf(cmd, sizeof(cmd), "%s %s", + PCC_CMDNAME_DEL, pccpath); + } + + rc = llapi_pccdev_set(mntpath, cmd); + if (rc < 0) { + llapi_error(LLAPI_MSG_ERROR, rc, + "failed to run '%s' on %s", cmd, mntpath); + return rc; + } } - rc = llapi_pccdev_set(mntpath, cmd); - if (rc < 0) { - llapi_error(LLAPI_MSG_ERROR, rc, - "failed to run '%s' on %s", cmd, mntpath); - return rc; + if (flags & PCC_CLEANUP_FL_KEEP_DATA) { + if (!(flags & PCC_CLEANUP_FL_ABORT)) + return 0; + + hsc.hsc_flags |= PCC_DETACH_FL_KEEP_DATA; } - if (flags & PCC_CLEANUP_FL_KEEP_DATA) - return 0; + if (flags & PCC_CLEANUP_FL_NOT_DETACH) + hsc.hsc_flags |= PCC_DETACH_FL_NOT_DETACH; hsc.hsc_mntfd = open(mntpath, O_RDONLY); if (hsc.hsc_mntfd < 0) { @@ -707,7 +722,8 @@ static int llapi_pcc_del_internal(const char *mntpath, const char *pccpath, (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) { llapi_printf(LLAPI_MSG_DEBUG, "Scan PCC to wait attaches finished\n"); - hsc.hsc_flags = PCC_DETACH_FL_ATTACHING_WAIT; + hsc.hsc_flags &= ~PCC_DETACH_FL_ATTACH_ABORT; + hsc.hsc_flags |= PCC_DETACH_FL_ATTACHING_WAIT; rc = hsm_scan_process(&hsc); } } diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 0f60916..103a3b0 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -6346,3 +6346,72 @@ int jt_pcc_list(int argc, char **argv) return rc; } + +int jt_pcc_abort(int argc, char **argv) +{ + static struct option long_opts[] = { + { .val = 'v', .name = "verbose", .has_arg = no_argument }, + { .val = 'w', .name = "wait", .has_arg = no_argument }, + { .val = 'd', .name = "detach", .has_arg = no_argument }, + { .name = NULL } }; + char fsname[MAX_OBD_NAME + 1]; + const char *mntpath; + const char *pccpath = NULL; + __u32 flags = PCC_CLEANUP_FL_ABORT | PCC_CLEANUP_FL_KEEP_DATA | + PCC_CLEANUP_FL_NOT_DETACH | PCC_CLEANUP_FL_NOT_DEL; + int verbose = LLAPI_MSG_INFO; + int rc; + int c; + + while ((c = getopt_long(argc, argv, "dvw", long_opts, NULL)) != -1) { + switch (c) { + case 'd': + flags &= ~PCC_CLEANUP_FL_NOT_DETACH; + break; + case 'w': + flags |= PCC_CLEANUP_FL_WAIT; + break; + case 'v': + verbose++; + break; + case '?': + return CMD_HELP; + default: + fprintf(stderr, "%s: option '%s' unrecognized\n", + argv[0], argv[optind - 1]); + return CMD_HELP; + } + } + if (optind + 1 == argc) { + mntpath = argv[optind]; + } else if (optind + 2 == argc) { + mntpath = argv[optind++]; + pccpath = argv[optind]; + } else { + fprintf(stderr, "%s: must specify mount path and PCC path\n", + jt_cmdname(argv[0])); + return CMD_HELP; + } + + rc = llapi_search_fsname(mntpath, fsname); + if (rc < 0) { + fprintf(stderr, + "%s: cannot find a Lustre filesystem mounted at '%s'\n", + jt_cmdname(argv[0]), mntpath); + return rc; + } + + /* Set llapi message level */ + llapi_msg_set_level(verbose); + + if (pccpath) + rc = llapi_pcc_del(mntpath, pccpath, flags); + else + rc = llapi_pcc_clear(mntpath, flags); + if (rc < 0) + fprintf(stderr, "%s: failed to abort '%s' on '%s': %s\n", + jt_cmdname(argv[0]), pccpath ? : "all PCC backends", + mntpath, strerror(errno)); + + return rc; +} diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index d14f4a4..e20d6ea 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -192,6 +192,7 @@ int jt_pcc_add(int argc, char **argv); int jt_pcc_del(int argc, char **argv); int jt_pcc_clear(int argc, char **argv); int jt_pcc_list(int argc, char **argv); +int jt_pcc_abort(int argc, char **argv); #ifdef HAVE_SERVER_SUPPORT /* lustre_lfsck.c */ -- 1.8.3.1