Whamcloud - gitweb
EX-8236 pcc: abort data copy when clear PCC backend
authorQian Yingjin <qian@ddn.com>
Fri, 8 Dec 2023 09:15:17 +0000 (04:15 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 12 Dec 2023 05:39:33 +0000 (05:39 +0000)
This patch adds an option "--abort" for "lctl pcc del|clear"
command tools.
With this option, the user will first set ATTACH_ABORTING flag on
all in-progress attaching files, and then wait for them to abort
the attache when remove a PCC backend from a client.

Add sanity-pcc/test_108 to verify it.

Change-Id: I4e2f3ec8866e9af45f4524a9f45ee418ef4cb5be
Signed-off-by: Qian Yingjin <qian@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53373
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/doc/lctl-pcc.8
lustre/doc/llapi_pcc_del.3
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/pcc.c
lustre/tests/sanity-pcc.sh
lustre/utils/liblustreapi_pcc.c
lustre/utils/obd.c

index b2102f6..82d163a 100644 (file)
@@ -4,9 +4,9 @@ lctl pcc commands used to interact with PCC features.
 .SH SYNOPSIS
 .B lctl pcc add \fR<\fImntpath\fR> <\fIpccpath\fR> [\fB--param\fR|\fB-p\fR <\fIparam\fR>]
 .br
-.B lctl pcc del [\fB--keep\fR|\fB-k\fR] [\fB--wait\fR|\fB-w\fR] <\fImntpath\fR> <\fIpccpath\fR>
+.B lctl pcc del [\fB--keep\fR|\fB-k\fR] [\fB--wait\fR|\fB-w\fR] [\fB--abort\fR|\fB-a\fR] <\fImntpath\fR> <\fIpccpath\fR>
 .br
-.B lctl pcc clear [\fB--keep\fR|\fB-k\fR] [\fB--wait\fR|\fB-w\fR] <\fImntpath\fR>
+.B lctl pcc clear [\fB--keep\fR|\fB-k\fR] [\fB--wait\fR|\fB-w\fR] [\fB--abort\fR|\fB-a\fR] <\fImntpath\fR>
 .br
 .B lctl pcc list <\fImntpath\fR>
 .SH DESCRIPTION
@@ -66,6 +66,9 @@ still running at this client.
 .B --wait | -w
 The caller must wait all in-progress attaches finished when remove a PCC
 backend from a client.
+.B --abort | -a
+The caller tries to abort all in-progress attaches when remove a PCC backend
+from a client.
 .TP
 .SH SEE ALSO
 .BR lfs (1),
index f52e274..c0a1411 100644 (file)
@@ -20,10 +20,11 @@ By default, when remove a PCC backend from a client, the action is to scan the
 PCC backend fs, uncache (detach and remove) all scanned PCC copies from PCC by
 FIDs. The input parameter
 .IR flags
-currently has two flag values
-.B PCC_CLEANUP_FL_KEEP_DATA
+currently has three flag values
+.B PCC_CLEANUP_FL_KEEP_DATA,
+.B PCC_CLEANUP_FL_WAIT
 and
-.B PCC_CLEANUP_FL_WAIT.
+.B PCC_CLEANUP_FL_ABORT.
 
 .B PCC_CLEANUP_FL_KEEP_DATA
 means it removes the PCC backend from the Lustre client, but retains
@@ -33,6 +34,8 @@ this client.
 .B PCC_CLEANUP_FL_WAIT
 means the caller must wait for the in-progress attaches finished when remove
 the PCC backend from the client.
+.B PCC_CLEANUP_FL_ABORT
+means the caller tries to abort all in-progress attaches on the PCC backend.
 .SH RETURN VALUES
 .PP
 .B llapi_pcc_del()
index c47272e..afe57cd 100644 (file)
@@ -627,6 +627,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_RACE_MOUNT                  0x1417
 #define OBD_FAIL_LLITE_PAGE_ALLOC                  0x1418
 #define OBD_FAIL_LLITE_OPEN_DELAY                  0x1419
+#define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE          0x141a
 #define OBD_FAIL_LLITE_XATTR_PAUSE                 0x1420
 #define OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE       0x1421
 #define OBD_FAIL_LLITE_READPAGE_PAUSE              0x1422
index 0970f7b..9a0caeb 100644 (file)
@@ -2926,6 +2926,8 @@ enum lu_pcc_cleanup_flags {
        PCC_CLEANUP_FL_KEEP_DATA        = 0x1,
        /* Wait in-progress attaches finished when remove the PCC backend */
        PCC_CLEANUP_FL_WAIT             = 0x2,
+       /* Abort in-progress attaches when remove the PCC backend */
+       PCC_CLEANUP_FL_ABORT            = 0x4,
 };
 
 enum lu_project_type {
index 3a7cd62..a9aaa11 100644 (file)
@@ -1920,6 +1920,7 @@ static inline void pcc_readonly_attach_fini(struct inode *inode)
 {
        pcc_inode_lock(inode);
        ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
+       ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACH_ABORTING;
        pcc_inode_unlock(inode);
 }
 
@@ -3957,6 +3958,7 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super, struct file *lu_file,
                       ll_i2sbi(file_inode(lu_file))->ll_fsname, fidstring,
                       pcc_filepath, super->pccs_lu_pathname, iosize,
                       numthreads, current->pid, rc, rc);
+               rc = rc > 0 ? -EIO : rc;
        }
        RETURN(rc);
 }
@@ -3965,6 +3967,7 @@ static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file,
                             struct file *pcc_file, char *pcc_pathname,
                             __u64 size, bool atomic_open_locked, bool use_dio)
 {
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE, cfs_fail_val);
        if (use_dio) {
 #ifndef HAVE_INODE_RWSEM
                int rc;
@@ -4229,6 +4232,7 @@ out_put:
        }
 out_unlock:
        lli->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
+       lli->lli_pcc_state &= ~PCC_STATE_FL_ATTACH_ABORTING;
        pcc_inode_unlock(inode);
        revert_creds(old_cred);
        RETURN(rc);
@@ -4539,12 +4543,11 @@ repeat:
                                lli->lli_pcc_state |=
                                        PCC_STATE_FL_ATTACH_ABORTING;
                        pcc_inode_unlock(inode);
+                       if (!(*flags & PCC_DETACH_FL_ATTACHING_WAIT))
+                               RETURN(0);
                        while (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING)
                                msleep(125);
                        pcc_inode_lock(inode);
-                       if (*flags & PCC_DETACH_FL_ATTACH_ABORT)
-                               lli->lli_pcc_state &=
-                                       ~PCC_STATE_FL_ATTACH_ABORTING;
                        GOTO(repeat, rc);
                }
                *flags |= PCC_DETACH_FL_ATTACHING;
index d84910e..42f98b3 100755 (executable)
@@ -5024,6 +5024,85 @@ test_107() {
 }
 run_test 107 "Wait for PCC atatch finished on PCC detach command"
 
+test_108_base() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local cnt=100
+
+       $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
+               skip "Server does not support PCC-RO"
+
+       setup_loopdev client $loopfile $mntpt 600
+       mkdir -p $hsm_root || error "mkdir $hsm_root failed"
+       setup_pcc_mapping client \
+               "projid={100}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1"
+
+       dd if=/dev/zero of=$file bs=1M count=$cnt ||
+               error "failed to write $file"
+
+       local pid
+
+       #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE       0x141a
+       $LCTL set_param fail_loc=0x8000141a fail_val=5
+       $LFS pcc attach $file &
+       pid=$!
+       sleep 1
+       $LFS pcc state $file
+       $LFS pcc detach --abort $file ||
+               error "failed to abort attch for $file"
+       wait $pid && error "attach $file should fail"
+       check_lpcc_state $file "none" client
+
+       #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE       0x141a
+       $LCTL set_param fail_loc=0x8000141a fail_val=5
+       $LFS pcc attach $file &
+       sleep 1
+       $LFS pcc state $file
+       $LFS pcc detach --wait --abort $file ||
+               error "failed to abort attch for $file"
+       check_lpcc_state $file "none" client
+
+       #define OBD_FAIL_LLITE_PCC_COPYDATA_PAUSE       0x141a
+       $LCTL set_param fail_loc=0x8000141a fail_val=5
+       $LFS pcc attach $file &
+       sleep 1
+       $LFS pcc state $file
+       $LCTL pcc clear -v --wait --abort $MOUNT
+}
+
+test_108a() {
+       local dio_attach_iosize
+
+       # disable attach via direct I/O (DIO)
+       dio_attach_iosize=$($LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
+       stack_trap "$LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize"
+       $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=0
+       test_108_base
+}
+run_test 108a "Test for in-progress attach abort (BIO)"
+
+test_108b() {
+       local dio_attach_threshold
+       local dio_attach_iosize
+       local dio_attach_threads
+
+       dio_attach_threshold=$($LCTL get_param -n llite.*.pcc_dio_attach_threshold | head -n 1)
+       stack_trap "$LCTL set_param llite.*.pcc_dio_attach_threshold=$dio_attach_threshold"
+       dio_attach_iosize=$($LCTL get_param -n llite.*.pcc_dio_attach_iosize_mb | head -n 1)
+       stack_trap "$LCTL set_param llite.*.pcc_dio_attach_iosize_mb=$dio_attach_iosize"
+       dio_attach_threads=$($LCTL get_param -n llite.*.pcc_dio_attach_threads_per_file | head -n 1)
+       stack_trap "$LCTL set_param llite.*.pcc_dio_attach_threads_per_file=$dio_attach_threads"
+
+       # enable DIO attach
+       $LCTL set_param llite.*.pcc_dio_attach_threshold=1048576
+       $LCTL set_param llite.*.pcc_dio_attach_iosize_mb=1M
+       $LCTL set_param llite.*.pcc_dio_attach_threads_per_file=1
+       test_108_base
+}
+run_test 108b "Test for in-progress attach abort (DIO)"
+
 wait_lpcc_purge_scan_end()  {
        local pidfile=$1
        local statsfile=$2
index 4d5d66e..52cadba 100644 (file)
@@ -618,7 +618,11 @@ static int llapi_pcc_scan_detach(const char *pname, const char *fname,
                             fidname);
        } else if (detach.pccd_flags & PCC_DETACH_FL_ATTACHING) {
                llapi_printf(LLAPI_MSG_DEBUG,
-                            "'%s' is being attached, skip it", fidname);
+                            "'%s' is being attached, skip it\n", fidname);
+       } else if (detach.pccd_flags & PCC_DETACH_FL_ATTACH_ABORT) {
+               llapi_printf(LLAPI_MSG_DEBUG,
+                            "'%s' is being aborted, will remove it later\n",
+                            fidname);
        } else {
                snprintf(fullname, sizeof(fullname), "%s/%s", pname, fidname);
                llapi_printf(LLAPI_MSG_DEBUG,
@@ -657,7 +661,9 @@ static int llapi_pcc_del_internal(const char *mntpath, const char *pccpath,
        char cmd[PATH_MAX];
        int rc;
 
-       if (flags & PCC_CLEANUP_FL_WAIT) {
+
+       if ((flags & (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) ==
+           PCC_CLEANUP_FL_WAIT) {
                snprintf(cmd, sizeof(cmd), "%s %s",
                         PCC_CMDNAME_DEL_WAIT, pccpath);
                hsc.hsc_flags = PCC_DETACH_FL_ATTACHING_WAIT;
@@ -683,7 +689,28 @@ static int llapi_pcc_del_internal(const char *mntpath, const char *pccpath,
                return rc;
        }
 
+       if (flags & PCC_CLEANUP_FL_ABORT) {
+               hsc.hsc_flags |= PCC_DETACH_FL_ATTACH_ABORT;
+               llapi_printf(LLAPI_MSG_DEBUG,
+                            "Scan PCC, try to abort in-progress attaches\n");
+       }
+
        rc = hsm_scan_process(&hsc);
+       if (rc == 0) {
+               /*
+                * The first scan is to set PCC_STATE_FL_ATTACH_ABORTING flag
+                * on the all in-progress attaches;
+                * The second scan is to wait the in-progress attaches finished
+                * one by one.
+                */
+               if ((flags & (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) ==
+                   (PCC_CLEANUP_FL_WAIT | PCC_CLEANUP_FL_ABORT)) {
+                       llapi_printf(LLAPI_MSG_DEBUG,
+                                    "Scan PCC to wait attaches finished\n");
+                       hsc.hsc_flags = PCC_DETACH_FL_ATTACHING_WAIT;
+                       rc = hsm_scan_process(&hsc);
+               }
+       }
        close(hsc.hsc_mntfd);
 
        return rc;
index dbc0b71..0f60916 100644 (file)
@@ -6203,6 +6203,7 @@ int jt_pcc_del(int argc, char **argv)
        { .val = 'k',   .name = "keep-data",    .has_arg = no_argument },
        { .val = 'v',   .name = "verbose",      .has_arg = no_argument },
        { .val = 'w',   .name = "wait",         .has_arg = no_argument },
+       { .val = 'a',   .name = "abort",        .has_arg = no_argument },
        { .name = NULL } };
        char fsname[MAX_OBD_NAME + 1];
        const char *mntpath;
@@ -6220,6 +6221,9 @@ int jt_pcc_del(int argc, char **argv)
                case 'w':
                        flags |= PCC_CLEANUP_FL_WAIT;
                        break;
+               case 'a':
+                       flags |= PCC_CLEANUP_FL_ABORT;
+                       break;
                case 'v':
                        verbose++;
                        break;
@@ -6264,6 +6268,7 @@ int jt_pcc_clear(int argc, char **argv)
        { .val = 'k',   .name = "keep-data",    .has_arg = no_argument },
        { .val = 'v',   .name = "verbose",      .has_arg = no_argument },
        { .val = 'w',   .name = "wait",         .has_arg = no_argument },
+       { .val = 'a',   .name = "abort",        .has_arg = no_argument },
        { .name = NULL } };
        char fsname[MAX_OBD_NAME + 1];
        const char *mntpath;
@@ -6280,6 +6285,9 @@ int jt_pcc_clear(int argc, char **argv)
                case 'w':
                        flags |= PCC_CLEANUP_FL_WAIT;
                        break;
+               case 'a':
+                       flags |= PCC_CLEANUP_FL_ABORT;
+                       break;
                case 'v':
                        verbose++;
                        break;