Whamcloud - gitweb
EX-8236 pcc: abort data copy via ll_fid_path_copy
authorQian Yingjin <qian@ddn.com>
Fri, 10 Nov 2023 09:23:46 +0000 (04:23 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 22 Nov 2023 21:07:44 +0000 (21:07 +0000)
For data copying via ll_fid_path_copy in direct I/O mode in user
space, the client calls llapi_pcc_state_fd() to obtain the file
PCC state. If it is marked with PCC_STATE_FL_ATTACH_ABORTING, the
data copy process ll_fid_path_copy exits immediately.
To reduce the overhead of these check, we do not check for each
data copy iter, instead, we do a check for certain times of I/Os
(32 times by default). For I/O size of 32MiB, it will be checking
1 times per second at 1GiB/s. There should be some time-lag
before the copy tool quits finally.

Change-Id: I20631e5481a7e97d7a1ed0729bcd269ef6248a2c
Signed-off-by: Qian Yingjin <qian@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53073
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/llite/file.c
lustre/utils/ll_fid_path_copy.c

index a1846dc..d48bd58 100644 (file)
@@ -4748,7 +4748,6 @@ out_detach_free:
                OBD_FREE_PTR(detach);
                RETURN(rc);
        }
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 18, 53, 0)
        case LL_IOC_PCC_STATE: {
                struct lu_pcc_state __user *ustate =
                        (struct lu_pcc_state __user *)arg;
@@ -4772,7 +4771,6 @@ out_state:
                OBD_FREE_PTR(state);
                RETURN(rc);
        }
-#endif
 #ifdef HAVE_LUSTRE_CRYPTO
        case LL_IOC_SET_ENCRYPTION_POLICY:
                if (!ll_sbi_has_encrypt(ll_i2sbi(inode)))
index 9e868f8..7b2dd83 100644 (file)
@@ -29,6 +29,8 @@ void usage(void)
 "ll_fid_path_copy");
 }
 
+static bool abort_copy;
+
 struct chunk_data {
        ssize_t* copied_total;
        int extra_open_flags;
@@ -39,6 +41,34 @@ struct chunk_data {
        int dst_fd;
 };
 
+#define PERIODIC_CHECK_IOCOUNT 32
+
+static bool check_abort_copy(int fd, int *iocount)
+{
+       if (abort_copy)
+               return true;
+
+       (*iocount)++;
+       if (*iocount >= PERIODIC_CHECK_IOCOUNT) {
+               struct lu_pcc_state state;
+               int rc;
+
+               *iocount = 0;
+               rc = llapi_pcc_state_get_fd(fd, &state);
+               if (rc) {
+                       fprintf(stderr,
+                               "%s: failed to get PCC state: rc = %d\n",
+                               program_invocation_short_name, rc);
+                       return false;
+               }
+
+               if (state.pccs_flags & PCC_STATE_FL_ATTACH_ABORTING)
+                       abort_copy = true;
+       }
+
+       return abort_copy;
+}
+
 void *copy_data_threaded(void *arg)
 {
        struct chunk_data *chunk = arg;
@@ -53,6 +83,7 @@ void *copy_data_threaded(void *arg)
        void* buf = NULL;
        ssize_t rc = 0;
        long int thread = syscall(__NR_gettid);
+       int iocount = 0;
 
        rc = posix_memalign(&buf, page_size, iosize);
        if (rc) {
@@ -112,6 +143,9 @@ void *copy_data_threaded(void *arg)
                offset += wsz;
                if (offset == end_offset)
                        break;
+
+               if (check_abort_copy(src_fd, &iocount))
+                       break;
        }
 
        free(buf);