From b87e99adf44b80308f341cbf143374034f627ab9 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Fri, 6 Sep 2013 16:26:25 -0700 Subject: [PATCH] LU-3817 llite: Truncate to restore file Truncate up is safe so it won't trigger restore. Copy optimization for truncate down - only copy the part under truncate length. If a file is truncated to zero usually it'll be followed by write so I choose to restore the file and set correct stripe information. Signed-off-by: Jinshan Xiong Change-Id: I5e64893bdeaeb7323566154e3469f3a0ce7797d4 Reviewed-on: http://review.whamcloud.com/7505 Tested-by: Hudson Tested-by: Maloo Reviewed-by: jacques-Charles Lafoucriere Reviewed-by: Henri Doreau Reviewed-by: Aurelien Degremont Reviewed-by: Oleg Drokin --- lustre/llite/file.c | 5 ++- lustre/llite/llite_internal.h | 2 +- lustre/llite/llite_lib.c | 96 ++++++++++++++++++++--------------------- lustre/llite/vvp_io.c | 7 +-- lustre/mdt/mdt_coordinator.c | 6 ++- lustre/mdt/mdt_hsm_cdt_client.c | 29 +++++++------ lustre/tests/sanity-hsm.sh | 69 ++++++++++++----------------- lustre/tests/sanity.sh | 15 +------ lustre/utils/lhsmtool_posix.c | 9 ++-- 9 files changed, 108 insertions(+), 130 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 75ce1d8..ef524f7 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -3851,7 +3851,7 @@ again: /** * This function send a restore request to the MDT */ -int ll_layout_restore(struct inode *inode) +int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) { struct hsm_user_request *hur; int len, rc; @@ -3868,7 +3868,8 @@ int ll_layout_restore(struct inode *inode) hur->hur_request.hr_flags = 0; memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, sizeof(hur->hur_user_item[0].hui_fid)); - hur->hur_user_item[0].hui_extent.length = -1; + hur->hur_user_item[0].hui_extent.offset = offset; + hur->hur_user_item[0].hui_extent.length = length; hur->hur_request.hr_itemcount = 1; rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp, len, hur, NULL); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index d8f64b1..4ee8ed3 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -1651,7 +1651,7 @@ enum { int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); -int ll_layout_restore(struct inode *inode); +int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); int ll_xattr_init(void); void ll_xattr_fini(void); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 1a9d1b4..6347213 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1461,14 +1461,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), cfs_time_current_sec()); - /* If we are changing file size, file content is modified, flag it. */ - if (attr->ia_valid & ATTR_SIZE) { - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -1483,13 +1475,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) down_write(&lli->lli_trunc_sem); } - memcpy(&op_data->op_attr, attr, sizeof(*attr)); - - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; - /* truncate on a released file must failed with -ENODATA, * so size must not be set on MDS for released file * but other attributes must be set @@ -1503,27 +1488,38 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) file_is_released = true; ccc_inode_lsm_put(inode, lsm); - } - /* if not in HSM import mode, clear size attr for released file - * we clear the attribute send to MDT in op_data, not the original - * received from caller in attr which is used later to - * decide return code */ - if (file_is_released && (attr->ia_valid & ATTR_SIZE) && !hsm_import) - op_data->op_attr.ia_valid &= ~ATTR_SIZE; + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + if (file_is_released) { + rc = ll_layout_restore(inode, 0, attr->ia_size); + if (rc < 0) + GOTO(out, rc); - rc = ll_md_setattr(dentry, op_data, &mod); - if (rc) - GOTO(out, rc); + file_is_released = false; + ll_layout_refresh(inode, &gen); + } - /* truncate failed (only when non HSM import), others succeed */ - if (file_is_released) { - if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) - GOTO(out, rc = -ENODATA); - else - GOTO(out, rc = 0); + /* If we are changing file size, file content is + * modified, flag it. */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + op_data->op_bias |= MDS_DATA_MODIFIED; + } } + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && + (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) + op_data->op_flags = MF_EPOCH_OPEN; + + rc = ll_md_setattr(dentry, op_data, &mod); + if (rc) + GOTO(out, rc); + /* RPC to MDT is sent, cancel data modification flag */ if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { spin_lock(&lli->lli_lock); @@ -1531,31 +1527,31 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) spin_unlock(&lli->lli_lock); } - ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode)) - GOTO(out, rc = 0); + ll_ioepoch_open(lli, op_data->op_ioepoch); + if (!S_ISREG(inode->i_mode) || file_is_released) + GOTO(out, rc = 0); if (attr->ia_valid & (ATTR_SIZE | ATTR_ATIME | ATTR_ATIME_SET | ATTR_MTIME | ATTR_MTIME_SET)) { - /* For truncate and utimes sending attributes to OSTs, setting - * mtime/atime to the past will be performed under PW [0:EOF] - * extent lock (new_size:EOF for truncate). It may seem - * excessive to send mtime/atime updates to OSTs when not - * setting times to past, but it is necessary due to possible - * time de-synchronization between MDT inode and OST objects */ - rc = ll_setattr_ost(inode, attr); + /* For truncate and utimes sending attributes to OSTs, setting + * mtime/atime to the past will be performed under PW [0:EOF] + * extent lock (new_size:EOF for truncate). It may seem + * excessive to send mtime/atime updates to OSTs when not + * setting times to past, but it is necessary due to possible + * time de-synchronization between MDT inode and OST objects */ + rc = ll_setattr_ost(inode, attr); } - EXIT; + EXIT; out: - if (op_data) { - if (op_data->op_ioepoch) { - rc1 = ll_setattr_done_writing(inode, op_data, mod); - if (!rc) - rc = rc1; - } - ll_finish_md_op_data(op_data); - } + if (op_data) { + if (op_data->op_ioepoch) { + rc1 = ll_setattr_done_writing(inode, op_data, mod); + if (!rc) + rc = rc1; + } + ll_finish_md_op_data(op_data); + } if (!S_ISDIR(inode->i_mode)) { up_write(&lli->lli_trunc_sem); mutex_lock(&inode->i_mutex); diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index b644bd8..b10de06 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -121,8 +121,9 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct ccc_io *cio = cl2ccc_io(env, ios); + struct inode *inode = ccc_object_inode(obj); - CLOBINVRNT(env, obj, ccc_object_invariant(obj)); + CLOBINVRNT(env, obj, ccc_object_invariant(obj)); CDEBUG(D_VFSTRACE, DFID" ignore/verify layout %d/%d, layout version %d " "restore needed %d\n", @@ -136,7 +137,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* file was detected release, we need to restore it * before finishing the io */ - rc = ll_layout_restore(ccc_object_inode(obj)); + rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF); /* if restore registration failed, no restart, * we will return -ENODATA */ /* The layout will change after restore, so we need to @@ -161,7 +162,7 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) __u32 gen = 0; /* check layout version */ - ll_layout_refresh(ccc_object_inode(obj), &gen); + ll_layout_refresh(inode, &gen); io->ci_need_restart = cio->cui_layout_gen != gen; if (io->ci_need_restart) { CDEBUG(D_VFSTRACE, diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index f98ec0e..fa92f75 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -725,6 +725,10 @@ static int hsm_restore_cb(const struct lu_env *env, larr = (struct llog_agent_req_rec *)hdr; hai = &larr->arr_hai; + if (hai->hai_cookie > cdt->cdt_last_cookie) + /* update the cookie to avoid collision */ + cdt->cdt_last_cookie = hai->hai_cookie + 1; + if (hai->hai_action != HSMA_RESTORE || agent_req_in_final_state(larr->arr_status)) RETURN(0); @@ -741,7 +745,7 @@ static int hsm_restore_cb(const struct lu_env *env, crh->extent.end = hai->hai_extent.offset + hai->hai_extent.length; */ crh->crh_extent.start = 0; - crh->crh_extent.end = OBD_OBJECT_EOF; + crh->crh_extent.end = hai->hai_extent.length; /* get the layout lock */ mdt_lock_reg_init(&crh->crh_lh, LCK_EX); child = mdt_object_find_lock(mti, &crh->crh_fid, &crh->crh_lh, diff --git a/lustre/mdt/mdt_hsm_cdt_client.c b/lustre/mdt/mdt_hsm_cdt_client.c index 8047c87..285818b 100644 --- a/lustre/mdt/mdt_hsm_cdt_client.c +++ b/lustre/mdt/mdt_hsm_cdt_client.c @@ -368,38 +368,39 @@ int mdt_hsm_add_actions(struct mdt_thread_info *mti, /* if restore, take an exclusive lock on layout */ if (hai->hai_action == HSMA_RESTORE) { - struct cdt_restore_handle *crh; - struct mdt_object *child; + struct cdt_restore_handle *crh; + + /* in V1 only whole file is supported. */ + if (hai->hai_extent.offset != 0) + GOTO(out, rc = -EPROTO); OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem); if (crh == NULL) GOTO(out, rc = -ENOMEM); crh->crh_fid = hai->hai_fid; - /* in V1 only whole file is supported - crh->extent.start = hai->hai_extent.offset; - crh->extent.end = hai->hai_extent.offset + - hai->hai_extent.length; - */ + /* in V1 only whole file is supported. However the + * restore may be due to truncate. */ crh->crh_extent.start = 0; - crh->crh_extent.end = OBD_OBJECT_EOF; + crh->crh_extent.end = hai->hai_extent.length; mdt_lock_reg_init(&crh->crh_lh, LCK_EX); - child = mdt_object_find_lock(mti, &crh->crh_fid, - &crh->crh_lh, - MDS_INODELOCK_LAYOUT); - if (IS_ERR(child)) { - rc = PTR_ERR(child); + obj = mdt_object_find_lock(mti, &crh->crh_fid, + &crh->crh_lh, + MDS_INODELOCK_LAYOUT); + if (IS_ERR(obj)) { + rc = PTR_ERR(obj); CERROR("%s: cannot take layout lock for " DFID": rc = %d\n", mdt_obd_name(mdt), PFID(&crh->crh_fid), rc); OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem); GOTO(out, rc); } + /* we choose to not keep a keep a reference * on the object during the restore time which can be * very long */ - mdt_object_put(mti->mti_env, child); + mdt_object_put(mti->mti_env, obj); mutex_lock(&cdt->cdt_restore_lock); list_add_tail(&crh->crh_list, &cdt->cdt_restore_hdl); diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 971a38a..527d3f0 100644 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -15,7 +15,7 @@ ONLY=${ONLY:-"$*"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # skip test cases failed before landing - Jinshan -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 12a 12b 12n 13 30a 31a 34 35 36 58 59" +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 12a 12b 12n 13 30a 31a 34 35 36" ALWAYS_EXCEPT="$ALWAYS_EXCEPT 110a 200 201 221 222a 223a 223b 225" LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -2127,13 +2127,15 @@ test_57() { } run_test 57 "Archive a file with dirty cache on another node" -test_58() { - # test needs a running copytool - copytool_setup +truncate_released_file() { + local src_file=$1 + local trunc_to=$2 - mkdir -p $DIR/$tdir + local sz=$(stat -c %s $src_file) local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(copy_file $1 $f) + local ref=$f-ref + cp $f $f-ref $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2141,63 +2143,46 @@ test_58() { $LFS hsm_release $f || error "could not release file" - $TRUNCATE $f 0 || error "truncate failed" + $TRUNCATE $f $trunc_to || error "truncate failed" sync - local sz=$(stat -c %s $f) - [[ $sz == 0 ]] || error "size after truncate is $sz != 0" + local sz1=$(stat -c %s $f) + [[ $sz1 == $trunc_to ]] || + error "size after trunc: $sz1 expect $trunc_to, original $sz" $LFS hsm_state $f - check_hsm_flags $f "0x0000000b" local state=$(get_request_state $fid RESTORE) - [[ "$state" == "" ]] || - error "truncate 0 trigs a restore, state = $state" + [[ "$state" == "SUCCEED" ]] || + error "truncate $sz does not trig restore, state = $state" - copytool_cleanup + $TRUNCATE $ref $trunc_to + cmp $ref $f || error "file data wrong after truncate" + + rm -f $f $f-ref } -run_test 58 "Truncate 0 on a released file must not trigger restore" -test_59() { +test_58() { # test needs a running copytool copytool_setup mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/passwd $f) - local ref=$f-ref - cp $f $ref - local sz=$(stat -c %s $ref) - sz=$((sz / 2)) - $TRUNCATE $ref $sz - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || - error "could not archive file" - wait_request_state $fid ARCHIVE SUCCEED - - $LFS hsm_release $f || error "could not release file" + local sz=$(stat -c %s /etc/passwd) - $TRUNCATE $f $sz || error "truncate failed" - sync + echo "truncate up from $sz to $((sz*2))" + truncate_released_file /etc/passwd $((sz*2)) - local sz1=$(stat -c %s $f) - [[ $sz1 == $sz ]] || error "size after truncate is $sz1 != $sz" + echo "truncate down from $sz to $((sz/2))" + truncate_released_file /etc/passwd $((sz/2)) - $LFS hsm_state $f - - check_hsm_flags $f "0x0000000b" - - local state=$(get_request_state $fid RESTORE) - [[ "$state" == "SUCCEED" ]] || - error "truncate $sz does not trig a successfull restore,"\ - " state = $state" - - cmp $ref $f || error "file data wrong after truncate" + echo "truncate to 0" + truncate_released_file /etc/passwd 0 copytool_cleanup } -run_test 59 "Truncate != 0 on a released file" +run_test 58 "Truncate a released file will trigger restore" test_90() { file_count=57 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 72d2d87..b4918ed 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11442,26 +11442,13 @@ test_229() { # LU-2482, LU-3448 [ $stripe_count -eq 2 ] || error "stripe count not 2 ($stripe_count)" stat $DIR/$tfile || error "failed to stat released file" - $TRUNCATE $DIR/$tfile 200000 && - error "truncate of released file should fail" - - # Ensure that nothing happened anyway. - $CHECKSTAT -s 0 $DIR/$tfile || - error "released file size should not change" - - # Stripe count should be no change after truncate - stripe_count=$($GETSTRIPE -c $DIR/$tfile) || error "getstripe failed" - [ $stripe_count -eq 2 ] || error "after trunc: ($stripe_count)" - chown $RUNAS_ID $DIR/$tfile || error "chown $RUNAS_ID $DIR/$tfile failed" chgrp $RUNAS_ID $DIR/$tfile || error "chgrp $RUNAS_ID $DIR/$tfile failed" - touch $DIR/$tfile || - error "touch $DIR/$tfile failed" - + touch $DIR/$tfile || error "touch $DIR/$tfile failed" rm $DIR/$tfile || error "failed to remove released file" } run_test 229 "getstripe/stat/rm/attr changes work on released files" diff --git a/lustre/utils/lhsmtool_posix.c b/lustre/utils/lhsmtool_posix.c index c25ed12..60077d8 100644 --- a/lustre/utils/lhsmtool_posix.c +++ b/lustre/utils/lhsmtool_posix.c @@ -88,7 +88,7 @@ struct options { struct options opt = { .o_copy_attrs = 1, .o_shadow_tree = 1, - .o_verbose = LLAPI_MSG_WARN, + .o_verbose = LLAPI_MSG_INFO, .o_copy_xattrs = 1, .o_report_int = REPORT_INTERVAL_DEFAULT, .o_chunk_size = ONE_MB, @@ -583,8 +583,9 @@ static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src, errno = 0; /* Don't read beyond a given extent */ - rlen = (hai->hai_extent.length == -1LL) ? - src_st.st_size : hai->hai_extent.length; + rlen = min(hai->hai_extent.length, src_st.st_size); + + CT_DEBUG("Going to copy "LPU64" bytes %s -> %s\n", rlen, src, dst); while (wpos < rlen) { int chunk = (rlen - wpos > opt.o_chunk_size) ? @@ -1748,6 +1749,8 @@ static int ct_run(void) } } + setbuf(stdout, NULL); + rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, 0, opt.o_archive_cnt, opt.o_archive_id); if (rc < 0) { -- 1.8.3.1