From c0fa6f7a10d1162f8d49d40ab4f5aba80e72157e Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Fri, 19 Nov 2021 14:50:34 -0500 Subject: [PATCH] LU-9206 llite: access striped directory with missing stripe This patch allows acessing striped directory with missing stripes: * lmv_revalidate_slave() skip error if one stripe returns -ESHUTDOWN. * add ll_dir_flush(), which will return error found in reading stripe dir pages, thus 'ls' can list dirents on other stripes, and return an error in the end. Add sanity 33i, update 60g because now ls may fail. Signed-off-by: Lai Siyao Change-Id: I16efd34e02b9855756cc93556e9e52550178f203 Reviewed-on: https://review.whamcloud.com/45631 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Yingjin Qian Reviewed-by: Oleg Drokin --- lustre/include/obd.h | 8 ++++--- lustre/include/obd_class.h | 7 +++--- lustre/llite/dir.c | 55 +++++++++++++++++++++++++++++-------------- lustre/llite/llite_internal.h | 10 +++++--- lustre/llite/llite_nfs.c | 5 ++-- lustre/llite/statahead.c | 6 ++--- lustre/lmv/lmv_intent.c | 4 ++-- lustre/lmv/lmv_obd.c | 20 +++++++++------- lustre/mdc/mdc_request.c | 7 +++--- lustre/tests/sanity.sh | 30 +++++++++++++++++++++-- 10 files changed, 102 insertions(+), 50 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 971b9fc..d9c014e 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -959,10 +959,12 @@ struct md_op_data { __u32 op_archive_id; }; -struct md_callback { - int (*md_blocking_ast)(struct ldlm_lock *lock, +struct md_readdir_info { + int (*mr_blocking_ast)(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); + /* if striped directory is partially read, the result is stored here */ + int mr_partial_readdir_rc; }; struct md_enqueue_info; @@ -1162,7 +1164,7 @@ struct md_ops { struct ptlrpc_request **); int (*m_read_page)(struct obd_export *, struct md_op_data *, - struct md_callback *cb_op, __u64 hash_offset, + struct md_readdir_info *mrinfo, __u64 hash_offset, struct page **ppage); int (*m_unlink)(struct obd_export *, struct md_op_data *, diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index ee7ea9d..bbb571b 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1538,9 +1538,8 @@ static inline int md_file_resync(struct obd_export *exp, static inline int md_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 hash_offset, - struct page **ppage) + struct md_readdir_info *mrinfo, + __u64 hash_offset, struct page **ppage) { int rc; @@ -1551,7 +1550,7 @@ static inline int md_read_page(struct obd_export *exp, lprocfs_counter_incr(exp->exp_obd->obd_md_stats, LPROC_MD_READ_PAGE); - return MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset, + return MDP(exp->exp_obd, read_page)(exp, op_data, mrinfo, hash_offset, ppage); } diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 4006417..f335e2c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -143,17 +143,20 @@ * */ struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data, - __u64 offset) + __u64 offset, int *partial_readdir_rc) { - struct md_callback cb_op; - struct page *page; - int rc; + struct md_readdir_info mrinfo = { + .mr_blocking_ast = ll_md_blocking_ast }; + struct page *page; + int rc; - cb_op.md_blocking_ast = ll_md_blocking_ast; - rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page); + rc = md_read_page(ll_i2mdexp(dir), op_data, &mrinfo, offset, &page); if (rc != 0) return ERR_PTR(rc); + if (partial_readdir_rc && mrinfo.mr_partial_readdir_rc) + *partial_readdir_rc = mrinfo.mr_partial_readdir_rc; + return page; } @@ -180,11 +183,11 @@ void ll_release_page(struct inode *inode, struct page *page, #ifdef HAVE_DIR_CONTEXT int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, - struct dir_context *ctx) + struct dir_context *ctx, int *partial_readdir_rc) { #else int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, - void *cookie, filldir_t filldir) + void *cookie, filldir_t filldir, int *partial_readdir_rc) { #endif struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -203,7 +206,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, RETURN(rc); } - page = ll_get_dir_page(inode, op_data, pos); + page = ll_get_dir_page(inode, op_data, pos, partial_readdir_rc); while (rc == 0 && !done) { struct lu_dirpage *dp; @@ -299,7 +302,8 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); next = pos; - page = ll_get_dir_page(inode, op_data, pos); + page = ll_get_dir_page(inode, op_data, pos, + partial_readdir_rc); } } #ifdef HAVE_DIR_CONTEXT @@ -325,14 +329,15 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) struct md_op_data *op_data; struct lu_fid pfid = { 0 }; ktime_t kstart = ktime_get(); + /* result of possible partial readdir */ + int partial_readdir_rc = 0; __u64 pos; int rc; + ENTRY; - if (lfd != NULL) - pos = lfd->lfd_pos; - else - pos = 0; + LASSERT(lfd != NULL); + pos = lfd->lfd_pos; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos/size%lu/%llu 32bit_api %d\n", @@ -391,13 +396,15 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) #ifdef HAVE_DIR_CONTEXT ctx->pos = pos; - rc = ll_dir_read(inode, &pos, op_data, ctx); + rc = ll_dir_read(inode, &pos, op_data, ctx, &partial_readdir_rc); pos = ctx->pos; #else - rc = ll_dir_read(inode, &pos, op_data, cookie, filldir); + rc = ll_dir_read(inode, &pos, op_data, cookie, filldir, + &partial_readdir_rc); #endif - if (lfd != NULL) - lfd->lfd_pos = pos; + lfd->lfd_pos = pos; + if (!lfd->fd_partial_readdir_rc) + lfd->fd_partial_readdir_rc = partial_readdir_rc; if (pos == MDS_DIR_END_OFF) { if (api32) @@ -2329,6 +2336,17 @@ static int ll_dir_release(struct inode *inode, struct file *file) RETURN(ll_file_release(inode, file)); } +/* notify error if partially read striped directory */ +static int ll_dir_flush(struct file *file, fl_owner_t id) +{ + struct ll_file_data *lfd = file->private_data; + int rc = lfd->fd_partial_readdir_rc; + + lfd->fd_partial_readdir_rc = 0; + + return rc; +} + const struct file_operations ll_dir_operations = { .llseek = ll_dir_seek, .open = ll_dir_open, @@ -2341,4 +2359,5 @@ const struct file_operations ll_dir_operations = { #endif .unlocked_ioctl = ll_dir_ioctl, .fsync = ll_fsync, + .flush = ll_dir_flush, }; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index d066f98..6639f4c 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -911,6 +911,10 @@ struct ll_file_data { * layout version for verification to OST objects */ __u32 fd_layout_version; struct pcc_file fd_pcc_file; + /* striped directory may read partially if some stripe inaccessible, + * -errno is saved here, and will return to user in close(). + */ + int fd_partial_readdir_rc; }; void llite_tunables_unregister(void); @@ -1035,15 +1039,15 @@ extern const struct file_operations ll_dir_operations; extern const struct inode_operations ll_dir_inode_operations; #ifdef HAVE_DIR_CONTEXT int ll_dir_read(struct inode *inode, __u64 *pos, struct md_op_data *op_data, - struct dir_context *ctx); + struct dir_context *ctx, int *partial_readdir_rc); #else int ll_dir_read(struct inode *inode, __u64 *pos, struct md_op_data *op_data, - void *cookie, filldir_t filldir); + void *cookie, filldir_t filldir, int *partial_readdir_rc); #endif int ll_get_mdt_idx(struct inode *inode); int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid); struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data, - __u64 offset); + __u64 offset, int *partial_readdir_rc); void ll_release_page(struct inode *inode, struct page *page, bool remove); int quotactl_ioctl(struct super_block *sb, struct if_quotactl *qctl); diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index ae15e1f..644aac2 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -266,9 +266,10 @@ static int ll_get_name(struct dentry *dentry, char *name, inode_lock(dir); #ifdef HAVE_DIR_CONTEXT - rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx); + rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx, NULL); #else - rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir); + rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir, + NULL); #endif inode_unlock(dir); ll_finish_md_op_data(op_data); diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 1154516..5922daa 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -1067,7 +1067,7 @@ static int ll_statahead_thread(void *arg) } sai->sai_in_readpage = 1; - page = ll_get_dir_page(dir, op_data, pos); + page = ll_get_dir_page(dir, op_data, pos, NULL); ll_unlock_md_op_lsm(op_data); sai->sai_in_readpage = 0; if (IS_ERR(page)) { @@ -1354,7 +1354,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry) *FIXME choose the start offset of the readdir */ - page = ll_get_dir_page(dir, op_data, 0); + page = ll_get_dir_page(dir, op_data, 0, NULL); while (1) { struct lu_dirpage *dp; @@ -1456,7 +1456,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry) */ ll_release_page(dir, page, le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); - page = ll_get_dir_page(dir, op_data, pos); + page = ll_get_dir_page(dir, op_data, pos, NULL); } } EXIT; diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index ef04c5b..97f1d9f 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -222,8 +222,8 @@ int lmv_revalidate_slaves(struct obd_export *exp, rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req, cb_blocking, extra_lock_flags); - if (rc == -ENOENT) { - /* skip stripe is not exists */ + if (rc == -ENOENT || rc == -ESHUTDOWN) { + /* skip stripe that doesn't exist or is inaccessible */ rc = 0; continue; } diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 1a1e699..e4b82bb 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -2623,7 +2623,7 @@ struct stripe_dirent { struct lmv_dir_ctxt { struct lmv_obd *ldc_lmv; struct md_op_data *ldc_op_data; - struct md_callback *ldc_cb_op; + struct md_readdir_info *ldc_mrinfo; __u64 ldc_hash; int ldc_count; struct stripe_dirent ldc_stripes[0]; @@ -2725,7 +2725,7 @@ static struct lu_dirent *stripe_dirent_load(struct lmv_dir_ctxt *ctxt, op_data->op_fid2 = oinfo->lmo_fid; op_data->op_data = oinfo->lmo_root; - rc = md_read_page(tgt->ltd_exp, op_data, ctxt->ldc_cb_op, hash, + rc = md_read_page(tgt->ltd_exp, op_data, ctxt->ldc_mrinfo, hash, &stripe->sd_page); op_data->op_fid1 = fid; @@ -2746,6 +2746,7 @@ static struct lu_dirent *stripe_dirent_load(struct lmv_dir_ctxt *ctxt, LASSERT(!ent); /* treat error as eof, so dir can be partially accessed */ stripe->sd_eof = true; + ctxt->ldc_mrinfo->mr_partial_readdir_rc = rc; LCONSOLE_WARN("dir "DFID" stripe %d readdir failed: %d, " "directory is partially accessed!\n", PFID(&ctxt->ldc_op_data->op_fid1), stripe_index, @@ -2847,7 +2848,8 @@ static struct lu_dirent *lmv_dirent_next(struct lmv_dir_ctxt *ctxt) * * \param[in] exp obd export refer to LMV * \param[in] op_data hold those MD parameters of read_entry - * \param[in] cb_op ldlm callback being used in enqueue in mdc_read_entry + * \param[in] mrinfo ldlm callback being used in enqueue in mdc_read_entry, + * and partial readdir result will be stored in it. * \param[in] offset starting hash offset * \param[out] ppage the page holding the entry. Note: because the entry * will be accessed in upper layer, so we need hold the @@ -2859,8 +2861,8 @@ static struct lu_dirent *lmv_dirent_next(struct lmv_dir_ctxt *ctxt) */ static int lmv_striped_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 offset, struct page **ppage) + struct md_readdir_info *mrinfo, __u64 offset, + struct page **ppage) { struct page *page = NULL; struct lu_dirpage *dp; @@ -2898,7 +2900,7 @@ static int lmv_striped_read_page(struct obd_export *exp, GOTO(free_page, rc = -ENOMEM); ctxt->ldc_lmv = &exp->exp_obd->u.lmv; ctxt->ldc_op_data = op_data; - ctxt->ldc_cb_op = cb_op; + ctxt->ldc_mrinfo = mrinfo; ctxt->ldc_hash = offset; ctxt->ldc_count = stripe_count; @@ -2971,7 +2973,7 @@ free_page: } static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, __u64 offset, + struct md_readdir_info *mrinfo, __u64 offset, struct page **ppage) { struct obd_device *obd = exp->exp_obd; @@ -2985,7 +2987,7 @@ static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, RETURN(-ENODATA); if (unlikely(lmv_dir_striped(op_data->op_mea1))) { - rc = lmv_striped_read_page(exp, op_data, cb_op, offset, ppage); + rc = lmv_striped_read_page(exp, op_data, mrinfo, offset, ppage); RETURN(rc); } @@ -2993,7 +2995,7 @@ static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); - rc = md_read_page(tgt->ltd_exp, op_data, cb_op, offset, ppage); + rc = md_read_page(tgt->ltd_exp, op_data, mrinfo, offset, ppage); RETURN(rc); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 7eab90d..af9cd83 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1289,7 +1289,6 @@ struct readpage_param { __u64 rp_off; int rp_hash64; struct obd_export *rp_exp; - struct md_callback *rp_cb; }; /** @@ -1405,7 +1404,7 @@ static int mdc_read_page_remote(void *data, struct page *page0) * \param[in] exp MDC export * \param[in] op_data client MD stack parameters, transfering parameters * between different layers on client MD stack. - * \param[in] cb_op callback required for ldlm lock enqueue during + * \param[in] mrinfo callback required for ldlm lock enqueue during * read page * \param[in] hash_offset the hash offset of the page to be read * \param[in] ppage the page to be read @@ -1414,7 +1413,7 @@ static int mdc_read_page_remote(void *data, struct page *page0) * errno(<0) get the page failed */ static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, __u64 hash_offset, + struct md_readdir_info *mrinfo, __u64 hash_offset, struct page **ppage) { struct lookup_intent it = { .it_op = IT_READDIR }; @@ -1437,7 +1436,7 @@ static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data, mapping = dir->i_mapping; rc = mdc_intent_lock(exp, op_data, &it, &enq_req, - cb_op->md_blocking_ast, 0); + mrinfo->mr_blocking_ast, 0); if (enq_req != NULL) ptlrpc_req_finished(enq_req); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e788b63..febf499 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -4263,6 +4263,33 @@ test_33h() { } run_test 33h "temp file is located on the same MDT as target" +test_33i() +{ + (( MDSCOUNT < 2 )) && skip "needs >= 2 MDTs" + + local FNAME=$(str_repeat 'f' 250) + + test_mkdir -i 0 -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed" + createmany -o $DIR/$tdir/$FNAME 1000 || error "createmany failed" + + local count + local total + + count=$($LFS getstripe -m $DIR/$tdir/* | grep -cw 1) + + local MDC=$(lctl dl | awk '/MDT0001-mdc-[^M]/ { print $4 }') + + lctl --device %$MDC deactivate + stack_trap "lctl --device %$MDC activate" + ls $DIR/$tdir > /dev/null && error "ls should return an error" + total=$(\ls -l $DIR/$tdir | wc -l) + # "ls -l" will list total in the first line + total=$((total - 1)) + (( total + count == 1000 )) || + error "ls list $total files, $count files on MDT1" +} +run_test 33i "striped directory can be accessed when one MDT is down" + TEST_34_SIZE=${TEST_34_SIZE:-2000000000000} test_34a() { rm -f $DIR/f34 @@ -8467,7 +8494,7 @@ test_60g() { awk '/^status/ { print \\\$2 }'" "completed" done - ls -R $DIR/$tdir || error "ls failed" + ls -R $DIR/$tdir rm -rf $DIR/$tdir || error "rmdir failed" } run_test 60g "transaction abort won't cause MDT hung" @@ -23690,7 +23717,6 @@ test_300s() { } run_test 300s "test lfs mkdir -c without -i" - prepare_remote_file() { mkdir $DIR/$tdir/src_dir || error "create remote source failed" -- 1.8.3.1