From 2a600912ae17da551a7365dc0521a8df571b9e8f Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 11 Aug 2021 17:32:06 +0800 Subject: [PATCH] EX-3636 pcc: reset file mmaping for the file once mmaped For a file once mmaped and cached on PCC, a new open will set the mapping for the file handle of PCC copy (@file->f_mapping) with the one of the Lustre file handle. When the file is detached from PCC due to manual detach or layout lock shrinking, the normal I/O (read/write) will auto-attach the file into PCC again during I/O as the layout version is unchanged. However, it still needs to reset the file mapping (@pcc_file->f_mapping) with the mapping of the PCC copy. Otherwise it will cause panic as follows: [ 935.516823] RIP: 0010:_raw_read_lock+0xa/0x20 [ 935.517077] ll_cl_find+0x19/0x60 [lustre] [ 935.517098] ll_readpage+0x51/0x820 [lustre] [ 935.517110] read_pages+0x122/0x190 [ 935.517119] __do_page_cache_readahead+0x1c1/0x1e0 [ 935.517131] ondemand_readahead+0x1f9/0x2c0 [ 935.517142] pagecache_get_page+0x30/0x2c0 [ 935.517165] generic_file_buffered_read+0x556/0xa00 [ 935.517189] pcc_try_auto_attach+0x3ac/0x400 [lustre] [ 935.517552] pcc_io_init+0x146/0x560 [lustre] [ 935.517906] pcc_file_read_iter+0x24d/0x2b0 [lustre] [ 935.518259] ll_file_read_iter+0x74/0x2e0 [lustre] [ 935.518604] new_sync_read+0x121/0x170 [ 935.518937] vfs_read+0x8a/0x140 This patch adds sanity-pcc test_98 to verify it. I/O for a file previously opened before attach into PCC or once opened while in ATTACHING state will fallback to Lustre OSTs. For the later mmap() on the file, the mmap() I/O also needs to fallback to Lustre OSTs and cannot read directly from local valid cached PCC copy until all fallback file handles are closed as the mapping of the PCC copy is replaced with the one of Lustre file when mmapped a file. Add sanity-pcc test_97 to verify it. And we also forbid to auto attach the file which is still in mmapped I/O. This patch disables "mmap_conv" by default. Test-Parameters: testlist=sanity-pcc,sanity-pcc,sanity-pcc Signed-off-by: Qian Yingjin Change-Id: I11195b0bdb6fb1d0d68d0b0cd02a0af8ee1fc297 Reviewed-on: https://review.whamcloud.com/44592 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/llite/llite_internal.h | 12 ++++- lustre/llite/llite_lib.c | 1 + lustre/llite/pcc.c | 86 +++++++++++++++++++++++++----- lustre/llite/pcc.h | 2 + lustre/tests/sanity-pcc.sh | 118 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 205 insertions(+), 14 deletions(-) diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 72a56fb..2ae186d 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -221,7 +221,17 @@ struct ll_inode_info { struct mutex lli_pcc_lock; enum lu_pcc_state_flags lli_pcc_state; atomic_t lli_pcc_mapcnt; - + /* + * I/O for a file previously opened before attach into + * PCC or once opened while in ATTACHING state will + * fallback to Lustre OSTs. + * For a later mmap() on the file, the mmap I/O also + * needs to fallback and cannot read from PCC directly + * until all fallback file handles are closed as we + * replace the mmaping of the PCC copy with the one of + * Lustre file when mmaped a file. + */ + atomic_t lli_pcc_mapneg; /* * @lli_pcc_generation saves the gobal PCC generation * when the file was successfully attached into PCC. diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 2ae6c1e..8794e7b 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1061,6 +1061,7 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_pcc_dsflags = PCC_DATASET_INVALID; lli->lli_pcc_generation = 0; atomic_set(&lli->lli_pcc_mapcnt, 0); + atomic_set(&lli->lli_pcc_mapneg, 0); mutex_init(&lli->lli_group_mutex); lli->lli_group_users = 0; lli->lli_group_gid = 0; diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index cdfd551..47501c8 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -728,7 +728,6 @@ pcc_parse_value_pairs(struct pcc_cmd *cmd, char *buffer) cmd->u.pccc_add.pccc_hsmtool_type = HSMTOOL_UNKNOWN; /* Enable these features by default */ cmd->u.pccc_add.pccc_flags |= PCC_DATASET_AUTO_ATTACH | - PCC_DATASET_MMAP_CONV | PCC_DATASET_PROJ_QUOTA; break; case PCC_DEL_DATASET: @@ -1924,6 +1923,13 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached, if (list_empty(&super->pccs_datasets)) RETURN(0); + if (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING) + RETURN(0); + + /* Forbid to auto attach the file once mmapped into PCC. */ + if (atomic_read(&lli->lli_pcc_mapcnt) > 0) + RETURN(0); + /* * The file layout lock was cancelled. And this open does not * obtain valid layout lock from MDT (i.e. the file is being @@ -1938,6 +1944,9 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached, pcc_inode_lock(inode); if (rc) RETURN(rc); + + if (atomic_read(&lli->lli_pcc_mapcnt) > 0) + RETURN(0); } rc = pcc_get_layout_info(inode, &clt); @@ -2140,8 +2149,21 @@ static bool pcc_io_tolerate(struct pcc_inode *pcci, return true; } +static inline void +pcc_file_mapping_reset(struct inode *inode, struct file *file) +{ + if (file) { + struct pcc_inode *pcci = ll_i2pcci(inode); + struct file *pcc_file = ll_file2pccf(file)->pccf_file; + struct inode *pcc_inode = pcci->pcci_path.dentry->d_inode; + + if (pcc_inode->i_mapping == &pcc_inode->i_data) + pcc_file->f_mapping = pcc_inode->i_mapping; + } +} + static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, - bool *cached) + struct file *file, bool *cached) { struct pcc_inode *pcci; @@ -2161,13 +2183,14 @@ static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, } else { *cached = false; /* - * FIXME: Forbid auto PCC attach if the file has still been - * mmapped in PCC. + * Forbid to auto PCC attach if the file has still been + * mapped in PCC. */ if (pcc_may_auto_attach(inode, iot)) { (void) pcc_try_auto_attach(inode, cached, iot); if (*cached) { pcci = ll_i2pcci(inode); + pcc_file_mapping_reset(inode, file); LASSERT(atomic_read(&pcci->pcci_refcount) > 0); atomic_inc(&pcci->pcci_active_ios); } @@ -2188,6 +2211,22 @@ static void pcc_io_fini(struct inode *inode, enum pcc_io_type iot, wake_up_all(&pcci->pcci_waitq); } +static inline void +pcc_file_fallback_set(struct ll_inode_info *lli, struct pcc_file *pccf) +{ + atomic_inc(&lli->lli_pcc_mapneg); + pccf->pccf_fallback = 1; +} + +static inline void +pcc_file_fallback_reset(struct ll_inode_info *lli, struct pcc_file *pccf) +{ + if (pccf->pccf_fallback) { + pccf->pccf_fallback = 0; + atomic_dec(&lli->lli_pcc_mapneg); + } +} + int pcc_file_open(struct inode *inode, struct file *file) { struct pcc_inode *pcci; @@ -2210,8 +2249,10 @@ int pcc_file_open(struct inode *inode, struct file *file) pcc_inode_lock(inode); pcci = ll_i2pcci(inode); - if (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING) + if (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING) { + pcc_file_fallback_set(lli, pccf); GOTO(out_unlock, rc = 0); + } if (!pcci || !pcc_inode_has_layout(pcci)) { if (pcc_may_auto_attach(inode, PIT_OPEN)) @@ -2220,8 +2261,13 @@ int pcc_file_open(struct inode *inode, struct file *file) if (rc == 0 && !cached && inode_owner_or_capable(inode)) rc = pcc_try_readonly_open_attach(inode, file, &cached); - if (rc < 0 || !cached) + if (rc < 0) + GOTO(out_unlock, rc); + + if (!cached) { + pcc_file_fallback_set(lli, pccf); GOTO(out_unlock, rc); + } pcci = ll_i2pcci(inode); } @@ -2262,6 +2308,8 @@ void pcc_file_release(struct inode *inode, struct file *file) pccf = &fd->fd_pcc_file; pcc_inode_lock(inode); + pcc_file_fallback_reset(ll_i2info(inode), pccf); + if (pccf->pccf_file == NULL) goto out; @@ -2330,7 +2378,7 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb, RETURN(0); } - pcc_io_init(inode, PIT_READ, cached); + pcc_io_init(inode, PIT_READ, file, cached); if (!*cached) RETURN(0); @@ -2400,7 +2448,7 @@ ssize_t pcc_file_write_iter(struct kiocb *iocb, RETURN(0); } - pcc_io_init(inode, PIT_WRITE, cached); + pcc_io_init(inode, PIT_WRITE, file, cached); if (!*cached) RETURN(0); @@ -2436,7 +2484,7 @@ int pcc_inode_setattr(struct inode *inode, struct iattr *attr, RETURN(0); } - pcc_io_init(inode, PIT_SETATTR, cached); + pcc_io_init(inode, PIT_SETATTR, NULL, cached); if (!*cached) RETURN(0); @@ -2473,7 +2521,7 @@ int pcc_inode_getattr(struct inode *inode, u32 request_mask, RETURN(0); } - pcc_io_init(inode, PIT_GETATTR, cached); + pcc_io_init(inode, PIT_GETATTR, NULL, cached); if (!*cached) RETURN(0); @@ -2534,7 +2582,7 @@ ssize_t pcc_file_splice_read(struct file *in_file, loff_t *ppos, RETURN(default_file_splice_read(in_file, ppos, pipe, count, flags)); - pcc_io_init(inode, PIT_SPLICE_READ, &cached); + pcc_io_init(inode, PIT_SPLICE_READ, in_file, &cached); if (!cached) RETURN(default_file_splice_read(in_file, ppos, pipe, count, flags)); @@ -2577,7 +2625,7 @@ int pcc_fsync(struct file *file, loff_t start, loff_t end, RETURN(0); } - pcc_io_init(inode, PIT_FSYNC, cached); + pcc_io_init(inode, PIT_FSYNC, file, cached); if (!*cached) RETURN(0); @@ -2761,7 +2809,8 @@ static int pcc_mmap_mapping_set(struct inode *inode, struct inode *pcc_inode) int pcc_file_mmap(struct file *file, struct vm_area_struct *vma, bool *cached) { - struct file *pcc_file = ll_file2pccf(file)->pccf_file; + struct pcc_file *pccf = ll_file2pccf(file); + struct file *pcc_file = pccf->pccf_file; struct inode *inode = file_inode(file); struct pcc_inode *pcci; int rc = 0; @@ -2776,9 +2825,20 @@ int pcc_file_mmap(struct file *file, struct vm_area_struct *vma, pcc_inode_lock(inode); pcci = ll_i2pcci(inode); if (pcci && pcc_inode_has_layout(pcci)) { + struct ll_inode_info *lli = ll_i2info(inode); struct inode *pcc_inode = file_inode(pcc_file); struct pcc_vma *pccv; + if (pccf->pccf_fallback) { + LASSERT(atomic_read(&lli->lli_pcc_mapneg) > 0); + GOTO(out, rc); + } + + if (atomic_read(&lli->lli_pcc_mapneg) > 0) { + pcc_file_fallback_set(lli, pccf); + GOTO(out, rc); + } + LASSERT(atomic_read(&pcci->pcci_refcount) > 1); *cached = true; diff --git a/lustre/llite/pcc.h b/lustre/llite/pcc.h index 33f0707..85eec3d 100644 --- a/lustre/llite/pcc.h +++ b/lustre/llite/pcc.h @@ -201,6 +201,8 @@ struct pcc_file { struct file *pccf_file; /* Whether readonly or readwrite PCC */ enum lu_pcc_type pccf_type; + /* I/O especially mmap() I/O must fallback to Lustre OSTs. */ + __u32 pccf_fallback:1; }; struct pcc_vma { diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index aedada2..4507b2f 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -3346,6 +3346,124 @@ test_43() { } run_test 43 "Auto attach at open() should add capacity owner check" +test_97() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tfile + + setup_loopdev $SINGLEAGT $loopfile $mntpt 60 + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "projid={0}\ roid=$HSM_ARCHIVE_NUMBER\ ropcc=1\ mmap_conv=0" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=1G + + local mpid1 + local mpid2 + local lpid + + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=50 || + error "Write $file failed" + + ( + while [ ! -e $DIR/sanity-pcc.97.lck ]; do + echo "T1. $MMAP_CAT $file ..." + do_facet $SINGLEAGT $MMAP_CAT $file > /dev/null || + error "$MMAP_CAT $file failed" + sleep 0.$((RANDON % 4 + 1)) + done + )& + mpid1=$! + + ( + while [ ! -e $DIR/sanity-pcc.97.lck ]; do + echo "T2. $MMAP_CAT $file ..." + do_facet $SINGLEAGT $MMAP_CAT $file > /dev/null || + error "$MMAP_CAT $file failed" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + mpid2=$! + + ( + while [ ! -e $DIR/sanity-pcc.97.lck ]; do + do_facet $SINGLEAGT $LCTL set_param -n ldlm.namespaces.*mdc*.lru_size=clear || + error "cancel_lru_locks mdc failed" + sleep 0.1 + done + )& + lpid=$! + + sleep 120 + touch $DIR/sanity-pcc.97.lck + wait $mpid1 || error "$?: mmap1 failed" + wait $mpid2 || error "$?: mmap2 failed" + wait $lpid || error "$?: cancel locks failed" + + do_facet $SINGLEAGT $LFS pcc detach $file + rm -f $DIR/sanity-pcc.97.lck +} +run_test 97 "two mmap I/O and layout lock cancel" + +test_98() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tfile + + setup_loopdev $SINGLEAGT $loopfile $mntpt 60 + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "projid={0}\ roid=$HSM_ARCHIVE_NUMBER\ ropcc=1\ mmap_conv=0" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=0 + + local rpid + local mpid + local lpid + + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=50 || + error "Write $file failed" + + ( + while [ ! -e $DIR/sanity-pcc.98.lck ]; do + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=50 || + error "Read $file failed" + sleep 0.$((RANDON % 4 + 1)) + done + )& + rpid=$! + + ( + while [ ! -e $DIR/sanity-pcc.98.lck ]; do + do_facet $SINGLEAGT $MMAP_CAT $file > /dev/null || + error "$MMAP_CAT $file failed" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + mpid=$! + + ( + while [ ! -e $DIR/sanity-pcc.98.lck ]; do + do_facet $SINGLEAGT $LCTL set_param -n ldlm.namespaces.*mdc*.lru_size=clear || + error "cancel_lru_locks mdc failed" + sleep 0.1 + done + )& + lpid=$! + + sleep 60 + touch $DIR/sanity-pcc.98.lck + wait $rpid || error "$?: read failed" + wait $mpid || error "$?: mmap failed" + wait $lpid || error "$?: cancel locks failed" + + do_facet $SINGLEAGT $LFS pcc detach $file + rm -f $DIR/sanity-pcc.98.lck +} +run_test 98 "racer between auto attach and mmap I/O" + test_99() { local loopfile="$TMP/$tfile" local mntpt="/mnt/pcc.$tdir" -- 1.8.3.1