X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fllite%2Fpcc.c;h=9f176b5ea92fa825d0167d27158fa916962014ae;hb=88bccc4fa4dd7310560f588c730eefedf423c515;hp=d8b582fce4e3d2ce8058c410708e40c059a0f573;hpb=7a810496c2ce970294af7ad5939d43490c1817d7;p=fs%2Flustre-release.git diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index d8b582f..9f176b5 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -124,6 +124,7 @@ int pcc_super_init(struct pcc_super *super) cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); init_rwsem(&super->pccs_rw_sem); INIT_LIST_HEAD(&super->pccs_datasets); + super->pccs_generation = 1; return 0; } @@ -213,13 +214,13 @@ pcc_fname_list_add(struct cfs_lstr *id, struct list_head *fname_list) { struct pcc_match_fname *fname; - OBD_ALLOC(fname, sizeof(struct pcc_match_fname)); + OBD_ALLOC_PTR(fname); if (fname == NULL) return -ENOMEM; OBD_ALLOC(fname->pmf_name, id->ls_len + 1); if (fname->pmf_name == NULL) { - OBD_FREE(fname, sizeof(struct pcc_match_fname)); + OBD_FREE_PTR(fname); return -ENOMEM; } @@ -312,7 +313,7 @@ pcc_expression_parse(struct cfs_lstr *src, struct list_head *cond_list) struct cfs_lstr field; int rc = 0; - OBD_ALLOC(expr, sizeof(struct pcc_expression)); + OBD_ALLOC_PTR(expr); if (expr == NULL) return -ENOMEM; @@ -370,7 +371,7 @@ pcc_conjunction_parse(struct cfs_lstr *src, struct list_head *cond_list) struct cfs_lstr expr; int rc = 0; - OBD_ALLOC(conjunction, sizeof(struct pcc_conjunction)); + OBD_ALLOC_PTR(conjunction); if (conjunction == NULL) return -ENOMEM; @@ -540,6 +541,12 @@ pcc_parse_value_pairs(struct pcc_cmd *cmd, char *buffer) */ if ((cmd->u.pccc_add.pccc_flags & PCC_DATASET_PCC_ALL) == 0) cmd->u.pccc_add.pccc_flags |= PCC_DATASET_PCC_ALL; + + /* For RW-PCC, the value of @rwid must be non zero. */ + if (cmd->u.pccc_add.pccc_flags & PCC_DATASET_RWPCC && + cmd->u.pccc_add.pccc_rwid == 0) + return -EINVAL; + break; case PCC_DEL_DATASET: case PCC_CLEAR_ALL: @@ -826,6 +833,7 @@ pcc_dataset_del(struct pcc_super *super, char *pathname) if (strcmp(dataset->pccd_pathname, pathname) == 0) { list_del_init(&dataset->pccd_linkage); pcc_dataset_put(dataset); + super->pccs_generation++; rc = 0; break; } @@ -866,6 +874,7 @@ static void pcc_remove_datasets(struct pcc_super *super) list_del(&dataset->pccd_linkage); pcc_dataset_put(dataset); } + super->pccs_generation++; up_write(&super->pccs_rw_sem); } @@ -1048,18 +1057,18 @@ void pcc_inode_free(struct inode *inode) * reduce overhead: * (fid->f_oid >> 16 & oxFFFF)/FID */ -#define MAX_PCC_DATABASE_PATH (6 * 5 + FID_NOBRACE_LEN + 1) +#define PCC_DATASET_MAX_PATH (6 * 5 + FID_NOBRACE_LEN + 1) static int pcc_fid2dataset_path(char *buf, int sz, struct lu_fid *fid) { - return snprintf(buf, sz, "%04x/%04x/%04x/%04x/%04x/%04x/" - DFID_NOBRACE, - (fid)->f_oid & 0xFFFF, - (fid)->f_oid >> 16 & 0xFFFF, - (unsigned int)((fid)->f_seq & 0xFFFF), - (unsigned int)((fid)->f_seq >> 16 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 32 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 48 & 0xFFFF), - PFID(fid)); + return scnprintf(buf, sz, "%04x/%04x/%04x/%04x/%04x/%04x/" + DFID_NOBRACE, + (fid)->f_oid & 0xFFFF, + (fid)->f_oid >> 16 & 0xFFFF, + (unsigned int)((fid)->f_seq & 0xFFFF), + (unsigned int)((fid)->f_seq >> 16 & 0xFFFF), + (unsigned int)((fid)->f_seq >> 32 & 0xFFFF), + (unsigned int)((fid)->f_seq >> 48 & 0xFFFF), + PFID(fid)); } static inline const struct cred *pcc_super_cred(struct super_block *sb) @@ -1073,9 +1082,15 @@ void pcc_file_init(struct pcc_file *pccf) pccf->pccf_type = LU_PCC_NONE; } -static inline bool pcc_auto_attach_enabled(struct pcc_dataset *dataset) +static inline bool pcc_auto_attach_enabled(enum pcc_dataset_flags flags, + enum pcc_io_type iot) { - return dataset->pccd_flags & PCC_DATASET_AUTO_ATTACH; + if (iot == PIT_OPEN) + return flags & PCC_DATASET_OPEN_ATTACH; + if (iot == PIT_GETATTR) + return flags & PCC_DATASET_STAT_ATTACH; + else + return flags & PCC_DATASET_AUTO_ATTACH; } static const char pcc_xattr_layout[] = XATTR_USER_PREFIX "PCC.layout"; @@ -1088,19 +1103,12 @@ static int pcc_layout_xattr_set(struct pcc_inode *pcci, __u32 gen) ENTRY; - if (!(lli->lli_pcc_state & PCC_STATE_FL_AUTO_ATTACH)) + if (!(lli->lli_pcc_dsflags & PCC_DATASET_AUTO_ATTACH)) RETURN(0); -#ifndef HAVE_VFS_SETXATTR - if (!pcc_dentry->d_inode->i_op->setxattr) - RETURN(-ENOTSUPP); + rc = ll_vfs_setxattr(pcc_dentry, pcc_dentry->d_inode, pcc_xattr_layout, + &gen, sizeof(gen), 0); - rc = pcc_dentry->d_inode->i_op->setxattr(pcc_dentry, pcc_xattr_layout, - &gen, sizeof(gen), 0); -#else - rc = __vfs_setxattr(pcc_dentry, pcc_dentry->d_inode, pcc_xattr_layout, - &gen, sizeof(gen), 0); -#endif RETURN(rc); } @@ -1121,27 +1129,12 @@ static int pcc_get_layout_info(struct inode *inode, struct cl_layout *clt) RETURN(PTR_ERR(env)); rc = cl_object_layout_get(env, lli->lli_clob, clt); - if (rc) + if (rc < 0) CDEBUG(D_INODE, "Cannot get layout for "DFID"\n", PFID(ll_inode2fid(inode))); cl_env_put(env, &refcheck); - RETURN(rc); -} - -static int pcc_fid2dataset_fullpath(char *buf, int sz, struct lu_fid *fid, - struct pcc_dataset *dataset) -{ - return snprintf(buf, sz, "%s/%04x/%04x/%04x/%04x/%04x/%04x/" - DFID_NOBRACE, - dataset->pccd_pathname, - (fid)->f_oid & 0xFFFF, - (fid)->f_oid >> 16 & 0xFFFF, - (unsigned int)((fid)->f_seq & 0xFFFF), - (unsigned int)((fid)->f_seq >> 16 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 32 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 48 & 0xFFFF), - PFID(fid)); + RETURN(rc < 0 ? rc : 0); } /* Must be called with pcci->pcci_lock held */ @@ -1150,21 +1143,33 @@ static void pcc_inode_attach_init(struct pcc_dataset *dataset, struct dentry *dentry, enum lu_pcc_type type) { - struct ll_inode_info *lli = pcci->pcci_lli; - pcci->pcci_path.mnt = mntget(dataset->pccd_path.mnt); pcci->pcci_path.dentry = dentry; LASSERT(atomic_read(&pcci->pcci_refcount) == 0); atomic_set(&pcci->pcci_refcount, 1); pcci->pcci_type = type; pcci->pcci_attr_valid = false; +} + +static inline void pcc_inode_dsflags_set(struct ll_inode_info *lli, + struct pcc_dataset *dataset) +{ + lli->lli_pcc_generation = ll_info2pccs(lli)->pccs_generation; + lli->lli_pcc_dsflags = dataset->pccd_flags; +} - if (dataset->pccd_flags & PCC_DATASET_OPEN_ATTACH) - lli->lli_pcc_state |= PCC_STATE_FL_OPEN_ATTACH; - if (dataset->pccd_flags & PCC_DATASET_IO_ATTACH) - lli->lli_pcc_state |= PCC_STATE_FL_IO_ATTACH; - if (dataset->pccd_flags & PCC_DATASET_STAT_ATTACH) - lli->lli_pcc_state |= PCC_STATE_FL_STAT_ATTACH; +static void pcc_inode_attach_set(struct pcc_super *super, + struct pcc_dataset *dataset, + struct ll_inode_info *lli, + struct pcc_inode *pcci, + struct dentry *dentry, + enum lu_pcc_type type) +{ + pcc_inode_init(pcci, lli); + pcc_inode_attach_init(dataset, pcci, dentry, type); + down_read(&super->pccs_rw_sem); + pcc_inode_dsflags_set(lli, dataset); + up_read(&super->pccs_rw_sem); } static inline void pcc_layout_gen_set(struct pcc_inode *pcci, @@ -1178,6 +1183,72 @@ static inline bool pcc_inode_has_layout(struct pcc_inode *pcci) return pcci->pcci_layout_gen != CL_LAYOUT_GEN_NONE; } +static struct dentry *pcc_lookup(struct dentry *base, char *pathname) +{ + char *ptr = NULL, *component; + struct dentry *parent; + struct dentry *child = ERR_PTR(-ENOENT); + + ptr = pathname; + + /* move past any initial '/' to the start of the first path component*/ + while (*ptr == '/') + ptr++; + + /* store the start of the first path component */ + component = ptr; + + parent = dget(base); + while (ptr) { + /* find the start of the next component - if we don't find it, + * the current component is the last component + */ + ptr = strchr(ptr, '/'); + /* put a NUL char in place of the '/' before the next compnent + * so we can treat this component as a string; note the full + * path string is NUL terminated to this is not needed for the + * last component + */ + if (ptr) + *ptr = '\0'; + + /* look up the current component */ + inode_lock(parent->d_inode); + child = lookup_one_len(component, parent, strlen(component)); + inode_unlock(parent->d_inode); + + /* repair the path string: put '/' back in place of the NUL */ + if (ptr) + *ptr = '/'; + + dput(parent); + + if (IS_ERR_OR_NULL(child)) + break; + + /* we may find a cached negative dentry */ + if (!d_is_positive(child)) { + dput(child); + child = NULL; + break; + } + + /* descend in to the next level of the path */ + parent = child; + + /* move the pointer past the '/' to the next component */ + if (ptr) + ptr++; + component = ptr; + } + + /* NULL child means we didn't find anything */ + if (!child) + child = ERR_PTR(-ENOENT); + + return child; +} + static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, enum lu_pcc_type type, struct pcc_dataset *dataset, @@ -1186,9 +1257,8 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, struct ll_inode_info *lli = ll_i2info(inode); struct pcc_inode *pcci = lli->lli_pcc_inode; const struct cred *old_cred; - struct dentry *pcc_dentry; - struct path path; - char *pathname; + struct dentry *pcc_dentry = NULL; + char pathname[PCC_DATASET_MAX_PATH]; __u32 pcc_gen; int rc; @@ -1198,34 +1268,25 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, !(dataset->pccd_flags & PCC_DATASET_RWPCC)) RETURN(0); - OBD_ALLOC(pathname, PATH_MAX); - if (pathname == NULL) - RETURN(-ENOMEM); - - pcc_fid2dataset_fullpath(pathname, PATH_MAX, &lli->lli_fid, dataset); + rc = pcc_fid2dataset_path(pathname, PCC_DATASET_MAX_PATH, + &lli->lli_fid); old_cred = override_creds(pcc_super_cred(inode->i_sb)); - rc = kern_path(pathname, LOOKUP_FOLLOW, &path); - if (rc) + pcc_dentry = pcc_lookup(dataset->pccd_path.dentry, pathname); + if (IS_ERR(pcc_dentry)) { + rc = PTR_ERR(pcc_dentry); + CDEBUG(D_CACHE, "%s: path lookup error on "DFID":%s: rc = %d\n", + ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid), + pathname, rc); /* ignore this error */ GOTO(out, rc = 0); + } - pcc_dentry = path.dentry; -#ifndef HAVE_VFS_SETXATTR - if (!pcc_dentry->d_inode->i_op->getxattr) - /* ignore this error */ - GOTO(out_put_path, rc = 0); - - rc = pcc_dentry->d_inode->i_op->getxattr(pcc_dentry, pcc_xattr_layout, - &pcc_gen, sizeof(pcc_gen)); -#else - rc = __vfs_getxattr(pcc_dentry, pcc_dentry->d_inode, pcc_xattr_layout, - &pcc_gen, sizeof(pcc_gen)); -#endif - + rc = ll_vfs_getxattr(pcc_dentry, pcc_dentry->d_inode, pcc_xattr_layout, + &pcc_gen, sizeof(pcc_gen)); if (rc < 0) /* ignore this error */ - GOTO(out_put_path, rc = 0); + GOTO(out_put_pcc_dentry, rc = 0); rc = 0; /* The file is still valid cached in PCC, attach it immediately. */ @@ -1235,7 +1296,7 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, if (!pcci) { OBD_SLAB_ALLOC_PTR_GFP(pcci, pcc_inode_slab, GFP_NOFS); if (pcci == NULL) - GOTO(out_put_path, rc = -ENOMEM); + GOTO(out_put_pcc_dentry, rc = -ENOMEM); pcc_inode_init(pcci, lli); dget(pcc_dentry); @@ -1253,22 +1314,24 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, pcc_inode_get(pcci); pcci->pcci_type = type; } + pcc_inode_dsflags_set(lli, dataset); pcc_layout_gen_set(pcci, gen); *cached = true; } -out_put_path: - path_put(&path); +out_put_pcc_dentry: + dput(pcc_dentry); out: revert_creds(old_cred); - OBD_FREE(pathname, PATH_MAX); RETURN(rc); } -static int pcc_try_datasets_attach(struct inode *inode, __u32 gen, - enum lu_pcc_type type, bool *cached) +static int pcc_try_datasets_attach(struct inode *inode, enum pcc_io_type iot, + __u32 gen, enum lu_pcc_type type, + bool *cached) { - struct pcc_dataset *dataset, *tmp; struct pcc_super *super = &ll_i2sbi(inode)->ll_pcc_super; + struct ll_inode_info *lli = ll_i2info(inode); + struct pcc_dataset *dataset = NULL, *tmp; int rc = 0; ENTRY; @@ -1276,18 +1339,68 @@ static int pcc_try_datasets_attach(struct inode *inode, __u32 gen, down_read(&super->pccs_rw_sem); list_for_each_entry_safe(dataset, tmp, &super->pccs_datasets, pccd_linkage) { - if (!pcc_auto_attach_enabled(dataset)) - continue; + if (!pcc_auto_attach_enabled(dataset->pccd_flags, iot)) + break; + rc = pcc_try_dataset_attach(inode, gen, type, dataset, cached); if (rc < 0 || (!rc && *cached)) break; } + + /* + * Update the saved dataset flags for the inode accordingly if failed. + */ + if (!rc && !*cached) { + /* + * Currently auto attach strategy for a PCC backend is + * unchangeable once once it was added into the PCC datasets on + * a client as the support to change auto attach strategy is + * not implemented yet. + */ + /* + * If tried to attach from one PCC backend: + * @lli_pcc_generation > 0: + * 1) The file was once attached into PCC, but now the + * corresponding PCC backend should be removed from the client; + * 2) The layout generation was changed, the data has been + * restored; + * 3) The corresponding PCC copy is not existed on PCC + * @lli_pcc_generation == 0: + * The file is never attached into PCC but in a HSM released + * state, or once attached into PCC but the inode was evicted + * from icache later. + * Set the saved dataset flags with PCC_DATASET_NONE. Then this + * file will skip from the candidates to try auto attach until + * the file is attached into PCC again. + * + * If the file was never attached into PCC, or once attached but + * its inode was evicted from icache (lli_pcc_generation == 0), + * or the corresponding dataset was removed from the client, + * set the saved dataset flags with PCC_DATASET_NONE. + * + * TODO: If the file was once attached into PCC but not try to + * auto attach due to the change of the configuration parameters + * for this dataset (i.e. change from auto attach enabled to + * auto attach disabled for this dataset), update the saved + * dataset flags with the found one. + */ + lli->lli_pcc_dsflags = PCC_DATASET_NONE; + } up_read(&super->pccs_rw_sem); RETURN(rc); } -static int pcc_try_auto_attach(struct inode *inode, bool *cached, bool is_open) +/* + * TODO: For RW-PCC, it is desirable to store HSM info as a layout (LU-10606). + * Thus the client can get archive ID from the layout directly. When try to + * attach the file automatically which is in HSM released state (according to + * LOV_PATTERN_F_RELEASED in the layout), it can determine whether the file is + * valid cached on PCC more precisely according to the @rwid (archive ID) in + * the PCC dataset and the archive ID in HSM attrs. + */ +static int pcc_try_auto_attach(struct inode *inode, bool *cached, + enum pcc_io_type iot) { struct pcc_super *super = &ll_i2sbi(inode)->ll_pcc_super; struct cl_layout clt = { @@ -1311,7 +1424,7 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached, bool is_open) * obtain valid layout lock from MDT (i.e. the file is being * HSM restoring). */ - if (is_open) { + if (iot == PIT_OPEN) { if (ll_layout_version_get(lli) == CL_LAYOUT_GEN_NONE) RETURN(0); } else { @@ -1324,28 +1437,62 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached, bool is_open) if (rc) RETURN(rc); - if (!is_open && gen != clt.cl_layout_gen) { + if (iot != PIT_OPEN && gen != clt.cl_layout_gen) { CDEBUG(D_CACHE, DFID" layout changed from %d to %d.\n", PFID(ll_inode2fid(inode)), gen, clt.cl_layout_gen); RETURN(-EINVAL); } if (clt.cl_is_released) - rc = pcc_try_datasets_attach(inode, clt.cl_layout_gen, + rc = pcc_try_datasets_attach(inode, iot, clt.cl_layout_gen, LU_PCC_READWRITE, cached); RETURN(rc); } +static inline bool pcc_may_auto_attach(struct inode *inode, + enum pcc_io_type iot) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct pcc_super *super = ll_i2pccs(inode); + + /* Known the file was not in any PCC backend. */ + if (lli->lli_pcc_dsflags & PCC_DATASET_NONE) + return false; + + /* + * lli_pcc_generation == 0 means that the file was never attached into + * PCC, or may be once attached into PCC but detached as the inode is + * evicted from icache (i.e. "echo 3 > /proc/sys/vm/drop_caches" or + * icache shrinking due to the memory pressure), which will cause the + * file detach from PCC when releasing the inode from icache. + * In either case, we still try to attach. + */ + /* lli_pcc_generation == 0, or the PCC setting was changed, + * or there is no PCC setup on the client and the try will return + * immediately in pcc_try_auto_attach(). + */ + if (super->pccs_generation != lli->lli_pcc_generation) + return true; + + /* The cached setting @lli_pcc_dsflags is valid */ + if (iot == PIT_OPEN) + return lli->lli_pcc_dsflags & PCC_DATASET_OPEN_ATTACH; + + if (iot == PIT_GETATTR) + return lli->lli_pcc_dsflags & PCC_DATASET_STAT_ATTACH; + + return lli->lli_pcc_dsflags & PCC_DATASET_IO_ATTACH; +} + int pcc_file_open(struct inode *inode, struct file *file) { struct pcc_inode *pcci; struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct pcc_file *pccf = &fd->fd_pcc_file; struct file *pcc_file; struct path *path; - struct qstr *dname; bool cached = false; int rc = 0; @@ -1354,6 +1501,9 @@ int pcc_file_open(struct inode *inode, struct file *file) if (!S_ISREG(inode->i_mode)) RETURN(0); + if (IS_ENCRYPTED(inode)) + RETURN(0); + pcc_inode_lock(inode); pcci = ll_i2pcci(inode); @@ -1361,8 +1511,8 @@ int pcc_file_open(struct inode *inode, struct file *file) GOTO(out_unlock, rc = 0); if (!pcci || !pcc_inode_has_layout(pcci)) { - if (lli->lli_pcc_state & PCC_STATE_FL_OPEN_ATTACH) - rc = pcc_try_auto_attach(inode, &cached, true); + if (pcc_may_auto_attach(inode, PIT_OPEN)) + rc = pcc_try_auto_attach(inode, &cached, PIT_OPEN); if (rc < 0 || !cached) GOTO(out_unlock, rc); @@ -1375,17 +1525,10 @@ int pcc_file_open(struct inode *inode, struct file *file) WARN_ON(pccf->pccf_file); path = &pcci->pcci_path; - dname = &path->dentry->d_name; - CDEBUG(D_CACHE, "opening pcc file '%.*s'\n", dname->len, - dname->name); + CDEBUG(D_CACHE, "opening pcc file '%pd'\n", path->dentry); -#ifdef HAVE_DENTRY_OPEN_USE_PATH pcc_file = dentry_open(path, file->f_flags, pcc_super_cred(inode->i_sb)); -#else - pcc_file = dentry_open(path->dentry, path->mnt, file->f_flags, - pcc_super_cred(inode->i_sb)); -#endif if (IS_ERR_OR_NULL(pcc_file)) { rc = pcc_file == NULL ? -EINVAL : PTR_ERR(pcc_file); pcc_inode_put(pcci); @@ -1402,10 +1545,9 @@ out_unlock: void pcc_file_release(struct inode *inode, struct file *file) { struct pcc_inode *pcci; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct pcc_file *pccf; struct path *path; - struct qstr *dname; ENTRY; @@ -1420,9 +1562,7 @@ void pcc_file_release(struct inode *inode, struct file *file) pcci = ll_i2pcci(inode); LASSERT(pcci); path = &pcci->pcci_path; - dname = &path->dentry->d_name; - CDEBUG(D_CACHE, "releasing pcc file \"%.*s\"\n", dname->len, - dname->name); + CDEBUG(D_CACHE, "releasing pcc file \"%pd\"\n", path->dentry); pcc_inode_put(pcci); fput(pccf->pccf_file); pccf->pccf_file = NULL; @@ -1433,7 +1573,6 @@ out: static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, bool *cached) { - struct ll_inode_info *lli = ll_i2info(inode); struct pcc_inode *pcci; pcc_inode_lock(inode); @@ -1444,11 +1583,8 @@ static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, bool *cached) *cached = true; } else { *cached = false; - if ((lli->lli_pcc_state & PCC_STATE_FL_IO_ATTACH && - iot != PIT_GETATTR) || - (iot == PIT_GETATTR && - lli->lli_pcc_state & PCC_STATE_FL_STAT_ATTACH)) { - (void) pcc_try_auto_attach(inode, cached, false); + if (pcc_may_auto_attach(inode, iot)) { + (void) pcc_try_auto_attach(inode, cached, iot); if (*cached) { pcci = ll_i2pcci(inode); LASSERT(atomic_read(&pcci->pcci_refcount) > 0); @@ -1465,7 +1601,7 @@ static void pcc_io_fini(struct inode *inode) LASSERT(pcci && atomic_read(&pcci->pcci_active_ios) > 0); if (atomic_dec_and_test(&pcci->pcci_active_ios)) - wake_up_all(&pcci->pcci_waitq); + wake_up(&pcci->pcci_waitq); } @@ -1508,7 +1644,7 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb, struct iov_iter *iter, bool *cached) { struct file *file = iocb->ki_filp; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct pcc_file *pccf = &fd->fd_pcc_file; struct inode *inode = file_inode(file); ssize_t result; @@ -1575,7 +1711,7 @@ ssize_t pcc_file_write_iter(struct kiocb *iocb, struct iov_iter *iter, bool *cached) { struct file *file = iocb->ki_filp; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct pcc_file *pccf = &fd->fd_pcc_file; struct inode *inode = file_inode(file); ssize_t result; @@ -1639,7 +1775,12 @@ int pcc_inode_setattr(struct inode *inode, struct iattr *attr, pcc_dentry = pcci->pcci_path.dentry; inode_lock(pcc_dentry->d_inode); old_cred = override_creds(pcc_super_cred(inode->i_sb)); +#ifdef HAVE_USER_NAMESPACE_ARG + rc = pcc_dentry->d_inode->i_op->setattr(&init_user_ns, pcc_dentry, + &attr2); +#else rc = pcc_dentry->d_inode->i_op->setattr(pcc_dentry, &attr2); +#endif revert_creds(old_cred); inode_unlock(pcc_dentry->d_inode); @@ -1647,7 +1788,8 @@ int pcc_inode_setattr(struct inode *inode, struct iattr *attr, RETURN(rc); } -int pcc_inode_getattr(struct inode *inode, bool *cached) +int pcc_inode_getattr(struct inode *inode, u32 request_mask, + unsigned int flags, bool *cached) { struct ll_inode_info *lli = ll_i2info(inode); const struct cred *old_cred; @@ -1669,17 +1811,17 @@ int pcc_inode_getattr(struct inode *inode, bool *cached) RETURN(0); old_cred = override_creds(pcc_super_cred(inode->i_sb)); - rc = ll_vfs_getattr(&ll_i2pcci(inode)->pcci_path, &stat); + rc = ll_vfs_getattr(&ll_i2pcci(inode)->pcci_path, &stat, request_mask, + flags); revert_creds(old_cred); if (rc) GOTO(out, rc); ll_inode_size_lock(inode); - if (inode->i_atime.tv_sec < lli->lli_atime || - lli->lli_update_atime) { + if (test_and_clear_bit(LLIF_UPDATE_ATIME, &lli->lli_flags) || + inode->i_atime.tv_sec < lli->lli_atime) inode->i_atime.tv_sec = lli->lli_atime; - lli->lli_update_atime = 0; - } + inode->i_mtime.tv_sec = lli->lli_mtime; inode->i_ctime.tv_sec = lli->lli_ctime; @@ -1709,42 +1851,40 @@ out: RETURN(rc); } +#ifdef HAVE_DEFAULT_FILE_SPLICE_READ_EXPORT ssize_t pcc_file_splice_read(struct file *in_file, loff_t *ppos, struct pipe_inode_info *pipe, - size_t count, unsigned int flags, - bool *cached) + size_t count, unsigned int flags) { struct inode *inode = file_inode(in_file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(in_file); + struct ll_file_data *fd = in_file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; + bool cached = false; ssize_t result; ENTRY; - *cached = false; if (!pcc_file) - RETURN(0); - - if (!file_inode(pcc_file)->i_fop->splice_read) - RETURN(-ENOTSUPP); + RETURN(default_file_splice_read(in_file, ppos, pipe, + count, flags)); - pcc_io_init(inode, PIT_SPLICE_READ, cached); - if (!*cached) - RETURN(0); + pcc_io_init(inode, PIT_SPLICE_READ, &cached); + if (!cached) + RETURN(default_file_splice_read(in_file, ppos, pipe, + count, flags)); - result = file_inode(pcc_file)->i_fop->splice_read(pcc_file, - ppos, pipe, count, - flags); + result = default_file_splice_read(pcc_file, ppos, pipe, count, flags); pcc_io_fini(inode); RETURN(result); } +#endif /* HAVE_DEFAULT_FILE_SPLICE_READ_EXPORT */ int pcc_fsync(struct file *file, loff_t start, loff_t end, int datasync, bool *cached) { struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; int rc; @@ -1770,7 +1910,7 @@ int pcc_file_mmap(struct file *file, struct vm_area_struct *vma, bool *cached) { struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; struct pcc_inode *pcci; int rc = 0; @@ -1805,7 +1945,7 @@ void pcc_vm_open(struct vm_area_struct *vma) struct pcc_inode *pcci; struct file *file = vma->vm_file; struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; struct vm_operations_struct *pcc_vm_ops = vma->vm_private_data; @@ -1829,7 +1969,7 @@ void pcc_vm_close(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; struct vm_operations_struct *pcc_vm_ops = vma->vm_private_data; @@ -1854,7 +1994,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; struct vm_operations_struct *pcc_vm_ops = vma->vm_private_data; int rc; @@ -1872,7 +2012,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, "%s: PCC backend fs not support ->page_mkwrite()\n", ll_i2sbi(inode)->ll_fsname); pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); *cached = true; RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } @@ -1899,7 +2039,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, */ if (page->mapping == pcc_file->f_mapping) { *cached = true; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } @@ -1913,7 +2053,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE)) { pcc_io_fini(inode); pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } @@ -1934,7 +2074,7 @@ int pcc_fault(struct vm_area_struct *vma, struct vm_fault *vmf, { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct file *pcc_file = fd->fd_pcc_file.pccf_file; struct vm_operations_struct *pcc_vm_ops = vma->vm_private_data; int rc; @@ -1962,23 +2102,17 @@ int pcc_fault(struct vm_area_struct *vma, struct vm_fault *vmf, RETURN(rc); } -static void pcc_layout_wait(struct pcc_inode *pcci) -{ - struct l_wait_info lwi = { 0 }; - - while (atomic_read(&pcci->pcci_active_ios) > 0) { - CDEBUG(D_CACHE, "Waiting for IO completion: %d\n", - atomic_read(&pcci->pcci_active_ios)); - l_wait_event(pcci->pcci_waitq, - atomic_read(&pcci->pcci_active_ios) == 0, &lwi); - } -} - static void __pcc_layout_invalidate(struct pcc_inode *pcci) { pcci->pcci_type = LU_PCC_NONE; pcc_layout_gen_set(pcci, CL_LAYOUT_GEN_NONE); - pcc_layout_wait(pcci); + if (atomic_read(&pcci->pcci_active_ios) == 0) + return; + + CDEBUG(D_CACHE, "Waiting for IO completion: %d\n", + atomic_read(&pcci->pcci_active_ios)); + wait_event_idle(pcci->pcci_waitq, + atomic_read(&pcci->pcci_active_ios) == 0); } void pcc_layout_invalidate(struct inode *inode) @@ -2007,11 +2141,11 @@ static int pcc_inode_remove(struct inode *inode, struct dentry *pcc_dentry) { int rc; - rc = ll_vfs_unlink(pcc_dentry->d_parent->d_inode, pcc_dentry); + rc = vfs_unlink(&init_user_ns, + pcc_dentry->d_parent->d_inode, pcc_dentry); if (rc) - CWARN("%s: failed to unlink PCC file %.*s, rc = %d\n", - ll_i2sbi(inode)->ll_fsname, pcc_dentry->d_name.len, - pcc_dentry->d_name.name, rc); + CWARN("%s: failed to unlink PCC file %pd, rc = %d\n", + ll_i2sbi(inode)->ll_fsname, pcc_dentry, rc); return rc; } @@ -2032,7 +2166,7 @@ pcc_mkdir(struct dentry *base, const char *name, umode_t mode) if (d_is_positive(dentry)) goto out; - rc = vfs_mkdir(dir, dentry, mode); + rc = vfs_mkdir(&init_user_ns, dir, dentry, mode); if (rc) { dput(dentry); dentry = ERR_PTR(rc); @@ -2088,7 +2222,7 @@ pcc_create(struct dentry *base, const char *name, umode_t mode) if (d_is_positive(dentry)) goto out; - rc = vfs_create(dir, dentry, mode, LL_VFS_CREATE_FALSE); + rc = vfs_create(&init_user_ns, dir, dentry, mode, false); if (rc) { dput(dentry); dentry = ERR_PTR(rc); @@ -2108,11 +2242,11 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, struct dentry *child; int rc = 0; - OBD_ALLOC(path, MAX_PCC_DATABASE_PATH); + OBD_ALLOC(path, PCC_DATASET_MAX_PATH); if (path == NULL) return -ENOMEM; - pcc_fid2dataset_path(path, MAX_PCC_DATABASE_PATH, fid); + pcc_fid2dataset_path(path, PCC_DATASET_MAX_PATH, fid); base = pcc_mkdir_p(dataset->pccd_path.dentry, path, 0); if (IS_ERR(base)) { @@ -2120,7 +2254,7 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, GOTO(out, rc); } - snprintf(path, MAX_PCC_DATABASE_PATH, DFID_NOBRACE, PFID(fid)); + snprintf(path, PCC_DATASET_MAX_PATH, DFID_NOBRACE, PFID(fid)); child = pcc_create(base, path, 0); if (IS_ERR(child)) { rc = PTR_ERR(child); @@ -2131,7 +2265,7 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, out_base: dput(base); out: - OBD_FREE(path, MAX_PCC_DATABASE_PATH); + OBD_FREE(path, PCC_DATASET_MAX_PATH); return rc; } @@ -2154,7 +2288,7 @@ int pcc_inode_reset_iattr(struct dentry *dentry, unsigned int valid, attr.ia_size = size; inode_lock(inode); - rc = notify_change(dentry, &attr, NULL); + rc = notify_change(&init_user_ns, dentry, &attr, NULL); inode_unlock(inode); RETURN(rc); @@ -2175,6 +2309,7 @@ int pcc_inode_create(struct super_block *sb, struct pcc_dataset *dataset, int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca) { struct dentry *pcc_dentry = pca->pca_dentry; + struct pcc_super *super = ll_i2pccs(inode); const struct cred *old_cred; struct pcc_inode *pcci; int rc; @@ -2189,7 +2324,7 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca) LASSERT(pcc_dentry); - old_cred = override_creds(pcc_super_cred(inode->i_sb)); + old_cred = override_creds(super->pccs_cred); pcc_inode_lock(inode); LASSERT(ll_i2pcci(inode) == NULL); OBD_SLAB_ALLOC_PTR_GFP(pcci, pcc_inode_slab, GFP_NOFS); @@ -2201,9 +2336,8 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca) if (rc) GOTO(out_put, rc); - pcc_inode_init(pcci, ll_i2info(inode)); - pcc_inode_attach_init(pca->pca_dataset, pcci, pcc_dentry, - LU_PCC_READWRITE); + pcc_inode_attach_set(super, pca->pca_dataset, ll_i2info(inode), + pcci, pcc_dentry, LU_PCC_READWRITE); rc = pcc_layout_xattr_set(pcci, 0); if (rc) { @@ -2242,12 +2376,12 @@ void pcc_create_attach_cleanup(struct super_block *sb, int rc; old_cred = override_creds(pcc_super_cred(sb)); - rc = ll_vfs_unlink(pca->pca_dentry->d_parent->d_inode, - pca->pca_dentry); + rc = vfs_unlink(&init_user_ns, + pca->pca_dentry->d_parent->d_inode, + pca->pca_dentry); if (rc) - CWARN("failed to unlink PCC file %.*s, rc = %d\n", - pca->pca_dentry->d_name.len, - pca->pca_dentry->d_name.name, rc); + CWARN("%s: failed to unlink PCC file %pd: rc = %d\n", + ll_s2sbi(sb)->ll_fsname, pca->pca_dentry, rc); /* ignore the unlink failure */ revert_creds(old_cred); dput(pca->pca_dentry); @@ -2336,6 +2470,7 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode, { struct pcc_dataset *dataset; struct ll_inode_info *lli = ll_i2info(inode); + struct pcc_super *super = ll_i2pccs(inode); struct pcc_inode *pcci; const struct cred *old_cred; struct dentry *dentry; @@ -2355,19 +2490,14 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode, if (dataset == NULL) RETURN(-ENOENT); - old_cred = override_creds(pcc_super_cred(inode->i_sb)); + old_cred = override_creds(super->pccs_cred); rc = __pcc_inode_create(dataset, &lli->lli_fid, &dentry); if (rc) GOTO(out_dataset_put, rc); path.mnt = dataset->pccd_path.mnt; path.dentry = dentry; -#ifdef HAVE_DENTRY_OPEN_USE_PATH pcc_filp = dentry_open(&path, O_WRONLY | O_LARGEFILE, current_cred()); -#else - pcc_filp = dentry_open(path.dentry, path.mnt, O_WRONLY | O_LARGEFILE, - current_cred()); -#endif if (IS_ERR_OR_NULL(pcc_filp)) { rc = pcc_filp == NULL ? -EINVAL : PTR_ERR(pcc_filp); GOTO(out_dentry, rc); @@ -2402,8 +2532,8 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode, if (pcci == NULL) GOTO(out_unlock, rc = -ENOMEM); - pcc_inode_init(pcci, lli); - pcc_inode_attach_init(dataset, pcci, dentry, LU_PCC_READWRITE); + pcc_inode_attach_set(super, dataset, lli, pcci, + dentry, LU_PCC_READWRITE); out_unlock: pcc_inode_unlock(inode); out_fput: @@ -2535,8 +2665,15 @@ int pcc_ioctl_detach(struct inode *inode, __u32 opt) LASSERT(atomic_read(&pcci->pcci_refcount) > 0); if (pcci->pcci_type == LU_PCC_READWRITE) { - if (opt == PCC_DETACH_OPT_UNCACHE) + if (opt == PCC_DETACH_OPT_UNCACHE) { hsm_remove = true; + /* + * The file will be removed from PCC, set the flags + * with PCC_DATASET_NONE even the later removal of the + * PCC copy fails. + */ + lli->lli_pcc_dsflags = PCC_DATASET_NONE; + } __pcc_layout_invalidate(pcci); pcc_inode_put(pcci); @@ -2563,7 +2700,7 @@ int pcc_ioctl_state(struct file *file, struct inode *inode, char *buf; char *path; int buf_len = sizeof(state->pccs_path); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct ll_file_data *fd = file->private_data; struct pcc_file *pccf = &fd->fd_pcc_file; struct pcc_inode *pcci;