X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fllite%2Fdir.c;h=5db622e1164d818ef026ec7e821d9999fc1bb44c;hb=bb60caa1c6e7c14c201916dc0423442d10c86a27;hp=a69a1de0b13d35669eada38117ee4d8f21ce5240;hpb=3973cf8dc955c773a5f9da13216252644aa3949f;p=fs%2Flustre-release.git diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index a69a1de..5db622e 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/llite/dir.c * @@ -103,16 +102,18 @@ * returned page, page hash collision has to be handled. Pages in the * hash chain, except first one, are termed "overflow pages". * - * Solution to index uniqueness problem is to not cache overflow - * pages. Instead, when page hash collision is detected, all overflow pages - * from emerging chain are immediately requested from the server and placed in - * a special data structure (struct ll_dir_chain). This data structure is used - * by ll_readdir() to process entries from overflow pages. When readdir - * invocation finishes, overflow pages are discarded. If page hash collision - * chain weren't completely processed, next call to readdir will again detect - * page hash collision, again read overflow pages in, process next portion of - * entries and again discard the pages. This is not as wasteful as it looks, - * because, given reasonable hash, page hash collisions are extremely rare. + * Proposed (unimplimented) solution to index uniqueness problem is to + * not cache overflow pages. Instead, when page hash collision is + * detected, all overflow pages from emerging chain should be + * immediately requested from the server and placed in a special data + * structure. This data structure can be used by ll_readdir() to + * process entries from overflow pages. When readdir invocation + * finishes, overflow pages are discarded. If page hash collision chain + * weren't completely processed, next call to readdir will again detect + * page hash collision, again read overflow pages in, process next + * portion of entries and again discard the pages. This is not as + * wasteful as it looks, because, given reasonable hash, page hash + * collisions are extremely rare. * * 1. directory positioning * @@ -141,7 +142,7 @@ * */ struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data, - __u64 offset, struct ll_dir_chain *chain) + __u64 offset) { struct md_callback cb_op; struct page *page; @@ -190,14 +191,11 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, bool is_api32 = ll_need_32bit_api(sbi); bool is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; struct page *page; - struct ll_dir_chain chain; bool done = false; int rc = 0; ENTRY; - ll_dir_chain_init(&chain); - - page = ll_get_dir_page(inode, op_data, pos, &chain); + page = ll_get_dir_page(inode, op_data, pos); while (rc == 0 && !done) { struct lu_dirpage *dp; @@ -234,7 +232,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, lhash = hash; fid_le_to_cpu(&fid, &ent->lde_fid); ino = cl_fid_build_ino(&fid, is_api32); - type = IFTODT(lu_dirent_type_get(ent)); + type = S_DT(lu_dirent_type_get(ent)); /* For ll_nfs_get_name_filldir(), it will try to access * 'ent' through 'lde_name', so the parameter 'name' * for 'filldir()' must be part of the 'ent'. */ @@ -271,8 +269,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); next = pos; - page = ll_get_dir_page(inode, op_data, pos, - &chain); + page = ll_get_dir_page(inode, op_data, pos); } } #ifdef HAVE_DIR_CONTEXT @@ -280,7 +277,6 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, #else *ppos = pos; #endif - ll_dir_chain_fini(&chain); RETURN(rc); } @@ -402,7 +398,8 @@ out: * <0 if the creation is failed. */ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, - size_t len, const char *dirname, umode_t mode) + size_t len, const char *dirname, umode_t mode, + bool createonly) { struct inode *parent = dparent->d_inode; struct ptlrpc_request *request = NULL; @@ -447,23 +444,17 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, !OBD_FAIL_CHECK(OBD_FAIL_LLITE_NO_CHECK_DEAD)) RETURN(-ENOENT); + /* MDS < 2.14 doesn't support 'crush' hash type, and cannot handle + * unknown hash if client doesn't set a valid one. switch to fnv_1a_64. + */ if (!(exp_connect_flags2(sbi->ll_md_exp) & OBD_CONNECT2_CRUSH)) { - if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) == - LMV_HASH_TYPE_CRUSH) { - /* if server doesn't support 'crush' hash type, - * switch to fnv_1a_64. - */ - lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK; - lump->lum_hash_type |= LMV_HASH_TYPE_FNV_1A_64; - } else if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) == - LMV_HASH_TYPE_UNKNOWN) { - /* from 2.14 MDT will choose default hash type if client - * doesn't set a valid one, while old server doesn't - * handle it. - */ - lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK; - lump->lum_hash_type |= LMV_HASH_TYPE_DEFAULT; - } + enum lmv_hash_type type = lump->lum_hash_type & + LMV_HASH_TYPE_MASK; + + if (type == LMV_HASH_TYPE_CRUSH || + type == LMV_HASH_TYPE_UNKNOWN) + lump->lum_hash_type = (lump->lum_hash_type ^ type) | + LMV_HASH_TYPE_FNV_1A_64; } if (unlikely(!lmv_user_magic_supported(cpu_to_le32(lump->lum_magic)))) @@ -478,8 +469,9 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - if (IS_ENCRYPTED(parent) || - unlikely(llcrypt_dummy_context_enabled(parent))) { + if (ll_sbi_has_encrypt(sbi) && + (IS_ENCRYPTED(parent) || + unlikely(llcrypt_dummy_context_enabled(parent)))) { err = llcrypt_get_encryption_info(parent); if (err) GOTO(out_op_data, err); @@ -500,7 +492,16 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, GOTO(out_op_data, err); } + if (encrypt) { + err = llcrypt_inherit_context(parent, NULL, op_data, false); + if (err) + GOTO(out_op_data, err); + } + op_data->op_cli_flags |= CLI_SET_MEA; + if (createonly) + op_data->op_bias |= MDS_SETSTRIPE_CREATE; + err = md_create(sbi->ll_md_exp, op_data, lump, len, mode, from_kuid(&init_user_ns, current_fsuid()), from_kgid(&init_user_ns, current_fsgid()), @@ -517,11 +518,13 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, dentry.d_inode = inode; if (sbi->ll_flags & LL_SBI_FILE_SECCTX) { - inode_lock(inode); + /* no need to protect selinux_inode_setsecurity() by + * inode_lock. Taking it would lead to a client deadlock + * LU-13617 + */ err = security_inode_notifysecctx(inode, op_data->op_file_secctx, op_data->op_file_secctx_size); - inode_unlock(inode); } else { err = ll_inode_init_security(&dentry, inode, parent); } @@ -529,14 +532,14 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, GOTO(out_inode, err); if (encrypt) { - err = llcrypt_inherit_context(parent, inode, NULL, false); + err = ll_set_encflags(inode, op_data->op_file_encctx, + op_data->op_file_encctx_size, false); if (err) GOTO(out_inode, err); } out_inode: - if (inode != NULL) - iput(inode); + iput(inode); out_request: ptlrpc_req_finished(request); out_op_data: @@ -740,10 +743,13 @@ int ll_dir_getstripe_default(struct inode *inode, void **plmm, int *plmm_size, rc = ll_dir_get_default_layout(inode, (void **)&lmm, &lmm_size, &req, valid, 0); if (rc == -ENODATA && !fid_is_root(ll_inode2fid(inode)) && - !(valid & (OBD_MD_MEA|OBD_MD_DEFAULT_MEA)) && root_request != NULL) - rc = ll_dir_get_default_layout(inode, (void **)&lmm, &lmm_size, - &root_req, valid, - GET_DEFAULT_LAYOUT_ROOT); + !(valid & (OBD_MD_MEA|OBD_MD_DEFAULT_MEA)) && root_request != NULL){ + int rc2 = ll_dir_get_default_layout(inode, (void **)&lmm, + &lmm_size, &root_req, valid, + GET_DEFAULT_LAYOUT_ROOT); + if (rc2 == 0) + rc = 0; + } *plmm = lmm; *plmm_size = lmm_size; @@ -1113,14 +1119,15 @@ static int check_owner(int type, int id) return 0; } -static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) +int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) { - int cmd = qctl->qc_cmd; - int type = qctl->qc_type; - int id = qctl->qc_id; - int valid = qctl->qc_valid; - int rc = 0; - ENTRY; + int cmd = qctl->qc_cmd; + int type = qctl->qc_type; + int id = qctl->qc_id; + int valid = qctl->qc_valid; + int rc = 0; + + ENTRY; switch (cmd) { case Q_SETQUOTA: @@ -1128,14 +1135,16 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) case LUSTRE_Q_SETDEFAULT: case LUSTRE_Q_SETQUOTAPOOL: case LUSTRE_Q_SETINFOPOOL: - if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + case LUSTRE_Q_SETDEFAULT_POOL: + if (!capable(CAP_SYS_ADMIN)) RETURN(-EPERM); break; case Q_GETQUOTA: case LUSTRE_Q_GETDEFAULT: case LUSTRE_Q_GETQUOTAPOOL: + case LUSTRE_Q_GETDEFAULT_POOL: if (check_owner(type, id) && - (!cfs_capable(CFS_CAP_SYS_ADMIN))) + (!capable(CAP_SYS_ADMIN))) RETURN(-EPERM); break; case Q_GETINFO: @@ -1203,13 +1212,21 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) if ((cmd == Q_GETQUOTA || cmd == LUSTRE_Q_GETQUOTAPOOL) && !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) && !oqctl->qc_dqblk.dqb_curspace) { - struct obd_quotactl *oqctl_tmp; - - OBD_ALLOC_PTR(oqctl_tmp); - if (oqctl_tmp == NULL) - GOTO(out, rc = -ENOMEM); - - oqctl_tmp->qc_cmd = Q_GETOQUOTA; + struct obd_quotactl *oqctl_tmp; + int qctl_len = sizeof(*oqctl_tmp) + LOV_MAXPOOLNAME + 1; + + OBD_ALLOC(oqctl_tmp, qctl_len); + if (oqctl_tmp == NULL) + GOTO(out, rc = -ENOMEM); + + if (cmd == LUSTRE_Q_GETQUOTAPOOL) { + oqctl_tmp->qc_cmd = LUSTRE_Q_GETQUOTAPOOL; + memcpy(oqctl_tmp->qc_poolname, + qctl->qc_poolname, + LOV_MAXPOOLNAME + 1); + } else { + oqctl_tmp->qc_cmd = Q_GETOQUOTA; + } oqctl_tmp->qc_id = oqctl->qc_id; oqctl_tmp->qc_type = oqctl->qc_type; @@ -1222,21 +1239,22 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) oqctl->qc_dqblk.dqb_valid |= QIF_SPACE; } - /* collect space & inode usage from MDTs */ - oqctl_tmp->qc_dqblk.dqb_curspace = 0; - oqctl_tmp->qc_dqblk.dqb_curinodes = 0; - rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp); - if (!rc || rc == -EREMOTEIO) { - oqctl->qc_dqblk.dqb_curspace += - oqctl_tmp->qc_dqblk.dqb_curspace; - oqctl->qc_dqblk.dqb_curinodes = - oqctl_tmp->qc_dqblk.dqb_curinodes; - oqctl->qc_dqblk.dqb_valid |= QIF_INODES; - } else { - oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE; - } + /* collect space & inode usage from MDTs */ + oqctl_tmp->qc_cmd = Q_GETOQUOTA; + oqctl_tmp->qc_dqblk.dqb_curspace = 0; + oqctl_tmp->qc_dqblk.dqb_curinodes = 0; + rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp); + if (!rc || rc == -EREMOTEIO) { + oqctl->qc_dqblk.dqb_curspace += + oqctl_tmp->qc_dqblk.dqb_curspace; + oqctl->qc_dqblk.dqb_curinodes = + oqctl_tmp->qc_dqblk.dqb_curinodes; + oqctl->qc_dqblk.dqb_valid |= QIF_INODES; + } else { + oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE; + } - OBD_FREE_PTR(oqctl_tmp); + OBD_FREE(oqctl_tmp, qctl_len); } out: QCTL_COPY(qctl, oqctl); @@ -1255,7 +1273,7 @@ int ll_rmfid(struct file *file, void __user *arg) int i, rc, *rcs = NULL; ENTRY; - if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) && + if (!capable(CAP_DAC_READ_SEARCH) && !(ll_i2sbi(file_inode(file))->ll_flags & LL_SBI_USER_FID2PATH)) RETURN(-EPERM); /* Only need to get the buflen */ @@ -1327,7 +1345,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_ioctl_data *data; + struct obd_ioctl_data *data = NULL; int rc = 0; ENTRY; @@ -1365,14 +1383,12 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } case IOC_MDC_LOOKUP: { - int namelen, len = 0; - char *buf = NULL; + int namelen, len = 0; char *filename; - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); + rc = obd_ioctl_getdata(&data, &len, (void __user *)arg); if (rc != 0) RETURN(rc); - data = (void *)buf; filename = data->ioc_inlbuf1; namelen = strlen(filename); @@ -1388,24 +1404,23 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) GOTO(out_free, rc); } out_free: - OBD_FREE_LARGE(buf, len); - return rc; - } + OBD_FREE_LARGE(data, len); + return rc; + } case LL_IOC_LMV_SETSTRIPE: { struct lmv_user_md *lum; - char *buf = NULL; - char *filename; - int namelen = 0; - int lumlen = 0; - umode_t mode; - int len; - int rc; + char *filename; + int namelen = 0; + int lumlen = 0; + umode_t mode; + bool createonly = false; + int len; + int rc; - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); + rc = obd_ioctl_getdata(&data, &len, (void __user *)arg); if (rc) RETURN(rc); - data = (void *)buf; if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL || data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) GOTO(lmv_out_free, rc = -EINVAL); @@ -1442,9 +1457,11 @@ out_free: } mode = data->ioc_type; - rc = ll_dir_setdirstripe(dentry, lum, lumlen, filename, mode); + createonly = data->ioc_obdo1.o_flags & OBD_FL_OBDMDEXISTS; + rc = ll_dir_setdirstripe(dentry, lum, lumlen, filename, mode, + createonly); lmv_out_free: - OBD_FREE_LARGE(buf, len); + OBD_FREE_LARGE(data, len); RETURN(rc); } @@ -1484,7 +1501,7 @@ lmv_out_free: if (copy_from_user(&lumv1, lumv1p, sizeof(lumv1))) RETURN(-EFAULT); - if (inode->i_sb->s_root == file_dentry(file)) + if (is_root_inode(inode)) set_default = 1; switch (lumv1.lmm_magic) { @@ -1639,6 +1656,16 @@ finish_req: return rc; } + case LL_IOC_UNLOCK_FOREIGN: + /* if not a foreign symlink do nothing */ + if (ll_foreign_is_removable(dentry, true)) { + CDEBUG(D_INFO, + "prevent rmdir of non-foreign dir ("DFID")\n", + PFID(ll_inode2fid(inode))); + RETURN(-EOPNOTSUPP); + } + RETURN(0); + case LL_IOC_REMOVE_ENTRY: { char *filename = NULL; int namelen = 0; @@ -1674,10 +1701,10 @@ out_rmdir: RETURN(ll_obd_statfs(inode, (void __user *)arg)); case LL_IOC_LOV_GETSTRIPE: case LL_IOC_LOV_GETSTRIPE_NEW: - case LL_IOC_MDC_GETINFO: - case LL_IOC_MDC_GETINFO_OLD: - case IOC_MDC_GETFILEINFO: - case IOC_MDC_GETFILEINFO_OLD: + case LL_IOC_MDC_GETINFO_V1: + case LL_IOC_MDC_GETINFO_V2: + case IOC_MDC_GETFILEINFO_V1: + case IOC_MDC_GETFILEINFO_V2: case IOC_MDC_GETFILESTRIPE: { struct ptlrpc_request *request = NULL; struct ptlrpc_request *root_request = NULL; @@ -1692,8 +1719,8 @@ out_rmdir: struct lu_fid __user *fidp = NULL; int lmmsize; - if (cmd == IOC_MDC_GETFILEINFO_OLD || - cmd == IOC_MDC_GETFILEINFO || + if (cmd == IOC_MDC_GETFILEINFO_V1 || + cmd == IOC_MDC_GETFILEINFO_V2 || cmd == IOC_MDC_GETFILESTRIPE) { filename = ll_getname((const char __user *)arg); if (IS_ERR(filename)) @@ -1715,10 +1742,10 @@ out_rmdir: GOTO(out_req, rc); } - if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || - cmd == LL_IOC_MDC_GETINFO || - cmd == IOC_MDC_GETFILEINFO_OLD || - cmd == LL_IOC_MDC_GETINFO_OLD)) { + if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO_V1 || + cmd == LL_IOC_MDC_GETINFO_V1 || + cmd == IOC_MDC_GETFILEINFO_V2 || + cmd == LL_IOC_MDC_GETINFO_V2)) { lmmsize = 0; rc = 0; } @@ -1730,8 +1757,8 @@ out_rmdir: cmd == LL_IOC_LOV_GETSTRIPE || cmd == LL_IOC_LOV_GETSTRIPE_NEW) { lump = (struct lov_user_md __user *)arg; - } else if (cmd == IOC_MDC_GETFILEINFO_OLD || - cmd == LL_IOC_MDC_GETINFO_OLD){ + } else if (cmd == IOC_MDC_GETFILEINFO_V1 || + cmd == LL_IOC_MDC_GETINFO_V1){ struct lov_user_mds_data_v1 __user *lmdp; lmdp = (struct lov_user_mds_data_v1 __user *)arg; @@ -1759,8 +1786,8 @@ out_rmdir: rc = -EOVERFLOW; } - if (cmd == IOC_MDC_GETFILEINFO_OLD || - cmd == LL_IOC_MDC_GETINFO_OLD) { + if (cmd == IOC_MDC_GETFILEINFO_V1 || + cmd == LL_IOC_MDC_GETINFO_V1) { lstat_t st = { 0 }; st.st_dev = inode->i_sb->s_dev; @@ -1781,8 +1808,8 @@ out_rmdir: if (copy_to_user(statp, &st, sizeof(st))) GOTO(out_req, rc = -EFAULT); - } else if (cmd == IOC_MDC_GETFILEINFO || - cmd == LL_IOC_MDC_GETINFO) { + } else if (cmd == IOC_MDC_GETFILEINFO_V2 || + cmd == LL_IOC_MDC_GETINFO_V2) { lstatx_t stx = { 0 }; __u64 valid = body->mbo_valid; @@ -1816,7 +1843,7 @@ out_rmdir: * However, this whould be better decided by the MDS * instead of the client. */ - if (cmd == LL_IOC_MDC_GETINFO && + if (cmd == LL_IOC_MDC_GETINFO_V2 && ll_i2info(inode)->lli_lsm_md != NULL) valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); @@ -1877,8 +1904,11 @@ out_quotactl: OBD_FREE(qctl, qctl_len); RETURN(rc); } - case OBD_IOC_GETDTNAME: - case OBD_IOC_GETMDNAME: + case OBD_IOC_GETNAME_OLD: + /* fall through */ + case OBD_IOC_GETDTNAME: + /* fall through */ + case OBD_IOC_GETMDNAME: RETURN(ll_get_obd_name(inode, cmd, arg)); case LL_IOC_FLUSHCTX: RETURN(ll_flush_ctx(inode)); @@ -2015,7 +2045,7 @@ out_hur: RETURN(rc); } case LL_IOC_HSM_CT_START: - if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) RETURN(-EPERM); rc = copy_and_ct_start(cmd, sbi->ll_md_exp, @@ -2062,17 +2092,15 @@ out_hur: } case LL_IOC_MIGRATE: { struct lmv_user_md *lum; - char *buf = NULL; int len; char *filename; int namelen = 0; int rc; - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); + rc = obd_ioctl_getdata(&data, &len, (void __user *)arg); if (rc) RETURN(rc); - data = (struct obd_ioctl_data *)buf; if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL || data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) GOTO(migrate_free, rc = -EINVAL); @@ -2096,7 +2124,7 @@ out_hur: rc = ll_migrate(inode, file, lum, filename); migrate_free: - OBD_FREE_LARGE(buf, len); + OBD_FREE_LARGE(data, len); RETURN(rc); }