X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fdir.c;h=fdfab6afa1593bd1ba9d0a3ca2da8aaa7c5cb6d1;hp=ef46098542a669f35a313816a5ba14a071600130;hb=9e5cb57addbb5d7bc1596096821ad8dcac7a939b;hpb=8a11cb6282cfbdc8617b809344e6a11223e86a38 diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index ef46098..fdfab6a 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2014, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,24 +38,24 @@ #include #include #include +#include #include -#ifdef HAVE_UIDGID_HEADER -# include -#endif -#include +#include +#include #include // for wait_on_buffer #include #define DEBUG_SUBSYSTEM S_LLITE -#include #include #include -#include +#include #include #include #include #include +#include + #include "llite_internal.h" /* @@ -138,7 +134,7 @@ * a header lu_dirpage which describes the start/end hash, and whether this * page is empty (contains no dir entry) or hash collide with next page. * After client receives reply, several pages will be integrated into dir page - * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the + * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the * lu_dirpage for this integrated page will be adjusted. See * mdc_adjust_dirpages(). * @@ -165,8 +161,7 @@ void ll_release_page(struct inode *inode, struct page *page, /* Always remove the page for striped dir, because the page is * built from temporarily in LMV layer */ - if (inode != NULL && S_ISDIR(inode->i_mode) && - ll_i2info(inode)->lli_lsm_md != NULL) { + if (inode && ll_dir_striped(inode)) { __free_page(page); return; } @@ -174,10 +169,10 @@ void ll_release_page(struct inode *inode, struct page *page, if (remove) { lock_page(page); if (likely(page->mapping != NULL)) - truncate_complete_page(page->mapping, page); + delete_from_page_cache(page); unlock_page(page); } - page_cache_release(page); + put_page(page); } /** @@ -248,18 +243,11 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, __u64 ino; hash = le64_to_cpu(ent->lde_hash); - if (hash < pos) - /* - * Skip until we find target hash - * value. - */ + if (hash < pos) /* Skip until we find target hash */ continue; namelen = le16_to_cpu(ent->lde_namelen); - if (namelen == 0) - /* - * Skip dummy record. - */ + if (namelen == 0) /* Skip dummy record */ continue; if (is_api32 && is_hash64) @@ -269,10 +257,9 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, fid_le_to_cpu(&fid, &ent->lde_fid); ino = cl_fid_build_ino(&fid, is_api32); type = ll_dirent_type_get(ent); - /* For 'll_nfs_get_name_filldir()', it will try - * to access the 'ent' through its 'lde_name', - * so the parameter 'name' for 'filldir()' must - * be part of the 'ent'. */ + /* For ll_nfs_get_name_filldir(), it will try to access + * 'ent' through 'lde_name', so the parameter 'name' + * for 'filldir()' must be part of the 'ent'. */ #ifdef HAVE_DIR_CONTEXT ctx->pos = lhash; done = !dir_emit(ctx, ent->lde_name, namelen, ino, @@ -325,14 +312,16 @@ static int ll_iterate(struct file *filp, struct dir_context *ctx) static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) #endif { - struct inode *inode = filp->f_path.dentry->d_inode; - struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; - int api32 = ll_need_32bit_api(sbi); - struct md_op_data *op_data; - __u64 pos; - int rc; + struct inode *inode = file_inode(filp); + struct ll_file_data *lfd = filp->private_data; + struct ll_sb_info *sbi = ll_i2sbi(inode); + int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; + int api32 = ll_need_32bit_api(sbi); + struct md_op_data *op_data; + struct lu_fid pfid = { 0 }; + ktime_t kstart = ktime_get(); + __u64 pos; + int rc; ENTRY; if (lfd != NULL) @@ -350,35 +339,44 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) */ GOTO(out, rc = 0); - op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, - LUSTRE_OPC_ANY, inode); - if (IS_ERR(op_data)) - GOTO(out, rc = PTR_ERR(op_data)); - - if (unlikely(op_data->op_mea1 != NULL)) { - /* This is only needed for striped dir to fill .., - * see lmv_read_entry */ - if (filp->f_path.dentry->d_parent != NULL && - filp->f_path.dentry->d_parent->d_inode != NULL) { - __u64 ibits = MDS_INODELOCK_UPDATE; + if (unlikely(ll_dir_striped(inode))) { + /* + * This is only needed for striped dir to fill .., + * see lmv_read_page() + */ + if (file_dentry(filp)->d_parent != NULL && + file_dentry(filp)->d_parent->d_inode != NULL) { + __u64 ibits = MDS_INODELOCK_LOOKUP; struct inode *parent = - filp->f_path.dentry->d_parent->d_inode; + file_dentry(filp)->d_parent->d_inode; if (ll_have_md_lock(parent, &ibits, LCK_MINMODE)) - op_data->op_fid3 = *ll_inode2fid(parent); + pfid = *ll_inode2fid(parent); } /* If it can not find in cache, do lookup .. on the master * object */ - if (fid_is_zero(&op_data->op_fid3)) { - rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3); - if (rc != 0) { - ll_finish_md_op_data(op_data); + if (fid_is_zero(&pfid)) { + rc = ll_dir_get_parent_fid(inode, &pfid); + if (rc != 0) RETURN(rc); - } } } - op_data->op_max_pages = sbi->ll_md_brw_pages; + + op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, + LUSTRE_OPC_ANY, inode); + if (IS_ERR(op_data)) + GOTO(out, rc = PTR_ERR(op_data)); + + /* foreign dirs are browsed out of Lustre */ + if (unlikely(op_data->op_mea1 != NULL && + op_data->op_mea1->lsm_md_magic == LMV_MAGIC_FOREIGN)) { + ll_finish_md_op_data(op_data); + RETURN(-ENODATA); + } + + op_data->op_fid3 = pfid; + #ifdef HAVE_DIR_CONTEXT ctx->pos = pos; rc = ll_dir_read(inode, &pos, op_data, ctx); @@ -404,15 +402,16 @@ static int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) filp->f_pos = pos; #endif ll_finish_md_op_data(op_data); - filp->f_version = inode->i_version; out: if (!rc) - ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1); + ll_stats_ops_tally(sbi, LPROC_LL_READDIR, + ktime_us_delta(ktime_get(), kstart)); RETURN(rc); } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) static int ll_send_mgc_param(struct obd_export *mgc, char *string) { struct mgs_send_param *msp; @@ -431,42 +430,65 @@ static int ll_send_mgc_param(struct obd_export *mgc, char *string) return rc; } +#endif /** * Create striped directory with specified stripe(@lump) * - * param[in]parent the parent of the directory. - * param[in]lump the specified stripes. - * param[in]dirname the name of the directory. - * param[in]mode the specified mode of the directory. + * \param[in] dparent the parent of the directory. + * \param[in] lump the specified stripes. + * \param[in] dirname the name of the directory. + * \param[in] mode the specified mode of the directory. * - * retval =0 if striped directory is being created successfully. + * \retval =0 if striped directory is being created successfully. * <0 if the creation is failed. */ -static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, - const char *dirname, umode_t mode) +static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, + size_t len, const char *dirname, umode_t mode) { + struct inode *parent = dparent->d_inode; struct ptlrpc_request *request = NULL; struct md_op_data *op_data; struct ll_sb_info *sbi = ll_i2sbi(parent); struct inode *inode = NULL; - struct dentry dentry; + struct dentry dentry = { + .d_parent = dparent, + .d_name = { + .name = dirname, + .len = strlen(dirname), + .hash = ll_full_name_hash(dparent, dirname, + strlen(dirname)), + }, + }; int err; ENTRY; - if (unlikely(lump->lum_magic != LMV_USER_MAGIC)) + if (unlikely(!lmv_magic_supported(lump->lum_magic))) RETURN(-EINVAL); - CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) name %s" - "stripe_offset %d, stripe_count: %u\n", - PFID(ll_inode2fid(parent)), parent, dirname, - (int)lump->lum_stripe_offset, lump->lum_stripe_count); + if (lump->lum_magic != LMV_MAGIC_FOREIGN) { + CDEBUG(D_VFSTRACE, + "VFS Op:inode="DFID"(%p) name %s stripe_offset %d, stripe_count: %u\n", + PFID(ll_inode2fid(parent)), parent, dirname, + (int)lump->lum_stripe_offset, lump->lum_stripe_count); + } else { + struct lmv_foreign_md *lfm = (struct lmv_foreign_md *)lump; + + CDEBUG(D_VFSTRACE, + "VFS Op:inode="DFID"(%p) name %s foreign, length %u, value '%.*s'\n", + PFID(ll_inode2fid(parent)), parent, dirname, + lfm->lfm_length, lfm->lfm_length, lfm->lfm_value); + } if (lump->lum_stripe_count > 1 && !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE)) RETURN(-EINVAL); - if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC)) + if (IS_DEADDIR(parent) && + !OBD_FAIL_CHECK(OBD_FAIL_LLITE_NO_CHECK_DEAD)) + RETURN(-ENOENT); + + if (unlikely(!lmv_magic_supported(cpu_to_le32(lump->lum_magic)))) lustre_swab_lmv_user_md(lump); if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent))) @@ -476,103 +498,144 @@ static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, strlen(dirname), mode, LUSTRE_OPC_MKDIR, lump); if (IS_ERR(op_data)) - GOTO(err_exit, err = PTR_ERR(op_data)); + RETURN(PTR_ERR(op_data)); + + if (sbi->ll_flags & LL_SBI_FILE_SECCTX) { + /* selinux_dentry_init_security() uses dentry->d_parent and name + * to determine the security context for the file. So our fake + * dentry should be real enough for this purpose. */ + err = ll_dentry_init_security(&dentry, mode, &dentry.d_name, + &op_data->op_file_secctx_name, + &op_data->op_file_secctx, + &op_data->op_file_secctx_size); + if (err < 0) + GOTO(out_op_data, err); + } op_data->op_cli_flags |= CLI_SET_MEA; - err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode, + err = md_create(sbi->ll_md_exp, op_data, lump, len, mode, from_kuid(&init_user_ns, current_fsuid()), from_kgid(&init_user_ns, current_fsgid()), cfs_curproc_cap_pack(), 0, &request); - ll_finish_md_op_data(op_data); if (err) - GOTO(err_exit, err); + GOTO(out_request, err); + + CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_SETDIRSTRIPE_PAUSE, cfs_fail_val); err = ll_prep_inode(&inode, request, parent->i_sb, NULL); if (err) - GOTO(err_exit, err); + GOTO(out_inode, err); - memset(&dentry, 0, sizeof(dentry)); dentry.d_inode = inode; - err = ll_init_security(&dentry, inode, parent); - iput(inode); + if (sbi->ll_flags & LL_SBI_FILE_SECCTX) { + inode_lock(inode); + err = security_inode_notifysecctx(inode, + op_data->op_file_secctx, + op_data->op_file_secctx_size); + inode_unlock(inode); + } else { + err = ll_inode_init_security(&dentry, inode, parent); + } if (err) - GOTO(err_exit, err); + GOTO(out_inode, err); -err_exit: +out_inode: + if (inode != NULL) + iput(inode); +out_request: ptlrpc_req_finished(request); +out_op_data: + ll_finish_md_op_data(op_data); + return err; } int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, int set_default) { - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - int rc = 0; - struct lustre_sb_info *lsi = s2lsi(inode->i_sb); - struct obd_device *mgc = lsi->lsi_mgc; - int lum_size; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct md_op_data *op_data; + struct ptlrpc_request *req = NULL; + int rc = 0; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) + struct lustre_sb_info *lsi = s2lsi(inode->i_sb); + struct obd_device *mgc = lsi->lsi_mgc; +#endif + int lum_size; ENTRY; - if (lump != NULL) { - /* - * This is coming from userspace, so should be in - * local endian. But the MDS would like it in little - * endian, so we swab it before we send it. - */ - switch (lump->lmm_magic) { - case LOV_USER_MAGIC_V1: { - if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) - lustre_swab_lov_user_md_v1(lump); - lum_size = sizeof(struct lov_user_md_v1); - break; - } - case LOV_USER_MAGIC_V3: { - if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3)) - lustre_swab_lov_user_md_v3( - (struct lov_user_md_v3 *)lump); - lum_size = sizeof(struct lov_user_md_v3); - break; - } - case LMV_USER_MAGIC: { + if (lump != NULL) { + switch (lump->lmm_magic) { + case LOV_USER_MAGIC_V1: + lum_size = sizeof(struct lov_user_md_v1); + break; + case LOV_USER_MAGIC_V3: + lum_size = sizeof(struct lov_user_md_v3); + break; + case LOV_USER_MAGIC_COMP_V1: + lum_size = ((struct lov_comp_md_v1 *)lump)->lcm_size; + break; + case LMV_USER_MAGIC: if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC)) lustre_swab_lmv_user_md( (struct lmv_user_md *)lump); lum_size = sizeof(struct lmv_user_md); break; + case LOV_USER_MAGIC_SPECIFIC: { + struct lov_user_md_v3 *v3 = + (struct lov_user_md_v3 *)lump; + if (v3->lmm_stripe_count > LOV_MAX_STRIPE_COUNT) + RETURN(-EINVAL); + lum_size = lov_user_md_size(v3->lmm_stripe_count, + LOV_USER_MAGIC_SPECIFIC); + break; } - default: { - CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" - " %#08x != %#08x nor %#08x\n", - lump->lmm_magic, LOV_USER_MAGIC_V1, - LOV_USER_MAGIC_V3); - RETURN(-EINVAL); - } - } - } else { - lum_size = sizeof(struct lov_user_md_v1); - } + default: + CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" + " %#08x != %#08x nor %#08x\n", + lump->lmm_magic, LOV_USER_MAGIC_V1, + LOV_USER_MAGIC_V3); + RETURN(-EINVAL); + } + + /* + * This is coming from userspace, so should be in + * local endian. But the MDS would like it in little + * endian, so we swab it before we send it. + */ + if ((__swab32(lump->lmm_magic) & le32_to_cpu(LOV_MAGIC_MASK)) == + le32_to_cpu(LOV_MAGIC_MAGIC)) + lustre_swab_lov_user_md(lump, 0); + } else { + lum_size = sizeof(struct lov_user_md_v1); + } - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + RETURN(PTR_ERR(op_data)); - /* swabbing is done in lov_setstripe() on server side */ + /* swabbing is done in lov_setstripe() on server side */ rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req); - ll_finish_md_op_data(op_data); - ptlrpc_req_finished(req); - if (rc) { - if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); - } + ll_finish_md_op_data(op_data); + ptlrpc_req_finished(req); + if (rc) + RETURN(rc); - /* In the following we use the fact that LOV_USER_MAGIC_V1 and - LOV_USER_MAGIC_V3 have the same initial fields so we do not - need the make the distiction between the 2 versions */ - if (set_default && mgc->u.cli.cl_mgc_mgsexp) { +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) + /* + * 2.9 server has stored filesystem default stripe in ROOT xattr, + * and it's stored into system config for backward compatibility. + * + * In the following we use the fact that LOV_USER_MAGIC_V1 and + * LOV_USER_MAGIC_V3 have the same initial fields so we do not + * need the make the distiction between the 2 versions + */ + if (set_default && mgc->u.cli.cl_mgc_mgsexp && + (lump == NULL || + le32_to_cpu(lump->lmm_magic) == LOV_USER_MAGIC_V1 || + le32_to_cpu(lump->lmm_magic) == LOV_USER_MAGIC_V3)) { char *param = NULL; char *buf; @@ -582,47 +645,42 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, buf = param; /* Get fsname and assume devname to be -MDT0000. */ - ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN); - strcat(buf, "-MDT0000.lov"); + snprintf(buf, MGS_PARAM_MAXLEN, "%s-MDT0000.lov", + sbi->ll_fsname); buf += strlen(buf); /* Set root stripesize */ - sprintf(buf, ".stripesize=%u", - lump ? le32_to_cpu(lump->lmm_stripe_size) : 0); + snprintf(buf, MGS_PARAM_MAXLEN, ".stripesize=%u", + lump ? le32_to_cpu(lump->lmm_stripe_size) : 0); rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); if (rc) GOTO(end, rc); /* Set root stripecount */ - sprintf(buf, ".stripecount=%hd", - lump ? le16_to_cpu(lump->lmm_stripe_count) : 0); + snprintf(buf, MGS_PARAM_MAXLEN, ".stripecount=%hd", + lump ? le16_to_cpu(lump->lmm_stripe_count) : 0); rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); if (rc) GOTO(end, rc); /* Set root stripeoffset */ - sprintf(buf, ".stripeoffset=%hd", - lump ? le16_to_cpu(lump->lmm_stripe_offset) : - (typeof(lump->lmm_stripe_offset))(-1)); + snprintf(buf, MGS_PARAM_MAXLEN, ".stripeoffset=%hd", + lump ? le16_to_cpu(lump->lmm_stripe_offset) : + (typeof(lump->lmm_stripe_offset))(-1)); rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); end: if (param != NULL) OBD_FREE(param, MGS_PARAM_MAXLEN); } +#endif RETURN(rc); } -/** - * This function will be used to get default LOV/LMV/Default LMV - * @valid will be used to indicate which stripe it will retrieve - * OBD_MD_MEA LMV stripe EA - * OBD_MD_DEFAULT_MEA Default LMV stripe EA - * otherwise Default LOV EA. - * Each time, it can only retrieve 1 stripe EA - **/ -int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, - struct ptlrpc_request **request, u64 valid) +static int ll_dir_get_default_layout(struct inode *inode, void **plmm, + int *plmm_size, + struct ptlrpc_request **request, u64 valid, + enum get_default_layout_type type) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct mdt_body *body; @@ -630,6 +688,7 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, struct ptlrpc_request *req = NULL; int rc, lmm_size; struct md_op_data *op_data; + struct lu_fid fid; ENTRY; rc = ll_get_default_mdsize(sbi, &lmm_size); @@ -643,11 +702,19 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, RETURN(PTR_ERR(op_data)); op_data->op_valid = valid | OBD_MD_FLEASIZE | OBD_MD_FLDIREA; + + if (type == GET_DEFAULT_LAYOUT_ROOT) { + lu_root_fid(&op_data->op_fid1); + fid = op_data->op_fid1; + } else { + fid = *ll_inode2fid(inode); + } + rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc < 0) { - CDEBUG(D_INFO, "md_getattr failed on inode " - DFID": rc %d\n", PFID(ll_inode2fid(inode)), rc); + CDEBUG(D_INFO, "md_getattr failed on inode "DFID": rc %d\n", + PFID(&fid), rc); GOTO(out, rc); } @@ -673,12 +740,11 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, /* We don't swab objects for directories */ switch (le32_to_cpu(lmm->lmm_magic)) { case LOV_MAGIC_V1: - if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) - lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); - break; case LOV_MAGIC_V3: + case LOV_MAGIC_COMP_V1: + case LOV_USER_MAGIC_SPECIFIC: if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) - lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); + lustre_swab_lov_user_md((struct lov_user_md *)lmm, 0); break; case LMV_MAGIC_V1: if (LMV_MAGIC != cpu_to_le32(LMV_MAGIC)) @@ -688,6 +754,17 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, if (LMV_USER_MAGIC != cpu_to_le32(LMV_USER_MAGIC)) lustre_swab_lmv_user_md((struct lmv_user_md *)lmm); break; + case LMV_MAGIC_FOREIGN: { + struct lmv_foreign_md *lfm = (struct lmv_foreign_md *)lmm; + + if (LMV_MAGIC_FOREIGN != cpu_to_le32(LMV_MAGIC_FOREIGN)) { + __swab32s(&lfm->lfm_magic); + __swab32s(&lfm->lfm_length); + __swab32s(&lfm->lfm_type); + __swab32s(&lfm->lfm_flags); + } + break; + } default: CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic); rc = -EPROTO; @@ -699,6 +776,72 @@ out: return rc; } +/** + * This function will be used to get default LOV/LMV/Default LMV + * @valid will be used to indicate which stripe it will retrieve. + * If the directory does not have its own default layout, then the + * function will request the default layout from root FID. + * OBD_MD_MEA LMV stripe EA + * OBD_MD_DEFAULT_MEA Default LMV stripe EA + * otherwise Default LOV EA. + * Each time, it can only retrieve 1 stripe EA + **/ +int ll_dir_getstripe_default(struct inode *inode, void **plmm, int *plmm_size, + struct ptlrpc_request **request, + struct ptlrpc_request **root_request, + u64 valid) +{ + struct ptlrpc_request *req = NULL; + struct ptlrpc_request *root_req = NULL; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + int rc = 0; + ENTRY; + + rc = ll_dir_get_default_layout(inode, (void **)&lmm, &lmm_size, + &req, valid, 0); + if (rc == -ENODATA && !fid_is_root(ll_inode2fid(inode)) && + !(valid & (OBD_MD_MEA|OBD_MD_DEFAULT_MEA)) && root_request != NULL) + rc = ll_dir_get_default_layout(inode, (void **)&lmm, &lmm_size, + &root_req, valid, + GET_DEFAULT_LAYOUT_ROOT); + + *plmm = lmm; + *plmm_size = lmm_size; + *request = req; + if (root_request != NULL) + *root_request = root_req; + + RETURN(rc); +} + +/** + * This function will be used to get default LOV/LMV/Default LMV + * @valid will be used to indicate which stripe it will retrieve + * OBD_MD_MEA LMV stripe EA + * OBD_MD_DEFAULT_MEA Default LMV stripe EA + * otherwise Default LOV EA. + * Each time, it can only retrieve 1 stripe EA + **/ +int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, + struct ptlrpc_request **request, u64 valid) +{ + struct ptlrpc_request *req = NULL; + struct lov_mds_md *lmm = NULL; + int lmm_size = 0; + int rc = 0; + ENTRY; + + rc = ll_dir_get_default_layout(inode, (void **)&lmm, &lmm_size, + &req, valid, 0); + + *plmm = lmm; + *plmm_size = lmm_size; + *request = req; + + RETURN(rc); +} + int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid) { struct md_op_data *op_data; @@ -778,7 +921,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) if (rc != 0) { CDEBUG(D_HSM, "Could not read file data version of " DFID" (rc = %d). Archive request (" - LPX64") could not be done.\n", + "%#llx) could not be done.\n", PFID(©->hc_hai.hai_fid), rc, copy->hc_hai.hai_cookie); hpk.hpk_flags |= HP_FLAG_RETRY; @@ -880,7 +1023,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) (copy->hc_data_version != data_version)) { CDEBUG(D_HSM, "File data version mismatched. " "File content was changed during archiving. " - DFID", start:"LPX64" current:"LPX64"\n", + DFID", start:%#llx current:%#llx\n", PFID(©->hc_hai.hai_fid), copy->hc_data_version, data_version); /* File was changed, send error to cdt. Do not ask for @@ -889,10 +1032,11 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) * The policy engine will ask for a new archive later * when the file will not be modified for some tunable * time */ - /* we do not notify caller */ hpk.hpk_flags &= ~HP_FLAG_RETRY; + rc = -EBUSY; /* hpk_errval must be >= 0 */ - hpk.hpk_errval = EBUSY; + hpk.hpk_errval = -rc; + GOTO(progress, rc); } } @@ -906,28 +1050,130 @@ progress: } -static int copy_and_ioctl(int cmd, struct obd_export *exp, - const void __user *data, size_t size) +static int copy_and_ct_start(int cmd, struct obd_export *exp, + const struct lustre_kernelcomm __user *data) { - void *copy; + struct lustre_kernelcomm *lk; + struct lustre_kernelcomm *tmp; + size_t size = sizeof(*lk); + size_t new_size; + int i; int rc; - OBD_ALLOC(copy, size); - if (copy == NULL) + /* copy data from userspace to get numbers of archive_id */ + OBD_ALLOC(lk, size); + if (lk == NULL) return -ENOMEM; - if (copy_from_user(copy, data, size)) { - rc = -EFAULT; - goto out; + if (copy_from_user(lk, data, size)) + GOTO(out_lk, rc = -EFAULT); + + if (lk->lk_flags & LK_FLG_STOP) + goto do_ioctl; + + if (!(lk->lk_flags & LK_FLG_DATANR)) { + __u32 archive_mask = lk->lk_data_count; + int count; + + /* old hsm agent to old MDS */ + if (!exp_connect_archive_id_array(exp)) + goto do_ioctl; + + /* old hsm agent to new MDS */ + lk->lk_flags |= LK_FLG_DATANR; + + if (archive_mask == 0) + goto do_ioctl; + + count = hweight32(archive_mask); + new_size = offsetof(struct lustre_kernelcomm, lk_data[count]); + OBD_ALLOC(tmp, new_size); + if (tmp == NULL) + GOTO(out_lk, rc = -ENOMEM); + + memcpy(tmp, lk, size); + tmp->lk_data_count = count; + OBD_FREE(lk, size); + lk = tmp; + size = new_size; + + count = 0; + for (i = 0; i < sizeof(archive_mask) * 8; i++) { + if ((1 << i) & archive_mask) { + lk->lk_data[count] = i + 1; + count++; + } + } + goto do_ioctl; } - rc = obd_iocontrol(cmd, exp, size, copy, NULL); -out: - OBD_FREE(copy, size); + /* new hsm agent to new mds */ + if (lk->lk_data_count > 0) { + new_size = offsetof(struct lustre_kernelcomm, + lk_data[lk->lk_data_count]); + OBD_ALLOC(tmp, new_size); + if (tmp == NULL) + GOTO(out_lk, rc = -ENOMEM); + OBD_FREE(lk, size); + lk = tmp; + size = new_size; + + if (copy_from_user(lk, data, size)) + GOTO(out_lk, rc = -EFAULT); + } + + /* new hsm agent to old MDS */ + if (!exp_connect_archive_id_array(exp)) { + __u32 archives = 0; + + if (lk->lk_data_count > LL_HSM_ORIGIN_MAX_ARCHIVE) + GOTO(out_lk, rc = -EINVAL); + + for (i = 0; i < lk->lk_data_count; i++) { + if (lk->lk_data[i] > LL_HSM_ORIGIN_MAX_ARCHIVE) { + rc = -EINVAL; + CERROR("%s: archive id %d requested but only " + "[0 - %zu] supported: rc = %d\n", + exp->exp_obd->obd_name, lk->lk_data[i], + LL_HSM_ORIGIN_MAX_ARCHIVE, rc); + GOTO(out_lk, rc); + } + + if (lk->lk_data[i] == 0) { + archives = 0; + break; + } + + archives |= (1 << (lk->lk_data[i] - 1)); + } + lk->lk_flags &= ~LK_FLG_DATANR; + lk->lk_data_count = archives; + } +do_ioctl: + rc = obd_iocontrol(cmd, exp, size, lk, NULL); +out_lk: + OBD_FREE(lk, size); return rc; } +static int check_owner(int type, int id) +{ + switch (type) { + case USRQUOTA: + if (!uid_eq(current_euid(), make_kuid(&init_user_ns, id))) + return -EPERM; + break; + case GRPQUOTA: + if (!in_egroup_p(make_kgid(&init_user_ns, id))) + return -EPERM; + break; + case PRJQUOTA: + break; + } + return 0; +} + static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) { int cmd = qctl->qc_cmd; @@ -937,33 +1183,27 @@ static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) int rc = 0; ENTRY; - switch (cmd) { - case Q_SETQUOTA: - case Q_SETINFO: - if (!cfs_capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT) - RETURN(-EPERM); - break; + switch (cmd) { + case Q_SETQUOTA: + case Q_SETINFO: + case LUSTRE_Q_SETDEFAULT: + if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + RETURN(-EPERM); + break; case Q_GETQUOTA: - if (((type == USRQUOTA && - !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) || - (type == GRPQUOTA && - !in_egroup_p(make_kgid(&init_user_ns, id)))) && - (!cfs_capable(CFS_CAP_SYS_ADMIN) || - sbi->ll_flags & LL_SBI_RMT_CLIENT)) + case LUSTRE_Q_GETDEFAULT: + if (check_owner(type, id) && + (!cfs_capable(CFS_CAP_SYS_ADMIN))) RETURN(-EPERM); - break; - case Q_GETINFO: - break; - default: - CERROR("unsupported quotactl op: %#x\n", cmd); - RETURN(-ENOTTY); - } + break; + case Q_GETINFO: + break; + default: + CERROR("unsupported quotactl op: %#x\n", cmd); + RETURN(-ENOTSUPP); + } if (valid != QC_GENERAL) { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - RETURN(-EOPNOTSUPP); - if (cmd == Q_GETINFO) qctl->qc_cmd = Q_GETOINFO; else if (cmd == Q_GETQUOTA) @@ -1058,73 +1298,126 @@ out: RETURN(rc); } -static char * -ll_getname(const char __user *filename) +int ll_rmfid(struct file *file, void __user *arg) +{ + const struct fid_array __user *ufa = arg; + struct fid_array *lfa = NULL; + size_t size; + unsigned nr; + int i, rc, *rcs = NULL; + ENTRY; + + if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) && + !(ll_i2sbi(file_inode(file))->ll_flags & LL_SBI_USER_FID2PATH)) + RETURN(-EPERM); + /* Only need to get the buflen */ + if (get_user(nr, &ufa->fa_nr)) + RETURN(-EFAULT); + /* DoS protection */ + if (nr > OBD_MAX_FIDS_IN_ARRAY) + RETURN(-E2BIG); + + size = offsetof(struct fid_array, fa_fids[nr]); + OBD_ALLOC(lfa, size); + if (!lfa) + RETURN(-ENOMEM); + OBD_ALLOC(rcs, sizeof(int) * nr); + if (!rcs) + GOTO(free_lfa, rc = -ENOMEM); + + if (copy_from_user(lfa, arg, size)) + GOTO(free_rcs, rc = -EFAULT); + + /* Call mdc_iocontrol */ + rc = md_rmfid(ll_i2mdexp(file_inode(file)), lfa, rcs, NULL); + if (!rc) { + for (i = 0; i < nr; i++) + if (rcs[i]) + lfa->fa_fids[i].f_ver = rcs[i]; + if (copy_to_user(arg, lfa, size)) + rc = -EFAULT; + } + +free_rcs: + OBD_FREE(rcs, sizeof(int) * nr); +free_lfa: + OBD_FREE(lfa, size); + + RETURN(rc); +} + +/* This function tries to get a single name component, + * to send to the server. No actual path traversal involved, + * so we limit to NAME_MAX */ +static char *ll_getname(const char __user *filename) { int ret = 0, len; - char *tmp = __getname(); + char *tmp; + + OBD_ALLOC(tmp, NAME_MAX + 1); if (!tmp) return ERR_PTR(-ENOMEM); - len = strncpy_from_user(tmp, filename, PATH_MAX); - if (len == 0) + len = strncpy_from_user(tmp, filename, NAME_MAX + 1); + if (len < 0) ret = -ENOENT; - else if (len > PATH_MAX) + else if (len > NAME_MAX) ret = -ENAMETOOLONG; if (ret) { - __putname(tmp); + OBD_FREE(tmp, NAME_MAX + 1); tmp = ERR_PTR(ret); } return tmp; } -#define ll_putname(filename) __putname(filename) +#define ll_putname(filename) OBD_FREE(filename, NAME_MAX + 1); static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_ioctl_data *data; - int rc = 0; - ENTRY; + struct dentry *dentry = file_dentry(file); + struct inode *inode = file_inode(file); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct obd_ioctl_data *data; + int rc = 0; + ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%#x\n", PFID(ll_inode2fid(inode)), inode, cmd); - /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ - if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ - return -ENOTTY; - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); - switch(cmd) { - case FSFILT_IOC_GETFLAGS: - case FSFILT_IOC_SETFLAGS: - RETURN(ll_iocontrol(inode, file, cmd, arg)); - case FSFILT_IOC_GETVERSION_OLD: - case FSFILT_IOC_GETVERSION: + /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ + if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ + return -ENOTTY; + + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); + switch (cmd) { + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + RETURN(ll_iocontrol(inode, file, cmd, arg)); + case FSFILT_IOC_GETVERSION: + case FS_IOC_GETVERSION: RETURN(put_user(inode->i_generation, (int __user *)arg)); - /* We need to special case any other ioctls we want to handle, - * to send them to the MDS/OST as appropriate and to properly - * network encode the arg field. - case FSFILT_IOC_SETVERSION_OLD: - case FSFILT_IOC_SETVERSION: - */ - case LL_IOC_GET_MDTIDX: { - int mdtidx; - - mdtidx = ll_get_mdt_idx(inode); - if (mdtidx < 0) - RETURN(mdtidx); + /* We need to special case any other ioctls we want to handle, + * to send them to the MDS/OST as appropriate and to properly + * network encode the arg field. */ + case FS_IOC_SETVERSION: + RETURN(-ENOTSUPP); + + case LL_IOC_GET_MDTIDX: { + int mdtidx; + + mdtidx = ll_get_mdt_idx(inode); + if (mdtidx < 0) + RETURN(mdtidx); if (put_user((int)mdtidx, (int __user *)arg)) - RETURN(-EFAULT); + RETURN(-EFAULT); - return 0; - } - case IOC_MDC_LOOKUP: { - int namelen, len = 0; + return 0; + } + case IOC_MDC_LOOKUP: { + int namelen, len = 0; char *buf = NULL; char *filename; @@ -1140,15 +1433,14 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) GOTO(out_free, rc = -EINVAL); } - rc = ll_get_fid_by_name(inode, filename, namelen, NULL); + rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL); if (rc < 0) { CERROR("%s: lookup %.*s failed: rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), namelen, - filename, rc); + sbi->ll_fsname, namelen, filename, rc); GOTO(out_free, rc); } out_free: - obd_ioctl_freedata(buf, len); + OBD_FREE_LARGE(buf, len); return rc; } case LL_IOC_LMV_SETSTRIPE: { @@ -1180,8 +1472,22 @@ out_free: lum = (struct lmv_user_md *)data->ioc_inlbuf2; lumlen = data->ioc_inllen2; - if (lum->lum_magic != LMV_USER_MAGIC || - lumlen != sizeof(*lum)) { + if (!lmv_magic_supported(lum->lum_magic)) { + CERROR("%s: wrong lum magic %x : rc = %d\n", filename, + lum->lum_magic, -EINVAL); + GOTO(lmv_out_free, rc = -EINVAL); + } + + if ((lum->lum_magic == LMV_USER_MAGIC || + lum->lum_magic == LMV_USER_MAGIC_SPECIFIC) && + lumlen < sizeof(*lum)) { + CERROR("%s: wrong lum size %d for magic %x : rc = %d\n", + filename, lumlen, lum->lum_magic, -EINVAL); + GOTO(lmv_out_free, rc = -EINVAL); + } + + if (lum->lum_magic == LMV_MAGIC_FOREIGN && + lumlen < sizeof(struct lmv_foreign_md)) { CERROR("%s: wrong lum magic %x or size %d: rc = %d\n", filename, lum->lum_magic, lumlen, -EFAULT); GOTO(lmv_out_free, rc = -EINVAL); @@ -1192,9 +1498,9 @@ out_free: #else mode = data->ioc_type; #endif - rc = ll_dir_setdirstripe(inode, lum, filename, mode); + rc = ll_dir_setdirstripe(dentry, lum, lumlen, filename, mode); lmv_out_free: - obd_ioctl_freedata(buf, len); + OBD_FREE_LARGE(buf, len); RETURN(rc); } @@ -1214,41 +1520,61 @@ lmv_out_free: RETURN(rc); } - case LL_IOC_LOV_SETSTRIPE: { - struct lov_user_md_v3 lumv3; - struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; + case LL_IOC_LOV_SETSTRIPE_NEW: + case LL_IOC_LOV_SETSTRIPE: { + struct lov_user_md_v3 *lumv3 = NULL; + struct lov_user_md_v1 lumv1; + struct lov_user_md_v1 *lumv1_ptr = &lumv1; struct lov_user_md_v1 __user *lumv1p = (struct lov_user_md_v1 __user *)arg; struct lov_user_md_v3 __user *lumv3p = (struct lov_user_md_v3 __user *)arg; + int lum_size = 0; - int set_default = 0; + int set_default = 0; - LASSERT(sizeof(lumv3) == sizeof(*lumv3p)); - LASSERT(sizeof(lumv3.lmm_objects[0]) == - sizeof(lumv3p->lmm_objects[0])); - /* first try with v1 which is smaller than v3 */ - if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1))) - RETURN(-EFAULT); - - if ((lumv1->lmm_magic == LOV_USER_MAGIC_V3) ) { - if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3))) - RETURN(-EFAULT); - } - - if (inode->i_sb->s_root == file->f_path.dentry) - set_default = 1; + BUILD_BUG_ON(sizeof(struct lov_user_md_v3) <= + sizeof(struct lov_comp_md_v1)); + BUILD_BUG_ON(sizeof(*lumv3) != sizeof(*lumv3p)); + /* first try with v1 which is smaller than v3 */ + if (copy_from_user(&lumv1, lumv1p, sizeof(lumv1))) + RETURN(-EFAULT); - /* in v1 and v3 cases lumv1 points to data */ - rc = ll_dir_setstripe(inode, lumv1, set_default); + if (inode->i_sb->s_root == file_dentry(file)) + set_default = 1; + + switch (lumv1.lmm_magic) { + case LOV_USER_MAGIC_V3: + case LOV_USER_MAGIC_SPECIFIC: + lum_size = ll_lov_user_md_size(&lumv1); + if (lum_size < 0) + RETURN(lum_size); + OBD_ALLOC(lumv3, lum_size); + if (!lumv3) + RETURN(-ENOMEM); + if (copy_from_user(lumv3, lumv3p, lum_size)) + GOTO(out, rc = -EFAULT); + lumv1_ptr = (struct lov_user_md_v1 *)lumv3; + break; + case LOV_USER_MAGIC_V1: + break; + default: + GOTO(out, rc = -ENOTSUPP); + } - RETURN(rc); - } + /* in v1 and v3 cases lumv1 points to data */ + rc = ll_dir_setstripe(inode, lumv1_ptr, set_default); +out: + if (lumv3) + OBD_FREE(lumv3, lum_size); + RETURN(rc); + } case LL_IOC_LMV_GETSTRIPE: { struct lmv_user_md __user *ulmv = (struct lmv_user_md __user *)arg; struct lmv_user_md lum; struct ptlrpc_request *request = NULL; + struct ptlrpc_request *root_request = NULL; union lmv_mds_md *lmm = NULL; int lmmsize; u64 valid = 0; @@ -1256,12 +1582,14 @@ lmv_out_free: int mdt_index; int lum_size; int stripe_count; + int max_stripe_count; int i; int rc; if (copy_from_user(&lum, ulmv, sizeof(*ulmv))) RETURN(-EFAULT); + max_stripe_count = lum.lum_stripe_count; /* lum_magic will indicate which stripe the ioctl will like * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC * is for default LMV stripe */ @@ -1272,16 +1600,13 @@ lmv_out_free: else RETURN(-EINVAL); - rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request, - valid); + rc = ll_dir_getstripe_default(inode, (void **)&lmm, &lmmsize, + &request, &root_request, valid); if (rc != 0) GOTO(finish_req, rc); /* Get default LMV EA */ if (lum.lum_magic == LMV_USER_MAGIC) { - if (rc != 0) - GOTO(finish_req, rc); - if (lmmsize > sizeof(*ulmv)) GOTO(finish_req, rc = -EINVAL); @@ -1291,8 +1616,47 @@ lmv_out_free: GOTO(finish_req, rc); } - stripe_count = lmv_mds_md_stripe_count_get(lmm); - lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1); + /* if foreign LMV case, fake stripes number */ + if (lmm->lmv_magic == LMV_MAGIC_FOREIGN) { + struct lmv_foreign_md *lfm; + + lfm = (struct lmv_foreign_md *)lmm; + if (lfm->lfm_length < XATTR_SIZE_MAX - + offsetof(typeof(*lfm), lfm_value)) { + __u32 size = lfm->lfm_length + + offsetof(typeof(*lfm), lfm_value); + + stripe_count = lmv_foreign_to_md_stripes(size); + } else { + CERROR("invalid %d foreign size returned\n", + lfm->lfm_length); + return -EINVAL; + } + } else { + stripe_count = lmv_mds_md_stripe_count_get(lmm); + } + if (max_stripe_count < stripe_count) { + lum.lum_stripe_count = stripe_count; + if (copy_to_user(ulmv, &lum, sizeof(lum))) + GOTO(finish_req, rc = -EFAULT); + GOTO(finish_req, rc = -E2BIG); + } + + /* enough room on user side and foreign case */ + if (lmm->lmv_magic == LMV_MAGIC_FOREIGN) { + struct lmv_foreign_md *lfm; + __u32 size; + + lfm = (struct lmv_foreign_md *)lmm; + size = lfm->lfm_length + + offsetof(struct lmv_foreign_md, lfm_value); + if (copy_to_user(ulmv, lfm, size)) + GOTO(finish_req, rc = -EFAULT); + GOTO(finish_req, rc); + } + + lum_size = lmv_user_md_size(stripe_count, + LMV_USER_MAGIC_SPECIFIC); OBD_ALLOC(tmp, lum_size); if (tmp == NULL) GOTO(finish_req, rc = -ENOMEM); @@ -1304,16 +1668,20 @@ lmv_out_free: tmp->lum_magic = LMV_MAGIC_V1; tmp->lum_stripe_count = 0; tmp->lum_stripe_offset = mdt_index; + tmp->lum_hash_type = lmv_mds_md_hash_type_get(lmm); for (i = 0; i < stripe_count; i++) { struct lu_fid fid; fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]); - mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid); - if (mdt_index < 0) - GOTO(out_tmp, rc = mdt_index); + if (fid_is_sane(&fid)) { + mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid); + if (mdt_index < 0) + GOTO(out_tmp, rc = mdt_index); + + tmp->lum_objects[i].lum_mds = mdt_index; + tmp->lum_objects[i].lum_fid = fid; + } - tmp->lum_objects[i].lum_mds = mdt_index; - tmp->lum_objects[i].lum_fid = fid; tmp->lum_stripe_count++; } @@ -1323,6 +1691,7 @@ out_tmp: OBD_FREE(tmp, lum_size); finish_req: ptlrpc_req_finished(request); + ptlrpc_req_finished(root_request); return rc; } @@ -1353,67 +1722,102 @@ out_rmdir: ll_putname(filename); RETURN(rc); } + case LL_IOC_RMFID: + RETURN(ll_rmfid(file, (void __user *)arg)); case LL_IOC_LOV_SWAP_LAYOUTS: RETURN(-EPERM); case IOC_OBD_STATFS: RETURN(ll_obd_statfs(inode, (void __user *)arg)); - case LL_IOC_LOV_GETSTRIPE: - case LL_IOC_MDC_GETINFO: - case IOC_MDC_GETFILEINFO: - case IOC_MDC_GETFILESTRIPE: { - struct ptlrpc_request *request = NULL; + case LL_IOC_LOV_GETSTRIPE: + case LL_IOC_LOV_GETSTRIPE_NEW: + case LL_IOC_MDC_GETINFO: + case LL_IOC_MDC_GETINFO_OLD: + case IOC_MDC_GETFILEINFO: + case IOC_MDC_GETFILEINFO_OLD: + case IOC_MDC_GETFILESTRIPE: { + struct ptlrpc_request *request = NULL; + struct ptlrpc_request *root_request = NULL; struct lov_user_md __user *lump; - struct lov_mds_md *lmm = NULL; - struct mdt_body *body; - char *filename = NULL; - int lmmsize; - - if (cmd == IOC_MDC_GETFILEINFO || - cmd == IOC_MDC_GETFILESTRIPE) { + struct lov_mds_md *lmm = NULL; + struct mdt_body *body; + char *filename = NULL; + lstat_t __user *statp = NULL; + lstatx_t __user *stxp = NULL; + __u64 __user *flagsp = NULL; + __u32 __user *lmmsizep = NULL; + struct lu_fid __user *fidp = NULL; + int lmmsize; + + if (cmd == IOC_MDC_GETFILEINFO_OLD || + cmd == IOC_MDC_GETFILEINFO || + cmd == IOC_MDC_GETFILESTRIPE) { filename = ll_getname((const char __user *)arg); - if (IS_ERR(filename)) - RETURN(PTR_ERR(filename)); + if (IS_ERR(filename)) + RETURN(PTR_ERR(filename)); - rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, - &lmmsize, &request); + rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, + &lmmsize, &request); } else { - rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, - &request, 0); + rc = ll_dir_getstripe_default(inode, (void **)&lmm, + &lmmsize, &request, + &root_request, 0); } - if (request) { - body = req_capsule_server_get(&request->rq_pill, - &RMF_MDT_BODY); - LASSERT(body != NULL); - } else { - GOTO(out_req, rc); - } + if (request) { + body = req_capsule_server_get(&request->rq_pill, + &RMF_MDT_BODY); + LASSERT(body != NULL); + } else { + GOTO(out_req, rc); + } - if (rc < 0) { - if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || - cmd == LL_IOC_MDC_GETINFO)) - GOTO(skip_lmm, rc = 0); - else - GOTO(out_req, rc); - } + if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || + cmd == LL_IOC_MDC_GETINFO || + cmd == IOC_MDC_GETFILEINFO_OLD || + cmd == LL_IOC_MDC_GETINFO_OLD)) { + lmmsize = 0; + rc = 0; + } + + if (rc < 0) + GOTO(out_req, rc); - if (cmd == IOC_MDC_GETFILESTRIPE || - cmd == LL_IOC_LOV_GETSTRIPE) { + if (cmd == IOC_MDC_GETFILESTRIPE || + cmd == LL_IOC_LOV_GETSTRIPE || + cmd == LL_IOC_LOV_GETSTRIPE_NEW) { lump = (struct lov_user_md __user *)arg; - } else { + } else if (cmd == IOC_MDC_GETFILEINFO_OLD || + cmd == LL_IOC_MDC_GETINFO_OLD){ + struct lov_user_mds_data_v1 __user *lmdp; + + lmdp = (struct lov_user_mds_data_v1 __user *)arg; + statp = &lmdp->lmd_st; + lump = &lmdp->lmd_lmm; + } else { struct lov_user_mds_data __user *lmdp; + lmdp = (struct lov_user_mds_data __user *)arg; - lump = &lmdp->lmd_lmm; - } - if (copy_to_user(lump, lmm, lmmsize)) { + fidp = &lmdp->lmd_fid; + stxp = &lmdp->lmd_stx; + flagsp = &lmdp->lmd_flags; + lmmsizep = &lmdp->lmd_lmmsize; + lump = &lmdp->lmd_lmm; + } + + if (lmmsize == 0) { + /* If the file has no striping then zero out *lump so + * that the caller isn't confused by garbage. */ + if (clear_user(lump, sizeof(*lump))) + GOTO(out_req, rc = -EFAULT); + } else if (copy_to_user(lump, lmm, lmmsize)) { if (copy_to_user(lump, lmm, sizeof(*lump))) - GOTO(out_req, rc = -EFAULT); - rc = -EOVERFLOW; - } - skip_lmm: - if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) { - struct lov_user_mds_data __user *lmdp; - lstat_t st = { 0 }; + GOTO(out_req, rc = -EFAULT); + rc = -EOVERFLOW; + } + + if (cmd == IOC_MDC_GETFILEINFO_OLD || + cmd == LL_IOC_MDC_GETINFO_OLD) { + lstat_t st = { 0 }; st.st_dev = inode->i_sb->s_dev; st.st_mode = body->mbo_mode; @@ -1422,25 +1826,84 @@ out_rmdir: st.st_gid = body->mbo_gid; st.st_rdev = body->mbo_rdev; st.st_size = body->mbo_size; - st.st_blksize = PAGE_CACHE_SIZE; + st.st_blksize = PAGE_SIZE; st.st_blocks = body->mbo_blocks; st.st_atime = body->mbo_atime; st.st_mtime = body->mbo_mtime; st.st_ctime = body->mbo_ctime; - st.st_ino = inode->i_ino; + st.st_ino = cl_fid_build_ino(&body->mbo_fid1, + sbi->ll_flags & + LL_SBI_32BIT_API); + + if (copy_to_user(statp, &st, sizeof(st))) + GOTO(out_req, rc = -EFAULT); + } else if (cmd == IOC_MDC_GETFILEINFO || + cmd == LL_IOC_MDC_GETINFO) { + lstatx_t stx = { 0 }; + __u64 valid = body->mbo_valid; + + stx.stx_blksize = PAGE_SIZE; + stx.stx_nlink = body->mbo_nlink; + stx.stx_uid = body->mbo_uid; + stx.stx_gid = body->mbo_gid; + stx.stx_mode = body->mbo_mode; + stx.stx_ino = cl_fid_build_ino(&body->mbo_fid1, + sbi->ll_flags & + LL_SBI_32BIT_API); + stx.stx_size = body->mbo_size; + stx.stx_blocks = body->mbo_blocks; + stx.stx_atime.tv_sec = body->mbo_atime; + stx.stx_ctime.tv_sec = body->mbo_ctime; + stx.stx_mtime.tv_sec = body->mbo_mtime; + stx.stx_rdev_major = MAJOR(body->mbo_rdev); + stx.stx_rdev_minor = MINOR(body->mbo_rdev); + stx.stx_dev_major = MAJOR(inode->i_sb->s_dev); + stx.stx_dev_minor = MINOR(inode->i_sb->s_dev); + stx.stx_mask |= STATX_BASIC_STATS; - lmdp = (struct lov_user_mds_data __user *)arg; - if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) - GOTO(out_req, rc = -EFAULT); - } + /* + * For a striped directory, the size and blocks returned + * from MDT is not correct. + * The size and blocks are aggregated by client across + * all stripes. + * Thus for a striped directory, do not return the valid + * FLSIZE and FLBLOCKS flags to the caller. + * However, this whould be better decided by the MDS + * instead of the client. + */ + if (cmd == LL_IOC_MDC_GETINFO && + ll_i2info(inode)->lli_lsm_md != NULL) + valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - EXIT; - out_req: - ptlrpc_req_finished(request); - if (filename) - ll_putname(filename); - return rc; - } + if (flagsp && copy_to_user(flagsp, &valid, + sizeof(*flagsp))) + GOTO(out_req, rc = -EFAULT); + + if (fidp && copy_to_user(fidp, &body->mbo_fid1, + sizeof(*fidp))) + GOTO(out_req, rc = -EFAULT); + + if (!(valid & OBD_MD_FLSIZE)) + stx.stx_mask &= ~STATX_SIZE; + if (!(valid & OBD_MD_FLBLOCKS)) + stx.stx_mask &= ~STATX_BLOCKS; + + if (stxp && copy_to_user(stxp, &stx, sizeof(stx))) + GOTO(out_req, rc = -EFAULT); + + if (lmmsizep && copy_to_user(lmmsizep, &lmmsize, + sizeof(*lmmsizep))) + GOTO(out_req, rc = -EFAULT); + } + + EXIT; +out_req: + ptlrpc_req_finished(request); + ptlrpc_req_finished(root_request); + if (filename) + ll_putname(filename); + return rc; + } case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl; @@ -1466,21 +1929,6 @@ out_rmdir: RETURN(ll_get_obd_name(inode, cmd, arg)); case LL_IOC_FLUSHCTX: RETURN(ll_flush_ctx(inode)); -#ifdef CONFIG_FS_POSIX_ACL - case LL_IOC_RMTACL: { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT && - inode == inode->i_sb->s_root->d_inode) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - LASSERT(fd != NULL); - rc = rct_add(&sbi->ll_rct, current_pid(), arg); - if (!rc) - fd->fd_flags |= LL_FILE_RMTACL; - RETURN(rc); - } else - RETURN(0); - } -#endif case LL_IOC_GETOBDCOUNT: { u32 count, vallen; struct obd_export *exp; @@ -1512,11 +1960,6 @@ out_rmdir: RETURN(obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL, (void __user *)arg)); } - case OBD_IOC_CHANGELOG_SEND: - case OBD_IOC_CHANGELOG_CLEAR: - rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, - sizeof(struct ioc_changelog)); - RETURN(rc); case OBD_IOC_FID2PATH: RETURN(ll_fid2path(inode, (void __user *)arg)); case LL_IOC_GETPARENT: @@ -1567,10 +2010,8 @@ out_rmdir: RETURN(-ENOMEM); /* Copy the whole struct */ - if (copy_from_user(hur, (void __user *)arg, totalsize)) { - OBD_FREE_LARGE(hur, totalsize); - RETURN(-EFAULT); - } + if (copy_from_user(hur, (void __user *)arg, totalsize)) + GOTO(out_hur, rc = -EFAULT); if (hur->hur_request.hr_action == HUA_RELEASE) { const struct lu_fid *fid; @@ -1595,6 +2036,7 @@ out_rmdir: hur, NULL); } +out_hur: OBD_FREE_LARGE(hur, totalsize); RETURN(rc); @@ -1623,8 +2065,8 @@ out_rmdir: if (!cfs_capable(CFS_CAP_SYS_ADMIN)) RETURN(-EPERM); - rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, - sizeof(struct lustre_kernelcomm)); + rc = copy_and_ct_start(cmd, sbi->ll_md_exp, + (struct lustre_kernelcomm __user *)arg); RETURN(rc); case LL_IOC_HSM_COPY_START: { @@ -1666,15 +2108,15 @@ out_rmdir: RETURN(rc); } case LL_IOC_MIGRATE: { - char *buf = NULL; - const char *filename; - int namelen = 0; - int len; - int rc; - int mdtidx; + struct lmv_user_md *lum; + char *buf = NULL; + int len; + char *filename; + int namelen = 0; + int rc; rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); - if (rc < 0) + if (rc) RETURN(rc); data = (struct obd_ioctl_data *)buf; @@ -1684,20 +2126,68 @@ out_rmdir: filename = data->ioc_inlbuf1; namelen = data->ioc_inllen1; - /* \0 is packed at the end of filename */ - if (namelen < 1 || namelen != strlen(filename) + 1) - GOTO(migrate_free, rc = -EINVAL); - if (data->ioc_inllen2 != sizeof(mdtidx)) + if (namelen < 1 || namelen != strlen(filename) + 1) { + CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n"); GOTO(migrate_free, rc = -EINVAL); - mdtidx = *(int *)data->ioc_inlbuf2; + } + + lum = (struct lmv_user_md *)data->ioc_inlbuf2; + if (lum->lum_magic != LMV_USER_MAGIC && + lum->lum_magic != LMV_USER_MAGIC_SPECIFIC) { + rc = -EINVAL; + CERROR("%s: wrong lum magic %x: rc = %d\n", + filename, lum->lum_magic, rc); + GOTO(migrate_free, rc); + } - rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1); + rc = ll_migrate(inode, file, lum, filename); migrate_free: - obd_ioctl_freedata(buf, len); + OBD_FREE_LARGE(buf, len); RETURN(rc); } + case LL_IOC_FSGETXATTR: + RETURN(ll_ioctl_fsgetxattr(inode, cmd, arg)); + case LL_IOC_FSSETXATTR: + RETURN(ll_ioctl_fssetxattr(inode, cmd, arg)); + case LL_IOC_PCC_DETACH_BY_FID: { + struct lu_pcc_detach_fid *detach; + struct lu_fid *fid; + struct inode *inode2; + unsigned long ino; + + OBD_ALLOC_PTR(detach); + if (detach == NULL) + RETURN(-ENOMEM); + + if (copy_from_user(detach, + (const struct lu_pcc_detach_fid __user *)arg, + sizeof(*detach))) + GOTO(out_detach, rc = -EFAULT); + + fid = &detach->pccd_fid; + ino = cl_fid_build_ino(fid, ll_need_32bit_api(sbi)); + inode2 = ilookup5(inode->i_sb, ino, ll_test_inode_by_fid, fid); + if (inode2 == NULL) + /* Target inode is not in inode cache, and PCC file + * has aleady released, return immdiately. + */ + GOTO(out_detach, rc = 0); + + if (!S_ISREG(inode2->i_mode)) + GOTO(out_iput, rc = -EINVAL); + + if (!inode_owner_or_capable(inode2)) + GOTO(out_iput, rc = -EPERM); + + rc = pcc_ioctl_detach(inode2, detach->pccd_opt); +out_iput: + iput(inode2); +out_detach: + OBD_FREE_PTR(detach); + RETURN(rc); + } default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL, (void __user *)arg)); @@ -1706,53 +2196,53 @@ migrate_free: static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) { - struct inode *inode = file->f_mapping->host; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int api32 = ll_need_32bit_api(sbi); - loff_t ret = -EINVAL; - ENTRY; + struct inode *inode = file->f_mapping->host; + struct ll_file_data *fd = file->private_data; + struct ll_sb_info *sbi = ll_i2sbi(inode); + int api32 = ll_need_32bit_api(sbi); + loff_t ret = -EINVAL; + ENTRY; - mutex_lock(&inode->i_mutex); - switch (origin) { - case SEEK_SET: - break; - case SEEK_CUR: - offset += file->f_pos; - break; - case SEEK_END: - if (offset > 0) - GOTO(out, ret); - if (api32) - offset += LL_DIR_END_OFF_32BIT; - else - offset += LL_DIR_END_OFF; - break; - default: - GOTO(out, ret); - } + inode_lock(inode); + switch (origin) { + case SEEK_SET: + break; + case SEEK_CUR: + offset += file->f_pos; + break; + case SEEK_END: + if (offset > 0) + GOTO(out, ret); + if (api32) + offset += LL_DIR_END_OFF_32BIT; + else + offset += LL_DIR_END_OFF; + break; + default: + GOTO(out, ret); + } - if (offset >= 0 && - ((api32 && offset <= LL_DIR_END_OFF_32BIT) || - (!api32 && offset <= LL_DIR_END_OFF))) { - if (offset != file->f_pos) { - if ((api32 && offset == LL_DIR_END_OFF_32BIT) || - (!api32 && offset == LL_DIR_END_OFF)) + if (offset >= 0 && + ((api32 && offset <= LL_DIR_END_OFF_32BIT) || + (!api32 && offset <= LL_DIR_END_OFF))) { + if (offset != file->f_pos) { + if ((api32 && offset == LL_DIR_END_OFF_32BIT) || + (!api32 && offset == LL_DIR_END_OFF)) fd->lfd_pos = MDS_DIR_END_OFF; - else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) + else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) fd->lfd_pos = offset << 32; - else + else fd->lfd_pos = offset; - file->f_pos = offset; - file->f_version = 0; - } - ret = offset; - } - GOTO(out, ret); + file->f_pos = offset; + file->f_version = 0; + } + ret = offset; + } + GOTO(out, ret); out: - mutex_unlock(&inode->i_mutex); - return ret; + inode_unlock(inode); + return ret; } static int ll_dir_open(struct inode *inode, struct file *file) @@ -1773,7 +2263,7 @@ const struct file_operations ll_dir_operations = { .release = ll_dir_release, .read = generic_read_dir, #ifdef HAVE_DIR_CONTEXT - .iterate = ll_iterate, + .iterate_shared = ll_iterate, #else .readdir = ll_readdir, #endif