Whamcloud - gitweb
LU-15850 llite: pass dmv inherit depth instead of dir depth
[fs/lustre-release.git] / lustre / llite / namei.c
index 9e314f1..fc6b20d 100644 (file)
 #include <lustre_dlm.h>
 #include "llite_internal.h"
 
+#ifndef HAVE_USER_NAMESPACE_ARG
+#define ll_create_nd(ns, dir, de, mode, ex)    ll_create_nd(dir, de, mode, ex)
+#define ll_mkdir(ns, dir, dch, mode)           ll_mkdir(dir, dch, mode)
+#define ll_mknod(ns, dir, dch, mode, rd)       ll_mknod(dir, dch, mode, rd)
+#ifdef HAVE_IOPS_RENAME_WITH_FLAGS
+#define ll_rename(ns, src, sdc, tgt, tdc, fl)  ll_rename(src, sdc, tgt, tdc, fl)
+#else
+#define ll_rename(ns, src, sdc, tgt, tdc)      ll_rename(src, sdc, tgt, tdc)
+#endif /* HAVE_IOPS_RENAME_WITH_FLAGS */
+#define ll_symlink(nd, dir, dch, old)          ll_symlink(dir, dch, old)
+#endif
+
 static int ll_create_it(struct inode *dir, struct dentry *dentry,
                        struct lookup_intent *it,
                        void *secctx, __u32 secctxlen, bool encrypt,
-                       void *encctx, __u32 encctxlen);
+                       void *encctx, __u32 encctxlen, unsigned int open_flags);
 
 /* called from iget5_locked->find_inode() under inode_lock spinlock */
 static int ll_test_inode(struct inode *inode, void *opaque)
@@ -257,7 +269,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
        }
 
        if (bits & MDS_INODELOCK_XATTR) {
-               ll_xattr_cache_destroy(inode);
+               ll_xattr_cache_empty(inode);
                bits &= ~MDS_INODELOCK_XATTR;
        }
 
@@ -371,8 +383,9 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
                }
        }
 
+       /* at umount s_root becomes NULL */
        if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
-           !is_root_inode(inode))
+           inode->i_sb->s_root && !is_root_inode(inode))
                ll_prune_aliases(inode);
 
        if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM))
@@ -411,13 +424,13 @@ int ll_md_need_convert(struct ldlm_lock *lock)
        switch (lock->l_req_mode) {
        case LCK_PR:
                mode = LCK_PR;
-               /* fallthrough */
+               fallthrough;
        case LCK_PW:
                mode |= LCK_CR;
                break;
        case LCK_CW:
                mode = LCK_CW;
-               /* fallthrough */
+               fallthrough;
        case LCK_CR:
                mode |= LCK_CR;
                break;
@@ -676,7 +689,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                                       "server returned encryption ctx for "DFID"\n",
                                       PFID(ll_inode2fid(inode)));
                                rc = ll_xattr_cache_insert(inode,
-                                              LL_XATTR_NAME_ENCRYPTION_CONTEXT,
+                                                          xattr_for_enc(inode),
                                                           encctx, encctxlen);
                                if (rc)
                                        CWARN("%s: cannot set enc ctx for "DFID": rc = %d\n",
@@ -771,14 +784,17 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                struct lookup_intent parent_it = {
                                        .it_op = IT_GETATTR,
                                        .it_lock_handle = 0 };
-               struct lu_fid   fid = ll_i2info(parent)->lli_fid;
+               struct ll_inode_info *lli = ll_i2info(parent);
+               struct lu_fid fid = lli->lli_fid;
 
                /* If it is striped directory, get the real stripe parent */
                if (unlikely(ll_dir_striped(parent))) {
+                       down_read(&lli->lli_lsm_sem);
                        rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
-                                                ll_i2info(parent)->lli_lsm_md,
+                                                lli->lli_lsm_md,
                                                 (*de)->d_name.name,
                                                 (*de)->d_name.len, &fid);
+                       up_read(&lli->lli_lsm_sem);
                        if (rc != 0)
                                GOTO(out, rc);
                }
@@ -863,7 +879,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
        rc = ll_setup_filename(parent, &dentry->d_name, 1, &fname, &fid);
        if ((!rc || rc == -ENOENT) && fname.is_ciphertext_name) {
                spin_lock(&dentry->d_lock);
-               dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
+               dentry->d_flags |= DCACHE_NOKEY_NAME;
                spin_unlock(&dentry->d_lock);
        }
        if (rc == -ENOENT)
@@ -889,7 +905,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                it->it_create_mode &= ~current_umask();
 
        if (it->it_op & IT_CREAT &&
-           ll_i2sbi(parent)->ll_flags & LL_SBI_FILE_SECCTX) {
+           test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(parent)->ll_flags)) {
                rc = ll_dentry_init_security(dentry, it->it_create_mode,
                                             &dentry->d_name,
                                             &op_data->op_file_secctx_name,
@@ -908,37 +924,20 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                        *secctxlen = 0;
        }
        if (it->it_op & IT_CREAT && encrypt) {
-               /* Volatile file name may look like:
-                * <parent>/LUSTRE_VOLATILE_HDR:<mdt_index>:<random>:fd=<fd>
-                * where fd is opened descriptor of reference file.
-                */
                if (unlikely(filename_is_volatile(dentry->d_name.name,
                                                  dentry->d_name.len, NULL))) {
+                       /* get encryption context from reference file */
                        int ctx_size = LLCRYPT_ENC_CTX_SIZE;
                        struct lustre_sb_info *lsi;
                        struct file *ref_file;
                        struct inode *ref_inode;
-                       char *p, *q, *fd_str;
                        void *ctx;
-                       int fd;
-
-                       p = strnstr(dentry->d_name.name, ":fd=",
-                                   dentry->d_name.len);
-                       if (!p || strlen(p + 4) == 0)
-                               GOTO(out, retval = ERR_PTR(-EINVAL));
 
-                       q = strchrnul(p + 4, ':');
-                       fd_str = kstrndup(p + 4, q - p - 4, GFP_NOFS);
-                       if (!fd_str)
-                               GOTO(out, retval = ERR_PTR(-ENOMEM));
-                       rc = kstrtouint(fd_str, 10, &fd);
-                       kfree(fd_str);
+                       rc = volatile_ref_file(dentry->d_name.name,
+                                              dentry->d_name.len,
+                                              &ref_file);
                        if (rc)
-                               GOTO(inherit, rc = -EINVAL);
-
-                       ref_file = fget(fd);
-                       if (!ref_file)
-                               GOTO(inherit, rc = -EINVAL);
+                               GOTO(out, retval = ERR_PTR(rc));
 
                        ref_inode = file_inode(ref_file);
                        if (!ref_inode) {
@@ -956,6 +955,9 @@ getctx:
 #ifdef CONFIG_LL_ENCRYPTION
                        rc = lsi->lsi_cop->get_context(ref_inode,
                                                       ctx, ctx_size);
+#elif defined(HAVE_LUSTRE_CRYPTO)
+                       rc = ref_inode->i_sb->s_cop->get_context(ref_inode,
+                                                                ctx, ctx_size);
 #else
                        rc = -ENODATA;
 #endif
@@ -984,7 +986,6 @@ getctx:
                                       op_data->op_file_encctx_size);
                                OBD_FREE(ctx, ctx_size);
                        }
-
                } else {
 inherit:
                        rc = llcrypt_inherit_context(parent, NULL, op_data,
@@ -1136,7 +1137,8 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
         * to proceed with lookup. LU-4185
         */
        if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
-           (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
+           (inode_permission(&init_user_ns,
+                             parent, MAY_WRITE | MAY_EXEC) == 0))
                return NULL;
 
        if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
@@ -1252,7 +1254,14 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                if (rc)
                        GOTO(out_release, rc);
                if (open_flags & O_CREAT) {
-                       if (!llcrypt_has_encryption_key(dir))
+                       /* For migration or mirroring without enc key, we still
+                        * need to be able to create a volatile file.
+                        */
+                       if (!llcrypt_has_encryption_key(dir) &&
+                           (!filename_is_volatile(dentry->d_name.name,
+                                                  dentry->d_name.len, NULL) ||
+                           (open_flags & O_FILE_ENC) != O_FILE_ENC ||
+                           !(open_flags & O_DIRECT)))
                                GOTO(out_release, rc = -ENOKEY);
                        encrypt = true;
                }
@@ -1283,7 +1292,8 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                if (it_disposition(it, DISP_OPEN_CREATE)) {
                        /* Dentry instantiated in ll_create_it. */
                        rc = ll_create_it(dir, dentry, it, secctx, secctxlen,
-                                         encrypt, encctx, encctxlen);
+                                         encrypt, encctx, encctxlen,
+                                         open_flags);
                        ll_security_release_secctx(secctx, secctxlen);
                        llcrypt_free_ctx(encctx, encctxlen);
                        if (rc) {
@@ -1409,7 +1419,7 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 static int ll_create_it(struct inode *dir, struct dentry *dentry,
                        struct lookup_intent *it,
                        void *secctx, __u32 secctxlen, bool encrypt,
-                       void *encctx, __u32 encctxlen)
+                       void *encctx, __u32 encctxlen, unsigned int open_flags)
 {
        struct inode *inode;
        __u64 bits = 0;
@@ -1427,8 +1437,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(inode))
                RETURN(PTR_ERR(inode));
 
-       if ((ll_i2sbi(inode)->ll_flags & LL_SBI_FILE_SECCTX) &&
-           secctx != NULL) {
+       if (test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(inode)->ll_flags) &&
+           secctx) {
                /* must be done before d_instantiate, because it calls
                 * security_d_instantiate, which means a getxattr if security
                 * context is not set yet */
@@ -1444,12 +1454,23 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
        d_instantiate(dentry, inode);
 
        if (encrypt) {
-               rc = ll_set_encflags(inode, encctx, encctxlen, true);
+               bool preload = true;
+
+               /* For migration or mirroring without enc key, we
+                * create a volatile file without enc context.
+                */
+               if (!llcrypt_has_encryption_key(dir) &&
+                   filename_is_volatile(dentry->d_name.name,
+                                        dentry->d_name.len, NULL) &&
+                   (open_flags & O_FILE_ENC) == O_FILE_ENC &&
+                   open_flags & O_DIRECT)
+                       preload = false;
+               rc = ll_set_encflags(inode, encctx, encctxlen, preload);
                if (rc)
                        RETURN(rc);
        }
 
-       if (!(ll_i2sbi(inode)->ll_flags & LL_SBI_FILE_SECCTX)) {
+       if (!test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(inode)->ll_flags)) {
                rc = ll_inode_init_security(dentry, inode, dir);
                if (rc)
                        RETURN(rc);
@@ -1494,7 +1515,7 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
        struct ll_inode_info *lli = ll_i2info(dir);
        struct lmv_stripe_md *lsm;
 
-       op_data->op_dir_depth = lli->lli_depth;
+       op_data->op_dir_depth = lli->lli_inherit_depth ?: lli->lli_dir_depth;
 
        /* parent directory is striped */
        if (unlikely(lli->lli_lsm_md))
@@ -1523,11 +1544,11 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
 
        if (lsm->lsm_md_max_inherit != LMV_INHERIT_NONE &&
            (lsm->lsm_md_max_inherit == LMV_INHERIT_UNLIMITED ||
-            lsm->lsm_md_max_inherit >= lli->lli_depth)) {
+            lsm->lsm_md_max_inherit >= lli->lli_dir_depth)) {
                op_data->op_flags |= MF_QOS_MKDIR;
                if (lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE &&
                    (lsm->lsm_md_max_inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
-                    lsm->lsm_md_max_inherit_rr >= lli->lli_depth))
+                    lsm->lsm_md_max_inherit_rr >= lli->lli_dir_depth))
                        op_data->op_flags |= MF_RR_MKDIR;
                CDEBUG(D_INODE, DFID" requests qos mkdir %#x\n",
                       PFID(&lli->lli_fid), op_data->op_flags);
@@ -1565,7 +1586,7 @@ again:
        if (S_ISDIR(mode))
                ll_qos_mkdir_prep(op_data, dir);
 
-       if (sbi->ll_flags & LL_SBI_FILE_SECCTX) {
+       if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                err = ll_dentry_init_security(dchild, mode, &dchild->d_name,
                                              &op_data->op_file_secctx_name,
                                              &op_data->op_file_secctx,
@@ -1577,7 +1598,8 @@ again:
        if (ll_sbi_has_encrypt(sbi) &&
            ((IS_ENCRYPTED(dir) &&
            (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) ||
-           (unlikely(llcrypt_dummy_context_enabled(dir)) && S_ISDIR(mode)))) {
+            (unlikely(ll_sb_has_test_dummy_encryption(dir->i_sb)) &&
+             S_ISDIR(mode)))) {
                err = llcrypt_get_encryption_info(dir);
                if (err)
                        GOTO(err_exit, err);
@@ -1606,6 +1628,7 @@ again:
 #ifdef IOP_XATTR
                        fakeinode->i_opflags |= IOP_XATTR;
 #endif
+                       ll_lli_init(ll_i2info(fakeinode));
                        err = ll_set_encflags(fakeinode,
                                              op_data->op_file_encctx,
                                              op_data->op_file_encctx_size,
@@ -1628,7 +1651,7 @@ again:
                        from_kuid(&init_user_ns, current_fsuid()),
                        from_kgid(&init_user_ns, current_fsgid()),
                        current_cap(), rdev, &request);
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 14, 58, 0)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 15, 58, 0)
        /*
         * server < 2.12.58 doesn't pack default LMV in intent_getattr reply,
         * fetch default LMV here.
@@ -1641,11 +1664,11 @@ again:
 
                ptlrpc_req_finished(request);
                request = NULL;
+               ll_finish_md_op_data(op_data);
+               op_data = NULL;
 
                err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
                                        OBD_MD_DEFAULT_MEA);
-               ll_finish_md_op_data(op_data);
-               op_data = NULL;
                if (err2 == 0) {
                        struct lustre_md md = { NULL };
 
@@ -1705,7 +1728,7 @@ again:
        if (err)
                GOTO(err_exit, err);
 
-       if (sbi->ll_flags & LL_SBI_FILE_SECCTX) {
+       if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                /* must be done before d_instantiate, because it calls
                 * security_d_instantiate, which means a getxattr if security
                 * context is not set yet */
@@ -1744,7 +1767,7 @@ again:
                }
        }
 
-       if (!(sbi->ll_flags & LL_SBI_FILE_SECCTX)) {
+       if (!test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                err = ll_inode_init_security(dchild, inode, dir);
                if (err)
                        GOTO(err_exit, err);
@@ -1761,8 +1784,8 @@ err_exit:
        RETURN(err);
 }
 
-static int ll_mknod(struct inode *dir, struct dentry *dchild, umode_t mode,
-                   dev_t rdev)
+static int ll_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                   struct dentry *dchild, umode_t mode, dev_t rdev)
 {
        ktime_t kstart = ktime_get();
        int err;
@@ -1777,7 +1800,7 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, umode_t mode,
        switch (mode & S_IFMT) {
        case 0:
                mode |= S_IFREG;
-               /* fallthrough */
+               fallthrough;
        case S_IFREG:
        case S_IFCHR:
        case S_IFBLK:
@@ -1803,7 +1826,8 @@ static int ll_mknod(struct inode *dir, struct dentry *dchild, umode_t mode,
 /*
  * Plain create. Intent create is handled in atomic_open.
  */
-static int ll_create_nd(struct inode *dir, struct dentry *dentry,
+static int ll_create_nd(struct user_namespace *mnt_userns,
+                       struct inode *dir, struct dentry *dentry,
                        umode_t mode, bool want_excl)
 {
        ktime_t kstart = ktime_get();
@@ -1817,7 +1841,7 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 
        /* Using mknod(2) to create a regular file is designed to not recognize
         * volatile file name, so we use ll_mknod() here. */
-       rc = ll_mknod(dir, dentry, mode, 0);
+       rc = ll_mknod(mnt_userns, dir, dentry, mode, 0);
 
        CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n",
               dentry, d_unhashed(dentry));
@@ -1829,8 +1853,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
        return rc;
 }
 
-static int ll_symlink(struct inode *dir, struct dentry *dchild,
-                     const char *oldpath)
+static int ll_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dchild, const char *oldpath)
 {
        ktime_t kstart = ktime_get();
        int len = strlen(oldpath);
@@ -1899,7 +1923,8 @@ out:
        RETURN(err);
 }
 
-static int ll_mkdir(struct inode *dir, struct dentry *dchild, umode_t mode)
+static int ll_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                   struct dentry *dchild, umode_t mode)
 {
        ktime_t kstart = ktime_get();
        int err;
@@ -1966,8 +1991,11 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
                 * immediately.
                 */
                body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-               if (body->mbo_valid & OBD_MD_FLNLINK)
+               if (body->mbo_valid & OBD_MD_FLNLINK) {
+                       spin_lock(&dchild->d_inode->i_lock);
                        set_nlink(dchild->d_inode, body->mbo_nlink);
+                       spin_unlock(&dchild->d_inode->i_lock);
+               }
        }
 
        ptlrpc_req_finished(request);
@@ -2054,8 +2082,11 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
         * the link count so the inode can be freed immediately.
         */
        body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-       if (body->mbo_valid & OBD_MD_FLNLINK)
+       if (body->mbo_valid & OBD_MD_FLNLINK) {
+               spin_lock(&dchild->d_inode->i_lock);
                set_nlink(dchild->d_inode, body->mbo_nlink);
+               spin_unlock(&dchild->d_inode->i_lock);
+       }
 
        ll_update_times(request, dir);
 
@@ -2067,9 +2098,10 @@ out:
        RETURN(rc);
 }
 
-static int ll_rename(struct inode *src, struct dentry *src_dchild,
+static int ll_rename(struct user_namespace *mnt_userns,
+                    struct inode *src, struct dentry *src_dchild,
                     struct inode *tgt, struct dentry *tgt_dchild
-#ifdef HAVE_IOPS_RENAME_WITH_FLAGS
+#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
                     , unsigned int flags
 #endif
                     )
@@ -2083,7 +2115,7 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
        int err;
        ENTRY;
 
-#ifdef HAVE_IOPS_RENAME_WITH_FLAGS
+#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
        if (flags)
                return -EINVAL;
 #endif
@@ -2096,7 +2128,7 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
        if (unlikely(d_mountpoint(src_dchild) || d_mountpoint(tgt_dchild)))
                RETURN(-EBUSY);
 
-#ifdef HAVE_IOPS_RENAME_WITH_FLAGS
+#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
        err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, flags);
 #else
        err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, 0);
@@ -2120,6 +2152,15 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));
 
+       /* If the client is using a subdir mount and does a rename to what it
+        * sees as /.fscrypt, interpret it as the .fscrypt dir at fs root.
+        */
+       if (unlikely(is_root_inode(tgt) && !fid_is_root(ll_inode2fid(tgt)) &&
+                    tgt_dchild->d_name.len == strlen(dot_fscrypt_name) &&
+                    strncmp(tgt_dchild->d_name.name, dot_fscrypt_name,
+                            tgt_dchild->d_name.len) == 0))
+               lu_root_fid(&op_data->op_fid2);
+
        if (src_dchild->d_inode)
                op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);