Whamcloud - gitweb
LU-15216 lmv: improve MDT QOS space balance
[fs/lustre-release.git] / lustre / llite / namei.c
index 3d17ade..80b8460 100644 (file)
@@ -49,7 +49,7 @@
 static int ll_create_it(struct inode *dir, struct dentry *dentry,
                        struct lookup_intent *it,
                        void *secctx, __u32 secctxlen, bool encrypt,
-                       void *encctx, __u32 encctxlen);
+                       void *encctx, __u32 encctxlen, unsigned int open_flags);
 
 /* called from iget5_locked->find_inode() under inode_lock spinlock */
 static int ll_test_inode(struct inode *inode, void *opaque)
@@ -171,7 +171,8 @@ restart:
                                continue;
 
                        spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
-                       __d_lustre_invalidate(child);
+                       if (lld_is_init(child))
+                               ll_d2d(child)->lld_invalid = 1;
                        if (!ll_d_count(child)) {
                                dget_dlock(child);
                                __d_drop(child);
@@ -256,7 +257,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
        }
 
        if (bits & MDS_INODELOCK_XATTR) {
-               ll_xattr_cache_destroy(inode);
+               ll_xattr_cache_empty(inode);
                bits &= ~MDS_INODELOCK_XATTR;
        }
 
@@ -592,16 +593,12 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
 {
        struct dentry *new;
-       int rc;
 
        if (inode) {
                new = ll_find_alias(inode, de);
                if (new) {
-                       rc = ll_d_init(new);
-                       if (rc < 0) {
-                               dput(new);
-                               return ERR_PTR(rc);
-                       }
+                       if (!ll_d_setup(new, true))
+                               return ERR_PTR(-ENOMEM);
                        d_move(new, de);
                        iput(inode);
                        CDEBUG(D_DENTRY,
@@ -610,9 +607,8 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
                        return new;
                }
        }
-       rc = ll_d_init(de);
-       if (rc < 0)
-               return ERR_PTR(rc);
+       if (!ll_d_setup(de, false))
+               return ERR_PTR(-ENOMEM);
        d_add(de, inode);
 
        /* this needs only to be done for foreign symlink dirs as
@@ -826,7 +822,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
        __u32 opc;
        int rc;
        char secctx_name[XATTR_NAME_MAX + 1];
-
+       struct llcrypt_name fname;
+       struct lu_fid fid;
        ENTRY;
 
        if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
@@ -854,19 +851,45 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
        if (it->it_op & IT_CREAT)
                opc = LUSTRE_OPC_CREATE;
        else
-               opc = LUSTRE_OPC_ANY;
+               opc = LUSTRE_OPC_LOOKUP;
+
+       /* Here we should be calling llcrypt_prepare_lookup(). But it installs a
+        * custom ->d_revalidate() method, so we lose ll_d_ops.
+        * To workaround this, call ll_setup_filename() and do the rest
+        * manually. Also make a copy of llcrypt_d_revalidate() (unfortunately
+        * not exported function) and call it from ll_revalidate_dentry(), to
+        * ensure we do not cache stale dentries after a key has been added.
+        */
+       rc = ll_setup_filename(parent, &dentry->d_name, 1, &fname, &fid);
+       if ((!rc || rc == -ENOENT) && fname.is_ciphertext_name) {
+               spin_lock(&dentry->d_lock);
+               dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
+               spin_unlock(&dentry->d_lock);
+       }
+       if (rc == -ENOENT)
+               RETURN(NULL);
+       if (rc)
+               RETURN(ERR_PTR(rc));
 
-       op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
-                                    dentry->d_name.len, 0, opc, NULL);
-       if (IS_ERR(op_data))
-               GOTO(out, retval = ERR_CAST(op_data));
+       op_data = ll_prep_md_op_data(NULL, parent, NULL, fname.disk_name.name,
+                                    fname.disk_name.len, 0, opc, NULL);
+       if (IS_ERR(op_data)) {
+               llcrypt_free_filename(&fname);
+               RETURN(ERR_CAST(op_data));
+       }
+       if (!fid_is_zero(&fid)) {
+               op_data->op_fid2 = fid;
+               op_data->op_bias = MDS_FID_OP;
+               if (it->it_op & IT_OPEN)
+                       it->it_flags |= MDS_OPEN_BY_FID;
+       }
 
        /* enforce umask if acl disabled or MDS doesn't support umask */
        if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
                it->it_create_mode &= ~current_umask();
 
        if (it->it_op & IT_CREAT &&
-           ll_i2sbi(parent)->ll_flags & LL_SBI_FILE_SECCTX) {
+           test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(parent)->ll_flags)) {
                rc = ll_dentry_init_security(dentry, it->it_create_mode,
                                             &dentry->d_name,
                                             &op_data->op_file_secctx_name,
@@ -885,37 +908,20 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                        *secctxlen = 0;
        }
        if (it->it_op & IT_CREAT && encrypt) {
-               /* Volatile file name may look like:
-                * <parent>/LUSTRE_VOLATILE_HDR:<mdt_index>:<random>:fd=<fd>
-                * where fd is opened descriptor of reference file.
-                */
                if (unlikely(filename_is_volatile(dentry->d_name.name,
                                                  dentry->d_name.len, NULL))) {
+                       /* get encryption context from reference file */
                        int ctx_size = LLCRYPT_ENC_CTX_SIZE;
                        struct lustre_sb_info *lsi;
                        struct file *ref_file;
                        struct inode *ref_inode;
-                       char *p, *q, *fd_str;
                        void *ctx;
-                       int fd;
-
-                       p = strnstr(dentry->d_name.name, ":fd=",
-                                   dentry->d_name.len);
-                       if (!p || strlen(p + 4) == 0)
-                               GOTO(out, retval = ERR_PTR(-EINVAL));
 
-                       q = strchrnul(p + 4, ':');
-                       fd_str = kstrndup(p + 4, q - p - 4, GFP_NOFS);
-                       if (!fd_str)
-                               GOTO(out, retval = ERR_PTR(-ENOMEM));
-                       rc = kstrtouint(fd_str, 10, &fd);
-                       kfree(fd_str);
+                       rc = volatile_ref_file(dentry->d_name.name,
+                                              dentry->d_name.len,
+                                              &ref_file);
                        if (rc)
-                               GOTO(inherit, rc = -EINVAL);
-
-                       ref_file = fget(fd);
-                       if (!ref_file)
-                               GOTO(inherit, rc = -EINVAL);
+                               GOTO(out, retval = ERR_PTR(rc));
 
                        ref_inode = file_inode(ref_file);
                        if (!ref_inode) {
@@ -961,7 +967,6 @@ getctx:
                                       op_data->op_file_encctx_size);
                                OBD_FREE(ctx, ctx_size);
                        }
-
                } else {
 inherit:
                        rc = llcrypt_inherit_context(parent, NULL, op_data,
@@ -1087,6 +1092,7 @@ out:
                        op_data->op_file_encctx = NULL;
                        op_data->op_file_encctx_size = 0;
                }
+               llcrypt_free_filename(&fname);
                ll_finish_md_op_data(op_data);
        }
 
@@ -1228,7 +1234,14 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                if (rc)
                        GOTO(out_release, rc);
                if (open_flags & O_CREAT) {
-                       if (!llcrypt_has_encryption_key(dir))
+                       /* For migration or mirroring without enc key, we still
+                        * need to be able to create a volatile file.
+                        */
+                       if (!llcrypt_has_encryption_key(dir) &&
+                           (!filename_is_volatile(dentry->d_name.name,
+                                                  dentry->d_name.len, NULL) ||
+                           (open_flags & O_FILE_ENC) != O_FILE_ENC ||
+                           !(open_flags & O_DIRECT)))
                                GOTO(out_release, rc = -ENOKEY);
                        encrypt = true;
                }
@@ -1259,7 +1272,8 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                if (it_disposition(it, DISP_OPEN_CREATE)) {
                        /* Dentry instantiated in ll_create_it. */
                        rc = ll_create_it(dir, dentry, it, secctx, secctxlen,
-                                         encrypt, encctx, encctxlen);
+                                         encrypt, encctx, encctxlen,
+                                         open_flags);
                        ll_security_release_secctx(secctx, secctxlen);
                        llcrypt_free_ctx(encctx, encctxlen);
                        if (rc) {
@@ -1385,7 +1399,7 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
 static int ll_create_it(struct inode *dir, struct dentry *dentry,
                        struct lookup_intent *it,
                        void *secctx, __u32 secctxlen, bool encrypt,
-                       void *encctx, __u32 encctxlen)
+                       void *encctx, __u32 encctxlen, unsigned int open_flags)
 {
        struct inode *inode;
        __u64 bits = 0;
@@ -1403,8 +1417,8 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(inode))
                RETURN(PTR_ERR(inode));
 
-       if ((ll_i2sbi(inode)->ll_flags & LL_SBI_FILE_SECCTX) &&
-           secctx != NULL) {
+       if (test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(inode)->ll_flags) &&
+           secctx) {
                /* must be done before d_instantiate, because it calls
                 * security_d_instantiate, which means a getxattr if security
                 * context is not set yet */
@@ -1420,12 +1434,23 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry,
        d_instantiate(dentry, inode);
 
        if (encrypt) {
-               rc = ll_set_encflags(inode, encctx, encctxlen, true);
+               bool preload = true;
+
+               /* For migration or mirroring without enc key, we
+                * create a volatile file without enc context.
+                */
+               if (!llcrypt_has_encryption_key(dir) &&
+                   filename_is_volatile(dentry->d_name.name,
+                                        dentry->d_name.len, NULL) &&
+                   (open_flags & O_FILE_ENC) == O_FILE_ENC &&
+                   open_flags & O_DIRECT)
+                       preload = false;
+               rc = ll_set_encflags(inode, encctx, encctxlen, preload);
                if (rc)
                        RETURN(rc);
        }
 
-       if (!(ll_i2sbi(inode)->ll_flags & LL_SBI_FILE_SECCTX)) {
+       if (!test_bit(LL_SBI_FILE_SECCTX, ll_i2sbi(inode)->ll_flags)) {
                rc = ll_inode_init_security(dentry, inode, dir);
                if (rc)
                        RETURN(rc);
@@ -1470,7 +1495,7 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
        struct ll_inode_info *lli = ll_i2info(dir);
        struct lmv_stripe_md *lsm;
 
-       op_data->op_dir_depth = lli->lli_depth;
+       op_data->op_dir_depth = lli->lli_dir_depth;
 
        /* parent directory is striped */
        if (unlikely(lli->lli_lsm_md))
@@ -1499,11 +1524,11 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
 
        if (lsm->lsm_md_max_inherit != LMV_INHERIT_NONE &&
            (lsm->lsm_md_max_inherit == LMV_INHERIT_UNLIMITED ||
-            lsm->lsm_md_max_inherit >= lli->lli_depth)) {
+            lsm->lsm_md_max_inherit >= lli->lli_dir_depth)) {
                op_data->op_flags |= MF_QOS_MKDIR;
                if (lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE &&
                    (lsm->lsm_md_max_inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
-                    lsm->lsm_md_max_inherit_rr >= lli->lli_depth))
+                    lsm->lsm_md_max_inherit_rr >= lli->lli_dir_depth))
                        op_data->op_flags |= MF_RR_MKDIR;
                CDEBUG(D_INODE, DFID" requests qos mkdir %#x\n",
                       PFID(&lli->lli_fid), op_data->op_flags);
@@ -1513,20 +1538,24 @@ unlock:
 }
 
 static int ll_new_node(struct inode *dir, struct dentry *dchild,
-                      const char *tgt, umode_t mode, int rdev, __u32 opc)
+                      const char *tgt, umode_t mode, __u64 rdev, __u32 opc)
 {
        struct qstr *name = &dchild->d_name;
        struct ptlrpc_request *request = NULL;
        struct md_op_data *op_data = NULL;
        struct inode *inode = NULL;
        struct ll_sb_info *sbi = ll_i2sbi(dir);
-       int tgt_len = 0;
+       struct llcrypt_str *disk_link = NULL;
        bool encrypt = false;
        int err;
 
        ENTRY;
-       if (unlikely(tgt != NULL))
-               tgt_len = strlen(tgt) + 1;
+       if (unlikely(tgt != NULL)) {
+               disk_link = (struct llcrypt_str *)rdev;
+               rdev = 0;
+               if (!disk_link)
+                       RETURN(-EINVAL);
+       }
 
 again:
        op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name,
@@ -1537,7 +1566,7 @@ again:
        if (S_ISDIR(mode))
                ll_qos_mkdir_prep(op_data, dir);
 
-       if (sbi->ll_flags & LL_SBI_FILE_SECCTX) {
+       if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                err = ll_dentry_init_security(dchild, mode, &dchild->d_name,
                                              &op_data->op_file_secctx_name,
                                              &op_data->op_file_secctx,
@@ -1562,9 +1591,42 @@ again:
                err = llcrypt_inherit_context(dir, NULL, op_data, false);
                if (err)
                        GOTO(err_exit, err);
+
+               if (S_ISLNK(mode)) {
+                       /* llcrypt needs inode to encrypt target name, so create
+                        * a fake inode and associate encryption context got
+                        * from llcrypt_inherit_context.
+                        */
+                       struct inode *fakeinode =
+                               dchild->d_sb->s_op->alloc_inode(dchild->d_sb);
+
+                       if (!fakeinode)
+                               GOTO(err_exit, err = -ENOMEM);
+                       fakeinode->i_sb = dchild->d_sb;
+                       fakeinode->i_mode |= S_IFLNK;
+#ifdef IOP_XATTR
+                       fakeinode->i_opflags |= IOP_XATTR;
+#endif
+                       ll_lli_init(ll_i2info(fakeinode));
+                       err = ll_set_encflags(fakeinode,
+                                             op_data->op_file_encctx,
+                                             op_data->op_file_encctx_size,
+                                             true);
+                       if (!err)
+                               err = __llcrypt_encrypt_symlink(fakeinode, tgt,
+                                                               strlen(tgt),
+                                                               disk_link);
+
+                       ll_xattr_cache_destroy(fakeinode);
+                       llcrypt_put_encryption_info(fakeinode);
+                       dchild->d_sb->s_op->destroy_inode(fakeinode);
+                       if (err)
+                               GOTO(err_exit, err);
+               }
        }
 
-       err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
+       err = md_create(sbi->ll_md_exp, op_data, tgt ? disk_link->name : NULL,
+                       tgt ? disk_link->len : 0, mode,
                        from_kuid(&init_user_ns, current_fsuid()),
                        from_kgid(&init_user_ns, current_fsgid()),
                        current_cap(), rdev, &request);
@@ -1645,7 +1707,7 @@ again:
        if (err)
                GOTO(err_exit, err);
 
-       if (sbi->ll_flags & LL_SBI_FILE_SECCTX) {
+       if (test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                /* must be done before d_instantiate, because it calls
                 * security_d_instantiate, which means a getxattr if security
                 * context is not set yet */
@@ -1667,9 +1729,24 @@ again:
                                      op_data->op_file_encctx_size, true);
                if (err)
                        GOTO(err_exit, err);
+
+               if (S_ISLNK(mode)) {
+                       struct ll_inode_info *lli = ll_i2info(inode);
+
+                       /* Cache the plaintext symlink target
+                        * for later use by get_link()
+                        */
+                       OBD_ALLOC(lli->lli_symlink_name, strlen(tgt) + 1);
+                       /* do not return an error if we cannot
+                        * cache the symlink locally
+                        */
+                       if (lli->lli_symlink_name)
+                               memcpy(lli->lli_symlink_name,
+                                      tgt, strlen(tgt) + 1);
+               }
        }
 
-       if (!(sbi->ll_flags & LL_SBI_FILE_SECCTX)) {
+       if (!test_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags)) {
                err = ll_inode_init_security(dchild, inode, dir);
                if (err)
                        GOTO(err_exit, err);
@@ -1758,14 +1835,24 @@ static int ll_symlink(struct inode *dir, struct dentry *dchild,
                      const char *oldpath)
 {
        ktime_t kstart = ktime_get();
+       int len = strlen(oldpath);
+       struct llcrypt_str disk_link;
        int err;
        ENTRY;
 
        CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), target=%.*s\n",
               dchild, PFID(ll_inode2fid(dir)), dir, 3000, oldpath);
 
-       err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO, 0,
-                         LUSTRE_OPC_SYMLINK);
+       err = llcrypt_prepare_symlink(dir, oldpath, len, dir->i_sb->s_blocksize,
+                                     &disk_link);
+       if (err)
+               RETURN(err);
+
+       err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO,
+                         (__u64)&disk_link, LUSTRE_OPC_SYMLINK);
+
+       if (disk_link.name != (unsigned char *)oldpath)
+               kfree(disk_link.name);
 
        if (!err)
                ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK,
@@ -1864,7 +1951,8 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
        if (dchild->d_inode != NULL)
                op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 
-       op_data->op_fid2 = op_data->op_fid3;
+       if (fid_is_zero(&op_data->op_fid2))
+               op_data->op_fid2 = op_data->op_fid3;
        rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
        ll_finish_md_op_data(op_data);
        if (!rc) {
@@ -1880,8 +1968,11 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
                 * immediately.
                 */
                body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-               if (body->mbo_valid & OBD_MD_FLNLINK)
+               if (body->mbo_valid & OBD_MD_FLNLINK) {
+                       spin_lock(&dchild->d_inode->i_lock);
                        set_nlink(dchild->d_inode, body->mbo_nlink);
+                       spin_unlock(&dchild->d_inode->i_lock);
+               }
        }
 
        ptlrpc_req_finished(request);
@@ -1956,7 +2047,8 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
            ll_i2info(dchild->d_inode)->lli_clob &&
            dirty_cnt(dchild->d_inode))
                op_data->op_cli_flags |= CLI_DIRTY_DATA;
-       op_data->op_fid2 = op_data->op_fid3;
+       if (fid_is_zero(&op_data->op_fid2))
+               op_data->op_fid2 = op_data->op_fid3;
        rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
        ll_finish_md_op_data(op_data);
        if (rc)
@@ -1967,8 +2059,11 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
         * the link count so the inode can be freed immediately.
         */
        body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-       if (body->mbo_valid & OBD_MD_FLNLINK)
+       if (body->mbo_valid & OBD_MD_FLNLINK) {
+               spin_lock(&dchild->d_inode->i_lock);
                set_nlink(dchild->d_inode, body->mbo_nlink);
+               spin_unlock(&dchild->d_inode->i_lock);
+       }
 
        ll_update_times(request, dir);
 
@@ -1987,13 +2082,12 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
 #endif
                     )
 {
-       struct qstr *src_name = &src_dchild->d_name;
-       struct qstr *tgt_name = &tgt_dchild->d_name;
        struct ptlrpc_request *request = NULL;
        struct ll_sb_info *sbi = ll_i2sbi(src);
        struct md_op_data *op_data;
        ktime_t kstart = ktime_get();
        umode_t mode = 0;
+       struct llcrypt_name foldname, fnewname;
        int err;
        ENTRY;
 
@@ -2040,9 +2134,20 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
        if (tgt_dchild->d_inode)
                op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
 
+       err = ll_setup_filename(src, &src_dchild->d_name, 1, &foldname, NULL);
+       if (err)
+               RETURN(err);
+       err = ll_setup_filename(tgt, &tgt_dchild->d_name, 1, &fnewname, NULL);
+       if (err) {
+               llcrypt_free_filename(&foldname);
+               RETURN(err);
+       }
        err = md_rename(sbi->ll_md_exp, op_data,
-                       src_name->name, src_name->len,
-                       tgt_name->name, tgt_name->len, &request);
+                       foldname.disk_name.name, foldname.disk_name.len,
+                       fnewname.disk_name.name, fnewname.disk_name.len,
+                       &request);
+       llcrypt_free_filename(&foldname);
+       llcrypt_free_filename(&fnewname);
        ll_finish_md_op_data(op_data);
        if (!err) {
                ll_update_times(request, src);