Whamcloud - gitweb
LU-14651 llite: extend inode methods with user namespace arg
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index 87ccf1b..ccdcc8c 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/random.h>
 #include <linux/statfs.h>
 #include <linux/time.h>
+#include <linux/file.h>
 #include <linux/types.h>
 #include <libcfs/linux/linux-uuid.h>
 #include <linux/version.h>
@@ -92,7 +93,6 @@ static struct ll_sb_info *ll_init_sbi(void)
        unsigned long lru_page_max;
        struct sysinfo si;
        int rc;
-       int i;
 
        ENTRY;
 
@@ -171,13 +171,6 @@ static struct ll_sb_info *ll_init_sbi(void)
 #endif
        set_bit(LL_SBI_LAZYSTATFS, sbi->ll_flags);
 
-        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
-               spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
-                              pp_r_hist.oh_lock);
-               spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
-                              pp_w_hist.oh_lock);
-        }
-
        /* metadata statahead is enabled by default */
        sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
        sbi->ll_sa_max = LL_SA_RPC_DEF;
@@ -261,6 +254,7 @@ static void ll_free_sbi(struct super_block *sb)
                                sizeof(struct ll_foreign_symlink_upcall_item));
                        sbi->ll_foreign_symlink_upcall_items = NULL;
                }
+               ll_free_rw_stats_info(sbi);
                pcc_super_fini(&sbi->ll_pcc_super);
                OBD_FREE(sbi, sizeof(*sbi));
        }
@@ -280,6 +274,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
        u64 valid;
        int size, err, checksum;
        bool api32;
+       void *encctx;
+       int encctxlen;
 
        ENTRY;
        sbi->ll_md_obd = class_name2obd(md);
@@ -458,6 +454,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sbi->ll_namelen = osfs->os_namelen;
        sbi->ll_mnt.mnt = current->fs->root.mnt;
+       sbi->ll_mnt_ns = current->nsproxy->mnt_ns;
 
        if (test_bit(LL_SBI_USER_XATTR, sbi->ll_flags) &&
            !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
@@ -647,7 +644,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
 
        /* make root inode
         * XXX: move this to after cbd setup? */
-       valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
+       valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE |
+               OBD_MD_ENCCTX;
        if (test_bit(LL_SBI_ACL, sbi->ll_flags))
                valid |= OBD_MD_FLACL;
 
@@ -661,6 +659,13 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
 
        err = md_getattr(sbi->ll_md_exp, op_data, &request);
 
+       /* We need enc ctx info, so reset it in op_data to
+        * prevent it from being freed.
+        */
+       encctx = op_data->op_file_encctx;
+       encctxlen = op_data->op_file_encctx_size;
+       op_data->op_file_encctx = NULL;
+       op_data->op_file_encctx_size = 0;
        OBD_FREE_PTR(op_data);
        if (err) {
                CERROR("%s: md_getattr failed for root: rc = %d\n",
@@ -680,7 +685,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
        api32 = test_bit(LL_SBI_32BIT_API, sbi->ll_flags);
        root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, api32), &lmd);
        md_free_lustre_md(sbi->ll_md_exp, &lmd);
-       ptlrpc_req_finished(request);
 
        if (IS_ERR(root)) {
                lmd_clear_acl(&lmd);
@@ -688,9 +692,22 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                root = NULL;
                CERROR("%s: bad ll_iget() for root: rc = %d\n",
                       sbi->ll_fsname, err);
+               ptlrpc_req_finished(request);
                GOTO(out_root, err);
        }
 
+       if (encctxlen) {
+               CDEBUG(D_SEC,
+                      "server returned encryption ctx for root inode "DFID"\n",
+                      PFID(&sbi->ll_root_fid));
+               err = ll_set_encflags(root, encctx, encctxlen, true);
+               if (err)
+                       CWARN("%s: cannot set enc ctx for "DFID": rc = %d\n",
+                             sbi->ll_fsname,
+                             PFID(&sbi->ll_root_fid), err);
+       }
+       ptlrpc_req_finished(request);
+
        checksum = test_bit(LL_SBI_CHECKSUM, sbi->ll_flags);
        if (sbi->ll_checksum_set) {
                err = obd_set_info_async(NULL, sbi->ll_dt_exp,
@@ -1285,6 +1302,9 @@ int ll_fill_super(struct super_block *sb)
        if (err)
                GOTO(out_free_cfg, err);
 
+       /* disable kernel readahead */
+       sb->s_bdi->ra_pages = 0;
+
        /* Call ll_debugfs_register_super() before lustre_process_log()
         * so that "llite.*.*" params can be processed correctly.
         */
@@ -1843,7 +1863,8 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
                            !S_ISDIR(inode->i_mode)) {
                                ia_valid = op_data->op_attr.ia_valid;
                                op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
-                               rc = simple_setattr(dentry, &op_data->op_attr);
+                               rc = simple_setattr(&init_user_ns, dentry,
+                                                   &op_data->op_attr);
                                op_data->op_attr.ia_valid = ia_valid;
                        }
                } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
@@ -1865,7 +1886,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
        op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
        if (S_ISREG(inode->i_mode))
                inode_lock(inode);
-       rc = simple_setattr(dentry, &op_data->op_attr);
+       rc = simple_setattr(&init_user_ns, dentry, &op_data->op_attr);
        if (S_ISREG(inode->i_mode))
                inode_unlock(inode);
        op_data->op_attr.ia_valid = ia_valid;
@@ -2032,6 +2053,44 @@ putenv:
        RETURN(rc);
 }
 
+/**
+ * Get reference file from volatile file name.
+ * Volatile file name may look like:
+ * <parent>/LUSTRE_VOLATILE_HDR:<mdt_index>:<random>:fd=<fd>
+ * where fd is opened descriptor of reference file.
+ *
+ * \param[in] volatile_name    volatile file name
+ * \param[in] volatile_len     volatile file name length
+ * \param[out] ref_file                pointer to struct file of reference file
+ *
+ * \retval 0           on success
+ * \retval negative    errno on failure
+ */
+int volatile_ref_file(const char *volatile_name, int volatile_len,
+                     struct file **ref_file)
+{
+       char *p, *q, *fd_str;
+       int fd, rc;
+
+       p = strnstr(volatile_name, ":fd=", volatile_len);
+       if (!p || strlen(p + 4) == 0)
+               return -EINVAL;
+
+       q = strchrnul(p + 4, ':');
+       fd_str = kstrndup(p + 4, q - p - 4, GFP_NOFS);
+       if (!fd_str)
+               return -ENOMEM;
+       rc = kstrtouint(fd_str, 10, &fd);
+       kfree(fd_str);
+       if (rc)
+               return -EINVAL;
+
+       *ref_file = fget(fd);
+       if (!(*ref_file))
+               return -EINVAL;
+       return 0;
+}
+
 /* If this inode has objects allocated to it (lsm != NULL), then the OST
  * object(s) determine the file size and mtime.  Otherwise, the MDS will
  * keep these values until such a time that objects are allocated for it.
@@ -2195,6 +2254,55 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                                        if (rc)
                                                GOTO(out, rc);
                                }
+                               /* If encrypted volatile file without the key,
+                                * we need to fetch size from reference file,
+                                * and set it on OST objects. This happens when
+                                * migrating or extending an encrypted file
+                                * without the key.
+                                */
+                               if (filename_is_volatile(dentry->d_name.name,
+                                                        dentry->d_name.len,
+                                                        NULL) &&
+                                   llcrypt_require_key(inode) == -ENOKEY) {
+                                       struct file *ref_file;
+                                       struct inode *ref_inode;
+                                       struct ll_inode_info *ref_lli;
+                                       struct cl_object *ref_obj;
+                                       struct cl_attr ref_attr = { 0 };
+                                       struct lu_env *env;
+                                       __u16 refcheck;
+
+                                       rc = volatile_ref_file(
+                                               dentry->d_name.name,
+                                               dentry->d_name.len,
+                                               &ref_file);
+                                       if (rc)
+                                               GOTO(out, rc);
+
+                                       ref_inode = file_inode(ref_file);
+                                       if (!ref_inode) {
+                                               fput(ref_file);
+                                               GOTO(out, rc = -EINVAL);
+                                       }
+
+                                       env = cl_env_get(&refcheck);
+                                       if (IS_ERR(env))
+                                               GOTO(out, rc = PTR_ERR(env));
+
+                                       ref_lli = ll_i2info(ref_inode);
+                                       ref_obj = ref_lli->lli_clob;
+                                       cl_object_attr_lock(ref_obj);
+                                       rc = cl_object_attr_get(env, ref_obj,
+                                                               &ref_attr);
+                                       cl_object_attr_unlock(ref_obj);
+                                       cl_env_put(env, &refcheck);
+                                       fput(ref_file);
+                                       if (rc)
+                                               GOTO(out, rc);
+
+                                       attr->ia_valid |= ATTR_SIZE;
+                                       attr->ia_size = ref_attr.cat_size;
+                               }
                        }
                        rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
                }
@@ -2253,10 +2361,11 @@ out:
                                        LPROC_LL_TRUNC : LPROC_LL_SETATTR,
                                   ktime_us_delta(ktime_get(), kstart));
 
-       return rc;
+       RETURN(rc);
 }
 
-int ll_setattr(struct dentry *de, struct iattr *attr)
+int ll_setattr(struct user_namespace *mnt_userns, struct dentry *de,
+              struct iattr *attr)
 {
        int mode = de->d_inode->i_mode;
        enum op_xvalid xvalid = 0;
@@ -2559,7 +2668,15 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
 
        LASSERT(fid_seq(&lli->lli_fid) != 0);
 
-       lli->lli_attr_valid = body->mbo_valid;
+       /* In case of encrypted file without the key, please do not lose
+        * clear text size stored into lli_lazysize in ll_merge_attr(),
+        * we will need it in ll_prepare_close().
+        */
+       if (lli->lli_attr_valid & OBD_MD_FLLAZYSIZE && lli->lli_lazysize &&
+           llcrypt_require_key(inode) == -ENOKEY)
+               lli->lli_attr_valid = body->mbo_valid | OBD_MD_FLLAZYSIZE;
+       else
+               lli->lli_attr_valid = body->mbo_valid;
        if (body->mbo_valid & OBD_MD_FLSIZE) {
                i_size_write(inode, body->mbo_size);
 
@@ -2602,8 +2719,9 @@ void ll_update_dir_depth(struct inode *dir, struct inode *inode)
                return;
 
        lli = ll_i2info(inode);
-       lli->lli_depth = ll_i2info(dir)->lli_depth + 1;
-       CDEBUG(D_INODE, DFID" depth %hu\n", PFID(&lli->lli_fid), lli->lli_depth);
+       lli->lli_dir_depth = ll_i2info(dir)->lli_dir_depth + 1;
+       CDEBUG(D_INODE, DFID" depth %hu\n",
+              PFID(&lli->lli_fid), lli->lli_dir_depth);
 }
 
 void ll_truncate_inode_pages_final(struct inode *inode)
@@ -3132,7 +3250,9 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
                if (namelen != 0)
                        return ERR_PTR(-EINVAL);
        } else {
-               if (namelen > ll_i2sbi(i1)->ll_namelen)
+               if ((!IS_ENCRYPTED(i1) ||
+                    (opc != LUSTRE_OPC_LOOKUP && opc != LUSTRE_OPC_CREATE)) &&
+                   namelen > ll_i2sbi(i1)->ll_namelen)
                        return ERR_PTR(-ENAMETOOLONG);
 
                /* "/" is not valid name, but it's allowed */
@@ -3180,9 +3300,11 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
        if (ll_need_32bit_api(ll_i2sbi(i1)))
                op_data->op_cli_flags |= CLI_API32;
 
-       if (opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_CREATE) {
+       if ((i2 && is_root_inode(i2)) ||
+           opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_CREATE) {
                /* In case of lookup, ll_setup_filename() has already been
                 * called in ll_lookup_it(), so just take provided name.
+                * Also take provided name if we are dealing with root inode.
                 */
                fname.disk_name.name = (unsigned char *)name;
                fname.disk_name.len = namelen;
@@ -3219,11 +3341,10 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
                        op_data->op_flags |= MF_OPNAME_KMALLOCED;
        }
 
-       /* In fact LUSTRE_OPC_LOOKUP, LUSTRE_OPC_OPEN, LUSTRE_OPC_MIGR
+       /* In fact LUSTRE_OPC_LOOKUP, LUSTRE_OPC_OPEN
         * are LUSTRE_OPC_ANY
         */
-       if (opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_OPEN ||
-           opc == LUSTRE_OPC_MIGR)
+       if (opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_OPEN)
                op_data->op_code = LUSTRE_OPC_ANY;
        else
                op_data->op_code = opc;