LU-11548 llite: increase readahead default values

[fs/lustre-release.git] / lustre / llite / llite_lib.c
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index bdf1b0a..2547302 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -36,6 +36,7 @@
  
  #define DEBUG_SUBSYSTEM S_LLITE
  
+#include <linux/cpu.h>
  #include <linux/module.h>
  #include <linux/random.h>
  #include <linux/statfs.h>
@@ -46,11 +47,13 @@
  #include <linux/mm.h>
  #include <linux/user_namespace.h>
  #include <linux/delay.h>
-#ifdef HAVE_UIDGID_HEADER
-# include <linux/uidgid.h>
-#endif
+#include <linux/uidgid.h>
  #include <linux/security.h>
+#include <linux/fs_struct.h>
  
+#ifndef HAVE_CPUS_READ_LOCK
+#include <libcfs/linux/linux-cpu.h>
+#endif
  #include <uapi/linux/lustre/lustre_ioctl.h>
  #ifdef HAVE_UAPI_LINUX_MOUNT_H
  #include <uapi/linux/mount.h>
@@ -72,6 +75,17 @@ struct kmem_cache *ll_file_data_slab;
  #define log2(n) ffz(~(n))
  #endif
  
+/**
+ * If there is only one number of core visible to Lustre,
+ * async readahead will be disabled, to avoid massive over
+ * subscription, we use 1/2 of active cores as default max
+ * async readahead requests.
+ */
+static inline unsigned int ll_get_ra_async_max_active(void)
+{
+       return cfs_cpt_weight(cfs_cpt_tab, CFS_CPT_ANY) >> 1;
+}
+
  static struct ll_sb_info *ll_init_sbi(void)
  {
         struct ll_sb_info *sbi = NULL;
@@ -102,24 +116,28 @@ static struct ll_sb_info *ll_init_sbi(void)
          pages = si.totalram - si.totalhigh;
         lru_page_max = pages / 2;
  
-       sbi->ll_ra_info.ra_async_max_active = 0;
+       sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
         sbi->ll_ra_info.ll_readahead_wq =
-               alloc_workqueue("ll-readahead-wq", WQ_UNBOUND,
-                               sbi->ll_ra_info.ra_async_max_active);
-       if (!sbi->ll_ra_info.ll_readahead_wq)
-               GOTO(out_pcc, rc = -ENOMEM);
+               cfs_cpt_bind_workqueue("ll-readahead-wq", cfs_cpt_tab,
+                                      0, CFS_CPT_ANY,
+                                      sbi->ll_ra_info.ra_async_max_active);
+       if (IS_ERR(sbi->ll_ra_info.ll_readahead_wq))
+               GOTO(out_pcc, rc = PTR_ERR(sbi->ll_ra_info.ll_readahead_wq));
  
         /* initialize ll_cache data */
         sbi->ll_cache = cl_cache_init(lru_page_max);
         if (sbi->ll_cache == NULL)
                 GOTO(out_destroy_ra, rc = -ENOMEM);
  
-       sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
-                                          SBI_DEFAULT_READAHEAD_MAX);
+       sbi->ll_ra_info.ra_max_pages =
+               min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX);
+       sbi->ll_ra_info.ra_max_pages_per_file =
+               min(sbi->ll_ra_info.ra_max_pages / 4,
+                   SBI_DEFAULT_READ_AHEAD_PER_FILE_MAX);
         sbi->ll_ra_info.ra_async_pages_per_file_threshold =
                                 sbi->ll_ra_info.ra_max_pages_per_file;
-       sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
         sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
+       atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
  
          sbi->ll_flags |= LL_SBI_VERBOSE;
  #ifdef ENABLE_CHECKSUM
@@ -151,6 +169,7 @@ static struct ll_sb_info *ll_init_sbi(void)
         sbi->ll_flags |= LL_SBI_AGL_ENABLED;
         sbi->ll_flags |= LL_SBI_FAST_READ;
         sbi->ll_flags |= LL_SBI_TINY_WRITE;
+       ll_sbi_set_encrypt(sbi, true);
  
         /* root squash */
         sbi->ll_squash.rsi_uid = 0;
@@ -258,16 +277,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                    OBD_CONNECT2_INC_XID |
                                    OBD_CONNECT2_LSOM |
                                    OBD_CONNECT2_ASYNC_DISCARD |
-                                  OBD_CONNECT2_PCC;
+                                  OBD_CONNECT2_PCC |
+                                  OBD_CONNECT2_CRUSH |
+                                  OBD_CONNECT2_GETATTR_PFID;
  
  #ifdef HAVE_LRU_RESIZE_SUPPORT
          if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
                  data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
  #endif
-#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
-       data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK |
-                                  OBD_CONNECT_LARGE_ACL;
-#endif
+       data->ocd_connect_flags |= OBD_CONNECT_ACL_FLAGS;
  
         data->ocd_cksum_types = obd_cksum_types_supported_client();
  
@@ -303,6 +321,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                 data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
  
         obd_connect_set_secctx(data);
+       if (ll_sbi_has_encrypt(sbi))
+               obd_connect_set_enc(data);
  
  #if defined(CONFIG_SECURITY)
         data->ocd_connect_flags2 |= OBD_CONNECT2_SELINUX_POLICY;
@@ -412,6 +432,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
         if (obd_connect_has_secctx(data))
                 sbi->ll_flags |= LL_SBI_FILE_SECCTX;
  
+       if (ll_sbi_has_encrypt(sbi) && !obd_connect_has_enc(data)) {
+               if (ll_sbi_has_test_dummy_encryption(sbi))
+                       LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
+                                     sbi->ll_fsname,
+                                     sbi->ll_md_exp->exp_obd->obd_name);
+               ll_sbi_set_encrypt(sbi, false);
+       }
+
         if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
                 if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
                         LCONSOLE_INFO("%s: disabling xattr cache due to "
@@ -448,23 +476,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
                                   OBD_CONNECT_BULK_MBITS | OBD_CONNECT_SHORTIO |
                                   OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
-
-/* The client currently advertises support for OBD_CONNECT_LOCKAHEAD_OLD so it
- * can interoperate with an older version of lockahead which was released prior
- * to landing in master. This support will be dropped when 2.13 development
- * starts.  At the point, we should not just drop the connect flag (below), we
- * should also remove the support in the code.
- *
- * Removing it means a few things:
- * 1. Remove this section here
- * 2. Remove CEF_NONBLOCK in ll_file_lockahead()
- * 3. Remove function exp_connect_lockahead_old
- * 4. Remove LDLM_FL_LOCKAHEAD_OLD_RESERVED in lustre_dlm_flags.h
- * */
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 12, 50, 0)
-       data->ocd_connect_flags |= OBD_CONNECT_LOCKAHEAD_OLD;
-#endif
-
         data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
                                    OBD_CONNECT2_INC_XID;
  
@@ -490,6 +501,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
         if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
                 data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
  
+       if (ll_sbi_has_encrypt(sbi))
+               obd_connect_set_enc(data);
+
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
                data->ocd_version, data->ocd_grant);
@@ -513,12 +527,22 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                 GOTO(out_md, err);
         }
  
+       if (ll_sbi_has_encrypt(sbi) &&
+           !obd_connect_has_enc(&sbi->ll_dt_obd->u.lov.lov_ocd)) {
+               if (ll_sbi_has_test_dummy_encryption(sbi))
+                       LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
+                                     sbi->ll_fsname, dt);
+               ll_sbi_set_encrypt(sbi, false);
+       } else if (ll_sbi_has_test_dummy_encryption(sbi)) {
+               LCONSOLE_WARN("Test dummy encryption mode enabled\n");
+       }
+
         sbi->ll_dt_exp->exp_connect_data = *data;
  
         /* Don't change value if it was specified in the config log */
         if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages == -1) {
                 sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
-                       max_t(unsigned long, SBI_DEFAULT_READAHEAD_WHOLE_MAX,
+                       max_t(unsigned long, SBI_DEFAULT_READ_AHEAD_WHOLE_MAX,
                               (data->ocd_brw_size >> PAGE_SHIFT));
                 if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages >
                     sbi->ll_ra_info.ra_max_pages_per_file)
@@ -560,6 +584,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
  #if THREAD_SIZE >= 8192 /*b=17630*/
         sb->s_export_op = &lustre_export_operations;
  #endif
+#ifdef HAVE_LUSTRE_CRYPTO
+       llcrypt_set_ops(sb, &lustre_cryptops);
+#endif
  
         /* make root inode
          * XXX: move this to after cbd setup? */
@@ -600,12 +627,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
         ptlrpc_req_finished(request);
  
         if (IS_ERR(root)) {
-#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
-               if (lmd.posix_acl) {
-                       posix_acl_release(lmd.posix_acl);
-                       lmd.posix_acl = NULL;
-               }
-#endif
+               lmd_clear_acl(&lmd);
                 err = IS_ERR(root) ? PTR_ERR(root) : -EBADF;
                 root = NULL;
                 CERROR("%s: bad ll_iget() for root: rc = %d\n",
@@ -809,10 +831,9 @@ void ll_kill_super(struct super_block *sb)
                 sb->s_dev = sbi->ll_sdev_orig;
  
                 /* wait running statahead threads to quit */
-               while (atomic_read(&sbi->ll_sa_running) > 0) {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(1) >> 3);
-               }
+               while (atomic_read(&sbi->ll_sa_running) > 0)
+                       schedule_timeout_uninterruptible(
+                               cfs_time_seconds(1) >> 3);
         }
  
         EXIT;
@@ -946,6 +967,25 @@ static int ll_options(char *options, struct ll_sb_info *sbi)
                         *flags |= tmp;
                         goto next;
                 }
+               tmp = ll_set_opt("test_dummy_encryption", s1,
+                                LL_SBI_TEST_DUMMY_ENCRYPTION);
+               if (tmp) {
+#ifdef HAVE_LUSTRE_CRYPTO
+                       *flags |= tmp;
+#else
+                       LCONSOLE_WARN("Test dummy encryption mount option ignored: encryption not supported\n");
+#endif
+                       goto next;
+               }
+               tmp = ll_set_opt("noencrypt", s1, LL_SBI_ENCRYPT);
+               if (tmp) {
+#ifdef HAVE_LUSTRE_CRYPTO
+                       *flags &= ~tmp;
+#else
+                       LCONSOLE_WARN("noencrypt mount option ignored: encryption not supported\n");
+#endif
+                       goto next;
+               }
                  LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
                                     s1);
                  RETURN(-EINVAL);
@@ -993,8 +1033,9 @@ void ll_lli_init(struct ll_inode_info *lli)
                 init_rwsem(&lli->lli_lsm_sem);
         } else {
                 mutex_init(&lli->lli_size_mutex);
+               mutex_init(&lli->lli_setattr_mutex);
                 lli->lli_symlink_name = NULL;
-               init_rwsem(&lli->lli_trunc_sem);
+               ll_trunc_sem_init(&lli->lli_trunc_sem);
                 range_lock_tree_init(&lli->lli_write_tree);
                 init_rwsem(&lli->lli_glimpse_sem);
                 lli->lli_glimpse_time = ktime_set(0, 0);
@@ -1062,7 +1103,7 @@ int ll_fill_super(struct super_block *sb)
         char    *profilenm = get_profile_name(sb);
         struct config_llog_instance *cfg;
         /* %p for void* in printf needs 16+2 characters: 0xffffffffffffffff */
-       const int instlen = 16 + 2;
+       const int instlen = LUSTRE_MAXINSTANCE + 2;
         unsigned long cfg_instance = ll_get_cfg_instance(sb);
         char name[MAX_STRING_SIZE];
         int md_len = 0;
@@ -1077,8 +1118,6 @@ int ll_fill_super(struct super_block *sb)
         CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
                profilenm, cfg_instance, sb);
  
-       try_module_get(THIS_MODULE);
-
         OBD_ALLOC_PTR(cfg);
         if (cfg == NULL)
                 GOTO(out_free_cfg, err = -ENOMEM);
@@ -1097,7 +1136,7 @@ int ll_fill_super(struct super_block *sb)
  
         /* UUID handling */
         generate_random_uuid(uuid.b);
-       snprintf(sbi->ll_sb_uuid.uuid, UUID_SIZE, "%pU", uuid.b);
+       snprintf(sbi->ll_sb_uuid.uuid, sizeof(sbi->ll_sb_uuid), "%pU", uuid.b);
  
         CDEBUG(D_CONFIG, "llite sb uuid: %s\n", sbi->ll_sb_uuid.uuid);
  
@@ -1236,14 +1275,13 @@ void ll_put_super(struct super_block *sb)
  
         /* Wait for unstable pages to be committed to stable storage */
         if (force == 0) {
-               struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-               rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
-                                 atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0,
-                                 &lwi);
+               rc = l_wait_event_abortable(
+                       sbi->ll_cache->ccc_unstable_waitq,
+                       atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0);
         }
  
         ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
-       if (force == 0 && rc != -EINTR)
+       if (force == 0 && rc != -ERESTARTSYS)
                 LASSERTF(ccc_count == 0, "count: %li\n", ccc_count);
  
         /* We need to set force before the lov_disconnect in
@@ -1479,6 +1517,7 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
  {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lmv_stripe_md *lsm = md->lmv;
+       struct cl_attr  *attr;
         int rc = 0;
  
         ENTRY;
@@ -1502,69 +1541,63 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
          * normally dir layout doesn't change, only take read lock to check
          * that to avoid blocking other MD operations.
          */
-       if (lli->lli_lsm_md)
-               down_read(&lli->lli_lsm_sem);
-       else
-               down_write(&lli->lli_lsm_sem);
+       down_read(&lli->lli_lsm_sem);
  
-       /*
-        * if dir layout mismatch, check whether version is increased, which
-        * means layout is changed, this happens in dir migration and lfsck.
+       /* some current lookup initialized lsm, and unchanged */
+       if (lli->lli_lsm_md && lsm_md_eq(lli->lli_lsm_md, lsm))
+               GOTO(unlock, rc = 0);
+
+       /* if dir layout doesn't match, check whether version is increased,
+        * which means layout is changed, this happens in dir split/merge and
+        * lfsck.
          *
          * foreign LMV should not change.
          */
-       if (lli->lli_lsm_md && !lsm_md_eq(lli->lli_lsm_md, lsm)) {
-               if (lmv_dir_striped(lli->lli_lsm_md) &&
-                   lsm->lsm_md_layout_version <=
-                   lli->lli_lsm_md->lsm_md_layout_version) {
-                       CERROR("%s: "DFID" dir layout mismatch:\n",
-                              ll_i2sbi(inode)->ll_fsname,
-                              PFID(&lli->lli_fid));
-                       lsm_md_dump(D_ERROR, lli->lli_lsm_md);
-                       lsm_md_dump(D_ERROR, lsm);
-                       GOTO(unlock, rc = -EINVAL);
-               }
+       if (lli->lli_lsm_md && lmv_dir_striped(lli->lli_lsm_md) &&
+           lsm->lsm_md_layout_version <=
+           lli->lli_lsm_md->lsm_md_layout_version) {
+               CERROR("%s: "DFID" dir layout mismatch:\n",
+                      ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
+               lsm_md_dump(D_ERROR, lli->lli_lsm_md);
+               lsm_md_dump(D_ERROR, lsm);
+               GOTO(unlock, rc = -EINVAL);
+       }
  
-               /* layout changed, switch to write lock */
-               up_read(&lli->lli_lsm_sem);
-               down_write(&lli->lli_lsm_sem);
-               ll_dir_clear_lsm_md(inode);
+       up_read(&lli->lli_lsm_sem);
+       down_write(&lli->lli_lsm_sem);
+       /* clear existing lsm */
+       if (lli->lli_lsm_md) {
+               lmv_free_memmd(lli->lli_lsm_md);
+               lli->lli_lsm_md = NULL;
         }
  
-       /* set directory layout */
-       if (!lli->lli_lsm_md) {
-               struct cl_attr  *attr;
+       rc = ll_init_lsm_md(inode, md);
+       up_write(&lli->lli_lsm_sem);
  
-               rc = ll_init_lsm_md(inode, md);
-               up_write(&lli->lli_lsm_sem);
-               if (rc != 0)
-                       RETURN(rc);
+       if (rc)
+               RETURN(rc);
  
-               /* set md->lmv to NULL, so the following free lustre_md
-                * will not free this lsm */
-               md->lmv = NULL;
+       /* set md->lmv to NULL, so the following free lustre_md will not free
+        * this lsm.
+        */
+       md->lmv = NULL;
  
-               /*
-                * md_merge_attr() may take long, since lsm is already set,
-                * switch to read lock.
-                */
-               down_read(&lli->lli_lsm_sem);
+       /* md_merge_attr() may take long, since lsm is already set, switch to
+        * read lock.
+        */
+       down_read(&lli->lli_lsm_sem);
  
-               if (!lmv_dir_striped(lli->lli_lsm_md))
-                       GOTO(unlock, rc);
+       if (!lmv_dir_striped(lli->lli_lsm_md))
+               GOTO(unlock, rc = 0);
  
-               OBD_ALLOC_PTR(attr);
-               if (attr == NULL)
-                       GOTO(unlock, rc = -ENOMEM);
-
-               /* validate the lsm */
-               rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
-                                  ll_md_blocking_ast);
-               if (rc != 0) {
-                       OBD_FREE_PTR(attr);
-                       GOTO(unlock, rc);
-               }
+       OBD_ALLOC_PTR(attr);
+       if (!attr)
+               GOTO(unlock, rc = -ENOMEM);
  
+       /* validate the lsm */
+       rc = md_merge_attr(ll_i2mdexp(inode), &lli->lli_fid, lli->lli_lsm_md,
+                          attr, ll_md_blocking_ast);
+       if (!rc) {
                 if (md->body->mbo_valid & OBD_MD_FLNLINK)
                         md->body->mbo_nlink = attr->cat_nlink;
                 if (md->body->mbo_valid & OBD_MD_FLSIZE)
@@ -1575,13 +1608,14 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
                         md->body->mbo_ctime = attr->cat_ctime;
                 if (md->body->mbo_valid & OBD_MD_FLMTIME)
                         md->body->mbo_mtime = attr->cat_mtime;
-
-               OBD_FREE_PTR(attr);
         }
+
+       OBD_FREE_PTR(attr);
+       GOTO(unlock, rc);
  unlock:
         up_read(&lli->lli_lsm_sem);
  
-       RETURN(rc);
+       return rc;
  }
  
  void ll_clear_inode(struct inode *inode)
@@ -1624,13 +1658,8 @@ void ll_clear_inode(struct inode *inode)
  
         ll_xattr_cache_destroy(inode);
  
-#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
         forget_all_cached_acls(inode);
-       if (lli->lli_posix_acl) {
-               posix_acl_release(lli->lli_posix_acl);
-               lli->lli_posix_acl = NULL;
-       }
-#endif
+       lli_clear_acl(lli);
         lli->lli_inode_magic = LLI_INODE_DEAD;
  
         if (S_ISDIR(inode->i_mode))
@@ -1644,6 +1673,8 @@ void ll_clear_inode(struct inode *inode)
          */
         cl_inode_fini(inode);
  
+       llcrypt_put_encryption_info(inode);
+
         EXIT;
  }
  
@@ -1705,6 +1736,162 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
         RETURN(rc);
  }
  
+/**
+ * Zero portion of page that is part of @inode.
+ * This implies, if necessary:
+ * - taking cl_lock on range corresponding to concerned page
+ * - grabbing vm page
+ * - associating cl_page
+ * - proceeding to clio read
+ * - zeroing range in page
+ * - proceeding to cl_page flush
+ * - releasing cl_lock
+ *
+ * \param[in] inode    inode
+ * \param[in] index    page index
+ * \param[in] offset   offset in page to start zero from
+ * \param[in] len      len to zero
+ *
+ * \retval 0           on success
+ * \retval negative    errno on failure
+ */
+int ll_io_zero_page(struct inode *inode, pgoff_t index, pgoff_t offset,
+                   unsigned len)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct cl_object *clob = lli->lli_clob;
+       __u16 refcheck;
+       struct lu_env *env = NULL;
+       struct cl_io *io = NULL;
+       struct cl_page *clpage = NULL;
+       struct page *vmpage = NULL;
+       unsigned from = index << PAGE_SHIFT;
+       struct cl_lock *lock = NULL;
+       struct cl_lock_descr *descr = NULL;
+       struct cl_2queue *queue = NULL;
+       struct cl_sync_io *anchor = NULL;
+       bool holdinglock = false;
+       bool lockedbymyself = true;
+       int rc;
+
+       ENTRY;
+
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               RETURN(PTR_ERR(env));
+
+       io = vvp_env_thread_io(env);
+       io->ci_obj = clob;
+       rc = cl_io_rw_init(env, io, CIT_WRITE, from, PAGE_SIZE);
+       if (rc)
+               GOTO(putenv, rc);
+
+       lock = vvp_env_lock(env);
+       descr = &lock->cll_descr;
+       descr->cld_obj   = io->ci_obj;
+       descr->cld_start = cl_index(io->ci_obj, from);
+       descr->cld_end   = cl_index(io->ci_obj, from + PAGE_SIZE - 1);
+       descr->cld_mode  = CLM_WRITE;
+       descr->cld_enq_flags = CEF_MUST | CEF_NONBLOCK;
+
+       /* request lock for page */
+       rc = cl_lock_request(env, io, lock);
+       /* -ECANCELED indicates a matching lock with a different extent
+        * was already present, and -EEXIST indicates a matching lock
+        * on exactly the same extent was already present.
+        * In both cases it means we are covered.
+        */
+       if (rc == -ECANCELED || rc == -EEXIST)
+               rc = 0;
+       else if (rc < 0)
+               GOTO(iofini, rc);
+       else
+               holdinglock = true;
+
+       /* grab page */
+       vmpage = grab_cache_page_nowait(inode->i_mapping, index);
+       if (vmpage == NULL)
+               GOTO(rellock, rc = -EOPNOTSUPP);
+
+       if (!PageDirty(vmpage)) {
+               /* associate cl_page */
+               clpage = cl_page_find(env, clob, vmpage->index,
+                                     vmpage, CPT_CACHEABLE);
+               if (IS_ERR(clpage))
+                       GOTO(pagefini, rc = PTR_ERR(clpage));
+
+               cl_page_assume(env, io, clpage);
+       }
+
+       if (!PageUptodate(vmpage) && !PageDirty(vmpage) &&
+           !PageWriteback(vmpage)) {
+               /* read page */
+               /* set PagePrivate2 to detect special case of empty page
+                * in osc_brw_fini_request()
+                */
+               SetPagePrivate2(vmpage);
+               rc = ll_io_read_page(env, io, clpage, NULL);
+               if (!PagePrivate2(vmpage))
+                       /* PagePrivate2 was cleared in osc_brw_fini_request()
+                        * meaning we read an empty page. In this case, in order
+                        * to avoid allocating unnecessary block in truncated
+                        * file, we must not zero and write as below. Subsequent
+                        * server-side truncate will handle things correctly.
+                        */
+                       GOTO(clpfini, rc = 0);
+               ClearPagePrivate2(vmpage);
+               if (rc)
+                       GOTO(clpfini, rc);
+               lockedbymyself = trylock_page(vmpage);
+               cl_page_assume(env, io, clpage);
+       }
+
+       /* zero range in page */
+       zero_user(vmpage, offset, len);
+
+       if (holdinglock && clpage) {
+               /* explicitly write newly modified page */
+               queue = &io->ci_queue;
+               cl_2queue_init(queue);
+               anchor = &vvp_env_info(env)->vti_anchor;
+               cl_sync_io_init(anchor, 1);
+               clpage->cp_sync_io = anchor;
+               cl_2queue_add(queue, clpage);
+               rc = cl_io_submit_rw(env, io, CRT_WRITE, queue);
+               if (rc)
+                       GOTO(queuefini1, rc);
+               rc = cl_sync_io_wait(env, anchor, 0);
+               if (rc)
+                       GOTO(queuefini2, rc);
+               cl_page_assume(env, io, clpage);
+
+queuefini2:
+               cl_2queue_discard(env, io, queue);
+queuefini1:
+               cl_2queue_disown(env, io, queue);
+               cl_2queue_fini(env, queue);
+       }
+
+clpfini:
+       if (clpage)
+               cl_page_put(env, clpage);
+pagefini:
+       if (lockedbymyself) {
+               unlock_page(vmpage);
+               put_page(vmpage);
+       }
+rellock:
+       if (holdinglock)
+               cl_lock_release(env, lock);
+iofini:
+       cl_io_fini(env, io);
+putenv:
+       if (env)
+               cl_env_put(env, &refcheck);
+
+       RETURN(rc);
+}
+
  /* If this inode has objects allocated to it (lsm != NULL), then the OST
   * object(s) determine the file size and mtime.  Otherwise, the MDS will
   * keep these values until such a time that objects are allocated for it.
@@ -1785,11 +1972,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                        (s64)attr->ia_mtime.tv_sec, (s64)attr->ia_ctime.tv_sec,
                        ktime_get_real_seconds());
  
-       if (S_ISREG(inode->i_mode)) {
-               if (attr->ia_valid & ATTR_SIZE)
-                       inode_dio_write_done(inode);
+       if (S_ISREG(inode->i_mode))
                 inode_unlock(inode);
-       }
  
         /* We always do an MDS RPC, even if we're only changing the size;
          * only the MDS knows whether truncate() should fail with -ETXTBUSY */
@@ -1808,7 +1992,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
         }
  
         if (attr->ia_valid & ATTR_FILE) {
-               struct ll_file_data *fd = LUSTRE_FPRIVATE(attr->ia_file);
+               struct ll_file_data *fd = attr->ia_file->private_data;
  
                 if (fd->fd_lease_och)
                         op_data->op_bias |= MDS_TRUNC_KEEP_LEASE;
@@ -1839,6 +2023,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                                 GOTO(out, rc);
                         }
                 } else {
+                       unsigned int flags = 0;
+
                         /* For truncate and utimes sending attributes to OSTs,
                          * setting mtime/atime to the past will be performed
                          * under PW [0:EOF] extent lock (new_size:EOF for
@@ -1847,7 +2033,30 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                          * it is necessary due to possible time
                          * de-synchronization between MDT inode and OST objects
                          */
-                       rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, 0);
+                       if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
+                           attr->ia_valid & ATTR_SIZE) {
+                               xvalid |= OP_XVALID_FLAGS;
+                               flags = LUSTRE_ENCRYPT_FL;
+                               /* Call to ll_io_zero_page is not necessary if
+                                * truncating on PAGE_SIZE boundary, because
+                                * whole pages will be wiped.
+                                * In case of Direct IO, all we need is to set
+                                * new size.
+                                */
+                               if (attr->ia_size & ~PAGE_MASK &&
+                                   !(attr->ia_valid & ATTR_FILE &&
+                                     attr->ia_file->f_flags & O_DIRECT)) {
+                                       pgoff_t offset =
+                                               attr->ia_size & (PAGE_SIZE - 1);
+
+                                       rc = ll_io_zero_page(inode,
+                                                   attr->ia_size >> PAGE_SHIFT,
+                                                   offset, PAGE_SIZE - offset);
+                                       if (rc)
+                                               GOTO(out, rc);
+                               }
+                       }
+                       rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
                 }
         }
  
@@ -1911,6 +2120,11 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
  {
         int mode = de->d_inode->i_mode;
         enum op_xvalid xvalid = 0;
+       int rc;
+
+       rc = llcrypt_prepare_setattr(de, attr);
+       if (rc)
+               return rc;
  
         if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
                               (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
@@ -1960,7 +2174,7 @@ int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
         CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree, osfs->os_files);
  
-       if (osfs->os_state & OS_STATE_SUM)
+       if (osfs->os_state & OS_STATFS_SUM)
                 GOTO(out, rc);
  
         rc = obd_statfs(NULL, sbi->ll_dt_exp, &obd_osfs, max_age, flags);
@@ -2054,6 +2268,8 @@ void ll_inode_size_unlock(struct inode *inode)
  
  void ll_update_inode_flags(struct inode *inode, int ext_flags)
  {
+       /* do not clear encryption flag */
+       ext_flags |= ll_inode_to_ext_flags(inode->i_flags) & LUSTRE_ENCRYPT_FL;
         inode->i_flags = ll_ext_to_inode_flags(ext_flags);
         if (ext_flags & LUSTRE_PROJINHERIT_FL)
                 ll_file_set_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT);
@@ -2080,15 +2296,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
                         return rc;
         }
  
-#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
-       if (body->mbo_valid & OBD_MD_FLACL) {
-               spin_lock(&lli->lli_lock);
-               if (lli->lli_posix_acl)
-                       posix_acl_release(lli->lli_posix_acl);
-               lli->lli_posix_acl = md->posix_acl;
-               spin_unlock(&lli->lli_lock);
-       }
-#endif
+       if (body->mbo_valid & OBD_MD_FLACL)
+               lli_replace_acl(lli, md);
+
         inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
                                         sbi->ll_flags & LL_SBI_32BIT_API);
         inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
@@ -2116,6 +2326,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
                 lli->lli_ctime = body->mbo_ctime;
         }
  
+       if (body->mbo_valid & OBD_MD_FLBTIME)
+               lli->lli_btime = body->mbo_btime;
+
         /* Clear i_flags to remove S_NOSEC before permissions are updated */
         if (body->mbo_valid & OBD_MD_FLFLAGS)
                 ll_update_inode_flags(inode, body->mbo_flags);
@@ -2128,12 +2341,6 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
                                 (body->mbo_mode & S_IFMT);
  
         LASSERT(inode->i_mode != 0);
-       if (S_ISREG(inode->i_mode))
-               inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
-                                      LL_MAX_BLKSIZE_BITS);
-       else
-               inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-
         if (body->mbo_valid & OBD_MD_FLUID)
                 inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
         if (body->mbo_valid & OBD_MD_FLGID)
@@ -2160,6 +2367,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
  
         LASSERT(fid_seq(&lli->lli_fid) != 0);
  
+       lli->lli_attr_valid = body->mbo_valid;
         if (body->mbo_valid & OBD_MD_FLSIZE) {
                 i_size_write(inode, body->mbo_size);
  
@@ -2169,6 +2377,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
  
                 if (body->mbo_valid & OBD_MD_FLBLOCKS)
                         inode->i_blocks = body->mbo_blocks;
+       } else {
+               if (body->mbo_valid & OBD_MD_FLLAZYSIZE)
+                       lli->lli_lazysize = body->mbo_size;
+               if (body->mbo_valid & OBD_MD_FLLAZYBLOCKS)
+                       lli->lli_lazyblocks = body->mbo_blocks;
         }
  
         if (body->mbo_valid & OBD_MD_TSTATE) {
@@ -2270,9 +2483,9 @@ void ll_delete_inode(struct inode *inode)
          */
         nrpages = mapping->nrpages;
         if (nrpages) {
-               xa_lock_irqsave(&mapping->i_pages, flags);
+               ll_xa_lock_irqsave(&mapping->i_pages, flags);
                 nrpages = mapping->nrpages;
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+               ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
         } /* Workaround end */
  
         LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
@@ -2568,12 +2781,7 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
                                              sbi->ll_flags & LL_SBI_32BIT_API),
                                  &md);
                 if (IS_ERR(*inode)) {
-#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
-                        if (md.posix_acl) {
-                                posix_acl_release(md.posix_acl);
-                                md.posix_acl = NULL;
-                        }
-#endif
+                        lmd_clear_acl(&md);
                          rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM;
                          *inode = NULL;
                          CERROR("new_inode -fatal: rc %d\n", rc);
@@ -2618,8 +2826,10 @@ out:
         /* cleanup will be done if necessary */
         md_free_lustre_md(sbi->ll_md_exp, &md);
  
-       if (rc != 0 && it != NULL && it->it_op & IT_OPEN)
+       if (rc != 0 && it != NULL && it->it_op & IT_OPEN) {
+               ll_intent_drop_lock(it);
                 ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, req);
+       }
  
         return rc;
  }
@@ -2674,12 +2884,12 @@ out_statfs:
  void ll_unlock_md_op_lsm(struct md_op_data *op_data)
  {
         if (op_data->op_mea2_sem) {
-               up_read(op_data->op_mea2_sem);
+               up_read_non_owner(op_data->op_mea2_sem);
                 op_data->op_mea2_sem = NULL;
         }
  
         if (op_data->op_mea1_sem) {
-               up_read(op_data->op_mea1_sem);
+               up_read_non_owner(op_data->op_mea1_sem);
                 op_data->op_mea1_sem = NULL;
         }
  }
@@ -2716,7 +2926,7 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
         op_data->op_code = opc;
  
         if (S_ISDIR(i1->i_mode)) {
-               down_read(&ll_i2info(i1)->lli_lsm_sem);
+               down_read_non_owner(&ll_i2info(i1)->lli_lsm_sem);
                 op_data->op_mea1_sem = &ll_i2info(i1)->lli_lsm_sem;
                 op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
                 op_data->op_default_mea1 = ll_i2info(i1)->lli_default_lsm_md;
@@ -2726,7 +2936,10 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
                 op_data->op_fid2 = *ll_inode2fid(i2);
                 if (S_ISDIR(i2->i_mode)) {
                         if (i2 != i1) {
-                               down_read(&ll_i2info(i2)->lli_lsm_sem);
+                               /* i2 is typically a child of i1, and MUST be
+                                * further from the root to avoid deadlocks.
+                                */
+                               down_read_non_owner(&ll_i2info(i2)->lli_lsm_sem);
                                 op_data->op_mea2_sem =
                                                 &ll_i2info(i2)->lli_lsm_sem;
                         }
@@ -2764,24 +2977,16 @@ void ll_finish_md_op_data(struct md_op_data *op_data)
         ll_unlock_md_op_lsm(op_data);
         security_release_secctx(op_data->op_file_secctx,
                                 op_data->op_file_secctx_size);
-        OBD_FREE_PTR(op_data);
+       llcrypt_free_ctx(op_data->op_file_encctx, op_data->op_file_encctx_size);
+       OBD_FREE_PTR(op_data);
  }
  
-#ifdef HAVE_SUPEROPS_USE_DENTRY
  int ll_show_options(struct seq_file *seq, struct dentry *dentry)
-#else
-int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
-#endif
  {
         struct ll_sb_info *sbi;
  
-#ifdef HAVE_SUPEROPS_USE_DENTRY
-       LASSERT((seq != NULL) && (dentry != NULL));
+       LASSERT(seq && dentry);
         sbi = ll_s2sbi(dentry->d_sb);
-#else
-       LASSERT((seq != NULL) && (vfs != NULL));
-       sbi = ll_s2sbi(vfs->mnt_sb);
-#endif
  
         if (sbi->ll_flags & LL_SBI_NOLCK)
                 seq_puts(seq, ",nolock");
@@ -2809,6 +3014,14 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
         if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
                 seq_puts(seq, ",always_ping");
  
+       if (ll_sbi_has_test_dummy_encryption(sbi))
+               seq_puts(seq, ",test_dummy_encryption");
+
+       if (ll_sbi_has_encrypt(sbi))
+               seq_puts(seq, ",encrypt");
+       else
+               seq_puts(seq, ",noencrypt");
+
         RETURN(0);
  }
  
@@ -2928,7 +3141,7 @@ void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
                 matched = false;
                 i = 0;
                 while (LNetGetId(i++, &id) != -ENOENT) {
-                       if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+                       if (id.nid == LNET_NID_LO_0)
                                 continue;
                         if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
                                 matched = true;