Whamcloud - gitweb
LU-7422 llite: don't panic when fid is insane
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index 30e16fc..cc1bab4 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
+#include <linux/statfs.h>
+#include <linux/time.h>
 #include <linux/types.h>
 #include <linux/version.h>
 #include <linux/mm.h>
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
+#endif
 
 #include <lustre_ioctl.h>
-#include <lustre_lite.h>
 #include <lustre_ha.h>
 #include <lustre_dlm.h>
 #include <lprocfs_status.h>
@@ -59,9 +64,6 @@
 
 struct kmem_cache *ll_file_data_slab;
 
-static struct list_head ll_super_blocks = LIST_HEAD_INIT(ll_super_blocks);
-static DEFINE_SPINLOCK(ll_sb_lock);
-
 #ifndef log2
 #define log2(n) ffz(~(n))
 #endif
@@ -91,14 +93,11 @@ static struct ll_sb_info *ll_init_sbi(void)
        lru_page_max = pages / 2;
 
        /* initialize ll_cache data */
-       atomic_set(&sbi->ll_cache.ccc_users, 0);
-       sbi->ll_cache.ccc_lru_max = lru_page_max;
-       atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
-       spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
-       INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
-
-       atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
-       init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
+       sbi->ll_cache = cl_cache_init(lru_page_max);
+       if (sbi->ll_cache == NULL) {
+               OBD_FREE(sbi, sizeof(*sbi));
+               RETURN(NULL);
+       }
 
        sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
                                           SBI_DEFAULT_READAHEAD_MAX);
@@ -112,10 +111,6 @@ static struct ll_sb_info *ll_init_sbi(void)
         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
         CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
 
-       spin_lock(&ll_sb_lock);
-       list_add_tail(&sbi->ll_list, &ll_super_blocks);
-       spin_unlock(&ll_sb_lock);
-
         sbi->ll_flags |= LL_SBI_VERBOSE;
 #ifdef ENABLE_CHECKSUM
         sbi->ll_flags |= LL_SBI_CHECKSUM;
@@ -136,6 +131,7 @@ static struct ll_sb_info *ll_init_sbi(void)
        sbi->ll_sa_max = LL_SA_RPC_DEF;
        atomic_set(&sbi->ll_sa_total, 0);
        atomic_set(&sbi->ll_sa_wrong, 0);
+       atomic_set(&sbi->ll_sa_running, 0);
        atomic_set(&sbi->ll_agl_total, 0);
        sbi->ll_flags |= LL_SBI_AGL_ENABLED;
 
@@ -154,11 +150,12 @@ static void ll_free_sbi(struct super_block *sb)
        ENTRY;
 
        if (sbi != NULL) {
-               spin_lock(&ll_sb_lock);
-               list_del(&sbi->ll_list);
-               spin_unlock(&ll_sb_lock);
                if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
                        cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
+               if (sbi->ll_cache != NULL) {
+                       cl_cache_decref(sbi->ll_cache);
+                       sbi->ll_cache = NULL;
+               }
                OBD_FREE(sbi, sizeof(*sbi));
        }
        EXIT;
@@ -167,17 +164,16 @@ static void ll_free_sbi(struct super_block *sb)
 static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                     struct vfsmount *mnt)
 {
-        struct inode *root = 0;
+       struct inode *root = NULL;
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_device *obd;
-        struct obd_capa *oc = NULL;
         struct obd_statfs *osfs = NULL;
         struct ptlrpc_request *request = NULL;
         struct obd_connect_data *data = NULL;
         struct obd_uuid *uuid;
         struct md_op_data *op_data;
         struct lustre_md lmd;
-        obd_valid valid;
+       u64 valid;
         int size, err, checksum;
         ENTRY;
 
@@ -211,10 +207,11 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                  OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS |
                                  OBD_CONNECT_MAX_EASIZE |
                                  OBD_CONNECT_FLOCK_DEAD |
-                                 OBD_CONNECT_DISP_STRIPE;
-
-        if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
-                data->ocd_connect_flags |= OBD_CONNECT_SOM;
+                                 OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
+                                 OBD_CONNECT_OPEN_BY_FID |
+                                 OBD_CONNECT_DIR_STRIPE |
+                                 OBD_CONNECT_BULK_MBITS |
+                                 OBD_CONNECT_SUBTREE;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
@@ -237,14 +234,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         if (sbi->ll_flags & LL_SBI_USER_XATTR)
                 data->ocd_connect_flags |= OBD_CONNECT_XATTR;
 
-#ifdef HAVE_MS_FLOCK_LOCK
-        /* force vfs to use lustre handler for flock() calls - bug 10743 */
-        sb->s_flags |= MS_FLOCK_LOCK;
-#endif
-#ifdef MS_HAS_NEW_AOPS
-        sb->s_flags |= MS_HAS_NEW_AOPS;
-#endif
-
         if (sbi->ll_flags & LL_SBI_FLOCK)
                 sbi->ll_fop = &ll_file_operations_flock;
         else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
@@ -257,6 +246,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
                 data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
+       /* always ping even if server suppress_pings */
+       if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+               data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
+
        data->ocd_brw_size = MD_MAX_BRW_SIZE;
 
         err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, data, NULL);
@@ -316,7 +309,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        size = sizeof(*data);
        err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
-                          KEY_CONN_DATA,  &size, data, NULL);
+                          KEY_CONN_DATA,  &size, data);
        if (err) {
                CERROR("%s: Get connect data failed: rc = %d\n",
                       sbi->ll_md_exp->exp_obd->obd_name, err);
@@ -329,7 +322,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         sb->s_magic = LL_SUPER_MAGIC;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sbi->ll_namelen = osfs->os_namelen;
-        sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK;
 
         if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
             !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
@@ -364,16 +356,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 }
         }
 
-        if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) {
-                LCONSOLE_INFO("client enabled MDS capability!\n");
-                sbi->ll_flags |= LL_SBI_MDS_CAPA;
-        }
-
-        if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) {
-                LCONSOLE_INFO("client enabled OSS capability!\n");
-                sbi->ll_flags |= LL_SBI_OSS_CAPA;
-        }
-
         if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
                 sbi->ll_flags |= LL_SBI_64BIT_HASH;
 
@@ -401,6 +383,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                GOTO(out_md_fid, err = -ENODEV);
        }
 
+       /* pass client page size via ocd_grant_blkbits, the server should report
+        * back its backend blocksize for grant calculation purpose */
+       data->ocd_grant_blkbits = PAGE_SHIFT;
+
         data->ocd_connect_flags = OBD_CONNECT_GRANT     | OBD_CONNECT_VERSION  |
                                  OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
                                   OBD_CONNECT_CANCELSET | OBD_CONNECT_FID      |
@@ -411,10 +397,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                   OBD_CONNECT_MAXBYTES |
                                  OBD_CONNECT_EINPROGRESS |
                                  OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
-                                 OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS;
+                                 OBD_CONNECT_LAYOUTLOCK |
+                                 OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
+                                 OBD_CONNECT_BULK_MBITS;
 
-        if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
-                data->ocd_connect_flags |= OBD_CONNECT_SOM;
+       if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
+               data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
 
         if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
                 /* OBD_CONNECT_CKSUM should always be set, even if checksums are
@@ -435,6 +423,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
                 data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
+       /* always ping even if server suppress_pings */
+       if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+               data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
+
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
                data->ocd_version, data->ocd_grant);
@@ -475,7 +467,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
        mutex_unlock(&sbi->ll_lco.lco_lock);
 
        fid_zero(&sbi->ll_root_fid);
-       err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, &oc);
+       err = md_get_root(sbi->ll_md_exp, get_mount_fileset(sb),
+                          &sbi->ll_root_fid);
        if (err) {
                CERROR("cannot mds_connect: rc = %d\n", err);
                GOTO(out_lock_cn_cb, err);
@@ -495,8 +488,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        /* make root inode
         * XXX: move this to after cbd setup? */
-       valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA |
-               OBD_MD_FLMODEASIZE;
+       valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
        if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
                valid |= OBD_MD_FLRMTPERM;
        else if (sbi->ll_flags & LL_SBI_ACL)
@@ -508,12 +500,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        op_data->op_fid1 = sbi->ll_root_fid;
        op_data->op_mode = 0;
-       op_data->op_capa1 = oc;
        op_data->op_valid = valid;
 
        err = md_getattr(sbi->ll_md_exp, op_data, &request);
-       if (oc)
-               capa_put(oc);
+
        OBD_FREE_PTR(op_data);
        if (err) {
                CERROR("%s: md_getattr failed for root: rc = %d\n",
@@ -537,8 +527,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         ptlrpc_req_finished(request);
 
        if (IS_ERR(root)) {
-               if (lmd.lsm)
-                       obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
 #ifdef CONFIG_FS_POSIX_ACL
                 if (lmd.posix_acl) {
                         posix_acl_release(lmd.posix_acl);
@@ -551,12 +539,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 GOTO(out_root, err);
         }
 
-        err = ll_close_thread_start(&sbi->ll_lcq);
-        if (err) {
-                CERROR("cannot start close thread: rc %d\n", err);
-                GOTO(out_root, err);
-        }
-
 #ifdef CONFIG_FS_POSIX_ACL
         if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
                 rct_init(&sbi->ll_rct);
@@ -571,8 +553,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         cl_sb_init(sb);
 
        err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
-                                KEY_CACHE_SET, sizeof(sbi->ll_cache),
-                                &sbi->ll_cache, NULL);
+                                KEY_CACHE_SET, sizeof(*sbi->ll_cache),
+                                sbi->ll_cache, NULL);
 
        sb->s_root = d_make_root(root);
        if (sb->s_root == NULL) {
@@ -618,8 +600,6 @@ out_lock_cn_cb:
 out_dt:
         obd_disconnect(sbi->ll_dt_exp);
         sbi->ll_dt_exp = NULL;
-       /* Make sure all OScs are gone, since cl_cache is accessing sbi. */
-       obd_zombie_barrier();
 out_md_fid:
        obd_fid_fini(sbi->ll_md_exp->exp_obd);
 out_md:
@@ -637,51 +617,70 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 {
        int size, rc;
 
-       *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL);
+       size = sizeof(*lmmsize);
+       rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
+                         KEY_MAX_EASIZE, &size, lmmsize);
+       if (rc != 0) {
+               CERROR("%s: cannot get max LOV EA size: rc = %d\n",
+                      sbi->ll_dt_exp->exp_obd->obd_name, rc);
+               RETURN(rc);
+       }
+
        size = sizeof(int);
        rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
-                         KEY_MAX_EASIZE, &size, lmmsize, NULL);
+                         KEY_MAX_EASIZE, &size, lmmsize);
        if (rc)
                CERROR("Get max mdsize error rc %d\n", rc);
 
        RETURN(rc);
 }
 
+/**
+ * Get the value of the default_easize parameter.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] sbi      superblock info for this filesystem
+ * \param[out] lmmsize pointer to storage location for value
+ *
+ * \retval 0           on success
+ * \retval negative    negated errno on failure
+ */
 int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 {
        int size, rc;
 
        size = sizeof(int);
        rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
-                        KEY_DEFAULT_EASIZE, &size, lmmsize, NULL);
+                        KEY_DEFAULT_EASIZE, &size, lmmsize);
        if (rc)
                CERROR("Get default mdsize error rc %d\n", rc);
 
        RETURN(rc);
 }
 
-int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *lmmsize)
+/**
+ * Set the default_easize parameter to the given value.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] sbi      superblock info for this filesystem
+ * \param[in] lmmsize  the size to set
+ *
+ * \retval 0           on success
+ * \retval negative    negated errno on failure
+ */
+int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
 {
-       int size, rc;
-
-       size = sizeof(int);
-       rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_COOKIESIZE),
-                         KEY_MAX_COOKIESIZE, &size, lmmsize, NULL);
-       if (rc)
-               CERROR("Get max cookiesize error rc %d\n", rc);
+       int rc;
 
-       RETURN(rc);
-}
+       if (lmmsize < sizeof(struct lov_mds_md) ||
+           lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
+               return -EINVAL;
 
-int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize)
-{
-       int size, rc;
-
-       size = sizeof(int);
-       rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_COOKIESIZE),
-                         KEY_DEFAULT_COOKIESIZE, &size, lmmsize, NULL);
-       if (rc)
-               CERROR("Get default cookiesize error rc %d\n", rc);
+       rc = obd_set_info_async(NULL, sbi->ll_md_exp,
+                               sizeof(KEY_DEFAULT_EASIZE), KEY_DEFAULT_EASIZE,
+                               sizeof(int), &lmmsize, NULL);
 
        RETURN(rc);
 }
@@ -717,7 +716,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur)
                " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
                dentry->d_name.len, dentry->d_name.name,
                dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
-              dentry->d_parent, dentry->d_inode, d_count(dentry),
+              dentry->d_parent, dentry->d_inode, ll_d_count(dentry),
                dentry->d_flags, dentry->d_fsdata, subdirs);
         if (dentry->d_inode != NULL)
                 ll_dump_inode(dentry->d_inode);
@@ -726,7 +725,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur)
                 return;
 
        list_for_each(tmp, &dentry->d_subdirs) {
-               struct dentry *d = list_entry(tmp, struct dentry, d_u.d_child);
+               struct dentry *d = list_entry(tmp, struct dentry, d_child);
                lustre_dump_dentry(d, recur - 1);
        }
 }
@@ -743,8 +742,6 @@ static void client_common_put_super(struct super_block *sb)
         }
 #endif
 
-        ll_close_thread_shutdown(sbi->ll_lcq);
-
         cl_sb_fini(sb);
 
        list_del(&sbi->ll_conn_chain);
@@ -752,9 +749,6 @@ static void client_common_put_super(struct super_block *sb)
        obd_fid_fini(sbi->ll_dt_exp->exp_obd);
         obd_disconnect(sbi->ll_dt_exp);
         sbi->ll_dt_exp = NULL;
-       /* wait till all OSCs are gone, since cl_cache is accessing sbi.
-        * see LU-2543. */
-       obd_zombie_barrier();
 
         lprocfs_unregister_mountpoint(sbi);
 
@@ -767,22 +761,28 @@ static void client_common_put_super(struct super_block *sb)
 
 void ll_kill_super(struct super_block *sb)
 {
-        struct ll_sb_info *sbi;
-
-        ENTRY;
+       struct ll_sb_info *sbi;
+       ENTRY;
 
         /* not init sb ?*/
-        if (!(sb->s_flags & MS_ACTIVE))
-                return;
+       if (!(sb->s_flags & MS_ACTIVE))
+               return;
 
-        sbi = ll_s2sbi(sb);
-        /* we need restore s_dev from changed for clustred NFS before put_super
-         * because new kernels have cached s_dev and change sb->s_dev in
-         * put_super not affected real removing devices */
+       sbi = ll_s2sbi(sb);
+       /* we need restore s_dev from changed for clustred NFS before put_super
+        * because new kernels have cached s_dev and change sb->s_dev in
+        * put_super not affected real removing devices */
        if (sbi) {
                sb->s_dev = sbi->ll_sdev_orig;
                sbi->ll_umounting = 1;
+
+               /* wait running statahead threads to quit */
+               while (atomic_read(&sbi->ll_sa_running) > 0) {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
+               }
        }
+
        EXIT;
 }
 
@@ -838,30 +838,23 @@ static int ll_options(char *options, int *flags)
                         *flags &= ~tmp;
                         goto next;
                 }
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 51, 0)
-               tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
-               if (tmp) {
-                       /* Ignore deprecated mount option.  The client will
-                        * always try to mount with ACL support, whether this
-                        * is used depends on whether server supports it. */
-                       LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-                                                 "mount option 'acl'.\n");
+               tmp = ll_set_opt("context", s1, 1);
+               if (tmp)
                        goto next;
-               }
-               tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
+               tmp = ll_set_opt("fscontext", s1, 1);
+               if (tmp)
+                       goto next;
+               tmp = ll_set_opt("defcontext", s1, 1);
+               if (tmp)
+                       goto next;
+               tmp = ll_set_opt("rootcontext", s1, 1);
+               if (tmp)
+                       goto next;
+               tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
                if (tmp) {
-                       LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated "
-                                                 "mount option 'noacl'.\n");
+                       *flags |= tmp;
                        goto next;
                }
-#else
-#warning "{no}acl options have been deprecated since 1.8, please remove them"
-#endif
-                tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT);
-                if (tmp) {
-                        *flags |= tmp;
-                        goto next;
-                }
                tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
                if (tmp) {
                        *flags |= tmp;
@@ -903,11 +896,6 @@ static int ll_options(char *options, int *flags)
                         *flags &= ~tmp;
                         goto next;
                 }
-                tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW);
-                if (tmp) {
-                        *flags |= tmp;
-                        goto next;
-                }
                 tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
                 if (tmp) {
                         *flags |= tmp;
@@ -923,6 +911,11 @@ static int ll_options(char *options, int *flags)
                         *flags &= ~tmp;
                         goto next;
                 }
+               tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
+               if (tmp) {
+                       *flags |= tmp;
+                       goto next;
+               }
                 LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
                                    s1);
                 RETURN(-EINVAL);
@@ -941,20 +934,14 @@ void ll_lli_init(struct ll_inode_info *lli)
 {
        lli->lli_inode_magic = LLI_INODE_MAGIC;
        lli->lli_flags = 0;
-       lli->lli_ioepoch = 0;
-       lli->lli_maxbytes = MAX_LFS_FILESIZE;
        spin_lock_init(&lli->lli_lock);
        lli->lli_posix_acl = NULL;
        lli->lli_remote_perms = NULL;
        mutex_init(&lli->lli_rmtperm_mutex);
        /* Do not set lli_fid, it has been initialized already. */
        fid_zero(&lli->lli_pfid);
-       INIT_LIST_HEAD(&lli->lli_close_list);
-       INIT_LIST_HEAD(&lli->lli_oss_capas);
        atomic_set(&lli->lli_open_count, 0);
-       lli->lli_mds_capa = NULL;
        lli->lli_rmtperm_time = 0;
-       lli->lli_pending_och = NULL;
        lli->lli_mds_read_och = NULL;
         lli->lli_mds_write_och = NULL;
         lli->lli_mds_exec_och = NULL;
@@ -963,9 +950,8 @@ void ll_lli_init(struct ll_inode_info *lli)
         lli->lli_open_fd_exec_count = 0;
        mutex_init(&lli->lli_och_mutex);
        spin_lock_init(&lli->lli_agl_lock);
-       lli->lli_has_smd = false;
        spin_lock_init(&lli->lli_layout_lock);
-       ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE);
+       ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
        lli->lli_clob = NULL;
 
        init_rwsem(&lli->lli_xattrs_list_rwsem);
@@ -978,11 +964,13 @@ void ll_lli_init(struct ll_inode_info *lli)
                lli->lli_sai = NULL;
                spin_lock_init(&lli->lli_sa_lock);
                lli->lli_opendir_pid = 0;
+               lli->lli_sa_enabled = 0;
+               lli->lli_def_stripe_offset = -1;
        } else {
                mutex_init(&lli->lli_size_mutex);
                lli->lli_symlink_name = NULL;
                init_rwsem(&lli->lli_trunc_sem);
-               mutex_init(&lli->lli_write_mutex);
+               range_lock_tree_init(&lli->lli_write_tree);
                init_rwsem(&lli->lli_glimpse_sem);
                lli->lli_glimpse_time = 0;
                INIT_LIST_HEAD(&lli->lli_agl_list);
@@ -1038,7 +1026,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
        if (err)
                GOTO(out_free, err);
        lsi->lsi_flags |= LSI_BDI_INITIALIZED;
+#ifdef HAVE_BDI_CAP_MAP_COPY
        lsi->lsi_bdi.capabilities = BDI_CAP_MAP_COPY;
+#else
+       lsi->lsi_bdi.capabilities = 0;
+#endif
        err = ll_bdi_register(&lsi->lsi_bdi);
        if (err)
                GOTO(out_free, err);
@@ -1083,19 +1075,25 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
 
         /* connections, registrations, sb setup */
         err = client_common_fill_super(sb, md, dt, mnt);
+       if (err < 0)
+               GOTO(out_free, err);
+
+       sbi->ll_client_common_fill_super_succeeded = 1;
 
 out_free:
-        if (md)
-                OBD_FREE(md, strlen(lprof->lp_md) + instlen + 2);
-        if (dt)
-                OBD_FREE(dt, strlen(lprof->lp_dt) + instlen + 2);
-        if (err)
-                ll_put_super(sb);
-        else if (sbi->ll_flags & LL_SBI_VERBOSE)
-                LCONSOLE_WARN("Mounted %s\n", profilenm);
+       if (md)
+               OBD_FREE(md, strlen(lprof->lp_md) + instlen + 2);
+       if (dt)
+               OBD_FREE(dt, strlen(lprof->lp_dt) + instlen + 2);
+       if (lprof != NULL)
+               class_put_profile(lprof);
+       if (err)
+               ll_put_super(sb);
+       else if (sbi->ll_flags & LL_SBI_VERBOSE)
+               LCONSOLE_WARN("Mounted %s\n", profilenm);
 
-        OBD_FREE_PTR(cfg);
-        RETURN(err);
+       OBD_FREE_PTR(cfg);
+       RETURN(err);
 } /* ll_fill_super */
 
 void ll_put_super(struct super_block *sb)
@@ -1105,13 +1103,12 @@ void ll_put_super(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         char *profilenm = get_profile_name(sb);
-       int ccc_count, next, force = 1, rc = 0;
+       long ccc_count;
+       int next, force = 1, rc = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
 
-        ll_print_capa_stat(sbi);
-
         cfg.cfg_instance = sb;
         lustre_end_log(sb, profilenm, &cfg);
 
@@ -1127,14 +1124,14 @@ void ll_put_super(struct super_block *sb)
        /* Wait for unstable pages to be committed to stable storage */
        if (force == 0) {
                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-               rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
-                       atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0,
+               rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
+                       atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0,
                        &lwi);
        }
 
-       ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
+       ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
        if (force == 0 && rc != -EINTR)
-               LASSERTF(ccc_count == 0, "count: %i\n", ccc_count);
+               LASSERTF(ccc_count == 0, "count: %li\n", ccc_count);
 
 
         /* We need to set force before the lov_disconnect in
@@ -1147,10 +1144,10 @@ void ll_put_super(struct super_block *sb)
                 }
         }
 
-        if (sbi->ll_lcq) {
-                /* Only if client_common_fill_super succeeded */
-                client_common_put_super(sb);
-        }
+       if (sbi->ll_client_common_fill_super_succeeded) {
+               /* Only if client_common_fill_super succeeded */
+               client_common_put_super(sb);
+       }
 
         next = 0;
         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
@@ -1205,7 +1202,7 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
        return inode;
 }
 
-static void ll_dir_clear_lsm_md(struct inode *inode)
+void ll_dir_clear_lsm_md(struct inode *inode)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
 
@@ -1240,7 +1237,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
                struct lmv_stripe_md *lsm = md->lmv;
 
                inode->i_mode = (inode->i_mode & ~S_IFMT) |
-                               (body->mode & S_IFMT);
+                               (body->mbo_mode & S_IFMT);
                LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
                         PFID(fid));
 
@@ -1249,9 +1246,11 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
                LTIME_S(inode->i_ctime) = 0;
                inode->i_rdev = 0;
 
+#ifdef HAVE_BACKING_DEV_INFO
                /* initializing backing dev info. */
                inode->i_mapping->backing_dev_info =
                                                &s2lsi(inode->i_sb)->lsi_bdi;
+#endif
                inode->i_op = &ll_dir_inode_operations;
                inode->i_fop = &ll_dir_operations;
                lli->lli_fid = *fid;
@@ -1259,7 +1258,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
 
                LASSERT(lsm != NULL);
                /* master object FID */
-               lli->lli_pfid = body->fid1;
+               lli->lli_pfid = body->mbo_fid1;
                CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
                       lli, PFID(fid), PFID(&lli->lli_pfid));
                unlock_new_inode(inode);
@@ -1301,10 +1300,7 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
                }
        }
 
-       /* Here is where the lsm is being initialized(fill lmo_info) after
-        * client retrieve MD stripe information from MDT. */
-       return md_update_lsm_md(ll_i2mdexp(inode), lsm, md->body,
-                               ll_md_blocking_ast);
+       return 0;
 }
 
 static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
@@ -1354,15 +1350,42 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
 
        /* set the directory layout */
        if (lli->lli_lsm_md == NULL) {
+               struct cl_attr  *attr;
 
                rc = ll_init_lsm_md(inode, md);
                if (rc != 0)
                        RETURN(rc);
 
-               lli->lli_lsm_md = lsm;
-               /* set lsm_md to NULL, so the following free lustre_md
+               /* set md->lmv to NULL, so the following free lustre_md
                 * will not free this lsm */
                md->lmv = NULL;
+               lli->lli_lsm_md = lsm;
+
+               OBD_ALLOC_PTR(attr);
+               if (attr == NULL)
+                       RETURN(-ENOMEM);
+
+               /* validate the lsm */
+               rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
+                                  ll_md_blocking_ast);
+               if (rc != 0) {
+                       OBD_FREE_PTR(attr);
+                       RETURN(rc);
+               }
+
+               if (md->body->mbo_valid & OBD_MD_FLNLINK)
+                       md->body->mbo_nlink = attr->cat_nlink;
+               if (md->body->mbo_valid & OBD_MD_FLSIZE)
+                       md->body->mbo_size = attr->cat_size;
+               if (md->body->mbo_valid & OBD_MD_FLATIME)
+                       md->body->mbo_atime = attr->cat_atime;
+               if (md->body->mbo_valid & OBD_MD_FLCTIME)
+                       md->body->mbo_ctime = attr->cat_ctime;
+               if (md->body->mbo_valid & OBD_MD_FLMTIME)
+                       md->body->mbo_mtime = attr->cat_mtime;
+
+               OBD_FREE_PTR(attr);
+
                CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
                       lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
                RETURN(0);
@@ -1373,12 +1396,11 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
                struct lmv_stripe_md    *old_lsm = lli->lli_lsm_md;
                int                     idx;
 
-               CERROR("%s: lmv layout mismatch "DFID"(%p)/"DFID"(%p)"
+               CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p)"
                       "magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d"
                       "hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
-                      ll_get_fsname(inode->i_sb, NULL, 0),
-                      PFID(&lsm->lsm_md_master_fid), lsm,
-                      PFID(&old_lsm->lsm_md_master_fid), old_lsm,
+                      ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
+                      inode, lsm, old_lsm,
                       lsm->lsm_md_magic, old_lsm->lsm_md_magic,
                       lsm->lsm_md_stripe_count,
                       old_lsm->lsm_md_stripe_count,
@@ -1405,10 +1427,7 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
                RETURN(-EIO);
        }
 
-       rc = md_update_lsm_md(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
-                             md->body, ll_md_blocking_ast);
-
-       RETURN(rc);
+       RETURN(0);
 }
 
 void ll_clear_inode(struct inode *inode)
@@ -1427,9 +1446,6 @@ void ll_clear_inode(struct inode *inode)
                 LASSERT(lli->lli_opendir_pid == 0);
         }
 
-       spin_lock(&lli->lli_lock);
-        ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
-       spin_unlock(&lli->lli_lock);
        md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
 
         LASSERT(!lli->lli_open_fd_write_count);
@@ -1468,7 +1484,6 @@ void ll_clear_inode(struct inode *inode)
 #endif
        lli->lli_inode_magic = LLI_INODE_DEAD;
 
-       ll_clear_inode_capas(inode);
        if (S_ISDIR(inode->i_mode))
                ll_dir_clear_lsm_md(inode);
        else if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
@@ -1479,13 +1494,11 @@ void ll_clear_inode(struct inode *inode)
         * cl_object still uses inode lsm.
         */
        cl_inode_fini(inode);
-       lli->lli_has_smd = false;
 
        EXIT;
 }
 
-int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
-                  struct md_open_data **mod)
+static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
 {
         struct lustre_md md;
         struct inode *inode = dentry->d_inode;
@@ -1499,8 +1512,7 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
         if (IS_ERR(op_data))
                 RETURN(PTR_ERR(op_data));
 
-        rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
-                        &request, mod);
+       rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
        if (rc) {
                ptlrpc_req_finished(request);
                if (rc == -ENOENT) {
@@ -1531,72 +1543,19 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
        /* inode size will be in ll_setattr_ost, can't do it now since dirty
         * cache is not cleared yet. */
        op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
+       if (S_ISREG(inode->i_mode))
+               mutex_lock(&inode->i_mutex);
        rc = simple_setattr(dentry, &op_data->op_attr);
+       if (S_ISREG(inode->i_mode))
+               mutex_unlock(&inode->i_mutex);
        op_data->op_attr.ia_valid = ia_valid;
 
-        /* Extract epoch data if obtained. */
-        op_data->op_handle = md.body->handle;
-        op_data->op_ioepoch = md.body->ioepoch;
-
        rc = ll_update_inode(inode, &md);
        ptlrpc_req_finished(request);
 
        RETURN(rc);
 }
 
-/* Close IO epoch and send Size-on-MDS attribute update. */
-static int ll_setattr_done_writing(struct inode *inode,
-                                   struct md_op_data *op_data,
-                                   struct md_open_data *mod)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-        int rc = 0;
-        ENTRY;
-
-        LASSERT(op_data != NULL);
-        if (!S_ISREG(inode->i_mode))
-                RETURN(0);
-
-        CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
-               op_data->op_ioepoch, PFID(&lli->lli_fid));
-
-        op_data->op_flags = MF_EPOCH_CLOSE;
-        ll_done_writing_attr(inode, op_data);
-        ll_pack_inode2opdata(inode, op_data, NULL);
-
-        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
-        if (rc == -EAGAIN) {
-                /* MDS has instructed us to obtain Size-on-MDS attribute
-                 * from OSTs and send setattr to back to MDS. */
-                rc = ll_som_update(inode, op_data);
-        } else if (rc) {
-               CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
-                      ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
-                      PFID(ll_inode2fid(inode)), rc);
-        }
-        RETURN(rc);
-}
-
-static int ll_setattr_ost(struct inode *inode, struct iattr *attr)
-{
-        struct obd_capa *capa;
-        int rc;
-
-        if (attr->ia_valid & ATTR_SIZE)
-                capa = ll_osscapa_get(inode, CAPA_OPC_OSS_TRUNC);
-        else
-                capa = ll_mdscapa_get(inode);
-
-        rc = cl_setattr_ost(inode, attr, capa);
-
-        if (attr->ia_valid & ATTR_SIZE)
-                ll_truncate_free_capa(capa);
-        else
-                capa_put(capa);
-
-        return rc;
-}
-
 /* If this inode has objects allocated to it (lsm != NULL), then the OST
  * object(s) determine the file size and mtime.  Otherwise, the MDS will
  * keep these values until such a time that objects are allocated for it.
@@ -1617,9 +1576,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
         struct inode *inode = dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct md_op_data *op_data = NULL;
-        struct md_open_data *mod = NULL;
-       bool file_is_released = false;
-       int rc = 0, rc1 = 0;
+       int rc = 0;
        ENTRY;
 
        CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, "
@@ -1655,18 +1612,19 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
        }
 
         /* We mark all of the fields "set" so MDS/OST does not re-set them */
-        if (attr->ia_valid & ATTR_CTIME) {
-                attr->ia_ctime = CFS_CURRENT_TIME;
+       if (!(attr->ia_valid & ATTR_CTIME_SET) &&
+           (attr->ia_valid & ATTR_CTIME)) {
+               attr->ia_ctime = CURRENT_TIME;
                 attr->ia_valid |= ATTR_CTIME_SET;
         }
        if (!(attr->ia_valid & ATTR_ATIME_SET) &&
            (attr->ia_valid & ATTR_ATIME)) {
-                attr->ia_atime = CFS_CURRENT_TIME;
+               attr->ia_atime = CURRENT_TIME;
                 attr->ia_valid |= ATTR_ATIME_SET;
         }
        if (!(attr->ia_valid & ATTR_MTIME_SET) &&
            (attr->ia_valid & ATTR_MTIME)) {
-                attr->ia_mtime = CFS_CURRENT_TIME;
+               attr->ia_mtime = CURRENT_TIME;
                 attr->ia_valid |= ATTR_MTIME_SET;
         }
 
@@ -1675,101 +1633,86 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
                        cfs_time_current_sec());
 
-        /* We always do an MDS RPC, even if we're only changing the size;
-         * only the MDS knows whether truncate() should fail with -ETXTBUSY */
-
-        OBD_ALLOC_PTR(op_data);
-        if (op_data == NULL)
-                RETURN(-ENOMEM);
-
-       if (!S_ISDIR(inode->i_mode)) {
+       if (S_ISREG(inode->i_mode)) {
                if (attr->ia_valid & ATTR_SIZE)
                        inode_dio_write_done(inode);
                mutex_unlock(&inode->i_mutex);
        }
 
-       /* truncate on a released file must failed with -ENODATA,
-        * so size must not be set on MDS for released file
-        * but other attributes must be set
-        */
-       if (S_ISREG(inode->i_mode)) {
-               struct lov_stripe_md *lsm;
-               __u32 gen;
-
-               ll_layout_refresh(inode, &gen);
-               lsm = ccc_inode_lsm_get(inode);
-               if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
-                       file_is_released = true;
-               ccc_inode_lsm_put(inode, lsm);
-
-               if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
-                       if (file_is_released) {
-                               rc = ll_layout_restore(inode, 0, attr->ia_size);
-                               if (rc < 0)
-                                       GOTO(out, rc);
-
-                               file_is_released = false;
-                               ll_layout_refresh(inode, &gen);
-                       }
+       /* We always do an MDS RPC, even if we're only changing the size;
+        * only the MDS knows whether truncate() should fail with -ETXTBUSY */
 
-                       /* If we are changing file size, file content is
-                        * modified, flag it. */
-                       attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
-                       spin_lock(&lli->lli_lock);
-                       lli->lli_flags |= LLIF_DATA_MODIFIED;
-                       spin_unlock(&lli->lli_lock);
-                       op_data->op_bias |= MDS_DATA_MODIFIED;
-               }
-       }
+       OBD_ALLOC_PTR(op_data);
+       if (op_data == NULL)
+               GOTO(out, rc = -ENOMEM);
 
-       memcpy(&op_data->op_attr, attr, sizeof(*attr));
+       if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
+               /* If we are changing file size, file content is
+                * modified, flag it. */
+               attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
+               op_data->op_bias |= MDS_DATA_MODIFIED;
+               ll_file_clear_flag(lli, LLIF_DATA_MODIFIED);
+       }
 
-       /* Open epoch for truncate. */
-       if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import &&
-           (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
-               op_data->op_flags = MF_EPOCH_OPEN;
+       op_data->op_attr = *attr;
 
-       rc = ll_md_setattr(dentry, op_data, &mod);
+       rc = ll_md_setattr(dentry, op_data);
        if (rc)
                GOTO(out, rc);
 
-       /* RPC to MDT is sent, cancel data modification flag */
-       if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
-               spin_lock(&lli->lli_lock);
-               lli->lli_flags &= ~LLIF_DATA_MODIFIED;
-               spin_unlock(&lli->lli_lock);
-       }
-
-       ll_ioepoch_open(lli, op_data->op_ioepoch);
-       if (!S_ISREG(inode->i_mode) || file_is_released)
+       if (!S_ISREG(inode->i_mode) || hsm_import)
                GOTO(out, rc = 0);
 
        if (attr->ia_valid & (ATTR_SIZE |
                              ATTR_ATIME | ATTR_ATIME_SET |
-                             ATTR_MTIME | ATTR_MTIME_SET)) {
+                             ATTR_MTIME | ATTR_MTIME_SET |
+                             ATTR_CTIME | ATTR_CTIME_SET)) {
                /* For truncate and utimes sending attributes to OSTs, setting
                 * mtime/atime to the past will be performed under PW [0:EOF]
                 * extent lock (new_size:EOF for truncate).  It may seem
                 * excessive to send mtime/atime updates to OSTs when not
                 * setting times to past, but it is necessary due to possible
                 * time de-synchronization between MDT inode and OST objects */
-               if (attr->ia_valid & ATTR_SIZE)
-                       down_write(&lli->lli_trunc_sem);
-               rc = ll_setattr_ost(inode, attr);
-               if (attr->ia_valid & ATTR_SIZE)
-                       up_write(&lli->lli_trunc_sem);
+               rc = cl_setattr_ost(lli->lli_clob, attr, 0);
+       }
+
+       /* If the file was restored, it needs to set dirty flag.
+        *
+        * We've already sent MDS_DATA_MODIFIED flag in
+        * ll_md_setattr() for truncate. However, the MDT refuses to
+        * set the HS_DIRTY flag on released files, so we have to set
+        * it again if the file has been restored. Please check how
+        * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
+        *
+        * Please notice that if the file is not released, the previous
+        * MDS_DATA_MODIFIED has taken effect and usually
+        * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
+        * This way we can save an RPC for common open + trunc
+        * operation. */
+       if (ll_file_test_and_clear_flag(lli, LLIF_DATA_MODIFIED)) {
+               struct hsm_state_set hss = {
+                       .hss_valid = HSS_SETMASK,
+                       .hss_setmask = HS_DIRTY,
+               };
+               int rc2;
+
+               rc2 = ll_hsm_state_set(inode, &hss);
+               /* truncate and write can happen at the same time, so that
+                * the file can be set modified even though the file is not
+                * restored from released state, and ll_hsm_state_set() is
+                * not applicable for the file, and rc2 < 0 is normal in this
+                * case. */
+               if (rc2 < 0)
+                       CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n",
+                              PFID(ll_inode2fid(inode)), rc2);
        }
+
        EXIT;
 out:
-       if (op_data) {
-               if (op_data->op_ioepoch) {
-                       rc1 = ll_setattr_done_writing(inode, op_data, mod);
-                       if (!rc)
-                               rc = rc1;
-               }
+       if (op_data != NULL)
                ll_finish_md_op_data(op_data);
-       }
-       if (!S_ISDIR(inode->i_mode)) {
+
+       if (S_ISREG(inode->i_mode)) {
                mutex_lock(&inode->i_mutex);
                if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
                        inode_dio_wait(inode);
@@ -1923,19 +1866,10 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct mdt_body *body = md->body;
-       struct lov_stripe_md *lsm = md->lsm;
        struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-       LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
-       if (lsm != NULL) {
-               if (!lli->lli_has_smd &&
-                   !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
-                       cl_file_inode_init(inode, md);
-
-               lli->lli_maxbytes = lsm->lsm_maxbytes;
-               if (lli->lli_maxbytes > MAX_LFS_FILESIZE)
-                       lli->lli_maxbytes = MAX_LFS_FILESIZE;
-       }
+       if (body->mbo_valid & OBD_MD_FLEASIZE)
+               cl_file_inode_init(inode, md);
 
        if (S_ISDIR(inode->i_mode)) {
                int     rc;
@@ -1946,11 +1880,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
        }
 
        if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-                if (body->valid & OBD_MD_FLRMTPERM)
-                        ll_update_remote_perm(inode, md->remote_perm);
-        }
+               if (body->mbo_valid & OBD_MD_FLRMTPERM)
+                       ll_update_remote_perm(inode, md->remote_perm);
+       }
 #ifdef CONFIG_FS_POSIX_ACL
-       else if (body->valid & OBD_MD_FLACL) {
+       else if (body->mbo_valid & OBD_MD_FLACL) {
                spin_lock(&lli->lli_lock);
                if (lli->lli_posix_acl)
                        posix_acl_release(lli->lli_posix_acl);
@@ -1958,126 +1892,93 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md)
                spin_unlock(&lli->lli_lock);
        }
 #endif
-       inode->i_ino = cl_fid_build_ino(&body->fid1,
+       inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
                                        sbi->ll_flags & LL_SBI_32BIT_API);
-       inode->i_generation = cl_fid_build_gen(&body->fid1);
+       inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
 
-        if (body->valid & OBD_MD_FLATIME) {
-                if (body->atime > LTIME_S(inode->i_atime))
-                        LTIME_S(inode->i_atime) = body->atime;
-                lli->lli_lvb.lvb_atime = body->atime;
-        }
-        if (body->valid & OBD_MD_FLMTIME) {
-                if (body->mtime > LTIME_S(inode->i_mtime)) {
-                        CDEBUG(D_INODE, "setting ino %lu mtime from %lu "
-                               "to "LPU64"\n", inode->i_ino,
-                               LTIME_S(inode->i_mtime), body->mtime);
-                        LTIME_S(inode->i_mtime) = body->mtime;
-                }
-                lli->lli_lvb.lvb_mtime = body->mtime;
-        }
-        if (body->valid & OBD_MD_FLCTIME) {
-                if (body->ctime > LTIME_S(inode->i_ctime))
-                        LTIME_S(inode->i_ctime) = body->ctime;
-                lli->lli_lvb.lvb_ctime = body->ctime;
-        }
-        if (body->valid & OBD_MD_FLMODE)
-                inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-        if (body->valid & OBD_MD_FLTYPE)
-                inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
-        LASSERT(inode->i_mode != 0);
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS);
-        } else {
-                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-        }
-       if (body->valid & OBD_MD_FLUID)
-               inode->i_uid = make_kuid(&init_user_ns, body->uid);
-       if (body->valid & OBD_MD_FLGID)
-               inode->i_gid = make_kgid(&init_user_ns, body->gid);
-       if (body->valid & OBD_MD_FLFLAGS)
-               inode->i_flags = ll_ext_to_inode_flags(body->flags);
-       if (body->valid & OBD_MD_FLNLINK)
-               set_nlink(inode, body->nlink);
-       if (body->valid & OBD_MD_FLRDEV)
-               inode->i_rdev = old_decode_dev(body->rdev);
-
-       if (body->valid & OBD_MD_FLID) {
+       if (body->mbo_valid & OBD_MD_FLATIME) {
+               if (body->mbo_atime > LTIME_S(inode->i_atime))
+                       LTIME_S(inode->i_atime) = body->mbo_atime;
+               lli->lli_atime = body->mbo_atime;
+       }
+
+       if (body->mbo_valid & OBD_MD_FLMTIME) {
+               if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
+                       CDEBUG(D_INODE, "setting ino %lu mtime from %lu "
+                              "to "LPU64"\n", inode->i_ino,
+                              LTIME_S(inode->i_mtime), body->mbo_mtime);
+                       LTIME_S(inode->i_mtime) = body->mbo_mtime;
+               }
+               lli->lli_mtime = body->mbo_mtime;
+       }
+
+       if (body->mbo_valid & OBD_MD_FLCTIME) {
+               if (body->mbo_ctime > LTIME_S(inode->i_ctime))
+                       LTIME_S(inode->i_ctime) = body->mbo_ctime;
+               lli->lli_ctime = body->mbo_ctime;
+       }
+
+       if (body->mbo_valid & OBD_MD_FLMODE)
+               inode->i_mode = (inode->i_mode & S_IFMT) |
+                               (body->mbo_mode & ~S_IFMT);
+
+       if (body->mbo_valid & OBD_MD_FLTYPE)
+               inode->i_mode = (inode->i_mode & ~S_IFMT) |
+                               (body->mbo_mode & S_IFMT);
+
+       LASSERT(inode->i_mode != 0);
+       if (S_ISREG(inode->i_mode))
+               inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
+                                      LL_MAX_BLKSIZE_BITS);
+       else
+               inode->i_blkbits = inode->i_sb->s_blocksize_bits;
+
+       if (body->mbo_valid & OBD_MD_FLUID)
+               inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
+       if (body->mbo_valid & OBD_MD_FLGID)
+               inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
+       if (body->mbo_valid & OBD_MD_FLFLAGS)
+               inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
+       if (body->mbo_valid & OBD_MD_FLNLINK)
+               set_nlink(inode, body->mbo_nlink);
+       if (body->mbo_valid & OBD_MD_FLRDEV)
+               inode->i_rdev = old_decode_dev(body->mbo_rdev);
+
+       if (body->mbo_valid & OBD_MD_FLID) {
                /* FID shouldn't be changed! */
                if (fid_is_sane(&lli->lli_fid)) {
-                       LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
+                       LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
                                 "Trying to change FID "DFID
                                 " to the "DFID", inode "DFID"(%p)\n",
-                                PFID(&lli->lli_fid), PFID(&body->fid1),
+                                PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
                                 PFID(ll_inode2fid(inode)), inode);
                } else {
-                       lli->lli_fid = body->fid1;
+                       lli->lli_fid = body->mbo_fid1;
                }
        }
 
-        LASSERT(fid_seq(&lli->lli_fid) != 0);
-
-        if (body->valid & OBD_MD_FLSIZE) {
-                if (exp_connect_som(ll_i2mdexp(inode)) &&
-                   S_ISREG(inode->i_mode)) {
-                        struct lustre_handle lockh;
-                        ldlm_mode_t mode;
-
-                        /* As it is possible a blocking ast has been processed
-                         * by this time, we need to check there is an UPDATE
-                         * lock on the client and set LLIF_MDS_SIZE_LOCK holding
-                         * it. */
-                        mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
-                                              &lockh, LDLM_FL_CBPENDING,
-                                              LCK_CR | LCK_CW |
-                                              LCK_PR | LCK_PW);
-                        if (mode) {
-                                if (lli->lli_flags & (LLIF_DONE_WRITING |
-                                                      LLIF_EPOCH_PENDING |
-                                                      LLIF_SOM_DIRTY)) {
-                                       CERROR("%s: inode "DFID" flags %u still"
-                                              " has size authority! do not "
-                                              "trust the size from MDS\n",
-                                              sbi->ll_md_exp->exp_obd->obd_name,
-                                              PFID(ll_inode2fid(inode)),
-                                              lli->lli_flags);
-                                } else {
-                                        /* Use old size assignment to avoid
-                                         * deadlock bz14138 & bz14326 */
-                                        i_size_write(inode, body->size);
-                                       spin_lock(&lli->lli_lock);
-                                        lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
-                                       spin_unlock(&lli->lli_lock);
-                                }
-                                ldlm_lock_decref(&lockh, mode);
-                        }
-                } else {
-                        /* Use old size assignment to avoid
-                         * deadlock bz14138 & bz14326 */
-                        i_size_write(inode, body->size);
-
-                       CDEBUG(D_VFSTRACE,
-                              "inode="DFID", updating i_size %llu\n",
-                              PFID(ll_inode2fid(inode)),
-                              (unsigned long long)body->size);
-                }
+       LASSERT(fid_seq(&lli->lli_fid) != 0);
 
-                if (body->valid & OBD_MD_FLBLOCKS)
-                        inode->i_blocks = body->blocks;
-        }
+       if (body->mbo_valid & OBD_MD_FLSIZE) {
+               i_size_write(inode, body->mbo_size);
 
-        if (body->valid & OBD_MD_FLMDSCAPA) {
-                LASSERT(md->mds_capa);
-                ll_add_capa(inode, md->mds_capa);
-        }
-        if (body->valid & OBD_MD_FLOSSCAPA) {
-                LASSERT(md->oss_capa);
-                ll_add_capa(inode, md->oss_capa);
-        }
+               CDEBUG(D_VFSTRACE, "inode="DFID", updating i_size %llu\n",
+                      PFID(ll_inode2fid(inode)),
+                      (unsigned long long)body->mbo_size);
 
-       if (body->valid & OBD_MD_TSTATE) {
-               if (body->t_state & MS_RESTORE)
-                       lli->lli_flags |= LLIF_FILE_RESTORING;
+               if (body->mbo_valid & OBD_MD_FLBLOCKS)
+                       inode->i_blocks = body->mbo_blocks;
+       }
+
+       if (body->mbo_valid & OBD_MD_TSTATE) {
+               /* Set LLIF_FILE_RESTORING if restore ongoing and
+                * clear it when done to ensure to start again
+                * glimpsing updated attrs
+                */
+               if (body->mbo_t_state & MS_RESTORE)
+                       ll_file_set_flag(lli, LLIF_FILE_RESTORING);
+               else
+                       ll_file_clear_flag(lli, LLIF_FILE_RESTORING);
        }
 
        return 0;
@@ -2093,8 +1994,6 @@ int ll_read_inode2(struct inode *inode, void *opaque)
         CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
                PFID(&lli->lli_fid), inode);
 
-       LASSERT(!lli->lli_has_smd);
-
         /* Core attributes from the MDS first.  This is a new inode, and
          * the VFS doesn't zero times in the core inode so we have to do
          * it ourselves.  They will be overwritten by either MDS or OST
@@ -2109,10 +2008,10 @@ int ll_read_inode2(struct inode *inode, void *opaque)
 
         /* OIDEBUG(inode); */
 
-        /* initializing backing dev info. */
-        inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi;
-
-
+#ifdef HAVE_BACKING_DEV_INFO
+       /* initializing backing dev info. */
+       inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi;
+#endif
         if (S_ISREG(inode->i_mode)) {
                 struct ll_sb_info *sbi = ll_i2sbi(inode);
                 inode->i_op = &ll_file_inode_operations;
@@ -2140,28 +2039,19 @@ int ll_read_inode2(struct inode *inode, void *opaque)
 
 void ll_delete_inode(struct inode *inode)
 {
-       struct cl_inode_info *lli = cl_i2info(inode);
+       struct ll_inode_info *lli = ll_i2info(inode);
        ENTRY;
 
        if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL)
-               /* discard all dirty pages before truncating them, required by
-                * osc_extent implementation at LU-1030. */
-               cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
-                                  CL_FSYNC_DISCARD, 1);
-
-        truncate_inode_pages(&inode->i_data, 0);
-
-        /* Workaround for LU-118 */
-        if (inode->i_data.nrpages) {
-               spin_lock_irq(&inode->i_data.tree_lock);
-               spin_unlock_irq(&inode->i_data.tree_lock);
-                LASSERTF(inode->i_data.nrpages == 0,
-                        "inode="DFID"(%p) nrpages=%lu, see "
-                        "http://jira.whamcloud.com/browse/LU-118\n",
-                        PFID(ll_inode2fid(inode)), inode,
-                         inode->i_data.nrpages);
-        }
-        /* Workaround end */
+               /* It is last chance to write out dirty pages,
+                * otherwise we may lose data while umount */
+               cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
+
+       truncate_inode_pages_final(&inode->i_data);
+
+       LASSERTF(inode->i_data.nrpages == 0, "inode="DFID"(%p) nrpages=%lu, "
+                "see https://jira.hpdd.intel.com/browse/LU-118\n",
+                PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
 
 #ifdef HAVE_SBOPS_EVICT_INODE
        ll_clear_inode(inode);
@@ -2202,29 +2092,28 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
                 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 
-                flags = body->flags;
+               flags = body->mbo_flags;
 
                 ptlrpc_req_finished(req);
 
-                RETURN(put_user(flags, (int *)arg));
+               RETURN(put_user(flags, (int __user *)arg));
         }
         case FSFILT_IOC_SETFLAGS: {
-               struct lov_stripe_md *lsm;
-                struct obd_info oinfo = { { { 0 } } };
-                struct md_op_data *op_data;
+               struct iattr *attr;
+               struct md_op_data *op_data;
+               struct cl_object *obj;
 
-                if (get_user(flags, (int *)arg))
-                        RETURN(-EFAULT);
+               if (get_user(flags, (int __user *)arg))
+                       RETURN(-EFAULT);
 
                 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                              LUSTRE_OPC_ANY, NULL);
                 if (IS_ERR(op_data))
                         RETURN(PTR_ERR(op_data));
 
-                ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
+               op_data->op_attr_flags = flags;
                 op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
-                rc = md_setattr(sbi->ll_md_exp, op_data,
-                                NULL, 0, NULL, 0, &req, NULL);
+               rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
                 ll_finish_md_op_data(op_data);
                 ptlrpc_req_finished(req);
                if (rc)
@@ -2232,32 +2121,18 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
                inode->i_flags = ll_ext_to_inode_flags(flags);
 
-               lsm = ccc_inode_lsm_get(inode);
-               if (!lsm_has_objects(lsm)) {
-                       ccc_inode_lsm_put(inode, lsm);
+               obj = ll_i2info(inode)->lli_clob;
+               if (obj == NULL)
                        RETURN(0);
-               }
 
-               OBDO_ALLOC(oinfo.oi_oa);
-               if (!oinfo.oi_oa) {
-                       ccc_inode_lsm_put(inode, lsm);
+               OBD_ALLOC_PTR(attr);
+               if (attr == NULL)
                        RETURN(-ENOMEM);
-               }
-               oinfo.oi_md = lsm;
-               oinfo.oi_oa->o_oi = lsm->lsm_oi;
-                oinfo.oi_oa->o_flags = flags;
-                oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS |
-                                       OBD_MD_FLGROUP;
-                oinfo.oi_capa = ll_mdscapa_get(inode);
-                obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid);
-                rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
-                capa_put(oinfo.oi_capa);
-                OBDO_FREE(oinfo.oi_oa);
-               ccc_inode_lsm_put(inode, lsm);
-
-               if (rc && rc != -EPERM && rc != -EACCES)
-                       CERROR("osc_setattr_async fails: rc = %d\n", rc);
 
+               attr->ia_valid = ATTR_ATTR_FLAG;
+               rc = cl_setattr_ost(obj, attr, flags);
+
+               OBD_FREE_PTR(attr);
                RETURN(rc);
         }
         default:
@@ -2363,20 +2238,63 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data)
         return 0;
 }
 
+/**
+ * Cleanup the open handle that is cached on MDT-side.
+ *
+ * For open case, the client side open handling thread may hit error
+ * after the MDT grant the open. Under such case, the client should
+ * send close RPC to the MDT as cleanup; otherwise, the open handle
+ * on the MDT will be leaked there until the client umount or evicted.
+ *
+ * In further, if someone unlinked the file, because the open handle
+ * holds the reference on such file/object, then it will block the
+ * subsequent threads that want to locate such object via FID.
+ *
+ * \param[in] sb       super block for this file-system
+ * \param[in] open_req pointer to the original open request
+ */
+void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
+{
+       struct mdt_body                 *body;
+       struct md_op_data               *op_data;
+       struct ptlrpc_request           *close_req = NULL;
+       struct obd_export               *exp       = ll_s2sbi(sb)->ll_md_exp;
+       ENTRY;
+
+       body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
+       OBD_ALLOC_PTR(op_data);
+       if (op_data == NULL) {
+               CWARN("%s: cannot allocate op_data to release open handle for "
+                     DFID"\n",
+                     ll_get_fsname(sb, NULL, 0), PFID(&body->mbo_fid1));
+
+               RETURN_EXIT;
+       }
+
+       op_data->op_fid1 = body->mbo_fid1;
+       op_data->op_handle = body->mbo_handle;
+       op_data->op_mod_time = cfs_time_current_sec();
+       md_close(exp, op_data, NULL, &close_req);
+       ptlrpc_req_finished(close_req);
+       ll_finish_md_op_data(op_data);
+
+       EXIT;
+}
+
 int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
                  struct super_block *sb, struct lookup_intent *it)
 {
        struct ll_sb_info *sbi = NULL;
-       struct lustre_md md = { 0 };
+       struct lustre_md md = { NULL };
        int rc;
        ENTRY;
 
-        LASSERT(*inode || sb);
-        sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
-        rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
-                              sbi->ll_md_exp, &md);
-        if (rc)
-                RETURN(rc);
+       LASSERT(*inode || sb);
+       sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
+       rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
+                             sbi->ll_md_exp, &md);
+       if (rc != 0)
+               GOTO(cleanup, rc);
 
        if (*inode) {
                rc = ll_update_inode(*inode, &md);
@@ -2385,13 +2303,18 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
        } else {
                LASSERT(sb != NULL);
 
-                /*
-                 * At this point server returns to client's same fid as client
-                 * generated for creating. So using ->fid1 is okay here.
-                 */
-                LASSERT(fid_is_sane(&md.body->fid1));
+               /*
+                * At this point server returns to client's same fid as client
+                * generated for creating. So using ->fid1 is okay here.
+                */
+               if (!fid_is_sane(&md.body->mbo_fid1)) {
+                       CERROR("%s: Fid is insane "DFID"\n",
+                               ll_get_fsname(sb, NULL, 0),
+                               PFID(&md.body->mbo_fid1));
+                       GOTO(out, rc = -EINVAL);
+               }
 
-               *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
+               *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
                                             sbi->ll_flags & LL_SBI_32BIT_API),
                                 &md);
                if (IS_ERR(*inode)) {
@@ -2416,11 +2339,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
         * 2. layout was changed by another client
         * 3. proc2: refresh layout and layout lock granted
         * 4. proc1: to apply a stale layout */
-       if (it != NULL && it->d.lustre.it_lock_mode != 0) {
+       if (it != NULL && it->it_lock_mode != 0) {
                struct lustre_handle lockh;
                struct ldlm_lock *lock;
 
-               lockh.cookie = it->d.lustre.it_lock_handle;
+               lockh.cookie = it->it_lock_handle;
                lock = ldlm_handle2lock(&lockh);
                LASSERT(lock != NULL);
                if (ldlm_has_layout(lock)) {
@@ -2430,27 +2353,31 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
                        conf.coc_opc = OBJECT_CONF_SET;
                        conf.coc_inode = *inode;
                        conf.coc_lock = lock;
-                       conf.u.coc_md = &md;
+                       conf.u.coc_layout = md.layout;
                        (void)ll_layout_conf(*inode, &conf);
                }
                LDLM_LOCK_PUT(lock);
        }
 
+       GOTO(out, rc = 0);
+
 out:
-       if (md.lsm != NULL)
-               obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
        md_free_lustre_md(sbi->ll_md_exp, &md);
-       RETURN(rc);
+
+cleanup:
+       if (rc != 0 && it != NULL && it->it_op & IT_OPEN)
+               ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, req);
+
+       return rc;
 }
 
-int ll_obd_statfs(struct inode *inode, void *arg)
+int ll_obd_statfs(struct inode *inode, void __user *arg)
 {
         struct ll_sb_info *sbi = NULL;
         struct obd_export *exp;
         char *buf = NULL;
         struct obd_ioctl_data *data = NULL;
         __u32 type;
-       __u32 flags;
         int len = 0, rc;
 
         if (!inode || !(sbi = ll_i2sbi(inode)))
@@ -2479,8 +2406,7 @@ int ll_obd_statfs(struct inode *inode, void *arg)
         else
                 GOTO(out_statfs, rc = -ENODEV);
 
-       flags = (type & LL_STATFS_NODELAY) ? OBD_STATFS_NODELAY : 0;
-       rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, &flags);
+       rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
         if (rc)
                 GOTO(out_statfs, rc);
 out_statfs:
@@ -2508,20 +2434,20 @@ int ll_process_config(struct lustre_cfg *lcfg)
 
        /* Note we have not called client_common_fill_super yet, so
           proc fns must be able to handle that! */
-       rc = class_process_proc_seq_param(PARAM_LLITE, lprocfs_llite_obd_vars,
-                                         lcfg, sb);
+       rc = class_process_proc_param(PARAM_LLITE, lprocfs_llite_obd_vars,
+                                     lcfg, sb);
        if (rc > 0)
                rc = 0;
        return rc;
 }
 
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data,
-                                       struct inode *i1, struct inode *i2,
-                                       const char *name, int namelen,
-                                       int mode, __u32 opc, void *data)
+/* this function prepares md_op_data hint for passing it down to MD stack. */
+struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
+                                     struct inode *i1, struct inode *i2,
+                                     const char *name, size_t namelen,
+                                     __u32 mode, __u32 opc, void *data)
 {
-        LASSERT(i1 != NULL);
+       LASSERT(i1 != NULL);
 
        if (name == NULL) {
                /* Do not reuse namelen for something else. */
@@ -2535,26 +2461,28 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data,
                        return ERR_PTR(-EINVAL);
        }
 
-        if (op_data == NULL)
-                OBD_ALLOC_PTR(op_data);
+       if (op_data == NULL)
+               OBD_ALLOC_PTR(op_data);
 
-        if (op_data == NULL)
-                return ERR_PTR(-ENOMEM);
+       if (op_data == NULL)
+               return ERR_PTR(-ENOMEM);
 
        ll_i2gids(op_data->op_suppgids, i1, i2);
        op_data->op_fid1 = *ll_inode2fid(i1);
-       op_data->op_capa1 = ll_mdscapa_get(i1);
-       if (S_ISDIR(i1->i_mode))
+       op_data->op_default_stripe_offset = -1;
+       if (S_ISDIR(i1->i_mode)) {
                op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
+               if (opc == LUSTRE_OPC_MKDIR)
+                       op_data->op_default_stripe_offset =
+                                  ll_i2info(i1)->lli_def_stripe_offset;
+       }
 
        if (i2) {
                op_data->op_fid2 = *ll_inode2fid(i2);
-               op_data->op_capa2 = ll_mdscapa_get(i2);
                if (S_ISDIR(i2->i_mode))
                        op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
        } else {
                fid_zero(&op_data->op_fid2);
-               op_data->op_capa2 = NULL;
        }
 
        if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
@@ -2570,39 +2498,19 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data,
        op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
        op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
        op_data->op_cap = cfs_curproc_cap_pack();
-       op_data->op_bias = 0;
-       op_data->op_cli_flags = 0;
        if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) &&
-            filename_is_volatile(name, namelen, NULL))
+            filename_is_volatile(name, namelen, &op_data->op_mds)) {
                op_data->op_bias |= MDS_CREATE_VOLATILE;
-       op_data->op_mds = 0;
-       op_data->op_data = data;
-
-        /* If the file is being opened after mknod() (normally due to NFS)
-         * try to use the default stripe data from parent directory for
-         * allocating OST objects.  Try to pass the parent FID to MDS. */
-        if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
-           !ll_i2info(i2)->lli_has_smd) {
-               struct ll_inode_info *lli = ll_i2info(i2);
-
-               spin_lock(&lli->lli_lock);
-               if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
-                       op_data->op_fid1 = lli->lli_pfid;
-               spin_unlock(&lli->lli_lock);
-               /** We ignore parent's capability temporary. */
+       } else {
+               op_data->op_mds = 0;
        }
-
-       /* When called by ll_setattr_raw, file is i1. */
-       if (LLIF_DATA_MODIFIED & ll_i2info(i1)->lli_flags)
-               op_data->op_bias |= MDS_DATA_MODIFIED;
+       op_data->op_data = data;
 
        return op_data;
 }
 
 void ll_finish_md_op_data(struct md_op_data *op_data)
 {
-        capa_put(op_data->op_capa1);
-        capa_put(op_data->op_capa2);
         OBD_FREE_PTR(op_data);
 }
 
@@ -2640,6 +2548,9 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
                seq_puts(seq, ",user_fid2path");
 
+       if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+               seq_puts(seq, ",always_ping");
+
         RETURN(0);
 }
 
@@ -2662,7 +2573,7 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
         if (!obd)
                 RETURN(-ENOENT);
 
-       if (copy_to_user((void *)arg, obd->obd_name,
+       if (copy_to_user((void __user *)arg, obd->obd_name,
                         strlen(obd->obd_name) + 1))
                RETURN(-EFAULT);
 
@@ -2720,7 +2631,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
        char *buf, *path = NULL;
        struct dentry *dentry = NULL;
-       struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+       struct inode *inode = page->mapping->host;
 
        /* this can be called inside spin lock so use GFP_ATOMIC. */
        buf = (char *)__get_free_page(GFP_ATOMIC);
@@ -2734,7 +2645,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
               "%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
               "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
               s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
-              PFID(&obj->cob_header.coh_lu.loh_fid),
+              PFID(ll_inode2fid(inode)),
               (path && !IS_ERR(path)) ? path : "", ioret);
 
        if (dentry != NULL)
@@ -2744,6 +2655,32 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
                free_page((unsigned long)buf);
 }
 
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+                       struct lov_user_md **kbuf)
+{
+       struct lov_user_md      lum;
+       ssize_t                 lum_size;
+       ENTRY;
+
+       if (copy_from_user(&lum, md, sizeof(lum)))
+               RETURN(-EFAULT);
+
+       lum_size = ll_lov_user_md_size(&lum);
+       if (lum_size < 0)
+               RETURN(lum_size);
+
+       OBD_ALLOC(*kbuf, lum_size);
+       if (*kbuf == NULL)
+               RETURN(-ENOMEM);
+
+       if (copy_from_user(*kbuf, md, lum_size) != 0) {
+               OBD_FREE(*kbuf, lum_size);
+               RETURN(-EFAULT);
+       }
+
+       RETURN(lum_size);
+}
+
 /*
  * Compute llite root squash state after a change of root squash
  * configuration setting or add/remove of a lnet nid
@@ -2780,4 +2717,120 @@ void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
        up_write(&squash->rsi_sem);
 }
 
+/**
+ * Parse linkea content to extract information about a given hardlink
+ *
+ * \param[in]   ldata      - Initialized linkea data
+ * \param[in]   linkno     - Link identifier
+ * \param[out]  parent_fid - The entry's parent FID
+ * \param[out]  ln         - Entry name destination buffer
+ *
+ * \retval 0 on success
+ * \retval Appropriate negative error code on failure
+ */
+static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
+                           struct lu_fid *parent_fid, struct lu_name *ln)
+{
+       unsigned int    idx;
+       int             rc;
+       ENTRY;
 
+       rc = linkea_init(ldata);
+       if (rc < 0)
+               RETURN(rc);
+
+       if (linkno >= ldata->ld_leh->leh_reccount)
+               /* beyond last link */
+               RETURN(-ENODATA);
+
+       linkea_first_entry(ldata);
+       for (idx = 0; ldata->ld_lee != NULL; idx++) {
+               linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
+                                   parent_fid);
+               if (idx == linkno)
+                       break;
+
+               linkea_next_entry(ldata);
+       }
+
+       if (idx < linkno)
+               RETURN(-ENODATA);
+
+       RETURN(0);
+}
+
+/**
+ * Get parent FID and name of an identified link. Operation is performed for
+ * a given link number, letting the caller iterate over linkno to list one or
+ * all links of an entry.
+ *
+ * \param[in]     file - File descriptor against which to perform the operation
+ * \param[in,out] arg  - User-filled structure containing the linkno to operate
+ *                       on and the available size. It is eventually filled with
+ *                       the requested information or left untouched on error
+ *
+ * \retval - 0 on success
+ * \retval - Appropriate negative error code on failure
+ */
+int ll_getparent(struct file *file, struct getparent __user *arg)
+{
+       struct dentry           *dentry = file->f_path.dentry;
+       struct inode            *inode = dentry->d_inode;
+       struct linkea_data      *ldata;
+       struct lu_buf            buf = LU_BUF_NULL;
+       struct lu_name           ln;
+       struct lu_fid            parent_fid;
+       __u32                    linkno;
+       __u32                    name_size;
+       int                      rc;
+
+       ENTRY;
+
+       if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
+           !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
+               RETURN(-EPERM);
+
+       if (get_user(name_size, &arg->gp_name_size))
+               RETURN(-EFAULT);
+
+       if (get_user(linkno, &arg->gp_linkno))
+               RETURN(-EFAULT);
+
+       if (name_size > PATH_MAX)
+               RETURN(-EINVAL);
+
+       OBD_ALLOC(ldata, sizeof(*ldata));
+       if (ldata == NULL)
+               RETURN(-ENOMEM);
+
+       rc = linkea_data_new(ldata, &buf);
+       if (rc < 0)
+               GOTO(ldata_free, rc);
+
+       rc = ll_getxattr(dentry, XATTR_NAME_LINK, buf.lb_buf, buf.lb_len);
+       if (rc < 0)
+               GOTO(lb_free, rc);
+
+       rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
+       if (rc < 0)
+               GOTO(lb_free, rc);
+
+       if (ln.ln_namelen >= name_size)
+               GOTO(lb_free, rc = -EOVERFLOW);
+
+       if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid)))
+               GOTO(lb_free, rc = -EFAULT);
+
+       if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen))
+               GOTO(lb_free, rc = -EFAULT);
+
+       if (put_user('\0', arg->gp_name + ln.ln_namelen))
+               GOTO(lb_free, rc = -EFAULT);
+
+lb_free:
+       lu_buf_free(&buf);
+ldata_free:
+       OBD_FREE(ldata, sizeof(*ldata));
+
+       RETURN(rc);
+}