X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_request.c;h=aca18074651b33b66a77ca5121ef359995dd192b;hp=aa8e39d4481010b4425ac7cfd76855b9e9d3c19d;hb=0d8c5ccc4ecfe7c0d10a0a4f92fd291320a97190;hpb=4322e0f99c87bc0412d315d0674d70cc1ffc0bb4

diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c
index aa8e39d..aca1807 100644
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -47,11 +47,15 @@
 #endif
 
 #include <lustre_acl.h>
+#include <lustre_ioctl.h>
 #include <obd_class.h>
+#include <lustre_lmv.h>
 #include <lustre_fid.h>
 #include <lprocfs_status.h>
 #include <lustre_param.h>
 #include <lustre_log.h>
+#include <cl_object.h>
+#include <lclient.h>
 
 #include "mdc_internal.h"
 
@@ -92,15 +96,15 @@ static inline int mdc_queue_wait(struct ptlrpc_request *req)
 	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
 	int rc;
 
-	/* mdc_enter_request() ensures that this client has no more
+	/* obd_get_request_slot() ensures that this client has no more
 	 * than cl_max_rpcs_in_flight RPCs simultaneously inf light
 	 * against an MDT. */
-	rc = mdc_enter_request(cli);
+	rc = obd_get_request_slot(cli);
 	if (rc != 0)
 		return rc;
 
 	rc = ptlrpc_queue_wait(req);
-	mdc_exit_request(cli);
+	obd_put_request_slot(cli);
 
 	return rc;
 }
@@ -190,14 +194,13 @@ static int mdc_getattr_common(struct obd_export *exp,
 
         CDEBUG(D_NET, "mode: %o\n", body->mode);
 
-        if (body->eadatasize != 0) {
-                mdc_update_max_ea_from_body(exp, body);
-
-                eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
-                                                      body->eadatasize);
-                if (eadata == NULL)
-                        RETURN(-EPROTO);
-        }
+	mdc_update_max_ea_from_body(exp, body);
+	if (body->eadatasize != 0) {
+		eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
+						      body->eadatasize);
+		if (eadata == NULL)
+			RETURN(-EPROTO);
+	}
 
         if (body->valid & OBD_MD_FLRMTPERM) {
                 struct mdt_remote_perm *perm;
@@ -371,7 +374,7 @@ static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt,
 	/* Flush local XATTR locks to get rid of a possible cancel RPC */
 	if (opcode == MDS_REINT && fid_is_sane(fid) &&
 	    exp->exp_connect_data.ocd_ibits_known & MDS_INODELOCK_XATTR) {
-		CFS_LIST_HEAD(cancels);
+		struct list_head cancels = LIST_HEAD_INIT(cancels);
 		int count;
 
 		/* Without that packing would fail */
@@ -403,8 +406,8 @@ static int mdc_xattr_common(struct obd_export *exp,const struct req_format *fmt,
                          sizeof(struct mdt_rec_reint));
 		rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
 		rec->sx_opcode = REINT_SETXATTR;
-		rec->sx_fsuid  = current_fsuid();
-		rec->sx_fsgid  = current_fsgid();
+		rec->sx_fsuid  = from_kuid(&init_user_ns, current_fsuid());
+		rec->sx_fsgid  = from_kgid(&init_user_ns, current_fsgid());
 		rec->sx_cap    = cfs_curproc_cap_pack();
 		rec->sx_suppgid1 = suppgid;
                 rec->sx_suppgid2 = -1;
@@ -570,25 +573,25 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
                                "but eadatasize 0\n");
                         RETURN(-EPROTO);
                 }
-                if (md->body->valid & OBD_MD_MEA) {
-                        lmvsize = md->body->eadatasize;
-                        lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
-                                                           lmvsize);
-                        if (!lmv)
-                                GOTO(out, rc = -EPROTO);
-
-                        rc = obd_unpackmd(md_exp, (void *)&md->mea, lmv,
-                                          lmvsize);
-                        if (rc < 0)
-                                GOTO(out, rc);
-
-                        if (rc < sizeof(*md->mea)) {
-                                CDEBUG(D_INFO, "size too small:  "
-                                       "rc < sizeof(*md->mea) (%d < %d)\n",
-                                        rc, (int)sizeof(*md->mea));
-                                GOTO(out, rc = -EPROTO);
-                        }
-                }
+		if (md->body->valid & OBD_MD_MEA) {
+			lmvsize = md->body->eadatasize;
+			lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
+							   lmvsize);
+			if (!lmv)
+				GOTO(out, rc = -EPROTO);
+
+			rc = obd_unpackmd(md_exp, (void *)&md->lmv, lmv,
+					  lmvsize);
+			if (rc < 0)
+				GOTO(out, rc);
+
+			if (rc < sizeof(*md->lmv)) {
+				CDEBUG(D_INFO, "size too small:  "
+				       "rc < sizeof(*md->lmv) (%d < %d)\n",
+					rc, (int)sizeof(*md->lmv));
+				GOTO(out, rc = -EPROTO);
+			}
+		}
         }
         rc = 0;
 
@@ -876,7 +879,7 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 		req_fmt = &RQF_MDS_RELEASE_CLOSE;
 
 		/* allocate a FID for volatile file */
-		rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+		rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
 		if (rc < 0) {
 			CERROR("%s: "DFID" failed to allocate FID: %d\n",
 			       obd->obd_name, PFID(&op_data->op_fid1), rc);
@@ -924,10 +927,10 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 
         mdc_close_pack(req, op_data);
 
-        req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
-                             obd->u.cli.cl_max_mds_easize);
-        req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
-                             obd->u.cli.cl_max_mds_cookiesize);
+	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+			     obd->u.cli.cl_default_mds_easize);
+	req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
+			     obd->u.cli.cl_default_mds_cookiesize);
 
         ptlrpc_request_set_replen(req);
 
@@ -1095,8 +1098,10 @@ out:
 EXPORT_SYMBOL(mdc_sendpage);
 #endif
 
-int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data,
-		 struct page **pages, struct ptlrpc_request **request)
+static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid,
+		       __u64 offset, struct obd_capa *oc,
+		       struct page **pages, int npages,
+		       struct ptlrpc_request **request)
 {
 	struct ptlrpc_request   *req;
 	struct ptlrpc_bulk_desc *desc;
@@ -1111,73 +1116,694 @@ int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data,
 	init_waitqueue_head(&waitq);
 
 restart_bulk:
-        req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
-        if (req == NULL)
-                RETURN(-ENOMEM);
+	req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
+	if (req == NULL)
+		RETURN(-ENOMEM);
 
-        mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1);
+	mdc_set_capa_size(req, &RMF_CAPA1, oc);
 
-        rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE);
-        if (rc) {
-                ptlrpc_request_free(req);
-                RETURN(rc);
-        }
+	rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE);
+	if (rc) {
+		ptlrpc_request_free(req);
+		RETURN(rc);
+	}
 
-        req->rq_request_portal = MDS_READPAGE_PORTAL;
-        ptlrpc_at_set_req_timeout(req);
+	req->rq_request_portal = MDS_READPAGE_PORTAL;
+	ptlrpc_at_set_req_timeout(req);
 
-	desc = ptlrpc_prep_bulk_imp(req, op_data->op_npages, 1, BULK_PUT_SINK,
+	desc = ptlrpc_prep_bulk_imp(req, npages, 1, BULK_PUT_SINK,
 				    MDS_BULK_PORTAL);
-        if (desc == NULL) {
-                ptlrpc_request_free(req);
-                RETURN(-ENOMEM);
-        }
+	if (desc == NULL) {
+		ptlrpc_request_free(req);
+		RETURN(-ENOMEM);
+	}
 
-        /* NB req now owns desc and will free it when it gets freed */
-        for (i = 0; i < op_data->op_npages; i++)
+	/* NB req now owns desc and will free it when it gets freed */
+	for (i = 0; i < npages; i++)
 		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_CACHE_SIZE);
 
-        mdc_readdir_pack(req, op_data->op_offset,
-			 PAGE_CACHE_SIZE * op_data->op_npages,
-                         &op_data->op_fid1, op_data->op_capa1);
+	mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE * npages, fid, oc);
 
-        ptlrpc_request_set_replen(req);
-        rc = ptlrpc_queue_wait(req);
-        if (rc) {
-                ptlrpc_req_finished(req);
-                if (rc != -ETIMEDOUT)
-                        RETURN(rc);
+	ptlrpc_request_set_replen(req);
+	rc = ptlrpc_queue_wait(req);
+	if (rc) {
+		ptlrpc_req_finished(req);
+		if (rc != -ETIMEDOUT)
+			RETURN(rc);
 
-                resends++;
-                if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
-                        CERROR("too many resend retries, returning error\n");
-                        RETURN(-EIO);
-                }
-                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
-                l_wait_event(waitq, 0, &lwi);
+		resends++;
+		if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
+			CERROR("%s: too many resend retries: rc = %d\n",
+			       exp->exp_obd->obd_name, -EIO);
+			RETURN(-EIO);
+		}
+		lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+				       NULL);
+		l_wait_event(waitq, 0, &lwi);
 
-                goto restart_bulk;
-        }
+		goto restart_bulk;
+	}
 
-        rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
-                                          req->rq_bulk->bd_nob_transferred);
-        if (rc < 0) {
-                ptlrpc_req_finished(req);
-                RETURN(rc);
-        }
+	rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
+					  req->rq_bulk->bd_nob_transferred);
+	if (rc < 0) {
+		ptlrpc_req_finished(req);
+		RETURN(rc);
+	}
 
-        if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
-                CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
-                        req->rq_bulk->bd_nob_transferred,
-			PAGE_CACHE_SIZE * op_data->op_npages);
-                ptlrpc_req_finished(req);
-                RETURN(-EPROTO);
-        }
+	if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
+		CERROR("%s: unexpected bytes transferred: %d (%ld expected)\n",
+		       exp->exp_obd->obd_name, req->rq_bulk->bd_nob_transferred,
+		       PAGE_CACHE_SIZE * npages);
+		ptlrpc_req_finished(req);
+		RETURN(-EPROTO);
+	}
 
-        *request = req;
-        RETURN(0);
+	*request = req;
+	RETURN(0);
+}
+
+#ifdef __KERNEL__
+static void mdc_release_page(struct page *page, int remove)
+{
+	if (remove) {
+		lock_page(page);
+		if (likely(page->mapping != NULL))
+			truncate_complete_page(page->mapping, page);
+		unlock_page(page);
+	}
+	page_cache_release(page);
+}
+
+static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
+				    __u64 *start, __u64 *end, int hash64)
+{
+	/*
+	 * Complement of hash is used as an index so that
+	 * radix_tree_gang_lookup() can be used to find a page with starting
+	 * hash _smaller_ than one we are looking for.
+	 */
+	unsigned long offset = hash_x_index(*hash, hash64);
+	struct page *page;
+	int found;
+
+	spin_lock_irq(&mapping->tree_lock);
+	found = radix_tree_gang_lookup(&mapping->page_tree,
+				       (void **)&page, offset, 1);
+	if (found > 0) {
+		struct lu_dirpage *dp;
+
+		page_cache_get(page);
+		spin_unlock_irq(&mapping->tree_lock);
+		/*
+		 * In contrast to find_lock_page() we are sure that directory
+		 * page cannot be truncated (while DLM lock is held) and,
+		 * hence, can avoid restart.
+		 *
+		 * In fact, page cannot be locked here at all, because
+		 * mdc_read_page_remote does synchronous io.
+		 */
+		wait_on_page_locked(page);
+		if (PageUptodate(page)) {
+			dp = kmap(page);
+			if (BITS_PER_LONG == 32 && hash64) {
+				*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+				*end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+				*hash  = *hash >> 32;
+			} else {
+				*start = le64_to_cpu(dp->ldp_hash_start);
+				*end   = le64_to_cpu(dp->ldp_hash_end);
+			}
+			if (unlikely(*start == 1 && *hash == 0))
+				*hash = *start;
+			else
+				LASSERTF(*start <= *hash, "start = "LPX64
+					 ",end = "LPX64",hash = "LPX64"\n",
+					 *start, *end, *hash);
+			CDEBUG(D_VFSTRACE, "offset %lx ["LPX64" "LPX64"],"
+			      " hash "LPX64"\n", offset, *start, *end, *hash);
+			if (*hash > *end) {
+				kunmap(page);
+				mdc_release_page(page, 0);
+				page = NULL;
+			} else if (*end != *start && *hash == *end) {
+				/*
+				 * upon hash collision, remove this page,
+				 * otherwise put page reference, and
+				 * ll_get_dir_page() will issue RPC to fetch
+				 * the page we want.
+				 */
+				kunmap(page);
+				mdc_release_page(page,
+				    le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+				page = NULL;
+			}
+		} else {
+			page_cache_release(page);
+			page = ERR_PTR(-EIO);
+		}
+	} else {
+		spin_unlock_irq(&mapping->tree_lock);
+		page = NULL;
+	}
+	return page;
+}
+
+/*
+ * Adjust a set of pages, each page containing an array of lu_dirpages,
+ * so that each page can be used as a single logical lu_dirpage.
+ *
+ * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
+ * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
+ * struct lu_dirent.  It has size up to LU_PAGE_SIZE. The ldp_hash_end
+ * value is used as a cookie to request the next lu_dirpage in a
+ * directory listing that spans multiple pages (two in this example):
+ *   ________
+ *  |        |
+ * .|--------v-------   -----.
+ * |s|e|f|p|ent|ent| ... |ent|
+ * '--|--------------   -----'   Each CFS_PAGE contains a single
+ *    '------.                   lu_dirpage.
+ * .---------v-------   -----.
+ * |s|e|f|p|ent| 0 | ... | 0 |
+ * '-----------------   -----'
+ *
+ * However, on hosts where the native VM page size (PAGE_CACHE_SIZE) is
+ * larger than LU_PAGE_SIZE, a single host page may contain multiple
+ * lu_dirpages. After reading the lu_dirpages from the MDS, the
+ * ldp_hash_end of the first lu_dirpage refers to the one immediately
+ * after it in the same CFS_PAGE (arrows simplified for brevity, but
+ * in general e0==s1, e1==s2, etc.):
+ *
+ * .--------------------   -----.
+ * |s0|e0|f0|p|ent|ent| ... |ent|
+ * |---v----------------   -----|
+ * |s1|e1|f1|p|ent|ent| ... |ent|
+ * |---v----------------   -----|  Here, each CFS_PAGE contains
+ *             ...                 multiple lu_dirpages.
+ * |---v----------------   -----|
+ * |s'|e'|f'|p|ent|ent| ... |ent|
+ * '---|----------------   -----'
+ *     v
+ * .----------------------------.
+ * |        next CFS_PAGE       |
+ *
+ * This structure is transformed into a single logical lu_dirpage as follows:
+ *
+ * - Replace e0 with e' so the request for the next lu_dirpage gets the page
+ *   labeled 'next CFS_PAGE'.
+ *
+ * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
+ *   a hash collision with the next page exists.
+ *
+ * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
+ *   to the first entry of the next lu_dirpage.
+ */
+#if PAGE_CACHE_SIZE > LU_PAGE_SIZE
+static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs)
+{
+	int i;
+
+	for (i = 0; i < cfs_pgs; i++) {
+		struct lu_dirpage	*dp = kmap(pages[i]);
+		struct lu_dirpage	*first = dp;
+		struct lu_dirent	*end_dirent = NULL;
+		struct lu_dirent	*ent;
+		__u64		hash_end = le64_to_cpu(dp->ldp_hash_end);
+		__u32		flags = le32_to_cpu(dp->ldp_flags);
+
+		while (--lu_pgs > 0) {
+			ent = lu_dirent_start(dp);
+			for (end_dirent = ent; ent != NULL;
+			     end_dirent = ent, ent = lu_dirent_next(ent));
+
+			/* Advance dp to next lu_dirpage. */
+			dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
+
+			/* Check if we've reached the end of the CFS_PAGE. */
+			if (!((unsigned long)dp & ~CFS_PAGE_MASK))
+				break;
+
+			/* Save the hash and flags of this lu_dirpage. */
+			hash_end = le64_to_cpu(dp->ldp_hash_end);
+			flags = le32_to_cpu(dp->ldp_flags);
+
+			/* Check if lu_dirpage contains no entries. */
+			if (end_dirent == NULL)
+				break;
+
+			/* Enlarge the end entry lde_reclen from 0 to
+			 * first entry of next lu_dirpage. */
+			LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0);
+			end_dirent->lde_reclen =
+				cpu_to_le16((char *)(dp->ldp_entries) -
+					    (char *)end_dirent);
+		}
+
+		first->ldp_hash_end = hash_end;
+		first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
+		first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+
+		kunmap(pages[i]);
+	}
+	LASSERTF(lu_pgs == 0, "left = %d", lu_pgs);
+}
+#else
+#define mdc_adjust_dirpages(pages, cfs_pgs, lu_pgs) do {} while (0)
+#endif	/* PAGE_CACHE_SIZE > LU_PAGE_SIZE */
+
+/* parameters for readdir page */
+struct readpage_param {
+	struct md_op_data	*rp_mod;
+	__u64			rp_off;
+	int			rp_hash64;
+	struct obd_export	*rp_exp;
+	struct md_callback	*rp_cb;
+};
+
+/**
+ * Read pages from server.
+ *
+ * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
+ * a header lu_dirpage which describes the start/end hash, and whether this
+ * page is empty (contains no dir entry) or hash collide with next page.
+ * After client receives reply, several pages will be integrated into dir page
+ * in CFS_PAGE_SIZE (if CFS_PAGE_SIZE greater than LU_PAGE_SIZE), and the
+ * lu_dirpage for this integrated page will be adjusted.
+ **/
+static int mdc_read_page_remote(void *data, struct page *page0)
+{
+	struct readpage_param	*rp = data;
+	struct page		**page_pool;
+	struct page		*page;
+	struct lu_dirpage	*dp;
+	int			rd_pgs = 0; /* number of pages read actually */
+	int			npages;
+	struct md_op_data	*op_data = rp->rp_mod;
+	struct ptlrpc_request	*req;
+	int			max_pages = op_data->op_max_pages;
+	struct inode		*inode;
+	struct lu_fid		*fid;
+	int			i;
+	int			rc;
+	ENTRY;
+
+	LASSERT(max_pages > 0 && max_pages <= PTLRPC_MAX_BRW_PAGES);
+	if (op_data->op_mea1 != NULL) {
+		__u32 index = op_data->op_stripe_offset;
+
+		inode = op_data->op_mea1->lsm_md_oinfo[index].lmo_root;
+		fid = &op_data->op_mea1->lsm_md_oinfo[index].lmo_fid;
+	} else {
+		inode = op_data->op_data;
+		fid = &op_data->op_fid1;
+	}
+	LASSERT(inode != NULL);
+
+	OBD_ALLOC(page_pool, sizeof(page_pool[0]) * max_pages);
+	if (page_pool != NULL) {
+		page_pool[0] = page0;
+	} else {
+		page_pool = &page0;
+		max_pages = 1;
+	}
+
+	for (npages = 1; npages < max_pages; npages++) {
+		page = page_cache_alloc_cold(inode->i_mapping);
+		if (page == NULL)
+			break;
+		page_pool[npages] = page;
+	}
+
+	rc = mdc_getpage(rp->rp_exp, fid, rp->rp_off, op_data->op_capa1,
+			 page_pool, npages, &req);
+	if (rc == 0) {
+		int lu_pgs;
+
+		rd_pgs = (req->rq_bulk->bd_nob_transferred +
+			    PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		lu_pgs = req->rq_bulk->bd_nob_transferred >>
+							LU_PAGE_SHIFT;
+		LASSERT(!(req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
+
+		CDEBUG(D_INODE, "read %d(%d)/%d pages\n", rd_pgs, lu_pgs,
+		       op_data->op_npages);
+
+		mdc_adjust_dirpages(page_pool, rd_pgs, lu_pgs);
+
+		SetPageUptodate(page0);
+	}
+
+	unlock_page(page0);
+	ptlrpc_req_finished(req);
+	CDEBUG(D_CACHE, "read %d/%d pages\n", rd_pgs, npages);
+	for (i = 1; i < npages; i++) {
+		unsigned long	offset;
+		__u64		hash;
+		int ret;
+
+		page = page_pool[i];
+
+		if (rc < 0 || i >= rd_pgs) {
+			page_cache_release(page);
+			continue;
+		}
+
+		SetPageUptodate(page);
+
+		dp = kmap(page);
+		hash = le64_to_cpu(dp->ldp_hash_start);
+		kunmap(page);
+
+		offset = hash_x_index(hash, rp->rp_hash64);
+
+		prefetchw(&page->flags);
+		ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
+					    GFP_KERNEL);
+		if (ret == 0)
+			unlock_page(page);
+		else
+			CDEBUG(D_VFSTRACE, "page %lu add to page cache failed:"
+			       " rc = %d\n", offset, ret);
+		page_cache_release(page);
+	}
+
+	if (page_pool != &page0)
+		OBD_FREE(page_pool, sizeof(page_pool[0]) * max_pages);
+
+	RETURN(rc);
+}
+
+/**
+ * Read dir page from cache first, if it can not find it, read it from
+ * server and add into the cache.
+ */
+static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data,
+			 struct md_callback *cb_op, struct page **ppage)
+{
+	struct lookup_intent	it = { .it_op = IT_READDIR };
+	struct page		*page;
+	struct inode		*dir = op_data->op_data;
+	struct address_space	*mapping;
+	struct lu_dirpage	*dp;
+	__u64			start = 0;
+	__u64			end = 0;
+	struct lustre_handle	lockh;
+	struct ptlrpc_request	*enq_req = NULL;
+	struct readpage_param	rp_param;
+	int rc;
+
+	ENTRY;
+
+	*ppage = NULL;
+
+	LASSERT(dir != NULL);
+	mapping = dir->i_mapping;
+
+	rc = mdc_intent_lock(exp, op_data, &it, &enq_req,
+			     cb_op->md_blocking_ast, 0);
+	if (enq_req != NULL)
+		ptlrpc_req_finished(enq_req);
+
+	if (rc < 0) {
+		CERROR("%s: "DFID" lock enqueue fails: rc = %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1), rc);
+		RETURN(rc);
+	}
+
+	rc = 0;
+	mdc_set_lock_data(exp, &it.d.lustre.it_lock_handle, dir, NULL);
+
+	rp_param.rp_off = op_data->op_hash_offset;
+	rp_param.rp_hash64 = op_data->op_cli_flags & CLI_HASH64;
+	page = mdc_page_locate(mapping, &rp_param.rp_off, &start, &end,
+			       rp_param.rp_hash64);
+	if (IS_ERR(page)) {
+		CERROR("%s: dir page locate: "DFID" at "LPU64": rc %ld\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, PTR_ERR(page));
+		GOTO(out_unlock, rc = PTR_ERR(page));
+	} else if (page != NULL) {
+		/*
+		 * XXX nikita: not entirely correct handling of a corner case:
+		 * suppose hash chain of entries with hash value HASH crosses
+		 * border between pages P0 and P1. First both P0 and P1 are
+		 * cached, seekdir() is called for some entry from the P0 part
+		 * of the chain. Later P0 goes out of cache. telldir(HASH)
+		 * happens and finds P1, as it starts with matching hash
+		 * value. Remaining entries from P0 part of the chain are
+		 * skipped. (Is that really a bug?)
+		 *
+		 * Possible solutions: 0. don't cache P1 is such case, handle
+		 * it as an "overflow" page. 1. invalidate all pages at
+		 * once. 2. use HASH|1 as an index for P1.
+		 */
+		GOTO(hash_collision, page);
+	}
+
+	rp_param.rp_exp = exp;
+	rp_param.rp_mod = op_data;
+	page = read_cache_page(mapping,
+			       hash_x_index(rp_param.rp_off,
+					    rp_param.rp_hash64),
+			       mdc_read_page_remote, &rp_param);
+	if (IS_ERR(page)) {
+		CERROR("%s: read cache page: "DFID" at "LPU64": rc %ld\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, PTR_ERR(page));
+		GOTO(out_unlock, rc = PTR_ERR(page));
+	}
+
+	wait_on_page_locked(page);
+	(void)kmap(page);
+	if (!PageUptodate(page)) {
+		CERROR("%s: page not updated: "DFID" at "LPU64": rc %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, -5);
+		goto fail;
+	}
+	if (!PageChecked(page))
+		SetPageChecked(page);
+	if (PageError(page)) {
+		CERROR("%s: page error: "DFID" at "LPU64": rc %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, -5);
+		goto fail;
+	}
+
+hash_collision:
+	dp = page_address(page);
+	if (BITS_PER_LONG == 32 && rp_param.rp_hash64) {
+		start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+		end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+		rp_param.rp_off = op_data->op_hash_offset >> 32;
+	} else {
+		start = le64_to_cpu(dp->ldp_hash_start);
+		end   = le64_to_cpu(dp->ldp_hash_end);
+		rp_param.rp_off = op_data->op_hash_offset;
+	}
+	if (end == start) {
+		LASSERT(start == rp_param.rp_off);
+		CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
+#if BITS_PER_LONG == 32
+		CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
+		      "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
+		      le64_to_cpu(dp->ldp_hash_end), op_data->op_hash_offset);
+#endif
+
+		/*
+		 * Fetch whole overflow chain...
+		 *
+		 * XXX not yet.
+		 */
+		goto fail;
+	}
+	*ppage = page;
+out_unlock:
+	lockh.cookie = it.d.lustre.it_lock_handle;
+	ldlm_lock_decref(&lockh, it.d.lustre.it_lock_mode);
+	it.d.lustre.it_lock_handle = 0;
+	return rc;
+fail:
+	kunmap(page);
+	mdc_release_page(page, 1);
+	rc = -EIO;
+	goto out_unlock;
 }
 
+/**
+ * Read one directory entry from the cache.
+ */
+int mdc_read_entry(struct obd_export *exp, struct md_op_data *op_data,
+		   struct md_callback *cb_op, struct lu_dirent **entp,
+		   struct page **ppage)
+{
+	struct page		*page = NULL;
+	struct lu_dirpage	*dp;
+	struct lu_dirent	*ent;
+	int			rc = 0;
+	__u32			same_hash_count;
+	__u64			hash_offset = op_data->op_hash_offset;
+	ENTRY;
+
+	CDEBUG(D_INFO, DFID " offset = "LPU64", flags %#x\n",
+	       PFID(&op_data->op_fid1), op_data->op_hash_offset,
+	       op_data->op_cli_flags);
+
+	*ppage = NULL;
+	*entp = NULL;
+
+	if (op_data->op_hash_offset == MDS_DIR_END_OFF)
+		RETURN(0);
+
+	rc = mdc_read_page(exp, op_data, cb_op, &page);
+	if (rc != 0)
+		RETURN(rc);
+
+	/* same_hash_count means how many entries with this
+	 * hash value has been read */
+	same_hash_count = op_data->op_same_hash_offset + 1;
+	dp = page_address(page);
+	for (ent = lu_dirent_start(dp); ent != NULL;
+	     ent = lu_dirent_next(ent)) {
+		/* Skip dummy entry */
+		if (le16_to_cpu(ent->lde_namelen) == 0)
+			continue;
+
+		if (le64_to_cpu(ent->lde_hash) <
+				op_data->op_hash_offset)
+			continue;
+
+		if (unlikely(le64_to_cpu(ent->lde_hash) ==
+				op_data->op_hash_offset)) {
+			/* If it is not for next entry, which usually from
+			 * ll_dir_entry_start, return this entry. */
+			if (!(op_data->op_cli_flags & CLI_NEXT_ENTRY))
+				break;
+
+			/* Keep reading until all of entries being read are
+			 * skipped. */
+			if (same_hash_count > 0) {
+				same_hash_count--;
+				continue;
+			}
+		}
+		break;
+	}
+
+	/* If it can not find entry in current page, try next page. */
+	if (ent == NULL) {
+		if (le64_to_cpu(dp->ldp_hash_end) == MDS_DIR_END_OFF) {
+			op_data->op_same_hash_offset = 0;
+			mdc_release_page(page,
+				 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+			RETURN(0);
+		}
+
+		op_data->op_hash_offset = le64_to_cpu(dp->ldp_hash_end);
+		mdc_release_page(page,
+				 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+		rc = mdc_read_page(exp, op_data, cb_op, &page);
+		if (rc != 0)
+			RETURN(rc);
+
+		if (page != NULL) {
+			dp = page_address(page);
+			ent = lu_dirent_start(dp);
+		}
+	}
+
+	/* If the next hash is the same as the current hash, increase
+	 * the op_same_hash_offset to resolve the same hash conflict */
+	if (ent != NULL && op_data->op_cli_flags & CLI_NEXT_ENTRY) {
+		if (unlikely(le64_to_cpu(ent->lde_hash) == hash_offset))
+			op_data->op_same_hash_offset++;
+		else
+			op_data->op_same_hash_offset = 0;
+	}
+
+	*ppage = page;
+	*entp = ent;
+	RETURN(rc);
+}
+
+#else /* __KERNEL__ */
+
+static struct page
+*mdc_read_page_remote(struct obd_export *exp, const struct lmv_oinfo *lmo,
+		      const __u64 hash, struct obd_capa *oc)
+{
+	struct ptlrpc_request *req = NULL;
+	struct page *page;
+	int rc;
+
+	OBD_PAGE_ALLOC(page, 0);
+	if (page == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	rc = mdc_getpage(exp, &lmo->lmo_fid, hash, oc, &page, 1, &req);
+	if (req != NULL)
+		ptlrpc_req_finished(req);
+
+	if (unlikely(rc)) {
+		OBD_PAGE_FREE(page);
+		return ERR_PTR(rc);
+	}
+	return page;
+}
+
+
+static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data,
+			struct md_callback *cb_op,
+			struct page **ppage)
+{
+	struct page *page;
+	struct lmv_oinfo *lmo;
+	int rc = 0;
+
+	/* No local cache for liblustre, always read entry remotely */
+	lmo = &op_data->op_mea1->lsm_md_oinfo[op_data->op_stripe_offset];
+	page = mdc_read_page_remote(exp, lmo, op_data->op_hash_offset,
+				    op_data->op_capa1);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	*ppage = page;
+
+	return rc;
+}
+
+int mdc_read_entry(struct obd_export *exp, struct md_op_data *op_data,
+		   struct md_callback *cb_op, struct lu_dirent **entp,
+		   struct page **ppage)
+{
+	struct page		*page = NULL;
+	struct lu_dirpage	*dp;
+	struct lu_dirent	*ent;
+	int			rc;
+	ENTRY;
+
+	rc = mdc_read_page(exp, op_data, cb_op, &page);
+	if (rc != 0)
+		RETURN(rc);
+
+	dp = page_address(page);
+	if (le64_to_cpu(dp->ldp_hash_end) < op_data->op_hash_offset)
+		GOTO(out, *entp = NULL);
+
+	for (ent = lu_dirent_start(dp); ent != NULL;
+	     ent = lu_dirent_next(ent))
+		if (le64_to_cpu(ent->lde_hash) >= op_data->op_hash_offset)
+			break;
+	*entp = ent;
+out:
+
+	OBD_PAGE_FREE(page);
+	RETURN(rc);
+}
+
+#endif
+
 static int mdc_statfs(const struct lu_env *env,
                       struct obd_export *exp, struct obd_statfs *osfs,
                       __u64 max_age, __u32 flags)
@@ -1677,8 +2303,9 @@ out:
 static int mdc_ioc_changelog_send(struct obd_device *obd,
                                   struct ioc_changelog *icc)
 {
-        struct changelog_show *cs;
-        int rc;
+	struct changelog_show *cs;
+	struct task_struct *task;
+	int rc;
 
         /* Freed in mdc_changelog_send_thread */
         OBD_ALLOC_PTR(cs);
@@ -1695,16 +2322,20 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
 	 * New thread because we should return to user app before
 	 * writing into our pipe
 	 */
-	rc = PTR_ERR(kthread_run(mdc_changelog_send_thread, cs,
-				 "mdc_clg_send_thread"));
-	if (!IS_ERR_VALUE(rc)) {
-		CDEBUG(D_CHANGELOG, "start changelog thread\n");
-		return 0;
+	task = kthread_run(mdc_changelog_send_thread, cs,
+			   "mdc_clg_send_thread");
+	if (IS_ERR(task)) {
+		rc = PTR_ERR(task);
+		CERROR("%s: cannot start changelog thread: rc = %d\n",
+		       obd->obd_name, rc);
+		OBD_FREE_PTR(cs);
+	} else {
+		rc = 0;
+		CDEBUG(D_CHANGELOG, "%s: started changelog thread\n",
+		       obd->obd_name);
 	}
 
-        CERROR("Failed to start changelog thread: %d\n", rc);
-        OBD_FREE_PTR(cs);
-        return rc;
+	return rc;
 }
 
 static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
@@ -1797,7 +2428,7 @@ static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp,
 static int mdc_ioc_swap_layouts(struct obd_export *exp,
 				struct md_op_data *op_data)
 {
-	CFS_LIST_HEAD(cancels);
+	struct list_head cancels = LIST_HEAD_INIT(cancels);
 	struct ptlrpc_request	*req;
 	int			 rc, count;
 	struct mdc_swap_layouts *msl, *payload;
@@ -1812,9 +2443,11 @@ static int mdc_ioc_swap_layouts(struct obd_export *exp,
 	 * with the request RPC to avoid extra RPC round trips
 	 */
 	count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels,
-					LCK_CR, MDS_INODELOCK_LAYOUT);
+					LCK_EX, MDS_INODELOCK_LAYOUT |
+					MDS_INODELOCK_XATTR);
 	count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels,
-					 LCK_CR, MDS_INODELOCK_LAYOUT);
+					 LCK_EX, MDS_INODELOCK_LAYOUT |
+					 MDS_INODELOCK_XATTR);
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 				   &RQF_MDS_SWAP_LAYOUTS);
@@ -1857,7 +2490,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         struct obd_device *obd = exp->exp_obd;
         struct obd_ioctl_data *data = karg;
         struct obd_import *imp = obd->u.cli.cl_import;
-        struct llog_ctxt *ctxt;
         int rc;
         ENTRY;
 
@@ -1910,22 +2542,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         case IOC_OSC_SET_ACTIVE:
                 rc = ptlrpc_set_import_active(imp, data->ioc_offset);
                 GOTO(out, rc);
-        case OBD_IOC_PARSE: {
-                ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-		rc = class_config_parse_llog(NULL, ctxt, data->ioc_inlbuf1,
-					     NULL);
-                llog_ctxt_put(ctxt);
-                GOTO(out, rc);
-        }
-#ifdef __KERNEL__
-        case OBD_IOC_LLOG_INFO:
-        case OBD_IOC_LLOG_PRINT: {
-                ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
-		rc = llog_ioctl(NULL, ctxt, cmd, data);
-                llog_ctxt_put(ctxt);
-                GOTO(out, rc);
-        }
-#endif
         case OBD_IOC_POLL_QUOTACHECK:
                 rc = mdc_quota_poll_check(exp, (struct if_quotacheck *)karg);
                 GOTO(out, rc);
@@ -2237,22 +2853,50 @@ int mdc_set_info_async(const struct lu_env *env,
 }
 
 int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
-                 __u32 keylen, void *key, __u32 *vallen, void *val,
-                 struct lov_stripe_md *lsm)
+		 __u32 keylen, void *key, __u32 *vallen, void *val,
+		 struct lov_stripe_md *lsm)
 {
-        int rc = -EINVAL;
+	int rc = -EINVAL;
 
-        if (KEY_IS(KEY_MAX_EASIZE)) {
-                int mdsize, *max_easize;
+	if (KEY_IS(KEY_MAX_EASIZE)) {
+		int mdsize, *max_easize;
 
-                if (*vallen != sizeof(int))
-                        RETURN(-EINVAL);
-                mdsize = *(int*)val;
-                if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
-                        exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
-                max_easize = val;
-                *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
-                RETURN(0);
+		if (*vallen != sizeof(int))
+			RETURN(-EINVAL);
+		mdsize = *(int *)val;
+		if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
+			exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
+		max_easize = val;
+		*max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
+		RETURN(0);
+	} else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
+		int *default_easize;
+
+		if (*vallen != sizeof(int))
+			RETURN(-EINVAL);
+		default_easize = val;
+		*default_easize = exp->exp_obd->u.cli.cl_default_mds_easize;
+		RETURN(0);
+	} else if (KEY_IS(KEY_MAX_COOKIESIZE)) {
+		int mdsize, *max_cookiesize;
+
+		if (*vallen != sizeof(int))
+			RETURN(-EINVAL);
+		mdsize = *(int *)val;
+		if (mdsize > exp->exp_obd->u.cli.cl_max_mds_cookiesize)
+			exp->exp_obd->u.cli.cl_max_mds_cookiesize = mdsize;
+		max_cookiesize = val;
+		*max_cookiesize = exp->exp_obd->u.cli.cl_max_mds_cookiesize;
+		RETURN(0);
+	} else if (KEY_IS(KEY_DEFAULT_COOKIESIZE)) {
+		int *default_cookiesize;
+
+		if (*vallen != sizeof(int))
+			RETURN(-EINVAL);
+		default_cookiesize = val;
+		*default_cookiesize =
+			exp->exp_obd->u.cli.cl_default_mds_cookiesize;
+		RETURN(0);
         } else if (KEY_IS(KEY_CONN_DATA)) {
                 struct obd_import *imp = class_exp2cliimp(exp);
                 struct obd_connect_data *data = val;
@@ -2444,13 +3088,13 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
         RETURN(rc);
 }
 
-int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
-                  struct md_op_data *op_data)
+int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+		  struct lu_fid *fid, struct md_op_data *op_data)
 {
-        struct client_obd *cli = &exp->exp_obd->u.cli;
-        struct lu_client_seq *seq = cli->cl_seq;
-        ENTRY;
-        RETURN(seq_client_alloc_fid(NULL, seq, fid));
+	struct client_obd *cli = &exp->exp_obd->u.cli;
+	struct lu_client_seq *seq = cli->cl_seq;
+	ENTRY;
+	RETURN(seq_client_alloc_fid(env, seq, fid));
 }
 
 struct obd_uuid *mdc_get_uuid(struct obd_export *exp) {
@@ -2463,18 +3107,18 @@ struct obd_uuid *mdc_get_uuid(struct obd_export *exp) {
  * recovery, non zero value will be return if the lock can be canceled,
  * or zero returned for not
  */
-static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+static int mdc_cancel_weight(struct ldlm_lock *lock)
 {
-        if (lock->l_resource->lr_type != LDLM_IBITS)
-                RETURN(0);
+	if (lock->l_resource->lr_type != LDLM_IBITS)
+		RETURN(0);
 
-        /* FIXME: if we ever get into a situation where there are too many
-         * opened files with open locks on a single node, then we really
-         * should replay these open locks to reget it */
-        if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
-                RETURN(0);
+	/* FIXME: if we ever get into a situation where there are too many
+	 * opened files with open locks on a single node, then we really
+	 * should replay these open locks to reget it */
+	if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
+		RETURN(0);
 
-        RETURN(1);
+	RETURN(1);
 }
 
 static int mdc_resource_inode_free(struct ldlm_resource *res)
@@ -2486,13 +3130,12 @@ static int mdc_resource_inode_free(struct ldlm_resource *res)
 }
 
 struct ldlm_valblock_ops inode_lvbo = {
-	lvbo_free: mdc_resource_inode_free
+	.lvbo_free = mdc_resource_inode_free
 };
 
 static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 {
 	struct client_obd		*cli = &obd->u.cli;
-	struct lprocfs_static_vars	lvars = { 0 };
 	int				rc;
 	ENTRY;
 
@@ -2513,13 +3156,15 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
         rc = client_obd_setup(obd, cfg);
         if (rc)
                 GOTO(err_close_lock, rc);
-        lprocfs_mdc_init_vars(&lvars);
-        lprocfs_obd_setup(obd, lvars.obd_vars);
+#ifdef LPROCFS
+	obd->obd_vars = lprocfs_mdc_obd_vars;
+	lprocfs_seq_obd_setup(obd);
 	lprocfs_alloc_md_stats(obd, 0);
-        sptlrpc_lprocfs_cliobd_attach(obd);
-        ptlrpc_lprocfs_register_obd(obd);
+#endif
+	sptlrpc_lprocfs_cliobd_attach(obd);
+	ptlrpc_lprocfs_register_obd(obd);
 
-        ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+	ns_register_cancel(obd->obd_namespace, mdc_cancel_weight);
 
 	obd->obd_namespace->ns_lvbo = &inode_lvbo;
 
@@ -2541,26 +3186,33 @@ err_rpc_lock:
 }
 
 /* Initialize the default and maximum LOV EA and cookie sizes.  This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
+ * us to make MDS RPCs with large enough reply buffers to hold a default
+ * sized EA and cookie without having to calculate this (via a call into the
+ * LOV + OSCs) each time we make an RPC.  The maximum size is also tracked
+ * but not used to avoid wastefully vmalloc()'ing large reply buffers when
+ * a large number of stripes is possible.  If a larger reply buffer is
+ * required it will be reallocated in the ptlrpc layer due to overflow.
+ */
 static int mdc_init_ea_size(struct obd_export *exp, int easize,
-                     int def_easize, int cookiesize)
+			    int def_easize, int cookiesize, int def_cookiesize)
 {
-        struct obd_device *obd = exp->exp_obd;
-        struct client_obd *cli = &obd->u.cli;
-        ENTRY;
+	struct obd_device *obd = exp->exp_obd;
+	struct client_obd *cli = &obd->u.cli;
+	ENTRY;
 
-        if (cli->cl_max_mds_easize < easize)
-                cli->cl_max_mds_easize = easize;
+	if (cli->cl_max_mds_easize < easize)
+		cli->cl_max_mds_easize = easize;
 
-        if (cli->cl_default_mds_easize < def_easize)
-                cli->cl_default_mds_easize = def_easize;
+	if (cli->cl_default_mds_easize < def_easize)
+		cli->cl_default_mds_easize = def_easize;
 
-        if (cli->cl_max_mds_cookiesize < cookiesize)
-                cli->cl_max_mds_cookiesize = cookiesize;
+	if (cli->cl_max_mds_cookiesize < cookiesize)
+		cli->cl_max_mds_cookiesize = cookiesize;
 
-        RETURN(0);
+	if (cli->cl_default_mds_cookiesize < def_cookiesize)
+		cli->cl_default_mds_cookiesize = def_cookiesize;
+
+	RETURN(0);
 }
 
 static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
@@ -2640,19 +3292,9 @@ static int mdc_llog_finish(struct obd_device *obd, int count)
 static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf)
 {
         struct lustre_cfg *lcfg = buf;
-        struct lprocfs_static_vars lvars = { 0 };
-        int rc = 0;
-
-        lprocfs_mdc_init_vars(&lvars);
-        switch (lcfg->lcfg_command) {
-        default:
-                rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars,
-                                              lcfg, obd);
-                if (rc > 0)
-                        rc = 0;
-                break;
-        }
-        return(rc);
+	int rc = class_process_proc_seq_param(PARAM_MDC, obd->obd_vars,
+					      lcfg, obd);
+	return (rc > 0 ? 0: rc);
 }
 
 
@@ -2794,8 +3436,8 @@ struct md_ops mdc_md_ops = {
         .m_setattr          = mdc_setattr,
         .m_setxattr         = mdc_setxattr,
         .m_getxattr         = mdc_getxattr,
-	.m_fsync	    = mdc_fsync,
-        .m_readpage         = mdc_readpage,
+	.m_fsync		= mdc_fsync,
+	.m_read_entry		= mdc_read_entry,
         .m_unlink           = mdc_unlink,
         .m_cancel_unused    = mdc_cancel_unused,
         .m_init_ea_size     = mdc_init_ea_size,
@@ -2814,13 +3456,11 @@ struct md_ops mdc_md_ops = {
 
 int __init mdc_init(void)
 {
-        int rc;
-        struct lprocfs_static_vars lvars = { 0 };
-        lprocfs_mdc_init_vars(&lvars);
-
-        rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, lvars.module_vars,
-                                 LUSTRE_MDC_NAME, NULL);
-        RETURN(rc);
+	return class_register_type(&mdc_obd_ops, &mdc_md_ops, true, NULL,
+#ifndef HAVE_ONLY_PROCFS_SEQ
+				   NULL,
+#endif
+				   LUSTRE_MDC_NAME, NULL);
 }
 
 #ifdef __KERNEL__