Whamcloud - gitweb
branch: HEAD
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
index 59fedf7..beaf09d 100644 (file)
@@ -66,7 +66,7 @@
  * policy registers                            *
  ***********************************************/
 
-static rwlock_t policy_lock = RW_LOCK_UNLOCKED;
+static rwlock_t policy_lock;
 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
         NULL,
 };
@@ -118,12 +118,13 @@ int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
 EXPORT_SYMBOL(sptlrpc_unregister_policy);
 
 static
-struct ptlrpc_sec_policy * sptlrpc_rpcflavor2policy(__u16 flavor)
+struct ptlrpc_sec_policy * sptlrpc_wireflavor2policy(__u32 flavor)
 {
         static DECLARE_MUTEX(load_mutex);
         static atomic_t           loaded = ATOMIC_INIT(0);
         struct ptlrpc_sec_policy *policy;
-        __u16                     number = RPC_FLVR_POLICY(flavor), flag = 0;
+        __u16                     number = SPTLRPC_FLVR_POLICY(flavor);
+        __u16                     flag = 0;
 
         if (number >= SPTLRPC_POLICY_MAX)
                 return NULL;
@@ -157,7 +158,7 @@ struct ptlrpc_sec_policy * sptlrpc_rpcflavor2policy(__u16 flavor)
         return policy;
 }
 
-__u16 sptlrpc_name2rpcflavor(const char *name)
+__u32 sptlrpc_name2flavor_base(const char *name)
 {
         if (!strcmp(name, "null"))
                 return SPTLRPC_FLVR_NULL;
@@ -174,51 +175,86 @@ __u16 sptlrpc_name2rpcflavor(const char *name)
 
         return SPTLRPC_FLVR_INVALID;
 }
-EXPORT_SYMBOL(sptlrpc_name2rpcflavor);
+EXPORT_SYMBOL(sptlrpc_name2flavor_base);
 
-const char *sptlrpc_rpcflavor2name(__u16 flavor)
+const char *sptlrpc_flavor2name_base(__u32 flvr)
 {
-        switch (flavor) {
-        case SPTLRPC_FLVR_NULL:
+        __u32   base = SPTLRPC_FLVR_BASE(flvr);
+
+        if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
                 return "null";
-        case SPTLRPC_FLVR_PLAIN:
+        else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
                 return "plain";
-        case SPTLRPC_FLVR_KRB5N:
+        else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
                 return "krb5n";
-        case SPTLRPC_FLVR_KRB5A:
+        else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
                 return "krb5a";
-        case SPTLRPC_FLVR_KRB5I:
+        else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
                 return "krb5i";
-        case SPTLRPC_FLVR_KRB5P:
+        else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
                 return "krb5p";
-        default:
-                CERROR("invalid rpc flavor 0x%x(p%u,s%u,v%u)\n", flavor,
-                       RPC_FLVR_POLICY(flavor), RPC_FLVR_MECH(flavor),
-                       RPC_FLVR_SVC(flavor));
-        }
-        return "unknown";
+
+        CERROR("invalid wire flavor 0x%x\n", flvr);
+        return "invalid";
 }
-EXPORT_SYMBOL(sptlrpc_rpcflavor2name);
+EXPORT_SYMBOL(sptlrpc_flavor2name_base);
 
-int sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+                               char *buf, int bufsize)
 {
-        char           *bulk;
-
-        if (sf->sf_bulk_ciph != BULK_CIPH_ALG_NULL)
-                bulk = "bulkp";
-        else if (sf->sf_bulk_hash != BULK_HASH_ALG_NULL)
-                bulk = "bulki";
+        if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
+                snprintf(buf, bufsize, "hash:%s",
+                         sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
         else
-                bulk = "bulkn";
+                snprintf(buf, bufsize, "%s",
+                         sptlrpc_flavor2name_base(sf->sf_rpc));
 
-        snprintf(buf, bufsize, "%s-%s:%s/%s",
-                 sptlrpc_rpcflavor2name(sf->sf_rpc), bulk,
-                 sptlrpc_get_hash_name(sf->sf_bulk_hash),
-                 sptlrpc_get_ciph_name(sf->sf_bulk_ciph));
-        return 0;
+        buf[bufsize - 1] = '\0';
+        return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
+
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+{
+        snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc));
+
+        /*
+         * currently we don't support customized bulk specification for
+         * flavors other than plain
+         */
+        if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
+                char bspec[16];
+
+                bspec[0] = '-';
+                sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
+                strncat(buf, bspec, bufsize);
+        }
+
+        buf[bufsize - 1] = '\0';
+        return buf;
 }
 EXPORT_SYMBOL(sptlrpc_flavor2name);
 
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
+{
+        buf[0] = '\0';
+
+        if (flags & PTLRPC_SEC_FL_REVERSE)
+                strncat(buf, "reverse,", bufsize);
+        if (flags & PTLRPC_SEC_FL_ROOTONLY)
+                strncat(buf, "rootonly,", bufsize);
+        if (flags & PTLRPC_SEC_FL_UDESC)
+                strncat(buf, "udesc,", bufsize);
+        if (flags & PTLRPC_SEC_FL_BULK)
+                strncat(buf, "bulk,", bufsize);
+        if (buf[0] == '\0')
+                strncat(buf, "-,", bufsize);
+
+        buf[bufsize - 1] = '\0';
+        return buf;
+}
+EXPORT_SYMBOL(sptlrpc_secflags2str);
+
 /**************************************************
  * client context APIs                            *
  **************************************************/
@@ -289,7 +325,7 @@ void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
         spin_lock(&ctx->cc_lock);
         list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
                 list_del_init(&req->rq_ctx_chain);
-                ptlrpc_wake_client_req(req);
+                ptlrpc_client_wake_req(req);
         }
         spin_unlock(&ctx->cc_lock);
 }
@@ -305,7 +341,7 @@ int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
         return ctx->cc_ops->display(ctx, buf, bufsize);
 }
 
-static int sptlrpc_import_sec_check_expire(struct obd_import *imp)
+static int import_sec_check_expire(struct obd_import *imp)
 {
         int     adapt = 0;
 
@@ -324,34 +360,47 @@ static int sptlrpc_import_sec_check_expire(struct obd_import *imp)
         return sptlrpc_import_sec_adapt(imp, NULL, 0);
 }
 
-int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+static int import_sec_validate_get(struct obd_import *imp,
+                                   struct ptlrpc_sec **sec)
 {
-        struct obd_import *imp = req->rq_import;
-        struct ptlrpc_sec *sec;
-        int                rc;
-        ENTRY;
-
-        LASSERT(!req->rq_cli_ctx);
-        LASSERT(imp);
+        int     rc;
 
         if (unlikely(imp->imp_sec_expire)) {
-                rc = sptlrpc_import_sec_check_expire(imp);
+                rc = import_sec_check_expire(imp);
                 if (rc)
-                        RETURN(rc);
+                        return rc;
         }
 
-        sec = sptlrpc_import_sec_ref(imp);
-        if (sec == NULL) {
-                CERROR("import %p (%s) with no ptlrpc_sec\n",
+        *sec = sptlrpc_import_sec_ref(imp);
+        if (*sec == NULL) {
+                CERROR("import %p (%s) with no sec\n",
                        imp, ptlrpc_import_state_name(imp->imp_state));
-                RETURN(-EACCES);
+                return -EACCES;
         }
 
-        if (unlikely(sec->ps_dying)) {
+        if (unlikely((*sec)->ps_dying)) {
                 CERROR("attempt to use dying sec %p\n", sec);
+                sptlrpc_sec_put(*sec);
                 return -EACCES;
         }
 
+        return 0;
+}
+
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+        struct obd_import *imp = req->rq_import;
+        struct ptlrpc_sec *sec;
+        int                rc;
+        ENTRY;
+
+        LASSERT(!req->rq_cli_ctx);
+        LASSERT(imp);
+
+        rc = import_sec_validate_get(imp, &sec);
+        if (rc)
+                RETURN(rc);
+
         req->rq_cli_ctx = get_my_ctx(sec);
 
         sptlrpc_sec_put(sec);
@@ -447,9 +496,13 @@ int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
         return rc;
 }
 
-/*
+/**
+ * if current context has died, or if we resend after flavor switched,
+ * call this func to switch context. if no switch is needed, request
+ * will end up with the same context.
+ *
  * request must have a context. in any case of failure, restore the
- * restore the old one. a request must have a ctx.
+ * restore the old one - a request must have a context.
  */
 int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
 {
@@ -459,7 +512,6 @@ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
         ENTRY;
 
         LASSERT(oldctx);
-        LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags));
 
         sptlrpc_cli_ctx_get(oldctx);
         sptlrpc_req_put_ctx(req, 0);
@@ -477,13 +529,16 @@ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
         LASSERT(newctx);
 
         if (unlikely(newctx == oldctx)) {
-                /*
-                 * still get the old ctx, usually means system busy
-                 */
-                CWARN("ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
-                      newctx, newctx->cc_flags);
-
-                cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ);
+                if (test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags)) {
+                        /*
+                         * still get the old ctx, usually means system busy
+                         */
+                        CWARN("ctx (%p, fl %lx) doesn't switch, "
+                              "relax a little bit\n",
+                              newctx, newctx->cc_flags);
+
+                        cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ);
+                }
         } else {
                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
                 if (rc) {
@@ -518,7 +573,7 @@ int ctx_refresh_timeout(void *data)
         /* conn_cnt is needed in expire_one_request */
         lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
 
-        rc = ptlrpc_expire_one_request(req);
+        rc = ptlrpc_expire_one_request(req, 1);
         /* if we started recovery, we should mark this ctx dead; otherwise
          * in case of lgssd died nobody would retire this ctx, following
          * connecting will still find the same ctx thus cause deadlock.
@@ -564,23 +619,35 @@ void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
 {
         struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
+        struct ptlrpc_sec      *sec;
         struct l_wait_info      lwi;
         int                     rc;
         ENTRY;
 
         LASSERT(ctx);
 
+        if (req->rq_ctx_init || req->rq_ctx_fini)
+                RETURN(0);
+
         /*
          * during the process a request's context might change type even
          * (e.g. from gss ctx to plain ctx), so each loop we need to re-check
          * everything
          */
 again:
-        /* skip special ctxs */
-        if (cli_ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
+        rc = import_sec_validate_get(req->rq_import, &sec);
+        if (rc)
+                RETURN(rc);
+
+        if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc)
+                sptlrpc_req_replace_dead_ctx(req);
+
+        sptlrpc_sec_put(sec);
+
+        if (cli_ctx_is_eternal(ctx))
                 RETURN(0);
 
-        if (test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags)) {
+        if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
                 LASSERT(ctx->cc_ops->refresh);
                 ctx->cc_ops->refresh(ctx);
         }
@@ -630,6 +697,15 @@ again:
         }
 
         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+                /*
+                 * don't switch ctx if import was deactivated
+                 */
+                if (req->rq_import->imp_deactive) {
+                        req_off_ctx_list(req, ctx);
+                        req->rq_err = 1;
+                        RETURN(-EINTR);
+                }
+
                 rc = sptlrpc_req_replace_dead_ctx(req);
                 if (rc) {
                         LASSERT(ctx == req->rq_cli_ctx);
@@ -659,9 +735,8 @@ again:
                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
         spin_unlock(&ctx->cc_lock);
 
-        if (timeout < 0) {
+        if (timeout < 0)
                 RETURN(-EWOULDBLOCK);
-        }
 
         /* Clear any flags that may be present from previous sends */
         LASSERT(req->rq_receiving_reply == 0);
@@ -682,7 +757,7 @@ again:
          * - timedout, and we don't want recover from the failure;
          * - timedout, and waked up upon recovery finished;
          * - someone else mark this ctx dead by force;
-         * - someone invalidate the req and call wake_client_req(),
+         * - someone invalidate the req and call ptlrpc_client_wake_req(),
          *   e.g. ptlrpc_abort_inflight();
          */
         if (!cli_ctx_is_refreshed(ctx)) {
@@ -713,9 +788,11 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
         /* special security flags accoding to opcode */
         switch (opcode) {
         case OST_READ:
+        case MDS_READPAGE:
                 req->rq_bulk_read = 1;
                 break;
         case OST_WRITE:
+        case MDS_WRITEPAGE:
                 req->rq_bulk_write = 1;
                 break;
         case SEC_CTX_INIT:
@@ -744,9 +821,9 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
         /* force SVC_NULL for context initiation rpc, SVC_INTG for context
          * destruction rpc */
         if (unlikely(req->rq_ctx_init))
-                rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
+                flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
         else if (unlikely(req->rq_ctx_fini))
-                rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
+                flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
 
         /* user descriptor flag, null security can't do it anyway */
         if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
@@ -755,14 +832,13 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
 
         /* bulk security flag */
         if ((req->rq_bulk_read || req->rq_bulk_write) &&
-            (req->rq_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL ||
-             req->rq_flvr.sf_bulk_hash != BULK_HASH_ALG_NULL))
+            sptlrpc_flavor_has_bulk(&req->rq_flvr))
                 req->rq_pack_bulk = 1;
 }
 
 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
 {
-        if (RPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
+        if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
                 return;
 
         LASSERT(req->rq_clrbuf);
@@ -794,7 +870,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
         sptlrpc_sec_put(sec);
 
         if (!ctx)
-                RETURN(1);
+                RETURN(-ENOMEM);
 
         if (cli_ctx_is_eternal(ctx) ||
             ctx->cc_ops->validate(ctx) == 0) {
@@ -802,6 +878,11 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
                 RETURN(0);
         }
 
+        if (cli_ctx_is_error(ctx)) {
+                sptlrpc_cli_ctx_put(ctx, 1);
+                RETURN(-EACCES);
+        }
+
         OBD_ALLOC_PTR(req);
         if (!req)
                 RETURN(-ENOMEM);
@@ -811,6 +892,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
         CFS_INIT_LIST_HEAD(&req->rq_ctx_chain);
         cfs_waitq_init(&req->rq_reply_waitq);
         req->rq_import = imp;
+        req->rq_flvr = sec->ps_flvr;
         req->rq_cli_ctx = ctx;
 
         rc = sptlrpc_req_refresh_ctx(req, 0);
@@ -840,7 +922,7 @@ int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
                         RETURN(rc);
         }
 
-        switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+        switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
         case SPTLRPC_SVC_NULL:
         case SPTLRPC_SVC_AUTH:
         case SPTLRPC_SVC_INTG:
@@ -868,7 +950,7 @@ static int do_cli_unwrap_reply(struct ptlrpc_request *req)
 {
         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
         int                    rc;
-        __u16                  rpc_flvr;
+        __u32                  flvr;
         ENTRY;
 
         LASSERT(ctx);
@@ -884,26 +966,26 @@ static int do_cli_unwrap_reply(struct ptlrpc_request *req)
         }
 
         /* v2 message, check request/reply policy match */
-        rpc_flvr = WIRE_FLVR_RPC(req->rq_repdata->lm_secflvr);
+        flvr = WIRE_FLVR(req->rq_repdata->lm_secflvr);
 
         if (req->rq_repdata->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
-                __swab16s(&rpc_flvr);
+                __swab32s(&flvr);
 
-        if (RPC_FLVR_POLICY(rpc_flvr) !=
-            RPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+        if (SPTLRPC_FLVR_POLICY(flvr) !=
+            SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
                 CERROR("request policy was %u while reply with %u\n",
-                       RPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
-                       RPC_FLVR_POLICY(rpc_flvr));
+                       SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
+                       SPTLRPC_FLVR_POLICY(flvr));
                 RETURN(-EPROTO);
         }
 
         /* do nothing if it's null policy; otherwise unpack the
          * wrapper message */
-        if (RPC_FLVR_POLICY(rpc_flvr) != SPTLRPC_POLICY_NULL &&
+        if (SPTLRPC_FLVR_POLICY(flvr) != SPTLRPC_POLICY_NULL &&
             lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len))
                 RETURN(-EPROTO);
 
-        switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+        switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
         case SPTLRPC_SVC_NULL:
         case SPTLRPC_SVC_AUTH:
         case SPTLRPC_SVC_INTG:
@@ -933,7 +1015,8 @@ int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
         LASSERT(req->rq_repmsg == NULL);
         LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
 
-        if (req->rq_reply_off == 0) {
+        if (req->rq_reply_off == 0 &&
+            (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
                 CERROR("real reply with offset 0\n");
                 return -EPROTO;
         }
@@ -950,114 +1033,108 @@ int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
         return do_cli_unwrap_reply(req);
 }
 
-/*
+/**
  * Upon called, the receive buffer might be still posted, so the reply data
  * might be changed at any time, no matter we're holding rq_lock or not. we
  * expect the rq_reply_off be 0, rq_nob_received is the early reply size.
  *
- * we allocate a separate buffer to hold early reply data, pointed by
- * rq_repdata, rq_repdata_len is the early reply size, and round up to power2
- * is the actual buffer size.
- *
- * caller _must_ call sptlrpc_cli_finish_early_reply() after this, before
- * process another early reply or real reply, to restore ptlrpc_request
- * to normal status.
+ * we allocate separate ptlrpc_request and reply buffer for early reply
+ * processing, return 0 and @req_ret is a duplicated ptlrpc_request. caller
+ * must call sptlrpc_cli_finish_early_reply() on the returned request to
+ * release it. if anything goes wrong @req_ret will not be set.
  */
-int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req)
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+                                   struct ptlrpc_request **req_ret)
 {
-        struct lustre_msg      *early_buf;
+        struct ptlrpc_request  *early_req;
+        char                   *early_buf;
         int                     early_bufsz, early_size;
         int                     rc;
         ENTRY;
 
-        LASSERT(req->rq_repbuf);
-        LASSERT(req->rq_repdata == NULL);
-        LASSERT(req->rq_repmsg == NULL);
+        OBD_ALLOC_PTR(early_req);
+        if (early_req == NULL)
+                RETURN(-ENOMEM);
 
         early_size = req->rq_nob_received;
-        if (early_size < sizeof(struct lustre_msg)) {
-                CERROR("early reply length %d too small\n", early_size);
-                RETURN(-EPROTO);
-        }
-
         early_bufsz = size_roundup_power2(early_size);
         OBD_ALLOC(early_buf, early_bufsz);
         if (early_buf == NULL)
-                RETURN(-ENOMEM);
+                GOTO(err_req, rc = -ENOMEM);
 
-        /* copy data out, do it inside spinlock */
+        /* sanity checkings and copy data out, do it inside spinlock */
         spin_lock(&req->rq_lock);
 
         if (req->rq_replied) {
                 spin_unlock(&req->rq_lock);
-                GOTO(err_free, rc = -EALREADY);
+                GOTO(err_buf, rc = -EALREADY);
         }
 
+        LASSERT(req->rq_repbuf);
+        LASSERT(req->rq_repdata == NULL);
+        LASSERT(req->rq_repmsg == NULL);
+
         if (req->rq_reply_off != 0) {
                 CERROR("early reply with offset %u\n", req->rq_reply_off);
-                GOTO(err_free, rc = -EPROTO);
+                spin_unlock(&req->rq_lock);
+                GOTO(err_buf, rc = -EPROTO);
         }
 
         if (req->rq_nob_received != early_size) {
                 /* even another early arrived the size should be the same */
-                CWARN("data size has changed from %u to %u\n",
-                      early_size, req->rq_nob_received);
+                CERROR("data size has changed from %u to %u\n",
+                       early_size, req->rq_nob_received);
                 spin_unlock(&req->rq_lock);
-                GOTO(err_free, rc = -EINVAL);
+                GOTO(err_buf, rc = -EINVAL);
         }
 
         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
                 CERROR("early reply length %d too small\n",
                        req->rq_nob_received);
                 spin_unlock(&req->rq_lock);
-                GOTO(err_free, rc = -EALREADY);
+                GOTO(err_buf, rc = -EALREADY);
         }
 
         memcpy(early_buf, req->rq_repbuf, early_size);
         spin_unlock(&req->rq_lock);
 
-        req->rq_repdata = early_buf;
-        req->rq_repdata_len = early_size;
-
-        rc = do_cli_unwrap_reply(req);
-
-        /* treate resend as an error case. in fact server should never ask
-         * resend via early reply. */
-        if (req->rq_resend) {
-                req->rq_resend = 0;
-                rc = -EPROTO;
-        }
+        early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
+        early_req->rq_flvr = req->rq_flvr;
+        early_req->rq_repbuf = early_buf;
+        early_req->rq_repbuf_len = early_bufsz;
+        early_req->rq_repdata = (struct lustre_msg *) early_buf;
+        early_req->rq_repdata_len = early_size;
+        early_req->rq_early = 1;
 
+        rc = do_cli_unwrap_reply(early_req);
         if (rc) {
-                LASSERT(req->rq_repmsg == NULL);
-                req->rq_repdata = NULL;
-                req->rq_repdata_len = 0;
-                GOTO(err_free, rc);
+                DEBUG_REQ(D_ADAPTTO, early_req,
+                          "error %d unwrap early reply", rc);
+                GOTO(err_ctx, rc);
         }
 
-        LASSERT(req->rq_repmsg);
+        LASSERT(early_req->rq_repmsg);
+        *req_ret = early_req;
         RETURN(0);
 
-err_free:
+err_ctx:
+        sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+err_buf:
         OBD_FREE(early_buf, early_bufsz);
+err_req:
+        OBD_FREE_PTR(early_req);
         RETURN(rc);
 }
 
-int sptlrpc_cli_finish_early_reply(struct ptlrpc_request *req)
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
 {
-        int     early_bufsz;
-
-        LASSERT(req->rq_repdata);
-        LASSERT(req->rq_repdata_len);
-        LASSERT(req->rq_repmsg);
-
-        early_bufsz = size_roundup_power2(req->rq_repdata_len);
-        OBD_FREE(req->rq_repdata, early_bufsz);
+        LASSERT(early_req->rq_repbuf);
+        LASSERT(early_req->rq_repdata);
+        LASSERT(early_req->rq_repmsg);
 
-        req->rq_repdata = NULL;
-        req->rq_repdata_len = 0;
-        req->rq_repmsg = NULL;
-        return 0;
+        sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+        OBD_FREE(early_req->rq_repbuf, early_req->rq_repbuf_len);
+        OBD_FREE_PTR(early_req);
 }
 
 /**************************************************
@@ -1148,7 +1225,7 @@ void sptlrpc_sec_put(struct ptlrpc_sec *sec)
 EXPORT_SYMBOL(sptlrpc_sec_put);
 
 /*
- * it's policy module responsible for taking refrence of import
+ * policy module is responsible for taking refrence of import
  */
 static
 struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
@@ -1158,6 +1235,7 @@ struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
 {
         struct ptlrpc_sec_policy *policy;
         struct ptlrpc_sec        *sec;
+        char                      str[32];
         ENTRY;
 
         if (svc_ctx) {
@@ -1166,7 +1244,7 @@ struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
                        imp->imp_obd->obd_type->typ_name,
                        imp->imp_obd->obd_name,
-                       sptlrpc_rpcflavor2name(sf->sf_rpc));
+                       sptlrpc_flavor2name(sf, str, sizeof(str)));
 
                 policy = sptlrpc_policy_get(svc_ctx->sc_policy);
                 sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
@@ -1176,9 +1254,9 @@ struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
                        imp->imp_obd->obd_type->typ_name,
                        imp->imp_obd->obd_name,
-                       sptlrpc_rpcflavor2name(sf->sf_rpc));
+                       sptlrpc_flavor2name(sf, str, sizeof(str)));
 
-                policy = sptlrpc_rpcflavor2policy(sf->sf_rpc);
+                policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
                 if (!policy) {
                         CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
                         RETURN(NULL);
@@ -1232,118 +1310,111 @@ static void sptlrpc_import_sec_install(struct obd_import *imp,
         }
 }
 
+static inline
+int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
+{
+        return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
+}
+
+static inline
+void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
+{
+        *dst = *src;
+}
+
 static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
                                              struct ptlrpc_sec *sec,
                                              struct sptlrpc_flavor *sf)
 {
-        if (sf->sf_bulk_ciph != sec->ps_flvr.sf_bulk_ciph ||
-            sf->sf_bulk_hash != sec->ps_flvr.sf_bulk_hash) {
-                CWARN("imp %p (%s->%s): changing bulk flavor %s/%s -> %s/%s\n",
-                      imp, imp->imp_obd->obd_name,
-                      obd_uuid2str(&imp->imp_connection->c_remote_uuid),
-                      sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
-                      sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
-                      sptlrpc_get_ciph_name(sf->sf_bulk_ciph),
-                      sptlrpc_get_hash_name(sf->sf_bulk_hash));
-
-                spin_lock(&sec->ps_lock);
-                sec->ps_flvr.sf_bulk_ciph = sf->sf_bulk_ciph;
-                sec->ps_flvr.sf_bulk_hash = sf->sf_bulk_hash;
-                spin_unlock(&sec->ps_lock);
-        }
+        char    str1[32], str2[32];
 
-        if (!equi(sf->sf_flags & PTLRPC_SEC_FL_UDESC,
-                  sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC)) {
-                CWARN("imp %p (%s->%s): %s shipping user descriptor\n",
-                      imp, imp->imp_obd->obd_name,
-                      obd_uuid2str(&imp->imp_connection->c_remote_uuid),
-                      (sf->sf_flags & PTLRPC_SEC_FL_UDESC) ? "start" : "stop");
+        if (sec->ps_flvr.sf_flags != sf->sf_flags)
+                CWARN("changing sec flags: %s -> %s\n",
+                      sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
+                                           str1, sizeof(str1)),
+                      sptlrpc_secflags2str(sf->sf_flags,
+                                           str2, sizeof(str2)));
 
-                spin_lock(&sec->ps_lock);
-                sec->ps_flvr.sf_flags &= ~PTLRPC_SEC_FL_UDESC;
-                sec->ps_flvr.sf_flags |= sf->sf_flags & PTLRPC_SEC_FL_UDESC;
-                spin_unlock(&sec->ps_lock);
-        }
+        spin_lock(&sec->ps_lock);
+        flavor_copy(&sec->ps_flvr, sf);
+        spin_unlock(&sec->ps_lock);
 }
 
 /*
- * for normal import, @svc_ctx should be NULL and @rpc_flavor is ignored;
- * for reverse import, @svc_ctx and @rpc_flavor is from incoming request.
+ * for normal import, @svc_ctx should be NULL and @flvr is ignored;
+ * for reverse import, @svc_ctx and @flvr is from incoming request.
  */
 int sptlrpc_import_sec_adapt(struct obd_import *imp,
                              struct ptlrpc_svc_ctx *svc_ctx,
-                             __u16 rpc_flavor)
+                             struct sptlrpc_flavor *flvr)
 {
         struct ptlrpc_connection   *conn;
         struct sptlrpc_flavor       sf;
         struct ptlrpc_sec          *sec, *newsec;
         enum lustre_sec_part        sp;
-        int                         rc;
+        char                        str[24];
+        int                         rc = 0;
+        ENTRY;
+
+        might_sleep();
 
         if (imp == NULL)
-                return 0;
+                RETURN(0);
 
         conn = imp->imp_connection;
 
         if (svc_ctx == NULL) {
-                /* normal import, determine flavor from rule set */
-                sptlrpc_rule_set_choose(&imp->imp_obd->u.cli.cl_sptlrpc_rset,
-                                        LUSTRE_SP_ANY, conn->c_self, &sf);
-
-                sp = imp->imp_obd->u.cli.cl_sec_part;
+                struct client_obd *cliobd = &imp->imp_obd->u.cli;
+                /*
+                 * normal import, determine flavor from rule set, except
+                 * for mgc the flavor is predetermined.
+                 */
+                if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
+                        sf = cliobd->cl_flvr_mgc;
+                else 
+                        sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
+                                                   cliobd->cl_sp_to,
+                                                   &cliobd->cl_target_uuid,
+                                                   conn->c_self, &sf);
+
+                sp = imp->imp_obd->u.cli.cl_sp_me;
         } else {
                 /* reverse import, determine flavor from incoming reqeust */
-                sf.sf_rpc = rpc_flavor;
-                sf.sf_bulk_ciph = BULK_CIPH_ALG_NULL;
-                sf.sf_bulk_hash = BULK_HASH_ALG_NULL;
-                sf.sf_flags = PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+                sf = *flvr;
+
+                if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
+                        sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
+                                      PTLRPC_SEC_FL_ROOTONLY;
 
                 sp = sptlrpc_target_sec_part(imp->imp_obd);
         }
 
         sec = sptlrpc_import_sec_ref(imp);
         if (sec) {
-                if (svc_ctx == NULL) {
-                        /* normal import, only check rpc flavor, if just bulk
-                         * flavor or flags changed, we can handle it on the fly
-                         * without switching sec. */
-                        if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
-                                sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
-
-                                rc = 0;
-                                goto out;
-                        }
-                } else {
-                        /* reverse import, do not compare bulk flavor */
-                        if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
-                                rc = 0;
-                                goto out;
-                        }
-                }
+                char    str2[24];
 
-                CWARN("%simport %p (%s%s%s): changing flavor "
-                      "(%s, %s/%s) -> (%s, %s/%s)\n",
-                      svc_ctx ? "reverse " : "",
-                      imp, imp->imp_obd->obd_name,
-                      svc_ctx == NULL ? "->" : "<-",
+                if (flavor_equal(&sf, &sec->ps_flvr))
+                        GOTO(out, rc);
+
+                CWARN("import %s->%s: changing flavor %s -> %s\n",
+                      imp->imp_obd->obd_name,
                       obd_uuid2str(&conn->c_remote_uuid),
-                      sptlrpc_rpcflavor2name(sec->ps_flvr.sf_rpc),
-                      sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
-                      sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
-                      sptlrpc_rpcflavor2name(sf.sf_rpc),
-                      sptlrpc_get_hash_name(sf.sf_bulk_hash),
-                      sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
+                      sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
+                      sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
+
+                if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
+                    SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
+                    SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
+                    SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
+                        sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
+                        GOTO(out, rc);
+                }
         } else {
-                CWARN("%simport %p (%s%s%s) netid %x: "
-                      "select initial flavor (%s, %s/%s)\n",
-                      svc_ctx == NULL ? "" : "reverse ",
-                      imp, imp->imp_obd->obd_name,
-                      svc_ctx == NULL ? "->" : "<-",
+                CWARN("import %s->%s netid %x: select flavor %s\n",
+                      imp->imp_obd->obd_name,
                       obd_uuid2str(&conn->c_remote_uuid),
                       LNET_NIDNET(conn->c_self),
-                      sptlrpc_rpcflavor2name(sf.sf_rpc),
-                      sptlrpc_get_hash_name(sf.sf_bulk_hash),
-                      sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
+                      sptlrpc_flavor2name(&sf, str, sizeof(str)));
         }
 
         mutex_down(&imp->imp_sec_mutex);
@@ -1351,19 +1422,17 @@ int sptlrpc_import_sec_adapt(struct obd_import *imp,
         newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
         if (newsec) {
                 sptlrpc_import_sec_install(imp, newsec);
-                rc = 0;
         } else {
-                CERROR("%simport %p (%s): failed to create new sec\n",
-                       svc_ctx == NULL ? "" : "reverse ",
-                       imp, obd_uuid2str(&conn->c_remote_uuid));
+                CERROR("import %s->%s: failed to create new sec\n",
+                       imp->imp_obd->obd_name,
+                       obd_uuid2str(&conn->c_remote_uuid));
                 rc = -EPERM;
         }
 
         mutex_up(&imp->imp_sec_mutex);
-
 out:
         sptlrpc_sec_put(sec);
-        return 0;
+        RETURN(rc);
 }
 
 void sptlrpc_import_sec_put(struct obd_import *imp)
@@ -1392,12 +1461,6 @@ static void import_flush_ctx_common(struct obd_import *imp,
         sptlrpc_sec_put(sec);
 }
 
-void sptlrpc_import_inval_all_ctx(struct obd_import *imp)
-{
-        /* use grace == 0 */
-        import_flush_ctx_common(imp, -1, 0, 1);
-}
-
 void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
 {
         /* it's important to use grace mode, see explain in
@@ -1604,12 +1667,13 @@ static int flavor_allowed(struct sptlrpc_flavor *exp,
 {
         struct sptlrpc_flavor *flvr = &req->rq_flvr;
 
-        if (exp->sf_rpc == flvr->sf_rpc)
+        if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
                 return 1;
 
         if ((req->rq_ctx_init || req->rq_ctx_fini) &&
-            RPC_FLVR_POLICY(exp->sf_rpc) == RPC_FLVR_POLICY(flvr->sf_rpc) &&
-            RPC_FLVR_MECH(exp->sf_rpc) == RPC_FLVR_MECH(flvr->sf_rpc))
+            SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
+            SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
+            SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
                 return 1;
 
         return 0;
@@ -1674,7 +1738,7 @@ int sptlrpc_target_export_check(struct obd_export *exp,
                 spin_unlock(&exp->exp_lock);
 
                 return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
-                                                req->rq_svc_ctx, flavor.sf_rpc);
+                                                req->rq_svc_ctx, &flavor);
         }
 
         /* if it equals to the current flavor, we accept it, but need to
@@ -1708,7 +1772,7 @@ int sptlrpc_target_export_check(struct obd_export *exp,
 
                         return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
                                                         req->rq_svc_ctx,
-                                                        flavor.sf_rpc);
+                                                        &flavor);
                 } else {
                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, "
                                "install rvs ctx\n", exp, exp->exp_flvr.sf_rpc,
@@ -1775,11 +1839,23 @@ int sptlrpc_target_export_check(struct obd_export *exp,
 
         spin_unlock(&exp->exp_lock);
 
-        CWARN("req %p: (%u|%u|%u|%u|%u) with unauthorized flavor %x\n",
+        CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u) with "
+              "unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
+              exp, exp->exp_obd->obd_name,
               req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
-              req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_flvr.sf_rpc);
+              req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_flvr.sf_rpc,
+              exp->exp_flvr.sf_rpc,
+              exp->exp_flvr_old[0].sf_rpc,
+              exp->exp_flvr_expire[0] ?
+              (unsigned long) (exp->exp_flvr_expire[0] -
+                               cfs_time_current_sec()) : 0,
+              exp->exp_flvr_old[1].sf_rpc,
+              exp->exp_flvr_expire[1] ?
+              (unsigned long) (exp->exp_flvr_expire[1] -
+                               cfs_time_current_sec()) : 0);
         return -EACCES;
 }
+EXPORT_SYMBOL(sptlrpc_target_export_check);
 
 void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
                                       struct sptlrpc_rule_set *rset)
@@ -1799,15 +1875,16 @@ void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
                  * (exp_flvr_changed == 1), this will override the
                  * previous one. */
                 spin_lock(&exp->exp_lock);
-                sptlrpc_rule_set_choose(rset, exp->exp_sp_peer,
-                                        exp->exp_connection->c_peer.nid,
-                                        &new_flvr);
+                sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
+                                             exp->exp_connection->c_peer.nid,
+                                             &new_flvr);
                 if (exp->exp_flvr_changed ||
-                    memcmp(&new_flvr, &exp->exp_flvr, sizeof(new_flvr))) {
+                    !flavor_equal(&new_flvr, &exp->exp_flvr)) {
                         exp->exp_flvr_old[1] = new_flvr;
                         exp->exp_flvr_expire[1] = 0;
                         exp->exp_flvr_changed = 1;
                         exp->exp_flvr_adapt = 1;
+
                         CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
                                exp, sptlrpc_part2name(exp->exp_sp_peer),
                                exp->exp_flvr.sf_rpc,
@@ -1829,6 +1906,7 @@ static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
         case LUSTRE_SP_CLI:
         case LUSTRE_SP_MDT:
         case LUSTRE_SP_OST:
+        case LUSTRE_SP_MGC:
         case LUSTRE_SP_MGS:
         case LUSTRE_SP_ANY:
                 break;
@@ -1866,13 +1944,14 @@ static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
 {
         struct ptlrpc_sec_policy *policy;
-        struct lustre_msg *msg = req->rq_reqbuf;
-        int rc;
+        struct lustre_msg        *msg = req->rq_reqbuf;
+        int                       rc;
         ENTRY;
 
         LASSERT(msg);
         LASSERT(req->rq_reqmsg == NULL);
         LASSERT(req->rq_repmsg == NULL);
+        LASSERT(req->rq_svc_ctx == NULL);
 
         req->rq_sp_from = LUSTRE_SP_ANY;
         req->rq_auth_uid = INVALID_UID;
@@ -1884,19 +1963,28 @@ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
         }
 
         /*
-         * v2 message.
+         * only expect v2 message.
          */
-        if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2)
-                req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(msg->lm_secflvr);
-        else
-                req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(__swab32(msg->lm_secflvr));
+        switch (msg->lm_magic) {
+        case LUSTRE_MSG_MAGIC_V2:
+                req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
+                break;
+        case LUSTRE_MSG_MAGIC_V2_SWABBED:
+                req->rq_flvr.sf_rpc = WIRE_FLVR(__swab32(msg->lm_secflvr));
+                break;
+        default:
+                CERROR("invalid magic %x\n", msg->lm_magic);
+                RETURN(SECSVC_DROP);
+        }
 
         /* unpack the wrapper message if the policy is not null */
-        if ((RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) &&
-             lustre_unpack_msg(msg, req->rq_reqdata_len))
+        if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
+            lustre_unpack_msg(msg, req->rq_reqdata_len)) {
+                CERROR("invalid wrapper msg format\n");
                 RETURN(SECSVC_DROP);
+        }
 
-        policy = sptlrpc_rpcflavor2policy(req->rq_flvr.sf_rpc);
+        policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
         if (!policy) {
                 CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
                 RETURN(SECSVC_DROP);
@@ -1906,22 +1994,11 @@ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
         rc = policy->sp_sops->accept(req);
 
         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+        LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
         sptlrpc_policy_put(policy);
 
         /* sanity check for the request source */
         rc = sptlrpc_svc_check_from(req, rc);
-
-        /* FIXME move to proper place */
-        if (rc == SECSVC_OK) {
-                __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
-
-                if (opc == OST_WRITE)
-                        req->rq_bulk_write = 1;
-                else if (opc == OST_READ)
-                        req->rq_bulk_read = 1;
-        }
-
-        LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
         RETURN(rc);
 }
 
@@ -2046,11 +2123,11 @@ int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
 {
         struct ptlrpc_cli_ctx *ctx;
 
+        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
         if (!req->rq_pack_bulk)
                 return 0;
 
-        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
         ctx = req->rq_cli_ctx;
         if (ctx->cc_ops->wrap_bulk)
                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
@@ -2058,79 +2135,61 @@ int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
 }
 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
 
-static
-void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
-                      struct ptlrpc_bulk_desc *desc)
-{
-        int i;
-
-        LASSERT(pga);
-        LASSERT(*pga);
-
-        for (i = 0; i < pg_count && nob > 0; i++) {
-#ifdef __KERNEL__
-                desc->bd_iov[i].kiov_page = pga[i]->pg;
-                desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
-                                           nob : pga[i]->count;
-                desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
-#else
-                /* FIXME currently liblustre doesn't support bulk encryption.
-                 * if we do, check again following may not be right. */
-                LASSERTF(0, "Bulk encryption not implemented for liblustre\n");
-                desc->bd_iov[i].iov_base = pga[i]->pg->addr;
-                desc->bd_iov[i].iov_len = pga[i]->count > nob ?
-                                           nob : pga[i]->count;
-#endif
-
-                desc->bd_iov_count++;
-                nob -= pga[i]->count;
-        }
-}
-
+/*
+ * return nob of actual plain text size received, or error code.
+ */
 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
-                                 int nob, obd_count pg_count,
-                                 struct brw_page **pga)
+                                 struct ptlrpc_bulk_desc *desc,
+                                 int nob)
 {
-        struct ptlrpc_bulk_desc *desc;
-        struct ptlrpc_cli_ctx *ctx;
-        int rc = 0;
-
-        if (!req->rq_pack_bulk)
-                return 0;
+        struct ptlrpc_cli_ctx  *ctx;
+        int                     rc;
 
         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
 
-        OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
-        if (desc == NULL) {
-                CERROR("out of memory, can't verify bulk read data\n");
-                return -ENOMEM;
-        }
-
-        pga_to_bulk_desc(nob, pg_count, pga, desc);
+        if (!req->rq_pack_bulk)
+                return desc->bd_nob_transferred;
 
         ctx = req->rq_cli_ctx;
-        if (ctx->cc_ops->unwrap_bulk)
+        if (ctx->cc_ops->unwrap_bulk) {
                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
-
-        OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
-
-        return rc;
+                if (rc < 0)
+                        return rc;
+        }
+        return desc->bd_nob_transferred;
 }
 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
 
+/*
+ * return 0 for success or error code.
+ */
 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
                                   struct ptlrpc_bulk_desc *desc)
 {
-        struct ptlrpc_cli_ctx *ctx;
+        struct ptlrpc_cli_ctx  *ctx;
+        int                     rc;
+
+        LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
 
         if (!req->rq_pack_bulk)
                 return 0;
 
-        LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
-
         ctx = req->rq_cli_ctx;
-        if (ctx->cc_ops->unwrap_bulk)
-                return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+        if (ctx->cc_ops->unwrap_bulk) {
+                rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+                if (rc < 0)
+                        return rc;
+        }
+
+        /*
+         * if everything is going right, nob should equals to nob_transferred.
+         * in case of privacy mode, nob_transferred needs to be adjusted.
+         */
+        if (desc->bd_nob != desc->bd_nob_transferred) {
+                CERROR("nob %d doesn't match transferred nob %d",
+                       desc->bd_nob, desc->bd_nob_transferred);
+                return -EPROTO;
+        }
 
         return 0;
 }
@@ -2141,11 +2200,11 @@ int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
 {
         struct ptlrpc_svc_ctx *ctx;
 
+        LASSERT(req->rq_bulk_read);
+
         if (!req->rq_pack_bulk)
                 return 0;
 
-        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
         ctx = req->rq_svc_ctx;
         if (ctx->sc_policy->sp_sops->wrap_bulk)
                 return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
@@ -2158,20 +2217,50 @@ int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
                             struct ptlrpc_bulk_desc *desc)
 {
         struct ptlrpc_svc_ctx *ctx;
+        int                    rc;
+
+        LASSERT(req->rq_bulk_write);
+
+        if (desc->bd_nob_transferred != desc->bd_nob &&
+            SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) !=
+            SPTLRPC_BULK_SVC_PRIV) {
+                DEBUG_REQ(D_ERROR, req, "truncated bulk GET %d(%d)",
+                          desc->bd_nob_transferred, desc->bd_nob);
+                return -ETIMEDOUT;
+        }
 
         if (!req->rq_pack_bulk)
                 return 0;
 
-        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
         ctx = req->rq_svc_ctx;
-        if (ctx->sc_policy->sp_sops->unwrap_bulk);
-                return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+        if (ctx->sc_policy->sp_sops->unwrap_bulk) {
+                rc = ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+                if (rc)
+                        CERROR("error unwrap bulk: %d\n", rc);
+        }
 
+        /* return 0 to allow reply be sent */
         return 0;
 }
 EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
 
+int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_svc_ctx *ctx;
+
+        LASSERT(req->rq_bulk_write);
+
+        if (!req->rq_pack_bulk)
+                return 0;
+
+        ctx = req->rq_svc_ctx;
+        if (ctx->sc_policy->sp_sops->prep_bulk)
+                return ctx->sc_policy->sp_sops->prep_bulk(req, desc);
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_prep_bulk);
 
 /****************************************
  * user descriptor helpers              *
@@ -2272,39 +2361,25 @@ const char * sec2target_str(struct ptlrpc_sec *sec)
 }
 EXPORT_SYMBOL(sec2target_str);
 
+/*
+ * return true if the bulk data is protected
+ */
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
+{
+        switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+        case SPTLRPC_BULK_SVC_INTG:
+        case SPTLRPC_BULK_SVC_PRIV:
+                return 1;
+        default:
+                return 0;
+        }
+}
+EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
+
 /****************************************
  * crypto API helper/alloc blkciper     *
  ****************************************/
 
-#ifdef __KERNEL__
-#ifndef HAVE_ASYNC_BLOCK_CIPHER
-struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname,
-                                                   u32 type, u32 mask)
-{
-        char        buf[CRYPTO_MAX_ALG_NAME + 1];
-        const char *pan = algname;
-        u32         flag = 0; 
-
-        if (strncmp("cbc(", algname, 4) == 0)
-                flag |= CRYPTO_TFM_MODE_CBC;
-        else if (strncmp("ecb(", algname, 4) == 0)
-                flag |= CRYPTO_TFM_MODE_ECB;
-        if (flag) {
-                char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')');
-                if (vp) {
-                        memcpy(buf, algname + 4, vp - algname - 4);
-                        buf[vp - algname - 4] = '\0';
-                        pan = buf;
-                } else {
-                        flag = 0;
-                }
-        }
-        return crypto_alloc_tfm(pan, flag);
-}
-EXPORT_SYMBOL(ll_crypto_alloc_blkcipher);
-#endif
-#endif
-
 /****************************************
  * initialize/finalize                  *
  ****************************************/
@@ -2313,14 +2388,20 @@ int __init sptlrpc_init(void)
 {
         int rc;
 
-        rc = sptlrpc_gc_start_thread();
+        rwlock_init(&policy_lock);
+
+        rc = sptlrpc_gc_init();
         if (rc)
                 goto out;
 
-        rc = sptlrpc_enc_pool_init();
+        rc = sptlrpc_conf_init();
         if (rc)
                 goto out_gc;
 
+        rc = sptlrpc_enc_pool_init();
+        if (rc)
+                goto out_conf;
+
         rc = sptlrpc_null_init();
         if (rc)
                 goto out_pool;
@@ -2341,8 +2422,10 @@ out_null:
         sptlrpc_null_fini();
 out_pool:
         sptlrpc_enc_pool_fini();
+out_conf:
+        sptlrpc_conf_fini();
 out_gc:
-        sptlrpc_gc_stop_thread();
+        sptlrpc_gc_fini();
 out:
         return rc;
 }
@@ -2353,5 +2436,6 @@ void __exit sptlrpc_fini(void)
         sptlrpc_plain_fini();
         sptlrpc_null_fini();
         sptlrpc_enc_pool_fini();
-        sptlrpc_gc_stop_thread();
+        sptlrpc_conf_fini();
+        sptlrpc_gc_fini();
 }