Whamcloud - gitweb
LU-9795 gss: properly handle mgssec
[fs/lustre-release.git] / lustre / ptlrpc / gss / sec_gss.c
index 528ea17..e1a53a0 100644 (file)
@@ -1,9 +1,9 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * Modifications for Lustre
  *
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
  *
  * Author: Eric Mei <ericm@clusterfs.com>
  */
  *
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_SEC
-#ifdef __KERNEL__
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
-#include <linux/random.h>
 #include <linux/mutex.h>
 #include <asm/atomic.h>
-#else
-#include <liblustre.h>
-#endif
 
 #include <obd.h>
 #include <obd_class.h>
 #include <obd_support.h>
 #include <obd_cksum.h>
-#include <lustre/lustre_idl.h>
 #include <lustre_net.h>
 #include <lustre_import.h>
 #include <lustre_sec.h>
@@ -77,6 +68,7 @@
 #include "gss_api.h"
 
 #include <linux/crypto.h>
+#include <linux/crc32.h>
 
 /*
  * early reply have fixed size, respectively in privacy and integrity mode.
@@ -111,43 +103,26 @@ void gss_header_swabber(struct gss_header *ghdr)
         __swab32s(&ghdr->gh_handle.len);
 }
 
-struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment)
+struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment,
+                                   int swabbed)
 {
         struct gss_header *ghdr;
 
-        ghdr = lustre_swab_buf(msg, segment, sizeof(*ghdr),
-                               gss_header_swabber);
-
-        if (ghdr &&
-            sizeof(*ghdr) + ghdr->gh_handle.len > msg->lm_buflens[segment]) {
-                CERROR("gss header require length %u, now %u received\n",
-                       (unsigned int) sizeof(*ghdr) + ghdr->gh_handle.len,
-                       msg->lm_buflens[segment]);
+        ghdr = lustre_msg_buf(msg, segment, sizeof(*ghdr));
+        if (ghdr == NULL)
                 return NULL;
-        }
-
-        return ghdr;
-}
-
-static
-void gss_netobj_swabber(netobj_t *obj)
-{
-        __swab32s(&obj->len);
-}
 
-netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment)
-{
-        netobj_t  *obj;
+        if (swabbed)
+                gss_header_swabber(ghdr);
 
-        obj = lustre_swab_buf(msg, segment, sizeof(*obj), gss_netobj_swabber);
-        if (obj && sizeof(*obj) + obj->len > msg->lm_buflens[segment]) {
-                CERROR("netobj require length %u but only %u received\n",
-                       (unsigned int) sizeof(*obj) + obj->len,
+        if (sizeof(*ghdr) + ghdr->gh_handle.len > msg->lm_buflens[segment]) {
+                CERROR("gss header has length %d, now %u received\n",
+                       (int) sizeof(*ghdr) + ghdr->gh_handle.len,
                        msg->lm_buflens[segment]);
                 return NULL;
         }
 
-        return obj;
+        return ghdr;
 }
 
 /*
@@ -182,7 +157,7 @@ static int gss_sign_msg(struct lustre_msg *msg,
                         rawobj_t *handle)
 {
         struct gss_header      *ghdr;
-        rawobj_t                text[3], mic;
+        rawobj_t                text[4], mic;
         int                     textcnt, max_textcnt, mic_idx;
         __u32                   major;
 
@@ -223,7 +198,7 @@ static int gss_sign_msg(struct lustre_msg *msg,
         mic.len = msg->lm_buflens[mic_idx];
         mic.data = lustre_msg_buf(msg, mic_idx, 0);
 
-        major = lgss_get_mic(mechctx, textcnt, text, &mic);
+        major = lgss_get_mic(mechctx, textcnt, text, 0, NULL, &mic);
         if (major != GSS_S_COMPLETE) {
                 CERROR("fail to generate MIC: %08x\n", major);
                 return -EPERM;
@@ -241,7 +216,7 @@ __u32 gss_verify_msg(struct lustre_msg *msg,
                      struct gss_ctx *mechctx,
                      __u32 svc)
 {
-        rawobj_t        text[3], mic;
+        rawobj_t        text[4], mic;
         int             textcnt, max_textcnt;
         int             mic_idx;
         __u32           major;
@@ -262,7 +237,7 @@ __u32 gss_verify_msg(struct lustre_msg *msg,
         mic.len = msg->lm_buflens[mic_idx];
         mic.data = lustre_msg_buf(msg, mic_idx, 0);
 
-        major = lgss_verify_mic(mechctx, textcnt, text, &mic);
+        major = lgss_verify_mic(mechctx, textcnt, text, 0, NULL, &mic);
         if (major != GSS_S_COMPLETE)
                 CERROR("mic verify error: %08x\n", major);
 
@@ -291,7 +266,7 @@ __u32 gss_unseal_msg(struct gss_ctx *mechctx,
         /* allocate a temporary clear text buffer, same sized as token,
          * we assume the final clear text size <= token size */
         clear_buflen = lustre_msg_buflen(msgbuf, 1);
-        OBD_ALLOC(clear_buf, clear_buflen);
+        OBD_ALLOC_LARGE(clear_buf, clear_buflen);
         if (!clear_buf)
                 RETURN(GSS_S_FAILURE);
 
@@ -317,7 +292,7 @@ __u32 gss_unseal_msg(struct gss_ctx *mechctx,
 
         major = GSS_S_COMPLETE;
 out_free:
-        OBD_FREE(clear_buf, clear_buflen);
+        OBD_FREE_LARGE(clear_buf, clear_buflen);
         RETURN(major);
 }
 
@@ -327,22 +302,23 @@ out_free:
 
 int cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
 {
-        LASSERT(atomic_read(&ctx->cc_refcount));
+       LASSERT(atomic_read(&ctx->cc_refcount));
 
-        if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
-                if (!ctx->cc_early_expire)
-                        clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+       if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
+               if (!ctx->cc_early_expire)
+                       clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
 
-                CWARN("ctx %p(%u->%s) get expired: %lu(%+lds)\n",
-                      ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
-                      ctx->cc_expire,
-                      ctx->cc_expire == 0 ? 0 :
-                      cfs_time_sub(ctx->cc_expire, cfs_time_current_sec()));
+               CWARN("ctx %p(%u->%s) get expired: %lld(%+llds)\n",
+                     ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+                     ctx->cc_expire,
+                     ctx->cc_expire == 0 ? 0 :
+                     ctx->cc_expire - ktime_get_real_seconds());
 
-                return 1;
-        }
+               sptlrpc_cli_ctx_wakeup(ctx);
+               return 1;
+       }
 
-        return 0;
+       return 0;
 }
 
 /*
@@ -359,7 +335,7 @@ int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx)
                 return 0;
 
         /* check real expiration */
-        if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
+       if (ctx->cc_expire > ktime_get_real_seconds())
                 return 0;
 
         cli_ctx_expire(ctx);
@@ -368,8 +344,8 @@ int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx)
 
 void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx)
 {
-        struct ptlrpc_cli_ctx  *ctx = &gctx->gc_base;
-        unsigned long           ctx_expiry;
+       struct ptlrpc_cli_ctx *ctx = &gctx->gc_base;
+       time64_t ctx_expiry;
 
         if (lgss_inquire_context(gctx->gc_mechctx, &ctx_expiry)) {
                 CERROR("ctx %p(%u): unable to inquire, expire it now\n",
@@ -383,26 +359,30 @@ void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx)
         /* At this point this ctx might have been marked as dead by
          * someone else, in which case nobody will make further use
          * of it. we don't care, and mark it UPTODATE will help
-         * destroying server side context when it be destroied. */
-        set_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
-
-        if (sec_is_reverse(ctx->cc_sec)) {
-                CWARN("server installed reverse ctx %p idx "LPX64", "
-                      "expiry %lu(%+lds)\n", ctx,
-                      gss_handle_to_u64(&gctx->gc_handle),
-                      ctx->cc_expire, ctx->cc_expire - cfs_time_current_sec());
+         * destroying server side context when it be destroyed. */
+       set_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+       if (sec_is_reverse(ctx->cc_sec)) {
+               CWARN("server installed reverse ctx %p idx %#llx, "
+                     "expiry %lld(%+llds)\n", ctx,
+                     gss_handle_to_u64(&gctx->gc_handle),
+                     ctx->cc_expire,
+                     ctx->cc_expire - ktime_get_real_seconds());
         } else {
-                CWARN("client refreshed ctx %p idx "LPX64" (%u->%s), "
-                      "expiry %lu(%+lds)\n", ctx,
-                      gss_handle_to_u64(&gctx->gc_handle),
-                      ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
-                      ctx->cc_expire, ctx->cc_expire - cfs_time_current_sec());
+               CWARN("client refreshed ctx %p idx %#llx (%u->%s), "
+                     "expiry %lld(%+llds)\n", ctx,
+                     gss_handle_to_u64(&gctx->gc_handle),
+                     ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+                     ctx->cc_expire,
+                     ctx->cc_expire - ktime_get_real_seconds());
 
-                /* install reverse svc ctx for root context */
-                if (ctx->cc_vcred.vc_uid == 0)
-                        gss_sec_install_rctx(ctx->cc_sec->ps_import,
-                                             ctx->cc_sec, ctx);
-        }
+               /* install reverse svc ctx for root context */
+               if (ctx->cc_vcred.vc_uid == 0)
+                       gss_sec_install_rctx(ctx->cc_sec->ps_import,
+                                            ctx->cc_sec, ctx);
+       }
+
+        sptlrpc_cli_ctx_wakeup(ctx);
 }
 
 static void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx)
@@ -426,43 +406,42 @@ static void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx)
         rawobj_free(&gctx->gc_handle);
 }
 
-/*
+/**
  * Based on sequence number algorithm as specified in RFC 2203.
  *
- * modified for our own problem: arriving request has valid sequence number,
+ * Modified for our own problem: arriving request has valid sequence number,
  * but unwrapping request might cost a long time, after that its sequence
  * are not valid anymore (fall behind the window). It rarely happen, mostly
  * under extreme load.
  *
- * note we should not check sequence before verify the integrity of incoming
+ * Note we should not check sequence before verifying the integrity of incoming
  * request, because just one attacking request with high sequence number might
- * cause all following request be dropped.
+ * cause all following requests be dropped.
  *
- * so here we use a multi-phase approach: prepare 2 sequence windows,
+ * So here we use a multi-phase approach: prepare 2 sequence windows,
  * "main window" for normal sequence and "back window" for fall behind sequence.
  * and 3-phase checking mechanism:
- *  0 - before integrity verification, perform a initial sequence checking in
- *      main window, which only try and don't actually set any bits. if the
- *      sequence is high above the window or fit in the window and the bit
+ *  0 - before integrity verification, perform an initial sequence checking in
+ *      main window, which only tries and doesn't actually set any bits. if the
+ *      sequence is high above the window or fits in the window and the bit
  *      is 0, then accept and proceed to integrity verification. otherwise
  *      reject this sequence.
  *  1 - after integrity verification, check in main window again. if this
- *      sequence is high above the window or fit in the window and the bit
- *      is 0, then set the bit and accept; if it fit in the window but bit
- *      already set, then reject; if it fall behind the window, then proceed
+ *      sequence is high above the window or fits in the window and the bit
+ *      is 0, then set the bit and accept; if it fits in the window but bit
+ *      already set, then reject; if it falls behind the window, then proceed
  *      to phase 2.
- *  2 - check in back window. if it is high above the window or fit in the
+ *  2 - check in back window. if it is high above the window or fits in the
  *      window and the bit is 0, then set the bit and accept. otherwise reject.
  *
- * return value:
- *   1: looks like a replay
- *   0: is ok
- *  -1: is a replay
+ * \return      1:     looks like a replay
+ * \return      0:     is ok
+ * \return     -1:     is a replay
  *
- * note phase 0 is necessary, because otherwise replay attacking request of
+ * Note phase 0 is necessary, because otherwise replay attacking request of
  * sequence which between the 2 windows can't be detected.
  *
- * this mechanism can't totally solve the problem, but could help much less
+ * This mechanism can't totally solve the problem, but could help reduce the
  * number of valid requests be dropped.
  */
 static
@@ -504,7 +483,7 @@ int gss_do_check_seq(unsigned long *window, __u32 win_size, __u32 *max_seq,
                  */
                 switch (phase) {
                 case 0:
-                        if (test_bit(seq_num % win_size, window))
+                       if (test_bit(seq_num % win_size, window))
                                 goto replay;
                         break;
                 case 1:
@@ -534,9 +513,9 @@ replay:
  */
 int gss_check_seq_num(struct gss_svc_seq_data *ssd, __u32 seq_num, int set)
 {
-        int rc = 0;
+       int rc = 0;
 
-        spin_lock(&ssd->ssd_lock);
+       spin_lock(&ssd->ssd_lock);
 
         if (set == 0) {
                 /*
@@ -570,8 +549,8 @@ int gss_check_seq_num(struct gss_svc_seq_data *ssd, __u32 seq_num, int set)
                         gss_stat_oos_record_svc(2, 0);
         }
 exit:
-        spin_unlock(&ssd->ssd_lock);
-        return rc;
+       spin_unlock(&ssd->ssd_lock);
+       return rc;
 }
 
 /***************************************
@@ -584,6 +563,33 @@ static inline int gss_cli_payload(struct ptlrpc_cli_ctx *ctx,
         return gss_mech_payload(NULL, msgsize, privacy);
 }
 
+static int gss_cli_bulk_payload(struct ptlrpc_cli_ctx *ctx,
+                                struct sptlrpc_flavor *flvr,
+                                int reply, int read)
+{
+        int     payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+        LASSERT(SPTLRPC_FLVR_BULK_TYPE(flvr->sf_rpc) == SPTLRPC_BULK_DEFAULT);
+
+        if ((!reply && !read) || (reply && read)) {
+                switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+                case SPTLRPC_BULK_SVC_NULL:
+                        break;
+                case SPTLRPC_BULK_SVC_INTG:
+                        payload += gss_cli_payload(ctx, 0, 0);
+                        break;
+                case SPTLRPC_BULK_SVC_PRIV:
+                        payload += gss_cli_payload(ctx, 0, 1);
+                        break;
+                case SPTLRPC_BULK_SVC_AUTH:
+                default:
+                        LBUG();
+                }
+        }
+
+        return payload;
+}
+
 int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
 {
         return (ctx->cc_vcred.vc_uid == vcred->vc_uid);
@@ -591,24 +597,22 @@ int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
 
 void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize)
 {
-        buf[0] = '\0';
+       buf[0] = '\0';
 
-        if (flags & PTLRPC_CTX_NEW)
-                strncat(buf, "new,", bufsize);
-        if (flags & PTLRPC_CTX_UPTODATE)
-                strncat(buf, "uptodate,", bufsize);
-        if (flags & PTLRPC_CTX_DEAD)
-                strncat(buf, "dead,", bufsize);
-        if (flags & PTLRPC_CTX_ERROR)
-                strncat(buf, "error,", bufsize);
-        if (flags & PTLRPC_CTX_CACHED)
-                strncat(buf, "cached,", bufsize);
-        if (flags & PTLRPC_CTX_ETERNAL)
-                strncat(buf, "eternal,", bufsize);
-        if (buf[0] == '\0')
-                strncat(buf, "-,", bufsize);
-
-        buf[strlen(buf) - 1] = '\0';
+       if (flags & PTLRPC_CTX_NEW)
+               strlcat(buf, "new,", bufsize);
+       if (flags & PTLRPC_CTX_UPTODATE)
+               strlcat(buf, "uptodate,", bufsize);
+       if (flags & PTLRPC_CTX_DEAD)
+               strlcat(buf, "dead,", bufsize);
+       if (flags & PTLRPC_CTX_ERROR)
+               strlcat(buf, "error,", bufsize);
+       if (flags & PTLRPC_CTX_CACHED)
+               strlcat(buf, "cached,", bufsize);
+       if (flags & PTLRPC_CTX_ETERNAL)
+               strlcat(buf, "eternal,", bufsize);
+       if (buf[0] == '\0')
+               strlcat(buf, "-,", bufsize);
 }
 
 int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx,
@@ -627,40 +631,40 @@ int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx,
         if (req->rq_ctx_init)
                 RETURN(0);
 
-        svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+        svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
         if (req->rq_pack_bulk)
                 flags |= LUSTRE_GSS_PACK_BULK;
         if (req->rq_pack_udesc)
                 flags |= LUSTRE_GSS_PACK_USER;
 
 redo:
-        seq = atomic_inc_return(&gctx->gc_seq);
-
-        rc = gss_sign_msg(req->rq_reqbuf, gctx->gc_mechctx,
-                          ctx->cc_sec->ps_part,
-                          flags, gctx->gc_proc, seq, svc,
-                          &gctx->gc_handle);
-        if (rc < 0)
-                RETURN(rc);
-
-        /* gss_sign_msg() msg might take long time to finish, in which period
-         * more rpcs could be wrapped up and sent out. if we found too many
-         * of them we should repack this rpc, because sent it too late might
-         * lead to the sequence number fall behind the window on server and
-         * be dropped. also applies to gss_cli_ctx_seal().
-         *
-         * Note: null mode dosen't check sequence number. */
-        if (svc != SPTLRPC_SVC_NULL &&
-            atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) {
-                int behind = atomic_read(&gctx->gc_seq) - seq;
-
-                gss_stat_oos_record_cli(behind);
-                CWARN("req %p: %u behind, retry signing\n", req, behind);
-                goto redo;
-        }
-
-        req->rq_reqdata_len = rc;
-        RETURN(0);
+       seq = atomic_inc_return(&gctx->gc_seq);
+
+       rc = gss_sign_msg(req->rq_reqbuf, gctx->gc_mechctx,
+                         ctx->cc_sec->ps_part,
+                         flags, gctx->gc_proc, seq, svc,
+                         &gctx->gc_handle);
+       if (rc < 0)
+               RETURN(rc);
+
+       /* gss_sign_msg() msg might take long time to finish, in which period
+        * more rpcs could be wrapped up and sent out. if we found too many
+        * of them we should repack this rpc, because sent it too late might
+        * lead to the sequence number fall behind the window on server and
+        * be dropped. also applies to gss_cli_ctx_seal().
+        *
+        * Note: null mode doesn't check sequence number. */
+       if (svc != SPTLRPC_SVC_NULL &&
+           atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) {
+               int behind = atomic_read(&gctx->gc_seq) - seq;
+
+               gss_stat_oos_record_cli(behind);
+               CWARN("req %p: %u behind, retry signing\n", req, behind);
+               goto redo;
+       }
+
+       req->rq_reqdata_len = rc;
+       RETURN(0);
 }
 
 static
@@ -675,7 +679,7 @@ int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx,
 
         errhdr = (struct gss_err_header *) ghdr;
 
-        CWARN("req x"LPU64"/t"LPU64", ctx %p idx "LPX64"(%u->%s): "
+       CWARN("req x%llu/t%llu, ctx %p idx %#llx(%u->%s): "
               "%sserver respond (%08x/%08x)\n",
               req->rq_xid, req->rq_transno, ctx,
               gss_handle_to_u64(&ctx2gctx(ctx)->gc_handle),
@@ -735,7 +739,7 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
         struct gss_header      *ghdr, *reqhdr;
         struct lustre_msg      *msg = req->rq_repdata;
         __u32                   major;
-        int                     pack_bulk, early = 0, rc = 0;
+        int                     pack_bulk, swabbed, rc = 0;
         ENTRY;
 
         LASSERT(req->rq_cli_ctx == ctx);
@@ -743,13 +747,9 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
 
         gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
 
-        if ((char *) msg < req->rq_repbuf ||
-            (char *) msg >= req->rq_repbuf + req->rq_repbuf_len)
-                early = 1;
-
         /* special case for context negotiation, rq_repmsg/rq_replen actually
          * are not used currently. but early reply always be treated normally */
-        if (req->rq_ctx_init && !early) {
+        if (req->rq_ctx_init && !req->rq_early) {
                 req->rq_repmsg = lustre_msg_buf(msg, 1, 0);
                 req->rq_replen = msg->lm_buflens[1];
                 RETURN(0);
@@ -760,7 +760,9 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
                 RETURN(-EPROTO);
         }
 
-        ghdr = gss_swab_header(msg, 0);
+        swabbed = ptlrpc_rep_need_swab(req);
+
+        ghdr = gss_swab_header(msg, 0, swabbed);
         if (ghdr == NULL) {
                 CERROR("can't decode gss header\n");
                 RETURN(-EPROTO);
@@ -780,7 +782,7 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
         case PTLRPC_GSS_PROC_DATA:
                 pack_bulk = ghdr->gh_flags & LUSTRE_GSS_PACK_BULK;
 
-                if (!early && !equi(req->rq_pack_bulk == 1, pack_bulk)) {
+                if (!req->rq_early && !equi(req->rq_pack_bulk == 1, pack_bulk)){
                         CERROR("%s bulk flag in reply\n",
                                req->rq_pack_bulk ? "missing" : "unexpected");
                         RETURN(-EPROTO);
@@ -798,14 +800,16 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
                         RETURN(-EPROTO);
                 }
 
-                if (lustre_msg_swabbed(msg))
+                if (swabbed)
                         gss_header_swabber(ghdr);
 
                 major = gss_verify_msg(msg, gctx->gc_mechctx, reqhdr->gh_svc);
-                if (major != GSS_S_COMPLETE)
+                if (major != GSS_S_COMPLETE) {
+                        CERROR("failed to verify reply: %x\n", major);
                         RETURN(-EPERM);
+                }
 
-                if (early && reqhdr->gh_svc == SPTLRPC_SVC_NULL) {
+                if (req->rq_early && reqhdr->gh_svc == SPTLRPC_SVC_NULL) {
                         __u32 cksum;
 
                         cksum = crc32_le(!(__u32) 0,
@@ -826,7 +830,7 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
                                 RETURN(-EPROTO);
                         }
 
-                        rc = bulk_sec_desc_unpack(msg, 2);
+                        rc = bulk_sec_desc_unpack(msg, 2, swabbed);
                         if (rc) {
                                 CERROR("unpack bulk desc: %d\n", rc);
                                 RETURN(rc);
@@ -837,7 +841,7 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
                 req->rq_replen = msg->lm_buflens[1];
                 break;
         case PTLRPC_GSS_PROC_ERR:
-                if (early) {
+                if (req->rq_early) {
                         CERROR("server return error with early reply\n");
                         rc = -EPROTO;
                 } else {
@@ -884,7 +888,7 @@ int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx,
                 LASSERT(req->rq_reqbuf != req->rq_clrbuf);
                 LASSERT(req->rq_reqbuf_len >= wiresize);
         } else {
-                OBD_ALLOC(req->rq_reqbuf, wiresize);
+                OBD_ALLOC_LARGE(req->rq_reqbuf, wiresize);
                 if (!req->rq_reqbuf)
                         RETURN(-ENOMEM);
                 req->rq_reqbuf_len = wiresize;
@@ -908,7 +912,7 @@ int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx,
                 ghdr->gh_flags |= LUSTRE_GSS_PACK_USER;
 
 redo:
-        ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
+       ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
 
         /* buffer objects */
         hdrobj.len = PTLRPC_GSS_HEADER_SIZE;
@@ -926,29 +930,29 @@ redo:
         }
         LASSERT(token.len <= buflens[1]);
 
-        /* see explain in gss_cli_ctx_sign() */
-        if (unlikely(atomic_read(&gctx->gc_seq) - ghdr->gh_seq >
-                     GSS_SEQ_REPACK_THRESHOLD)) {
-                int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq;
+       /* see explain in gss_cli_ctx_sign() */
+       if (unlikely(atomic_read(&gctx->gc_seq) - ghdr->gh_seq >
+                    GSS_SEQ_REPACK_THRESHOLD)) {
+               int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq;
 
-                gss_stat_oos_record_cli(behind);
-                CWARN("req %p: %u behind, retry sealing\n", req, behind);
+               gss_stat_oos_record_cli(behind);
+               CWARN("req %p: %u behind, retry sealing\n", req, behind);
 
-                ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
-                goto redo;
-        }
+               ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
+               goto redo;
+       }
 
-        /* now set the final wire data length */
-        req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, 1, token.len,0);
-        RETURN(0);
+       /* now set the final wire data length */
+       req->rq_reqdata_len = lustre_shrink_msg(req->rq_reqbuf, 1, token.len,0);
+       RETURN(0);
 
 err_free:
-        if (!req->rq_pool) {
-                OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
-                req->rq_reqbuf = NULL;
-                req->rq_reqbuf_len = 0;
-        }
-        RETURN(rc);
+       if (!req->rq_pool) {
+               OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
+               req->rq_reqbuf = NULL;
+               req->rq_reqbuf_len = 0;
+       }
+       RETURN(rc);
 }
 
 int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
@@ -957,7 +961,7 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
         struct gss_cli_ctx      *gctx;
         struct gss_header       *ghdr;
         struct lustre_msg       *msg = req->rq_repdata;
-        int                      msglen, pack_bulk, early = 0, rc;
+        int                      msglen, pack_bulk, swabbed, rc;
         __u32                    major;
         ENTRY;
 
@@ -966,12 +970,9 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
         LASSERT(msg);
 
         gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+        swabbed = ptlrpc_rep_need_swab(req);
 
-        if ((char *) msg < req->rq_repbuf ||
-            (char *) msg >= req->rq_repbuf + req->rq_repbuf_len)
-                early = 1;
-
-        ghdr = gss_swab_header(msg, 0);
+        ghdr = gss_swab_header(msg, 0, swabbed);
         if (ghdr == NULL) {
                 CERROR("can't decode gss header\n");
                 RETURN(-EPROTO);
@@ -988,13 +989,13 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
         case PTLRPC_GSS_PROC_DATA:
                 pack_bulk = ghdr->gh_flags & LUSTRE_GSS_PACK_BULK;
 
-                if (!early && !equi(req->rq_pack_bulk == 1, pack_bulk)) {
+                if (!req->rq_early && !equi(req->rq_pack_bulk == 1, pack_bulk)){
                         CERROR("%s bulk flag in reply\n",
                                req->rq_pack_bulk ? "missing" : "unexpected");
                         RETURN(-EPROTO);
                 }
 
-                if (lustre_msg_swabbed(msg))
+                if (swabbed)
                         gss_header_swabber(ghdr);
 
                 /* use rq_repdata_len as buffer size, which assume unseal
@@ -1004,11 +1005,13 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
                 major = gss_unseal_msg(gctx->gc_mechctx, msg,
                                        &msglen, req->rq_repdata_len);
                 if (major != GSS_S_COMPLETE) {
+                        CERROR("failed to unwrap reply: %x\n", major);
                         rc = -EPERM;
                         break;
                 }
 
-                if (lustre_unpack_msg(msg, msglen)) {
+                swabbed = __lustre_unpack_msg(msg, msglen);
+                if (swabbed < 0) {
                         CERROR("Failed to unpack after decryption\n");
                         RETURN(-EPROTO);
                 }
@@ -1026,7 +1029,8 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
                         }
 
                         /* bulk checksum is the last segment */
-                        if (bulk_sec_desc_unpack(msg, msg->lm_bufcount-1))
+                        if (bulk_sec_desc_unpack(msg, msg->lm_bufcount - 1,
+                                                 swabbed))
                                 RETURN(-EPROTO);
                 }
 
@@ -1036,7 +1040,12 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
                 rc = 0;
                 break;
         case PTLRPC_GSS_PROC_ERR:
-                rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr);
+                if (req->rq_early) {
+                        CERROR("server return error with early reply\n");
+                        rc = -EPROTO;
+                } else {
+                        rc = gss_cli_ctx_handle_err_notify(ctx, req, ghdr);
+                }
                 break;
         default:
                 CERROR("unexpected proc %d\n", ghdr->gh_proc);
@@ -1070,28 +1079,29 @@ int gss_sec_create_common(struct gss_sec *gsec,
         struct ptlrpc_sec   *sec;
 
         LASSERT(imp);
-        LASSERT(RPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_GSS);
+        LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_GSS);
 
-        gsec->gs_mech = lgss_subflavor_to_mech(RPC_FLVR_SUB(sf->sf_rpc));
+        gsec->gs_mech = lgss_subflavor_to_mech(
+                                SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
         if (!gsec->gs_mech) {
                 CERROR("gss backend 0x%x not found\n",
-                       RPC_FLVR_SUB(sf->sf_rpc));
+                       SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
                 return -EOPNOTSUPP;
         }
 
-        spin_lock_init(&gsec->gs_lock);
+       spin_lock_init(&gsec->gs_lock);
         gsec->gs_rvs_hdl = 0ULL;
 
-        /* initialize upper ptlrpc_sec */
-        sec = &gsec->gs_base;
-        sec->ps_policy = policy;
-        atomic_set(&sec->ps_refcount, 0);
-        atomic_set(&sec->ps_nctx, 0);
-        sec->ps_id = sptlrpc_get_next_secid();
-        sec->ps_flvr = *sf;
-        sec->ps_import = class_import_get(imp);
-        sec->ps_lock = SPIN_LOCK_UNLOCKED;
-        CFS_INIT_LIST_HEAD(&sec->ps_gc_list);
+       /* initialize upper ptlrpc_sec */
+       sec = &gsec->gs_base;
+       sec->ps_policy = policy;
+       atomic_set(&sec->ps_refcount, 0);
+       atomic_set(&sec->ps_nctx, 0);
+       sec->ps_id = sptlrpc_get_next_secid();
+       sec->ps_flvr = *sf;
+       sec->ps_import = class_import_get(imp);
+       spin_lock_init(&sec->ps_lock);
+       INIT_LIST_HEAD(&sec->ps_gc_list);
 
         if (!svcctx) {
                 sec->ps_gc_interval = GSS_GC_INTERVAL;
@@ -1102,8 +1112,7 @@ int gss_sec_create_common(struct gss_sec *gsec,
                 sec->ps_gc_interval = 0;
         }
 
-        if (sec->ps_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL &&
-            sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_BULK)
+        if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
                 sptlrpc_enc_pool_add_user();
 
         CDEBUG(D_SEC, "create %s%s@%p\n", (svcctx ? "reverse " : ""),
@@ -1113,30 +1122,29 @@ int gss_sec_create_common(struct gss_sec *gsec,
 
 void gss_sec_destroy_common(struct gss_sec *gsec)
 {
-        struct ptlrpc_sec      *sec = &gsec->gs_base;
-        ENTRY;
+       struct ptlrpc_sec       *sec = &gsec->gs_base;
+       ENTRY;
 
-        LASSERT(sec->ps_import);
-        LASSERT(atomic_read(&sec->ps_refcount) == 0);
-        LASSERT(atomic_read(&sec->ps_nctx) == 0);
+       LASSERT(sec->ps_import);
+       LASSERT(atomic_read(&sec->ps_refcount) == 0);
+       LASSERT(atomic_read(&sec->ps_nctx) == 0);
 
-        if (gsec->gs_mech) {
-                lgss_mech_put(gsec->gs_mech);
-                gsec->gs_mech = NULL;
-        }
+       if (gsec->gs_mech) {
+               lgss_mech_put(gsec->gs_mech);
+               gsec->gs_mech = NULL;
+       }
 
-        class_import_put(sec->ps_import);
+       class_import_put(sec->ps_import);
 
-        if (sec->ps_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL &&
-            sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_BULK)
-                sptlrpc_enc_pool_del_user();
+       if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
+               sptlrpc_enc_pool_del_user();
 
-        EXIT;
+       EXIT;
 }
 
 void gss_sec_kill(struct ptlrpc_sec *sec)
 {
-        sec->ps_dying = 1;
+       sec->ps_dying = 1;
 }
 
 int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
@@ -1144,31 +1152,31 @@ int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
                             struct ptlrpc_ctx_ops *ctxops,
                             struct vfs_cred *vcred)
 {
-        struct gss_cli_ctx    *gctx = ctx2gctx(ctx);
-
-        gctx->gc_win = 0;
-        atomic_set(&gctx->gc_seq, 0);
-
-        CFS_INIT_HLIST_NODE(&ctx->cc_cache);
-        atomic_set(&ctx->cc_refcount, 0);
-        ctx->cc_sec = sec;
-        ctx->cc_ops = ctxops;
-        ctx->cc_expire = 0;
-        ctx->cc_flags = PTLRPC_CTX_NEW;
-        ctx->cc_vcred = *vcred;
-        spin_lock_init(&ctx->cc_lock);
-        CFS_INIT_LIST_HEAD(&ctx->cc_req_list);
-        CFS_INIT_LIST_HEAD(&ctx->cc_gc_chain);
-
-        /* take a ref on belonging sec, balanced in ctx destroying */
-        atomic_inc(&sec->ps_refcount);
-        /* statistic only */
-        atomic_inc(&sec->ps_nctx);
-
-        CDEBUG(D_SEC, "%s@%p: create ctx %p(%u->%s)\n",
-               sec->ps_policy->sp_name, ctx->cc_sec,
-               ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
-        return 0;
+       struct gss_cli_ctx      *gctx = ctx2gctx(ctx);
+
+       gctx->gc_win = 0;
+       atomic_set(&gctx->gc_seq, 0);
+
+       INIT_HLIST_NODE(&ctx->cc_cache);
+       atomic_set(&ctx->cc_refcount, 0);
+       ctx->cc_sec = sec;
+       ctx->cc_ops = ctxops;
+       ctx->cc_expire = 0;
+       ctx->cc_flags = PTLRPC_CTX_NEW;
+       ctx->cc_vcred = *vcred;
+       spin_lock_init(&ctx->cc_lock);
+       INIT_LIST_HEAD(&ctx->cc_req_list);
+       INIT_LIST_HEAD(&ctx->cc_gc_chain);
+
+       /* take a ref on belonging sec, balanced in ctx destroying */
+       atomic_inc(&sec->ps_refcount);
+       /* statistic only */
+       atomic_inc(&sec->ps_nctx);
+
+       CDEBUG(D_SEC, "%s@%p: create ctx %p(%u->%s)\n",
+              sec->ps_policy->sp_name, ctx->cc_sec,
+              ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+       return 0;
 }
 
 /*
@@ -1179,35 +1187,44 @@ int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
 int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
                             struct ptlrpc_cli_ctx *ctx)
 {
-        struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+       struct gss_cli_ctx *gctx = ctx2gctx(ctx);
 
-        LASSERT(atomic_read(&sec->ps_nctx) > 0);
-        LASSERT(atomic_read(&ctx->cc_refcount) == 0);
-        LASSERT(ctx->cc_sec == sec);
+       LASSERT(atomic_read(&sec->ps_nctx) > 0);
+       LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+       LASSERT(ctx->cc_sec == sec);
 
-        if (gctx->gc_mechctx) {
-                /* the final context fini rpc will use this ctx too, and it's
-                 * asynchronous which finished by request_out_callback(). so
-                 * we add refcount, whoever drop finally drop the refcount to
-                 * 0 should responsible for the rest of destroy. */
-                atomic_inc(&ctx->cc_refcount);
+       /*
+        * remove UPTODATE flag of reverse ctx thus we won't send fini rpc,
+        * this is to avoid potential problems of client side reverse svc ctx
+        * be mis-destroyed in various recovery senarios. anyway client can
+        * manage its reverse ctx well by associating it with its buddy ctx.
+        */
+       if (sec_is_reverse(sec))
+               ctx->cc_flags &= ~PTLRPC_CTX_UPTODATE;
 
-                gss_do_ctx_fini_rpc(gctx);
-                gss_cli_ctx_finalize(gctx);
+       if (gctx->gc_mechctx) {
+               /* the final context fini rpc will use this ctx too, and it's
+                * asynchronous which finished by request_out_callback(). so
+                * we add refcount, whoever drop finally drop the refcount to
+                * 0 should responsible for the rest of destroy. */
+               atomic_inc(&ctx->cc_refcount);
 
-                if (!atomic_dec_and_test(&ctx->cc_refcount))
-                        return 1;
-        }
+               gss_do_ctx_fini_rpc(gctx);
+               gss_cli_ctx_finalize(gctx);
 
-        if (sec_is_reverse(sec))
-                CWARN("reverse sec %p: destroy ctx %p\n",
-                      ctx->cc_sec, ctx);
-        else
-                CWARN("%s@%p: destroy ctx %p(%u->%s)\n",
-                      sec->ps_policy->sp_name, ctx->cc_sec,
-                      ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+               if (!atomic_dec_and_test(&ctx->cc_refcount))
+                       return 1;
+       }
 
-        return 0;
+       if (sec_is_reverse(sec))
+               CWARN("reverse sec %p: destroy ctx %p\n",
+                     ctx->cc_sec, ctx);
+       else
+               CWARN("%s@%p: destroy ctx %p(%u->%s)\n",
+                     sec->ps_policy->sp_name, ctx->cc_sec,
+                     ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+
+       return 0;
 }
 
 static
@@ -1250,9 +1267,9 @@ int gss_alloc_reqbuf_intg(struct ptlrpc_sec *sec,
         }
 
         if (req->rq_pack_bulk) {
-                buflens[bufcnt] = bulk_sec_desc_size(
-                                                req->rq_flvr.sf_bulk_hash, 1,
-                                                req->rq_bulk_read);
+                buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+                                                       &req->rq_flvr,
+                                                       0, req->rq_bulk_read);
                 if (svc == SPTLRPC_SVC_INTG)
                         txtsize += buflens[bufcnt];
                 bufcnt++;
@@ -1268,7 +1285,7 @@ int gss_alloc_reqbuf_intg(struct ptlrpc_sec *sec,
         if (!req->rq_reqbuf) {
                 bufsize = size_roundup_power2(bufsize);
 
-                OBD_ALLOC(req->rq_reqbuf, bufsize);
+                OBD_ALLOC_LARGE(req->rq_reqbuf, bufsize);
                 if (!req->rq_reqbuf)
                         RETURN(-ENOMEM);
 
@@ -1316,9 +1333,9 @@ int gss_alloc_reqbuf_priv(struct ptlrpc_sec *sec,
         if (req->rq_pack_udesc)
                 ibuflens[ibufcnt++] = sptlrpc_current_user_desc_size();
         if (req->rq_pack_bulk)
-                ibuflens[ibufcnt++] = bulk_sec_desc_size(
-                                                req->rq_flvr.sf_bulk_hash, 1,
-                                                req->rq_bulk_read);
+                ibuflens[ibufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+                                                           &req->rq_flvr, 0,
+                                                           req->rq_bulk_read);
 
         clearsize = lustre_msg_size_v2(ibufcnt, ibuflens);
         /* to allow append padding during encryption */
@@ -1355,7 +1372,7 @@ int gss_alloc_reqbuf_priv(struct ptlrpc_sec *sec,
         if (!req->rq_clrbuf) {
                 clearsize = size_roundup_power2(clearsize);
 
-                OBD_ALLOC(req->rq_clrbuf, clearsize);
+                OBD_ALLOC_LARGE(req->rq_clrbuf, clearsize);
                 if (!req->rq_clrbuf)
                         RETURN(-ENOMEM);
         }
@@ -1378,7 +1395,7 @@ int gss_alloc_reqbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req,
                      int msgsize)
 {
-        int     svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+        int     svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
 
         LASSERT(!req->rq_pack_bulk ||
                 (req->rq_bulk_read || req->rq_bulk_write));
@@ -1403,7 +1420,7 @@ void gss_free_reqbuf(struct ptlrpc_sec *sec,
         ENTRY;
 
         LASSERT(!req->rq_pool || req->rq_reqbuf);
-        privacy = RPC_FLVR_SVC(req->rq_flvr.sf_rpc) == SPTLRPC_SVC_PRIV;
+        privacy = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) == SPTLRPC_SVC_PRIV;
 
         if (!req->rq_clrbuf)
                 goto release_reqbuf;
@@ -1412,13 +1429,12 @@ void gss_free_reqbuf(struct ptlrpc_sec *sec,
         LASSERT(privacy);
         LASSERT(req->rq_clrbuf_len);
 
-        if (req->rq_pool &&
-            req->rq_clrbuf >= req->rq_reqbuf &&
-            (char *) req->rq_clrbuf <
+        if (req->rq_pool == NULL ||
+            req->rq_clrbuf < req->rq_reqbuf ||
+            (char *) req->rq_clrbuf >=
             (char *) req->rq_reqbuf + req->rq_reqbuf_len)
-                goto release_reqbuf;
+                OBD_FREE_LARGE(req->rq_clrbuf, req->rq_clrbuf_len);
 
-        OBD_FREE(req->rq_clrbuf, req->rq_clrbuf_len);
         req->rq_clrbuf = NULL;
         req->rq_clrbuf_len = 0;
 
@@ -1426,13 +1442,11 @@ release_reqbuf:
         if (!req->rq_pool && req->rq_reqbuf) {
                 LASSERT(req->rq_reqbuf_len);
 
-                OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+                OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
                 req->rq_reqbuf = NULL;
                 req->rq_reqbuf_len = 0;
         }
 
-        req->rq_reqmsg = NULL;
-
         EXIT;
 }
 
@@ -1440,7 +1454,7 @@ static int do_alloc_repbuf(struct ptlrpc_request *req, int bufsize)
 {
         bufsize = size_roundup_power2(bufsize);
 
-        OBD_ALLOC(req->rq_repbuf, bufsize);
+        OBD_ALLOC_LARGE(req->rq_repbuf, bufsize);
         if (!req->rq_repbuf)
                 return -ENOMEM;
 
@@ -1480,9 +1494,9 @@ int gss_alloc_repbuf_intg(struct ptlrpc_sec *sec,
                 txtsize += buflens[1];
 
         if (req->rq_pack_bulk) {
-                buflens[bufcnt] = bulk_sec_desc_size(
-                                                req->rq_flvr.sf_bulk_hash, 0,
-                                                req->rq_bulk_read);
+                buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+                                                       &req->rq_flvr,
+                                                       1, req->rq_bulk_read);
                 if (svc == SPTLRPC_SVC_INTG)
                         txtsize += buflens[bufcnt];
                 bufcnt++;
@@ -1516,9 +1530,9 @@ int gss_alloc_repbuf_priv(struct ptlrpc_sec *sec,
         buflens[0] = msgsize;
 
         if (req->rq_pack_bulk)
-                buflens[bufcnt++] = bulk_sec_desc_size(
-                                                req->rq_flvr.sf_bulk_hash, 0,
-                                                req->rq_bulk_read);
+                buflens[bufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+                                                         &req->rq_flvr,
+                                                         1, req->rq_bulk_read);
         txtsize = lustre_msg_size_v2(bufcnt, buflens);
         txtsize += GSS_MAX_CIPHER_BLOCK;
 
@@ -1538,7 +1552,7 @@ int gss_alloc_repbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req,
                      int msgsize)
 {
-        int     svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+        int     svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
         ENTRY;
 
         LASSERT(!req->rq_pack_bulk ||
@@ -1560,11 +1574,11 @@ int gss_alloc_repbuf(struct ptlrpc_sec *sec,
 void gss_free_repbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req)
 {
-        OBD_FREE(req->rq_repbuf, req->rq_repbuf_len);
+        OBD_FREE_LARGE(req->rq_repbuf, req->rq_repbuf_len);
         req->rq_repbuf = NULL;
         req->rq_repbuf_len = 0;
-
-        req->rq_repmsg = NULL;
+        req->rq_repdata = NULL;
+        req->rq_repdata_len = 0;
 }
 
 static int get_enlarged_msgsize(struct lustre_msg *msg,
@@ -1656,16 +1670,28 @@ int gss_enlarge_reqbuf_intg(struct ptlrpc_sec *sec,
         if (req->rq_reqbuf_len < newbuf_size) {
                 newbuf_size = size_roundup_power2(newbuf_size);
 
-                OBD_ALLOC(newbuf, newbuf_size);
+                OBD_ALLOC_LARGE(newbuf, newbuf_size);
                 if (newbuf == NULL)
                         RETURN(-ENOMEM);
 
+               /* Must lock this, so that otherwise unprotected change of
+                * rq_reqmsg is not racing with parallel processing of
+                * imp_replay_list traversing threads. See LU-3333
+                * This is a bandaid at best, we really need to deal with this
+                * in request enlarging code before unpacking that's already
+                * there */
+               if (req->rq_import)
+                       spin_lock(&req->rq_import->imp_lock);
+
                 memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len);
 
-                OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+                OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len);
                 req->rq_reqbuf = newbuf;
                 req->rq_reqbuf_len = newbuf_size;
                 req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, 0);
+
+               if (req->rq_import)
+                       spin_unlock(&req->rq_import->imp_lock);
         }
 
         /* do enlargement, from wrapper to embedded, from end to begin */
@@ -1726,6 +1752,8 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec,
                 if (newclrbuf_size + newcipbuf_size <= req->rq_reqbuf_len) {
                         void *src, *dst;
 
+                       if (req->rq_import)
+                               spin_lock(&req->rq_import->imp_lock);
                         /* move clear text backward. */
                         src = req->rq_clrbuf;
                         dst = (char *) req->rq_reqbuf + newcipbuf_size;
@@ -1735,6 +1763,9 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec,
                         req->rq_clrbuf = (struct lustre_msg *) dst;
                         req->rq_clrbuf_len = newclrbuf_size;
                         req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0);
+
+                       if (req->rq_import)
+                               spin_unlock(&req->rq_import->imp_lock);
                 } else {
                         /* sadly we have to split out the clear buffer */
                         LASSERT(req->rq_reqbuf_len >= newcipbuf_size);
@@ -1745,22 +1776,34 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec,
         if (req->rq_clrbuf_len < newclrbuf_size) {
                 newclrbuf_size = size_roundup_power2(newclrbuf_size);
 
-                OBD_ALLOC(newclrbuf, newclrbuf_size);
+                OBD_ALLOC_LARGE(newclrbuf, newclrbuf_size);
                 if (newclrbuf == NULL)
                         RETURN(-ENOMEM);
 
+               /* Must lock this, so that otherwise unprotected change of
+                * rq_reqmsg is not racing with parallel processing of
+                * imp_replay_list traversing threads. See LU-3333
+                * This is a bandaid at best, we really need to deal with this
+                * in request enlarging code before unpacking that's already
+                * there */
+               if (req->rq_import)
+                       spin_lock(&req->rq_import->imp_lock);
+
                 memcpy(newclrbuf, req->rq_clrbuf, req->rq_clrbuf_len);
 
                 if (req->rq_reqbuf == NULL ||
                     req->rq_clrbuf < req->rq_reqbuf ||
                     (char *) req->rq_clrbuf >=
                     (char *) req->rq_reqbuf + req->rq_reqbuf_len) {
-                        OBD_FREE(req->rq_clrbuf, req->rq_clrbuf_len);
+                        OBD_FREE_LARGE(req->rq_clrbuf, req->rq_clrbuf_len);
                 }
 
                 req->rq_clrbuf = newclrbuf;
                 req->rq_clrbuf_len = newclrbuf_size;
                 req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0);
+
+               if (req->rq_import)
+                       spin_unlock(&req->rq_import->imp_lock);
         }
 
         _sptlrpc_enlarge_msg_inplace(req->rq_clrbuf, 0, newmsg_size);
@@ -1774,7 +1817,7 @@ int gss_enlarge_reqbuf(struct ptlrpc_sec *sec,
                        struct ptlrpc_request *req,
                        int segment, int newsize)
 {
-        int     svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+        int     svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
 
         LASSERT(!req->rq_ctx_init && !req->rq_ctx_fini);
 
@@ -1831,17 +1874,17 @@ void gss_svc_reqctx_free(struct gss_svc_reqctx *grctx)
 static inline
 void gss_svc_reqctx_addref(struct gss_svc_reqctx *grctx)
 {
-        LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
-        atomic_inc(&grctx->src_base.sc_refcount);
+       LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
+       atomic_inc(&grctx->src_base.sc_refcount);
 }
 
 static inline
 void gss_svc_reqctx_decref(struct gss_svc_reqctx *grctx)
 {
-        LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
+       LASSERT(atomic_read(&grctx->src_base.sc_refcount) > 0);
 
-        if (atomic_dec_and_test(&grctx->src_base.sc_refcount))
-                gss_svc_reqctx_free(grctx);
+       if (atomic_dec_and_test(&grctx->src_base.sc_refcount))
+               gss_svc_reqctx_free(grctx);
 }
 
 static
@@ -1856,7 +1899,7 @@ int gss_svc_sign(struct ptlrpc_request *req,
 
         LASSERT(rs->rs_msg == lustre_msg_buf(rs->rs_repbuf, 1, 0));
 
-        /* embedded lustre_msg might have been shrinked */
+        /* embedded lustre_msg might have been shrunk */
         if (req->rq_replen != rs->rs_repbuf->lm_buflens[1])
                 lustre_shrink_msg(rs->rs_repbuf, 1, req->rq_replen, 1);
 
@@ -1872,7 +1915,10 @@ int gss_svc_sign(struct ptlrpc_request *req,
         rs->rs_repdata_len = rc;
 
         if (likely(req->rq_packed_final)) {
-                req->rq_reply_off = gss_at_reply_off_integ;
+                if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)
+                        req->rq_reply_off = gss_at_reply_off_integ;
+                else
+                        req->rq_reply_off = 0;
         } else {
                 if (svc == SPTLRPC_SVC_NULL)
                         rs->rs_repbuf->lm_cksum = crc32_le(!(__u32) 0,
@@ -1935,7 +1981,7 @@ int gss_svc_handle_init(struct ptlrpc_request *req,
         rawobj_t                   uuid_obj, rvs_hdl, in_token;
         __u32                      lustre_svc;
         __u32                     *secdata, seclen;
-        int                        rc;
+        int                        swabbed, rc;
         ENTRY;
 
         CDEBUG(D_SEC, "processing gss init(%d) request from %s\n", gw->gw_proc,
@@ -1959,6 +2005,8 @@ int gss_svc_handle_init(struct ptlrpc_request *req,
                 RETURN(SECSVC_DROP);
         }
 
+        swabbed = ptlrpc_req_need_swab(req);
+
         /* ctx initiate payload is in last segment */
         secdata = lustre_msg_buf(reqbuf, reqbuf->lm_bufcount - 1, 0);
         seclen = reqbuf->lm_buflens[reqbuf->lm_bufcount - 1];
@@ -2006,21 +2054,24 @@ int gss_svc_handle_init(struct ptlrpc_request *req,
         if (rc != SECSVC_OK)
                 RETURN(rc);
 
-        if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_root)
-                CWARN("create svc ctx %p: user from %s authenticated as %s\n",
-                      grctx->src_ctx, libcfs_nid2str(req->rq_peer.nid),
-                      grctx->src_ctx->gsc_usr_mds ? "mds" : "root");
-        else
-                CWARN("create svc ctx %p: accept user %u from %s\n",
-                      grctx->src_ctx, grctx->src_ctx->gsc_uid,
-                      libcfs_nid2str(req->rq_peer.nid));
+       if (grctx->src_ctx->gsc_usr_mds || grctx->src_ctx->gsc_usr_oss ||
+           grctx->src_ctx->gsc_usr_root)
+               CWARN("create svc ctx %p: user from %s authenticated as %s\n",
+                     grctx->src_ctx, libcfs_nid2str(req->rq_peer.nid),
+                     grctx->src_ctx->gsc_usr_root ? "root" :
+                     (grctx->src_ctx->gsc_usr_mds ? "mds" :
+                      (grctx->src_ctx->gsc_usr_oss ? "oss" : "null")));
+       else
+               CWARN("create svc ctx %p: accept user %u from %s\n",
+                     grctx->src_ctx, grctx->src_ctx->gsc_uid,
+                     libcfs_nid2str(req->rq_peer.nid));
 
         if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
                 if (reqbuf->lm_bufcount < 4) {
                         CERROR("missing user descriptor\n");
                         RETURN(SECSVC_DROP);
                 }
-                if (sptlrpc_unpack_user_desc(reqbuf, 2)) {
+                if (sptlrpc_unpack_user_desc(reqbuf, 2, swabbed)) {
                         CERROR("Mal-formed user descriptor\n");
                         RETURN(SECSVC_DROP);
                 }
@@ -2047,6 +2098,7 @@ int gss_svc_verify_request(struct ptlrpc_request *req,
         struct gss_svc_ctx *gctx = grctx->src_ctx;
         struct lustre_msg  *msg = req->rq_reqbuf;
         int                 offset = 2;
+        int                 swabbed;
         ENTRY;
 
         *major = GSS_S_COMPLETE;
@@ -2066,8 +2118,10 @@ int gss_svc_verify_request(struct ptlrpc_request *req,
         }
 
         *major = gss_verify_msg(msg, gctx->gsc_mechctx, gw->gw_svc);
-        if (*major != GSS_S_COMPLETE)
+        if (*major != GSS_S_COMPLETE) {
+                CERROR("failed to verify request: %x\n", *major);
                 RETURN(-EACCES);
+        }
 
         if (gctx->gsc_reverse == 0 &&
             gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
@@ -2077,6 +2131,8 @@ int gss_svc_verify_request(struct ptlrpc_request *req,
         }
 
 verified:
+        swabbed = ptlrpc_req_need_swab(req);
+
         /* user descriptor */
         if (gw->gw_flags & LUSTRE_GSS_PACK_USER) {
                 if (msg->lm_bufcount < (offset + 1)) {
@@ -2084,7 +2140,7 @@ verified:
                         RETURN(-EINVAL);
                 }
 
-                if (sptlrpc_unpack_user_desc(msg, offset)) {
+                if (sptlrpc_unpack_user_desc(msg, offset, swabbed)) {
                         CERROR("Mal-formed user descriptor\n");
                         RETURN(-EINVAL);
                 }
@@ -2094,14 +2150,14 @@ verified:
                 offset++;
         }
 
-        /* check bulk cksum data */
+        /* check bulk_sec_desc data */
         if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
                 if (msg->lm_bufcount < (offset + 1)) {
-                        CERROR("no bulk checksum included\n");
+                        CERROR("missing bulk sec descriptor\n");
                         RETURN(-EINVAL);
                 }
 
-                if (bulk_sec_desc_unpack(msg, offset))
+                if (bulk_sec_desc_unpack(msg, offset, swabbed))
                         RETURN(-EINVAL);
 
                 req->rq_pack_bulk = 1;
@@ -2122,7 +2178,7 @@ int gss_svc_unseal_request(struct ptlrpc_request *req,
 {
         struct gss_svc_ctx *gctx = grctx->src_ctx;
         struct lustre_msg  *msg = req->rq_reqbuf;
-        int                 msglen, offset = 1;
+        int                 swabbed, msglen, offset = 1;
         ENTRY;
 
         if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) {
@@ -2133,8 +2189,10 @@ int gss_svc_unseal_request(struct ptlrpc_request *req,
 
         *major = gss_unseal_msg(gctx->gsc_mechctx, msg,
                                &msglen, req->rq_reqdata_len);
-        if (*major != GSS_S_COMPLETE)
+        if (*major != GSS_S_COMPLETE) {
+                CERROR("failed to unwrap request: %x\n", *major);
                 RETURN(-EACCES);
+        }
 
         if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
                 CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
@@ -2142,7 +2200,8 @@ int gss_svc_unseal_request(struct ptlrpc_request *req,
                 RETURN(-EACCES);
         }
 
-        if (lustre_unpack_msg(msg, msglen)) {
+        swabbed = __lustre_unpack_msg(msg, msglen);
+        if (swabbed < 0) {
                 CERROR("Failed to unpack after decryption\n");
                 RETURN(-EINVAL);
         }
@@ -2159,7 +2218,7 @@ int gss_svc_unseal_request(struct ptlrpc_request *req,
                         RETURN(-EINVAL);
                 }
 
-                if (sptlrpc_unpack_user_desc(msg, offset)) {
+                if (sptlrpc_unpack_user_desc(msg, offset, swabbed)) {
                         CERROR("Mal-formed user descriptor\n");
                         RETURN(-EINVAL);
                 }
@@ -2175,7 +2234,7 @@ int gss_svc_unseal_request(struct ptlrpc_request *req,
                         RETURN(-EINVAL);
                 }
 
-                if (bulk_sec_desc_unpack(msg, offset))
+                if (bulk_sec_desc_unpack(msg, offset, swabbed))
                         RETURN(-EINVAL);
 
                 req->rq_pack_bulk = 1;
@@ -2220,8 +2279,8 @@ int gss_svc_handle_data(struct ptlrpc_request *req,
         if (rc == 0)
                 RETURN(SECSVC_OK);
 
-        CERROR("svc %u failed: major 0x%08x: req xid "LPU64" ctx %p idx "
-               LPX64"(%u->%s)\n", gw->gw_svc, major, req->rq_xid,
+       CERROR("svc %u failed: major 0x%08x: req xid %llu ctx %p idx "
+              "%#llx(%u->%s)\n", gw->gw_svc, major, req->rq_xid,
                grctx->src_ctx, gss_handle_to_u64(&gw->gw_handle),
                grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid));
 error:
@@ -2259,7 +2318,7 @@ int gss_svc_handle_destroy(struct ptlrpc_request *req,
         if (gss_svc_verify_request(req, grctx, gw, &major))
                 RETURN(SECSVC_DROP);
 
-        CWARN("destroy svc ctx %p idx "LPX64" (%u->%s)\n",
+       CWARN("destroy svc ctx %p idx %#llx (%u->%s)\n",
               grctx->src_ctx, gss_handle_to_u64(&gw->gw_handle),
               grctx->src_ctx->gsc_uid, libcfs_nid2str(req->rq_peer.nid));
 
@@ -2270,7 +2329,8 @@ int gss_svc_handle_destroy(struct ptlrpc_request *req,
                         CERROR("missing user descriptor, ignore it\n");
                         RETURN(SECSVC_OK);
                 }
-                if (sptlrpc_unpack_user_desc(req->rq_reqbuf, 2)) {
+                if (sptlrpc_unpack_user_desc(req->rq_reqbuf, 2,
+                                             ptlrpc_req_need_swab(req))) {
                         CERROR("Mal-formed user descriptor, ignore it\n");
                         RETURN(SECSVC_OK);
                 }
@@ -2287,7 +2347,7 @@ int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
         struct gss_header      *ghdr;
         struct gss_svc_reqctx  *grctx;
         struct gss_wire_ctx    *gw;
-        int                     rc;
+        int                     swabbed, rc;
         ENTRY;
 
         LASSERT(req->rq_reqbuf);
@@ -2298,7 +2358,9 @@ int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
                 RETURN(SECSVC_DROP);
         }
 
-        ghdr = gss_swab_header(req->rq_reqbuf, 0);
+        swabbed = ptlrpc_req_need_swab(req);
+
+        ghdr = gss_swab_header(req->rq_reqbuf, 0, swabbed);
         if (ghdr == NULL) {
                 CERROR("can't decode gss header\n");
                 RETURN(SECSVC_DROP);
@@ -2318,10 +2380,10 @@ int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
         if (!grctx)
                 RETURN(SECSVC_DROP);
 
-        grctx->src_base.sc_policy = sptlrpc_policy_get(policy);
-        atomic_set(&grctx->src_base.sc_refcount, 1);
-        req->rq_svc_ctx = &grctx->src_base;
-        gw = &grctx->src_wirectx;
+       grctx->src_base.sc_policy = sptlrpc_policy_get(policy);
+       atomic_set(&grctx->src_base.sc_refcount, 1);
+       req->rq_svc_ctx = &grctx->src_base;
+       gw = &grctx->src_wirectx;
 
         /* save wire context */
         gw->gw_flags = ghdr->gh_flags;
@@ -2331,7 +2393,7 @@ int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
         rawobj_from_netobj(&gw->gw_handle, &ghdr->gh_handle);
 
         /* keep original wire header which subject to checksum verification */
-        if (lustre_msg_swabbed(req->rq_reqbuf))
+        if (swabbed)
                 gss_header_swabber(ghdr);
 
         switch(ghdr->gh_proc) {
@@ -2356,8 +2418,8 @@ int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
                 LASSERT (grctx->src_ctx);
 
                 req->rq_auth_gss = 1;
-                req->rq_auth_remote = grctx->src_ctx->gsc_remote;
                 req->rq_auth_usr_mdt = grctx->src_ctx->gsc_usr_mds;
+                req->rq_auth_usr_ost = grctx->src_ctx->gsc_usr_oss;
                 req->rq_auth_usr_root = grctx->src_ctx->gsc_usr_root;
                 req->rq_auth_uid = grctx->src_ctx->gsc_uid;
                 req->rq_auth_mapped_uid = grctx->src_ctx->gsc_mapped_uid;
@@ -2405,6 +2467,31 @@ int gss_svc_payload(struct gss_svc_reqctx *grctx, int early,
         return gss_mech_payload(NULL, msgsize, privacy);
 }
 
+static int gss_svc_bulk_payload(struct gss_svc_ctx *gctx,
+                                struct sptlrpc_flavor *flvr,
+                                int read)
+{
+        int     payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+        if (read) {
+                switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+                case SPTLRPC_BULK_SVC_NULL:
+                        break;
+                case SPTLRPC_BULK_SVC_INTG:
+                        payload += gss_mech_payload(NULL, 0, 0);
+                        break;
+                case SPTLRPC_BULK_SVC_PRIV:
+                        payload += gss_mech_payload(NULL, 0, 1);
+                        break;
+                case SPTLRPC_BULK_SVC_AUTH:
+                default:
+                        LBUG();
+                }
+        }
+
+        return payload;
+}
+
 int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
 {
         struct gss_svc_reqctx       *grctx;
@@ -2422,7 +2509,7 @@ int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
                 RETURN(-EPROTO);
         }
 
-        svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+        svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
         early = (req->rq_packed_final == 0);
 
         grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
@@ -2440,9 +2527,10 @@ int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
                         LASSERT(grctx->src_reqbsd);
 
                         bsd_off = ibufcnt;
-                        ibuflens[ibufcnt++] = bulk_sec_desc_size(
-                                                grctx->src_reqbsd->bsd_hash_alg,
-                                                0, req->rq_bulk_read);
+                        ibuflens[ibufcnt++] = gss_svc_bulk_payload(
+                                                        grctx->src_ctx,
+                                                        &req->rq_flvr,
+                                                        req->rq_bulk_read);
                 }
 
                 txtsize = lustre_msg_size_v2(ibufcnt, ibuflens);
@@ -2465,9 +2553,10 @@ int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
                         LASSERT(grctx->src_reqbsd);
 
                         bsd_off = bufcnt;
-                        buflens[bufcnt] = bulk_sec_desc_size(
-                                                grctx->src_reqbsd->bsd_hash_alg,
-                                                0, req->rq_bulk_read);
+                        buflens[bufcnt] = gss_svc_bulk_payload(
+                                                        grctx->src_ctx,
+                                                        &req->rq_flvr,
+                                                        req->rq_bulk_read);
                         if (svc == SPTLRPC_SVC_INTG)
                                 txtsize += buflens[bufcnt];
                         bufcnt++;
@@ -2488,7 +2577,7 @@ int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
                 /* pre-allocated */
                 LASSERT(rs->rs_size >= rs_size);
         } else {
-                OBD_ALLOC(rs, rs_size);
+                OBD_ALLOC_LARGE(rs, rs_size);
                 if (rs == NULL)
                         RETURN(-ENOMEM);
 
@@ -2537,7 +2626,7 @@ static int gss_svc_seal(struct ptlrpc_request *req,
         ENTRY;
 
         /* get clear data length. note embedded lustre_msg might
-         * have been shrinked */
+         * have been shrunk */
         if (req->rq_replen != lustre_msg_buflen(rs->rs_repbuf, 0))
                 msglen = lustre_shrink_msg(rs->rs_repbuf, 0, req->rq_replen, 1);
         else 
@@ -2560,7 +2649,7 @@ static int gss_svc_seal(struct ptlrpc_request *req,
 
         /* allocate temporary cipher buffer */
         token_buflen = gss_mech_payload(gctx->gsc_mechctx, msglen, 1);
-        OBD_ALLOC(token_buf, token_buflen);
+        OBD_ALLOC_LARGE(token_buf, token_buflen);
         if (token_buf == NULL)
                 RETURN(-ENOMEM);
 
@@ -2604,7 +2693,8 @@ static int gss_svc_seal(struct ptlrpc_request *req,
         memcpy(lustre_msg_buf(rs->rs_repbuf, 1, 0), token.data, token.len);
 
         /* reply offset */
-        if (likely(req->rq_packed_final))
+        if (req->rq_packed_final &&
+            (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))
                 req->rq_reply_off = gss_at_reply_off_priv;
         else
                 req->rq_reply_off = 0;
@@ -2616,7 +2706,7 @@ static int gss_svc_seal(struct ptlrpc_request *req,
 
         rc = 0;
 out_free:
-        OBD_FREE(token_buf, token_buflen);
+        OBD_FREE_LARGE(token_buf, token_buflen);
         RETURN(rc);
 }
 
@@ -2677,13 +2767,13 @@ void gss_svc_free_rs(struct ptlrpc_reply_state *rs)
         rs->rs_svc_ctx = NULL;
 
         if (!rs->rs_prealloc)
-                OBD_FREE(rs, rs->rs_size);
+                OBD_FREE_LARGE(rs, rs->rs_size);
 }
 
 void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx)
 {
-        LASSERT(atomic_read(&ctx->sc_refcount) == 0);
-        gss_svc_reqctx_free(gss_svc_ctx2reqctx(ctx));
+       LASSERT(atomic_read(&ctx->sc_refcount) == 0);
+       gss_svc_reqctx_free(gss_svc_ctx2reqctx(ctx));
 }
 
 int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
@@ -2699,21 +2789,21 @@ int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
         cli_gctx->gc_proc = PTLRPC_GSS_PROC_DATA;
         cli_gctx->gc_win = GSS_SEQ_WIN;
 
-        /* The problem is the reverse ctx might get lost in some recovery
-         * situations, and the same svc_ctx will be used to re-create it.
-         * if there's callback be sentout before that, new reverse ctx start
-         * with sequence 0 will lead to future callback rpc be treated as
-         * replay.
-         *
-         * each reverse root ctx will record its latest sequence number on its
-         * buddy svcctx before be destroied, so here we continue use it.
-         */
-        atomic_set(&cli_gctx->gc_seq, svc_gctx->gsc_rvs_seq);
-
-        if (gss_svc_upcall_dup_handle(&cli_gctx->gc_svc_handle, svc_gctx)) {
-                CERROR("failed to dup svc handle\n");
-                goto err_out;
-        }
+       /* The problem is the reverse ctx might get lost in some recovery
+        * situations, and the same svc_ctx will be used to re-create it.
+        * if there's callback be sentout before that, new reverse ctx start
+        * with sequence 0 will lead to future callback rpc be treated as
+        * replay.
+        *
+        * each reverse root ctx will record its latest sequence number on its
+        * buddy svcctx before be destroyed, so here we continue use it.
+        */
+       atomic_set(&cli_gctx->gc_seq, svc_gctx->gsc_rvs_seq);
+
+       if (gss_svc_upcall_dup_handle(&cli_gctx->gc_svc_handle, svc_gctx)) {
+               CERROR("failed to dup svc handle\n");
+               goto err_out;
+       }
 
         if (lgss_copy_reverse_context(svc_gctx->gsc_mechctx, &mechctx) !=
             GSS_S_COMPLETE) {
@@ -2757,7 +2847,7 @@ static void gss_init_at_reply_offset(void)
         gss_at_reply_off_priv = lustre_msg_size_v2(3, buflens);
 }
 
-int __init sptlrpc_gss_init(void)
+static int __init sptlrpc_gss_init(void)
 {
         int rc;
 
@@ -2773,57 +2863,63 @@ int __init sptlrpc_gss_init(void)
         if (rc)
                 goto out_cli_upcall;
 
-        rc = init_kerberos_module();
-        if (rc)
-                goto out_svc_upcall;
+       rc = init_null_module();
+       if (rc)
+               goto out_svc_upcall;
 
-        /* register policy after all other stuff be intialized, because it
-         * might be in used immediately after the registration. */
+       rc = init_kerberos_module();
+       if (rc)
+               goto out_null;
 
-        rc = gss_init_keyring();
-        if (rc)
-                goto out_kerberos;
+       rc = init_sk_module();
+       if (rc)
+               goto out_kerberos;
 
-#ifdef HAVE_GSS_PIPEFS
-        rc = gss_init_pipefs();
-        if (rc)
-                goto out_keyring;
-#endif
+       /* register policy after all other stuff be initialized, because it
+        * might be in used immediately after the registration. */
 
-        gss_init_at_reply_offset();
+       rc = gss_init_keyring();
+       if (rc)
+               goto out_sk;
 
-        return 0;
+       rc = gss_init_pipefs();
+       if (rc)
+               goto out_keyring;
 
-#ifdef HAVE_GSS_PIPEFS
-out_keyring:
-        gss_exit_keyring();
-#endif
+       gss_init_at_reply_offset();
 
+       return 0;
+
+out_keyring:
+       gss_exit_keyring();
+out_sk:
+       cleanup_sk_module();
 out_kerberos:
-        cleanup_kerberos_module();
+       cleanup_kerberos_module();
+out_null:
+       cleanup_null_module();
 out_svc_upcall:
-        gss_exit_svc_upcall();
+       gss_exit_svc_upcall();
 out_cli_upcall:
-        gss_exit_cli_upcall();
+       gss_exit_cli_upcall();
 out_lproc:
-        gss_exit_lproc();
-        return rc;
+       gss_exit_lproc();
+       return rc;
 }
 
 static void __exit sptlrpc_gss_exit(void)
 {
         gss_exit_keyring();
-#ifdef HAVE_GSS_PIPEFS
         gss_exit_pipefs();
-#endif
         cleanup_kerberos_module();
         gss_exit_svc_upcall();
         gss_exit_cli_upcall();
         gss_exit_lproc();
 }
 
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("GSS security policy for Lustre");
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre GSS security policy");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
 MODULE_LICENSE("GPL");
 
 module_init(sptlrpc_gss_init);