Whamcloud - gitweb
branch: HEAD
authorericm <ericm>
Wed, 19 Sep 2007 18:10:10 +0000 (18:10 +0000)
committerericm <ericm>
Wed, 19 Sep 2007 18:10:10 +0000 (18:10 +0000)
land b1_8_keyring (20070919_1155): support linux keyring in lustre gss.

50 files changed:
lustre/autoconf/lustre-core.m4
lustre/include/linux/lustre_lite.h
lustre/include/lustre_sec.h
lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch
lustre/kernel_patches/patches/export_symbols-2.6.12.patch
lustre/ptlrpc/Makefile.in
lustre/ptlrpc/autoMakefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/gss/Makefile.in
lustre/ptlrpc/gss/gss_bulk.c
lustre/ptlrpc/gss/gss_cli_upcall.c
lustre/ptlrpc/gss/gss_generic_token.c
lustre/ptlrpc/gss/gss_internal.h
lustre/ptlrpc/gss/gss_keyring.c [new file with mode: 0644]
lustre/ptlrpc/gss/gss_krb5_mech.c
lustre/ptlrpc/gss/gss_mech_switch.c
lustre/ptlrpc/gss/gss_pipefs.c [new file with mode: 0644]
lustre/ptlrpc/gss/gss_rawobj.c
lustre/ptlrpc/gss/gss_svc_upcall.c
lustre/ptlrpc/gss/lproc_gss.c
lustre/ptlrpc/gss/sec_gss.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/sec.c
lustre/ptlrpc/sec_gc.c [new file with mode: 0644]
lustre/ptlrpc/sec_lproc.c
lustre/ptlrpc/sec_null.c
lustre/ptlrpc/sec_plain.c
lustre/tests/sanity-gss.sh
lustre/tests/test-framework.sh
lustre/utils/gss/Makefile.am
lustre/utils/gss/README
lustre/utils/gss/cacheio.c
lustre/utils/gss/cacheio.h
lustre/utils/gss/context.c
lustre/utils/gss/context_heimdal.c
lustre/utils/gss/context_lucid.c
lustre/utils/gss/context_mit.c
lustre/utils/gss/gssd.c
lustre/utils/gss/gssd_proc.c
lustre/utils/gss/krb5_util.c
lustre/utils/gss/lgss_keyring.c [new file with mode: 0644]
lustre/utils/gss/lgss_krb5_utils.c [new file with mode: 0644]
lustre/utils/gss/lgss_krb5_utils.h [new file with mode: 0644]
lustre/utils/gss/lgss_utils.c [new file with mode: 0644]
lustre/utils/gss/lgss_utils.h [new file with mode: 0644]
lustre/utils/gss/lsupport.c
lustre/utils/gss/nfs-utils-1.0.10-lustre.diff
lustre/utils/gss/nfs-utils-1.0.11-lustre.diff [new file with mode: 0644]
lustre/utils/gss/svcgssd.c
lustre/utils/gss/svcgssd_proc.c

index 3fc77f2..5078e1e 100644 (file)
@@ -636,54 +636,72 @@ LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],[
 ])
 ])
 
+#
+# LC_CONFIG_GSS_KEYRING (default enabled, if gss is enabled)
+#
+AC_DEFUN([LC_CONFIG_GSS_KEYRING],
+[AC_MSG_CHECKING([whether to enable gss keyring backend])
+ AC_ARG_ENABLE([gss_keyring], 
+              [AC_HELP_STRING([--disable-gss-keyring],
+                               [disable gss keyring backend])],
+              [],[enable_gss_keyring='yes'])
+ AC_MSG_RESULT([$enable_gss_keyring])
+
+ if test x$enable_gss_keyring != xno; then
+       LB_LINUX_CONFIG_IM([KEYS],[],
+                           [AC_MSG_ERROR([GSS keyring backend require that CONFIG_KEYS be enabled in your kernel.])])
+
+       AC_CHECK_LIB([keyutils], [keyctl_search], [],
+                     [AC_MSG_ERROR([libkeyutils is not found, which is required by gss keyring backend])],)
+
+       AC_DEFINE([HAVE_GSS_KEYRING], [1],
+                  [Define this if you enable gss keyring backend])
+ fi
+])
+
 m4_pattern_allow(AC_KERBEROS_V5)
 
 #
-# LC_CONFIG_GSS
+# LC_CONFIG_GSS (default disabled)
 #
 # Build gss and related tools of Lustre. Currently both kernel and user space
 # parts are depend on linux platform.
 #
 AC_DEFUN([LC_CONFIG_GSS],
 [AC_MSG_CHECKING([whether to enable gss/krb5 support])
-AC_ARG_ENABLE([gss], 
-       AC_HELP_STRING([--enable-gss], [enable gss/krb5 support]),
-       [],[enable_gss='no'])
-AC_MSG_RESULT([$enable_gss])
-
-if test x$enable_gss == xyes; then
-       LB_LINUX_CONFIG_IM([SUNRPC],[],[
-               AC_MSG_ERROR([GSS require that CONFIG_SUNRPC is enabled in your kernel.])
-       ])
-       LB_LINUX_CONFIG_IM([CRYPTO_DES],[],[
-               AC_MSG_WARN([DES support is recommended by using GSS.])
-       ])
-       LB_LINUX_CONFIG_IM([CRYPTO_MD5],[],[
-               AC_MSG_WARN([MD5 support is recommended by using GSS.])
-       ])
-       LB_LINUX_CONFIG_IM([CRYPTO_SHA256],[],[
-               AC_MSG_WARN([SHA256 support is recommended by using GSS.])
-       ])
-       LB_LINUX_CONFIG_IM([CRYPTO_SHA512],[],[
-               AC_MSG_WARN([SHA512 support is recommended by using GSS.])
-       ])
-       LB_LINUX_CONFIG_IM([CRYPTO_ARC4],[],[
-               AC_MSG_WARN([ARC4 support is recommended by using GSS.])
-       ])
-       #
-       # AES symbol is uncertain (optimized & depend on arch)
-       #
-
-       AC_CHECK_LIB(gssapi, gss_init_sec_context, [
-               GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi"
-               ], [
-               AC_MSG_ERROR([libgssapi is not found, consider --disable-gss.])
-               ], 
-       )
+ AC_ARG_ENABLE([gss], 
+               [AC_HELP_STRING([--enable-gss], [enable gss/krb5 support])],
+               [],[enable_gss='no'])
+ AC_MSG_RESULT([$enable_gss])
+
+ if test x$enable_gss == xyes; then
+       LC_CONFIG_GSS_KEYRING
+
+        LB_LINUX_CONFIG_IM([CRYPTO_DES],[],
+                           [AC_MSG_WARN([kernel DES support is recommended by using GSS.])])
+        LB_LINUX_CONFIG_IM([CRYPTO_MD5],[],
+                           [AC_MSG_WARN([kernel MD5 support is recommended by using GSS.])])
+       LB_LINUX_CONFIG_IM([CRYPTO_SHA256],[],
+                           [AC_MSG_WARN([kernel SHA256 support is recommended by using GSS.])])
+       LB_LINUX_CONFIG_IM([CRYPTO_SHA512],[],
+                           [AC_MSG_WARN([kernel SHA512 support is recommended by using GSS.])])
+       LB_LINUX_CONFIG_IM([CRYPTO_ARC4],[],
+                           [AC_MSG_WARN([kernel ARC4 support is recommended by using GSS.])])
+
+       dnl FIXME
+       dnl the AES symbol usually tied with arch, e.g. CRYPTO_AES_586
+       dnl FIXME
+       LB_LINUX_CONFIG_IM([CRYPTO_AES],[],
+                           [AC_MSG_WARN([kernel AES support is recommended by using GSS.])])
+
+       AC_CHECK_LIB([gssapi], [gss_init_sec_context],
+                     [GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi"],
+                     [AC_MSG_ERROR([libgssapi is not found, which is required by GSS.])],)
 
        AC_SUBST(GSSAPI_LIBS)
+
        AC_KERBEROS_V5
-fi
+ fi
 ])
 
 # LC_FUNC_MS_FLOCK_LOCK
@@ -1478,6 +1496,8 @@ AM_CONDITIONAL(SPLIT, test x$enable_split = xyes)
 AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
 AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
 AM_CONDITIONAL(GSS, test x$enable_gss = xyes)
+AM_CONDITIONAL(GSS_KEYRING, test x$enable_gss_keyring = xyes)
+AM_CONDITIONAL(GSS_PIPEFS, test x$enable_gss_pipefs = xyes)
 AM_CONDITIONAL(LIBPTHREAD, test x$enable_libpthread = xyes)
 ])
 
index 41c670e..2ec527e 100644 (file)
@@ -1,6 +1,6 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
- */
+
 
 #ifndef _LINUX_LL_H
 #define _LINUX_LL_H
index c677a78..8ce5ab3 100644 (file)
@@ -25,6 +25,7 @@
 /*
  * to avoid include
  */
+struct key;
 struct obd_import;
 struct ptlrpc_request;
 struct ptlrpc_reply_state;
@@ -49,6 +50,7 @@ enum sptlrpc_policies {
         SPTLRPC_POLICY_NULL             = 0,
         SPTLRPC_POLICY_PLAIN            = 1,
         SPTLRPC_POLICY_GSS              = 2,
+        SPTLRPC_POLICY_GSS_PIPEFS       = 3,
         SPTLRPC_POLICY_MAX,
 };
 
@@ -210,6 +212,9 @@ struct ptlrpc_ctx_ops {
         int     (*match)       (struct ptlrpc_cli_ctx *ctx,
                                 struct vfs_cred *vcred);
         int     (*refresh)     (struct ptlrpc_cli_ctx *ctx);
+        int     (*validate)    (struct ptlrpc_cli_ctx *ctx);
+        void    (*die)         (struct ptlrpc_cli_ctx *ctx,
+                                int grace);
         int     (*display)     (struct ptlrpc_cli_ctx *ctx,
                                 char *buf, int bufsize);
         /*
@@ -234,19 +239,22 @@ struct ptlrpc_ctx_ops {
                                 struct ptlrpc_bulk_desc *desc);
 };
 
-#define PTLRPC_CTX_UPTODATE_BIT        (0)  /* uptodate */
-#define PTLRPC_CTX_DEAD_BIT            (1)  /* mark expired gracefully */
-#define PTLRPC_CTX_ERROR_BIT           (2)  /* fatal error (refresh, etc.) */
-#define PTLRPC_CTX_HASHED_BIT          (8)  /* in hash table */
+#define PTLRPC_CTX_NEW_BIT             (0)  /* newly created */
+#define PTLRPC_CTX_UPTODATE_BIT        (1)  /* uptodate */
+#define PTLRPC_CTX_DEAD_BIT            (2)  /* mark expired gracefully */
+#define PTLRPC_CTX_ERROR_BIT           (3)  /* fatal error (refresh, etc.) */
+#define PTLRPC_CTX_CACHED_BIT          (8)  /* in ctx cache (hash etc.) */
 #define PTLRPC_CTX_ETERNAL_BIT         (9)  /* always valid */
 
+#define PTLRPC_CTX_NEW                 (1 << PTLRPC_CTX_NEW_BIT)
 #define PTLRPC_CTX_UPTODATE            (1 << PTLRPC_CTX_UPTODATE_BIT)
 #define PTLRPC_CTX_DEAD                (1 << PTLRPC_CTX_DEAD_BIT)
 #define PTLRPC_CTX_ERROR               (1 << PTLRPC_CTX_ERROR_BIT)
-#define PTLRPC_CTX_HASHED              (1 << PTLRPC_CTX_HASHED_BIT)
+#define PTLRPC_CTX_CACHED              (1 << PTLRPC_CTX_CACHED_BIT)
 #define PTLRPC_CTX_ETERNAL             (1 << PTLRPC_CTX_ETERNAL_BIT)
 
-#define PTLRPC_CTX_STATUS_MASK         (PTLRPC_CTX_UPTODATE   |       \
+#define PTLRPC_CTX_STATUS_MASK         (PTLRPC_CTX_NEW_BIT    |       \
+                                        PTLRPC_CTX_UPTODATE   |       \
                                         PTLRPC_CTX_DEAD       |       \
                                         PTLRPC_CTX_ERROR)
 
@@ -271,24 +279,31 @@ struct ptlrpc_sec_cops {
                                                 __u32 flavor,
                                                 unsigned long flags);
         void                    (*destroy_sec) (struct ptlrpc_sec *sec);
+
         /*
-         * search ctx for a certain user, if this function is missing,
-         * a generic function will be invoked by caller. implement this
-         * for any special need.
+         * context
          */
         struct ptlrpc_cli_ctx * (*lookup_ctx)  (struct ptlrpc_sec *sec,
-                                                struct vfs_cred *vcred);
+                                                struct vfs_cred *vcred,
+                                                int create,
+                                                int remove_dead);
+        void                    (*release_ctx) (struct ptlrpc_sec *sec,
+                                                struct ptlrpc_cli_ctx *ctx,
+                                                int sync);
+        int                     (*flush_ctx_cache)
+                                               (struct ptlrpc_sec *sec,
+                                                uid_t uid,
+                                                int grace,
+                                                int force);
+        void                    (*gc_ctx)      (struct ptlrpc_sec *sec);
+
         /*
-         * ptlrpc_cli_ctx constructor/destructor
+         * reverse context
          */
-        struct ptlrpc_cli_ctx * (*create_ctx)  (struct ptlrpc_sec *sec,
-                                                struct vfs_cred *vcred);
-        void                    (*destroy_ctx) (struct ptlrpc_sec *sec,
-                                                struct ptlrpc_cli_ctx *ctx);
-        /* reverse service */
         int                     (*install_rctx)(struct obd_import *imp,
                                                 struct ptlrpc_sec *sec,
                                                 struct ptlrpc_cli_ctx *ctx);
+
         /*
          * request/reply buffer manipulation
          */
@@ -306,6 +321,11 @@ struct ptlrpc_sec_cops {
                                                (struct ptlrpc_sec *sec,
                                                 struct ptlrpc_request *req,
                                                 int segment, int newsize);
+        /*
+         * misc
+         */
+        int                     (*display)     (struct ptlrpc_sec *sec,
+                                                char *buf, int buflen);
 };
 
 struct ptlrpc_sec_sops {
@@ -338,7 +358,8 @@ struct ptlrpc_sec_policy {
 
 #define PTLRPC_SEC_FL_REVERSE           0x0001 /* reverse sec */
 #define PTLRPC_SEC_FL_ROOTONLY          0x0002 /* treat everyone as root */
-#define PTLRPC_SEC_FL_BULK              0x0004 /* intensive bulk i/o expected */
+#define PTLRPC_SEC_FL_PAG               0x0004 /* PAG mode */
+#define PTLRPC_SEC_FL_BULK              0x0008 /* intensive bulk i/o expected */
 
 struct ptlrpc_sec {
         struct ptlrpc_sec_policy       *ps_policy;
@@ -347,9 +368,11 @@ struct ptlrpc_sec {
         unsigned long                   ps_flags;       /* PTLRPC_SEC_FL_XX */
         struct obd_import              *ps_import;      /* owning import */
         spinlock_t                      ps_lock;        /* protect ccache */
-        int                             ps_ccache_size; /* must be 2^n */
-        struct hlist_head              *ps_ccache;      /* ctx cache hash */
         atomic_t                        ps_busy;        /* busy count */
+        /*
+         * garbage collection
+         */
+        struct list_head                ps_gc_list;
         cfs_time_t                      ps_gc_interval; /* in seconds */
         cfs_time_t                      ps_gc_next;     /* in seconds */
 };
@@ -460,15 +483,52 @@ void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
 /*
  * client credential
  */
-struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx);
-void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
-void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx);
-void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new);
-void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
-int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+static inline
+unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx)
+{
+        return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
+}
+
+static inline
+int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+        return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
+}
+
+static inline
+int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
+{
+        return (cli_ctx_status(ctx) != 0);
+}
+
+static inline
+int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
+{
+        return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
+}
+
+static inline
+int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
+{
+        return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
+}
+
+/*
+ * internal apis which only used by policy impelentation
+ */
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
 
 /*
- * client wrap/buffers
+ * exported client context api
+ */
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+
+/*
+ * exported client context wrap/buffers
  */
 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
@@ -481,7 +541,7 @@ int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
 void sptlrpc_request_out_callback(struct ptlrpc_request *req);
 
 /*
- * higher interface of import & request
+ * exported higher interface of import & request
  */
 int sptlrpc_import_get_sec(struct obd_import *imp, struct ptlrpc_svc_ctx *svc_ctx,
                            __u32 flavor, unsigned long flags);
@@ -498,6 +558,7 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
 
 int sptlrpc_parse_flavor(enum lustre_part from, enum lustre_part to,
                          char *str, struct sec_flavor_config *conf);
+
 /* misc */
 const char * sec2target_str(struct ptlrpc_sec *sec);
 int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
index 2a08192..0561e65 100644 (file)
@@ -79,16 +79,3 @@ Index: linux-2.6.9-5.0.3.EL/fs/dcache.c
  
  void d_genocide(struct dentry *root)
  {
-Index: linux-2.6.12-rc6/net/sunrpc/sunrpc_syms.c
-===================================================================
---- linux-2.6.12.orig/net/sunrpc/sunrpc_syms.c 2005-12-14 23:20:39.000000000 -0700
-+++ linux-2.6.12/net/sunrpc/sunrpc_syms.c      2005-12-14 23:21:47.000000000 -0700
-@@ -58,6 +58,8 @@ EXPORT_SYMBOL(rpc_unlink);
- EXPORT_SYMBOL(rpc_wake_up);
- EXPORT_SYMBOL(rpc_queue_upcall);
- EXPORT_SYMBOL(rpc_mkpipe);
-+EXPORT_SYMBOL(rpc_mkdir);
-+EXPORT_SYMBOL(rpc_rmdir);
- /* Client transport */
- EXPORT_SYMBOL(xprt_create_proto);
index 6521703..e21fcf4 100644 (file)
@@ -62,16 +62,3 @@ Index: linux-2.6.12-rc6/fs/dcache.c
  
  void d_genocide(struct dentry *root)
  {
-Index: linux-2.6.12-rc6/net/sunrpc/sunrpc_syms.c
-===================================================================
---- linux-2.6.12.orig/net/sunrpc/sunrpc_syms.c 2005-12-14 23:20:39.000000000 -0700
-+++ linux-2.6.12/net/sunrpc/sunrpc_syms.c      2005-12-14 23:21:47.000000000 -0700
-@@ -58,6 +58,8 @@ EXPORT_SYMBOL(rpc_unlink);
- EXPORT_SYMBOL(rpc_wake_up);
- EXPORT_SYMBOL(rpc_queue_upcall);
- EXPORT_SYMBOL(rpc_mkpipe);
-+EXPORT_SYMBOL(rpc_mkdir);
-+EXPORT_SYMBOL(rpc_rmdir);
- /* Client transport */
- EXPORT_SYMBOL(xprt_create_proto);
index bc45564..d5d8e18 100644 (file)
@@ -13,7 +13,7 @@ ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
 ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o
 ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o
 ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
-ptlrpc_objs += sec.o sec_bulk.o sec_null.o sec_plain.o sec_lproc.o
+ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_null.o sec_plain.o sec_lproc.o
 
 ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs)
 
index c50cbcf..17b6f53 100644 (file)
@@ -18,8 +18,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \
 COMMON_SOURCES =  client.c recover.c connection.c niobuf.c pack_generic.c   \
     events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c   \
     llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c               \
-    ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_null.c sec_plain.c      \
-    sec_lproc.c lproc_ptlrpc.c $(LDLM_COMM_SOURCES)
+    ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_gc.c sec_null.c         \
+    sec_plain.c sec_lproc.c lproc_ptlrpc.c $(LDLM_COMM_SOURCES)
 
 if LIBLUSTRE
 
index 4fd69e9..6779362 100644 (file)
@@ -352,7 +352,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
         request->rq_import = class_import_get(imp);
 
         if (unlikely(ctx))
-                request->rq_cli_ctx = sptlrpc_ctx_get(ctx);
+                request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx);
         else {
                 rc = sptlrpc_req_get_ctx(request);
                 if (rc)
index 3871c65..2b4597a 100644 (file)
@@ -1,8 +1,11 @@
 MODULES := ptlrpc_gss
 
 ptlrpc_gss-objs := sec_gss.o gss_bulk.o gss_cli_upcall.o gss_svc_upcall.o      \
-                   gss_rawobj.o lproc_gss.o gss_generic_token.o                \
-                   gss_mech_switch.o gss_krb5_mech.o
+                   gss_rawobj.o lproc_gss.o                                    \
+                   gss_generic_token.o gss_mech_switch.o gss_krb5_mech.o
+
+@GSS_KEYRING_TRUE@ptlrpc_gss-objs += gss_keyring.o
+@GSS_PIPEFS_TRUE@ptlrpc_gss-objs += gss_pipefs.o
 
 default: all
 
index 77aa401..cede791 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/random.h>
+#include <linux/mutex.h>
 #include <linux/crypto.h>
 #else
 #include <liblustre.h>
index 3bd7b2f..ac2a903 100644 (file)
 #include <linux/slab.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/mutex.h>
 #include <linux/random.h>
-/* for rpc_pipefs */
-struct rpc_clnt;
-#include <linux/sunrpc/rpc_pipe_fs.h>
 #else
 #include <liblustre.h>
 #endif
@@ -53,575 +51,6 @@ struct rpc_clnt;
 #include "gss_internal.h"
 #include "gss_api.h"
 
-#define LUSTRE_PIPE_ROOT        "/lustre"
-#define LUSTRE_PIPE_KRB5        LUSTRE_PIPE_ROOT"/krb5"
-
-struct gss_upcall_msg_data {
-        __u32                           gum_seq;
-        __u32                           gum_uid;
-        __u32                           gum_gid;
-        __u32                           gum_svc;        /* MDS/OSS... */
-        __u64                           gum_nid;        /* peer NID */
-        __u8                            gum_obd[64];    /* client obd name */
-};
-
-struct gss_upcall_msg {
-        struct rpc_pipe_msg             gum_base;
-        atomic_t                        gum_refcount;
-        struct list_head                gum_list;
-        __u32                           gum_mechidx;
-        struct gss_sec                 *gum_gsec;
-        struct gss_cli_ctx             *gum_gctx;
-        struct gss_upcall_msg_data      gum_data;
-};
-
-static atomic_t upcall_seq = ATOMIC_INIT(0);
-
-static inline
-__u32 upcall_get_sequence(void)
-{
-        return (__u32) atomic_inc_return(&upcall_seq);
-}
-
-enum mech_idx_t {
-        MECH_KRB5   = 0,
-        MECH_MAX
-};
-
-static inline
-__u32 mech_name2idx(const char *name)
-{
-        LASSERT(!strcmp(name, "krb5"));
-        return MECH_KRB5;
-}
-
-/* pipefs dentries for each mechanisms */
-static struct dentry *de_pipes[MECH_MAX] = { NULL, };
-/* all upcall messgaes linked here */
-static struct list_head upcall_lists[MECH_MAX];
-/* and protected by this */
-static spinlock_t upcall_locks[MECH_MAX];
-
-static inline
-void upcall_list_lock(int idx)
-{
-        spin_lock(&upcall_locks[idx]);
-}
-
-static inline
-void upcall_list_unlock(int idx)
-{
-        spin_unlock(&upcall_locks[idx]);
-}
-
-static
-void upcall_msg_enlist(struct gss_upcall_msg *msg)
-{
-        __u32 idx = msg->gum_mechidx;
-
-        upcall_list_lock(idx);
-        list_add(&msg->gum_list, &upcall_lists[idx]);
-        upcall_list_unlock(idx);
-}
-
-static
-void upcall_msg_delist(struct gss_upcall_msg *msg)
-{
-        __u32 idx = msg->gum_mechidx;
-
-        upcall_list_lock(idx);
-        list_del_init(&msg->gum_list);
-        upcall_list_unlock(idx);
-}
-
-/**********************************************
- * rpc_pipe upcall helpers                    *
- **********************************************/
-static
-void gss_release_msg(struct gss_upcall_msg *gmsg)
-{
-        ENTRY;
-        LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
-
-        if (!atomic_dec_and_test(&gmsg->gum_refcount)) {
-                EXIT;
-                return;
-        }
-
-        if (gmsg->gum_gctx) {
-                sptlrpc_ctx_wakeup(&gmsg->gum_gctx->gc_base);
-                sptlrpc_ctx_put(&gmsg->gum_gctx->gc_base, 1);
-                gmsg->gum_gctx = NULL;
-        }
-
-        LASSERT(list_empty(&gmsg->gum_list));
-        LASSERT(list_empty(&gmsg->gum_base.list));
-        OBD_FREE_PTR(gmsg);
-        EXIT;
-}
-
-static
-void gss_unhash_msg_nolock(struct gss_upcall_msg *gmsg)
-{
-        __u32 idx = gmsg->gum_mechidx;
-
-        LASSERT(idx < MECH_MAX);
-        LASSERT_SPIN_LOCKED(&upcall_locks[idx]);
-
-        if (list_empty(&gmsg->gum_list))
-                return;
-
-        list_del_init(&gmsg->gum_list);
-        LASSERT(atomic_read(&gmsg->gum_refcount) > 1);
-        atomic_dec(&gmsg->gum_refcount);
-}
-
-static
-void gss_unhash_msg(struct gss_upcall_msg *gmsg)
-{
-        __u32 idx = gmsg->gum_mechidx;
-
-        LASSERT(idx < MECH_MAX);
-        upcall_list_lock(idx);
-        gss_unhash_msg_nolock(gmsg);
-        upcall_list_unlock(idx);
-}
-
-static
-void gss_msg_fail_ctx(struct gss_upcall_msg *gmsg)
-{
-        if (gmsg->gum_gctx) {
-                struct ptlrpc_cli_ctx *ctx = &gmsg->gum_gctx->gc_base;
-
-                LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-                sptlrpc_ctx_expire(ctx);
-                set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
-        }
-}
-
-static
-struct gss_upcall_msg * gss_find_upcall(__u32 mechidx, __u32 seq)
-{
-        struct gss_upcall_msg *gmsg;
-
-        upcall_list_lock(mechidx);
-        list_for_each_entry(gmsg, &upcall_lists[mechidx], gum_list) {
-                if (gmsg->gum_data.gum_seq != seq)
-                        continue;
-
-                LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
-                LASSERT(gmsg->gum_mechidx == mechidx);
-
-                atomic_inc(&gmsg->gum_refcount);
-                upcall_list_unlock(mechidx);
-                return gmsg;
-        }
-        upcall_list_unlock(mechidx);
-        return NULL;
-}
-
-static
-int simple_get_bytes(char **buf, __u32 *buflen, void *res, __u32 reslen)
-{
-        if (*buflen < reslen) {
-                CERROR("buflen %u < %u\n", *buflen, reslen);
-                return -EINVAL;
-        }
-
-        memcpy(res, *buf, reslen);
-        *buf += reslen;
-        *buflen -= reslen;
-        return 0;
-}
-
-/*******************************************
- * rpc_pipe APIs                           *
- *******************************************/
-static
-ssize_t gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-                        char *dst, size_t buflen)
-{
-        char *data = (char *)msg->data + msg->copied;
-        ssize_t mlen = msg->len;
-        ssize_t left;
-        ENTRY;
-
-        if (mlen > buflen)
-                mlen = buflen;
-        left = copy_to_user(dst, data, mlen);
-        if (left < 0) {
-                msg->errno = left;
-                RETURN(left);
-        }
-        mlen -= left;
-        msg->copied += mlen;
-        msg->errno = 0;
-        RETURN(mlen);
-}
-
-static
-ssize_t gss_pipe_downcall(struct file *filp, const char *src, size_t mlen)
-{
-        struct rpc_inode        *rpci = RPC_I(filp->f_dentry->d_inode);
-        struct gss_upcall_msg   *gss_msg;
-        struct ptlrpc_cli_ctx   *ctx;
-        struct gss_cli_ctx      *gctx = NULL;
-        char                    *buf, *data;
-        int                      datalen;
-        int                      timeout, rc;
-        __u32                    mechidx, seq, gss_err;
-        ENTRY;
-
-        mechidx = (__u32) (long) rpci->private;
-        LASSERT(mechidx < MECH_MAX);
-
-        OBD_ALLOC(buf, mlen);
-        if (!buf)
-                RETURN(-ENOMEM);
-
-        if (copy_from_user(buf, src, mlen)) {
-                CERROR("failed copy user space data\n");
-                GOTO(out_free, rc = -EFAULT);
-        }
-        data = buf;
-        datalen = mlen;
-
-        /* data passed down format:
-         *  - seq
-         *  - timeout
-         *  - gc_win / error
-         *  - wire_ctx (rawobj)
-         *  - mech_ctx (rawobj)
-         */
-        if (simple_get_bytes(&data, &datalen, &seq, sizeof(seq))) {
-                CERROR("fail to get seq\n");
-                GOTO(out_free, rc = -EFAULT);
-        }
-
-        gss_msg = gss_find_upcall(mechidx, seq);
-        if (!gss_msg) {
-                CERROR("upcall %u has aborted earlier\n", seq);
-                GOTO(out_free, rc = -EINVAL);
-        }
-
-        gss_unhash_msg(gss_msg);
-        gctx = gss_msg->gum_gctx;
-        LASSERT(gctx);
-        LASSERT(atomic_read(&gctx->gc_base.cc_refcount) > 0);
-
-        /* timeout is not in use for now */
-        if (simple_get_bytes(&data, &datalen, &timeout, sizeof(timeout)))
-                GOTO(out_msg, rc = -EFAULT);
-
-        /* lgssd signal an error by gc_win == 0 */
-        if (simple_get_bytes(&data, &datalen, &gctx->gc_win,
-                             sizeof(gctx->gc_win)))
-                GOTO(out_msg, rc = -EFAULT);
-
-        if (gctx->gc_win == 0) {
-                /* followed by:
-                 * - rpc error
-                 * - gss error
-                 */
-                if (simple_get_bytes(&data, &datalen, &rc, sizeof(rc)))
-                        GOTO(out_msg, rc = -EFAULT);
-                if (simple_get_bytes(&data, &datalen, &gss_err,sizeof(gss_err)))
-                        GOTO(out_msg, rc = -EFAULT);
-
-                if (rc == 0 && gss_err == GSS_S_COMPLETE) {
-                        CWARN("both rpc & gss error code not set\n");
-                        rc = -EPERM;
-                }
-        } else {
-                rawobj_t tmpobj;
-
-                /* handle */
-                if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
-                        GOTO(out_msg, rc = -EFAULT);
-                if (rawobj_dup(&gctx->gc_handle, &tmpobj))
-                        GOTO(out_msg, rc = -ENOMEM);
-
-                /* mechctx */
-                if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
-                        GOTO(out_msg, rc = -EFAULT);
-                gss_err = lgss_import_sec_context(&tmpobj,
-                                                  gss_msg->gum_gsec->gs_mech,
-                                                  &gctx->gc_mechctx);
-                rc = 0;
-        }
-
-        if (likely(rc == 0 && gss_err == GSS_S_COMPLETE)) {
-                gss_cli_ctx_uptodate(gctx);
-        } else {
-                ctx = &gctx->gc_base;
-                sptlrpc_ctx_expire(ctx);
-                if (rc != -ERESTART || gss_err != GSS_S_COMPLETE)
-                        set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
-
-                CERROR("refresh ctx %p(uid %d) failed: %d/0x%08x: %s\n",
-                       ctx, ctx->cc_vcred.vc_uid, rc, gss_err,
-                       test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) ?
-                       "fatal error" : "non-fatal");
-        }
-
-        rc = mlen;
-
-out_msg:
-        gss_release_msg(gss_msg);
-
-out_free:
-        OBD_FREE(buf, mlen);
-        /* FIXME
-         * hack pipefs: always return asked length unless all following
-         * downcalls might be messed up.
-         */
-        rc = mlen;
-        RETURN(rc);
-}
-
-static
-void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
-{
-        struct gss_upcall_msg          *gmsg;
-        struct gss_upcall_msg_data     *gumd;
-        static cfs_time_t               ratelimit = 0;
-        ENTRY;
-
-        LASSERT(list_empty(&msg->list));
-
-        /* normally errno is >= 0 */
-        if (msg->errno >= 0) {
-                EXIT;
-                return;
-        }
-
-        gmsg = container_of(msg, struct gss_upcall_msg, gum_base);
-        gumd = &gmsg->gum_data;
-        LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
-
-        CERROR("failed msg %p (seq %u, uid %u, svc %u, nid "LPX64", obd %.*s): "
-               "errno %d\n", msg, gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
-               gumd->gum_nid, (int) sizeof(gumd->gum_obd),
-               gumd->gum_obd, msg->errno);
-
-        atomic_inc(&gmsg->gum_refcount);
-        gss_unhash_msg(gmsg);
-        if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
-                cfs_time_t now = cfs_time_current_sec();
-
-                if (cfs_time_after(now, ratelimit)) {
-                        CWARN("upcall timed out, is lgssd running?\n");
-                        ratelimit = now + 15;
-                }
-        }
-        gss_msg_fail_ctx(gmsg);
-        gss_release_msg(gmsg);
-        EXIT;
-}
-
-static
-void gss_pipe_release(struct inode *inode)
-{
-        struct rpc_inode *rpci = RPC_I(inode);
-        __u32             idx;
-        ENTRY;
-
-        idx = (__u32) (long) rpci->private;
-        LASSERT(idx < MECH_MAX);
-
-        upcall_list_lock(idx);
-        while (!list_empty(&upcall_lists[idx])) {
-                struct gss_upcall_msg      *gmsg;
-                struct gss_upcall_msg_data *gumd;
-
-                gmsg = list_entry(upcall_lists[idx].next,
-                                  struct gss_upcall_msg, gum_list);
-                gumd = &gmsg->gum_data;
-                LASSERT(list_empty(&gmsg->gum_base.list));
-
-                CERROR("failing remaining msg %p:seq %u, uid %u, svc %u, "
-                       "nid "LPX64", obd %.*s\n", gmsg,
-                       gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
-                       gumd->gum_nid, (int) sizeof(gumd->gum_obd),
-                       gumd->gum_obd);
-
-                gmsg->gum_base.errno = -EPIPE;
-                atomic_inc(&gmsg->gum_refcount);
-                gss_unhash_msg_nolock(gmsg);
-
-                gss_msg_fail_ctx(gmsg);
-
-                upcall_list_unlock(idx);
-                gss_release_msg(gmsg);
-                upcall_list_lock(idx);
-        }
-        upcall_list_unlock(idx);
-        EXIT;
-}
-
-static struct rpc_pipe_ops gss_upcall_ops = {
-        .upcall         = gss_pipe_upcall,
-        .downcall       = gss_pipe_downcall,
-        .destroy_msg    = gss_pipe_destroy_msg,
-        .release_pipe   = gss_pipe_release,
-};
-
-
-/*******************************************
- * upcall helper functions                 *
- *******************************************/
-
-static inline
-__u32 import_to_gss_svc(struct obd_import *imp)
-{
-        const char *name = imp->imp_obd->obd_type->typ_name;
-        if (!strcmp(name, LUSTRE_MDC_NAME))
-                return LUSTRE_GSS_TGT_MDS;
-        if (!strcmp(name, LUSTRE_OSC_NAME))
-                return LUSTRE_GSS_TGT_OSS;
-        LBUG();
-        return 0;
-}
-
-int gss_ctx_refresh_pipefs(struct ptlrpc_cli_ctx *ctx)
-{
-        struct obd_import          *imp;
-        struct gss_sec             *gsec;
-        struct gss_upcall_msg      *gmsg;
-        int                         rc = 0;
-        ENTRY;
-
-        might_sleep();
-
-        LASSERT(ctx->cc_sec);
-        LASSERT(ctx->cc_sec->ps_import);
-        LASSERT(ctx->cc_sec->ps_import->imp_obd);
-
-        imp = ctx->cc_sec->ps_import;
-        if (!imp->imp_connection) {
-                CERROR("import has no connection set\n");
-                RETURN(-EINVAL);
-        }
-
-        gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
-
-        OBD_ALLOC_PTR(gmsg);
-        if (!gmsg)
-                RETURN(-ENOMEM);
-
-        /* initialize pipefs base msg */
-        INIT_LIST_HEAD(&gmsg->gum_base.list);
-        gmsg->gum_base.data = &gmsg->gum_data;
-        gmsg->gum_base.len = sizeof(gmsg->gum_data);
-        gmsg->gum_base.copied = 0;
-        gmsg->gum_base.errno = 0;
-
-        /* init upcall msg */
-        atomic_set(&gmsg->gum_refcount, 1);
-        gmsg->gum_mechidx = mech_name2idx(gsec->gs_mech->gm_name);
-        gmsg->gum_gsec = gsec;
-        gmsg->gum_gctx = container_of(sptlrpc_ctx_get(ctx),
-                                      struct gss_cli_ctx, gc_base);
-        gmsg->gum_data.gum_seq = upcall_get_sequence();
-        gmsg->gum_data.gum_uid = ctx->cc_vcred.vc_uid;
-        gmsg->gum_data.gum_gid = 0; /* not used for now */
-        gmsg->gum_data.gum_svc = import_to_gss_svc(imp);
-        gmsg->gum_data.gum_nid = imp->imp_connection->c_peer.nid;
-        strncpy(gmsg->gum_data.gum_obd, imp->imp_obd->obd_name,
-                sizeof(gmsg->gum_data.gum_obd));
-
-        /* This only could happen when sysadmin set it dead/expired
-         * using lctl by force.
-         */
-        smp_mb();
-        if (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK) {
-                CWARN("ctx %p(%u->%s) was set flags %lx unexpectedly\n",
-                      ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
-                      ctx->cc_flags);
-
-                LASSERT(!(ctx->cc_flags & PTLRPC_CTX_UPTODATE));
-                ctx->cc_flags |= PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR;
-
-                rc = -EIO;
-                goto err_free;
-        }
-
-        upcall_msg_enlist(gmsg);
-
-        rc = rpc_queue_upcall(de_pipes[gmsg->gum_mechidx]->d_inode,
-                              &gmsg->gum_base);
-        if (rc) {
-                CERROR("rpc_queue_upcall failed: %d\n", rc);
-
-                upcall_msg_delist(gmsg);
-                goto err_free;
-        }
-
-        RETURN(0);
-err_free:
-        OBD_FREE_PTR(gmsg);
-        RETURN(rc);
-}
-
-int gss_sec_upcall_init(struct gss_sec *gsec)
-{
-        return 0;
-}
-
-void gss_sec_upcall_cleanup(struct gss_sec *gsec)
-{
-}
-
-int gss_init_pipefs(void)
-{
-        struct dentry   *de;
-
-        /* pipe dir */
-        de = rpc_mkdir(LUSTRE_PIPE_ROOT, NULL);
-        if (IS_ERR(de) && PTR_ERR(de) != -EEXIST) {
-                CERROR("Failed to create gss pipe dir: %ld\n", PTR_ERR(de));
-                return PTR_ERR(de);
-        }
-        /* FIXME
-         * hack pipefs: dput will sometimes cause oops during module unload
-         * and lgssd close the pipe fds.
-         */
-        //dput(de);
-
-        /* krb5 mechanism */
-        de = rpc_mkpipe(LUSTRE_PIPE_KRB5, (void *) MECH_KRB5, &gss_upcall_ops,
-                        RPC_PIPE_WAIT_FOR_OPEN);
-        if (!de || IS_ERR(de)) {
-                CERROR("failed to make rpc_pipe %s: %ld\n",
-                       LUSTRE_PIPE_KRB5, PTR_ERR(de));
-                rpc_rmdir(LUSTRE_PIPE_ROOT);
-                return PTR_ERR(de);
-        }
-
-        de_pipes[MECH_KRB5] = de;
-        INIT_LIST_HEAD(&upcall_lists[MECH_KRB5]);
-        upcall_locks[MECH_KRB5] = SPIN_LOCK_UNLOCKED;
-
-        return 0;
-}
-
-void gss_cleanup_pipefs(void)
-{
-        __u32   i;
-
-        for (i = 0; i < MECH_MAX; i++) {
-                LASSERT(list_empty(&upcall_lists[i]));
-                /* FIXME
-                 * hack pipefs, dput pipe dentry here might cause lgssd oops.
-                 */
-                //dput(de_pipes[i]);
-                de_pipes[i] = NULL;
-        }
-
-        rpc_unlink(LUSTRE_PIPE_KRB5);
-        rpc_rmdir(LUSTRE_PIPE_ROOT);
-}
-
 /**********************************************
  * gss context init/fini helper               *
  **********************************************/
@@ -907,6 +336,18 @@ int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx)
                 RETURN(0);
         }
 
+        /* FIXME
+         * this could be called when import being tearing down, thus import's
+         * spinlock is held. A more clean solution might be: let gss worker
+         * thread handle the ctx destroying; don't wait reply for fini rpc.
+         */
+        if (imp->imp_invalid) {
+                CWARN("ctx %p(%u): skip because import is invalid\n",
+                      ctx, ctx->cc_vcred.vc_uid);
+                RETURN(0);
+        }
+        RETURN(0); // XXX remove after using gss worker thread
+
         if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) ||
             !test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags)) {
                 CWARN("ctx %p(%u->%s) already dead, don't send destroy rpc\n",
@@ -959,23 +400,11 @@ out_ref:
         RETURN(rc);
 }
 
-int __init gss_init_upcall(void)
+int __init gss_init_cli_upcall(void)
 {
-        int     rc;
-
-        rc = gss_svc_init_upcall();
-        if (rc)
-                return rc;
-
-        rc = gss_init_pipefs();
-        if (rc)
-                gss_svc_exit_upcall();
-
-        return rc;
+        return 0;
 }
 
-void __exit gss_exit_upcall(void)
+void __exit gss_exit_cli_upcall(void)
 {
-        gss_svc_exit_upcall();
-        gss_cleanup_pipefs();
 }
index 6cb4028..be40e4d 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
 #else
 #include <liblustre.h>
 #endif
index 8fd8014..0202d94 100644 (file)
@@ -44,9 +44,12 @@ int rawobj_serialize(rawobj_t *obj, __u32 **buf, __u32 *buflen);
 int rawobj_extract(rawobj_t *obj, __u32 **buf, __u32 *buflen);
 int rawobj_extract_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen);
 int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen);
+int rawobj_extract_local_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen);
 int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj);
 int rawobj_from_netobj_alloc(rawobj_t *obj, netobj_t *netobj);
 
+int buffer_extract_bytes(const void **buf, __u32 *buflen,
+                         void *res, __u32 reslen);
 
 /*
  * several timeout values. client refresh upcall timeout we using
@@ -61,6 +64,11 @@ int rawobj_from_netobj_alloc(rawobj_t *obj, netobj_t *netobj);
 #define GSS_SECFINI_RPC_TIMEOUT         (__TIMEOUT_DELTA)
 #define GSS_SECSVC_UPCALL_TIMEOUT       (GSS_SECINIT_RPC_TIMEOUT)
 
+/*
+ * default gc interval
+ */
+#define GSS_GC_INTERVAL                 (60 * 60) /* 60 minutes */
+
 static inline
 unsigned long gss_round_ctx_expiry(unsigned long expiry,
                                    unsigned long sec_flags)
@@ -127,6 +135,19 @@ enum ptlrpc_gss_tgt {
         LUSTRE_GSS_TGT_OSS              = 1,
 };
 
+static inline
+__u32 import_to_gss_svc(struct obd_import *imp)
+{
+        const char *name = imp->imp_obd->obd_type->typ_name;
+
+        if (!strcmp(name, LUSTRE_MDC_NAME))
+                return LUSTRE_GSS_TGT_MDS;
+        if (!strcmp(name, LUSTRE_OSC_NAME))
+                return LUSTRE_GSS_TGT_OSS;
+        LBUG();
+        return 0;
+}
+
 /*
  * following 3 header must have the same size and offset
  */
@@ -182,10 +203,11 @@ struct gss_wire_ctx {
                                          PTLRPC_GSS_MAX_HANDLE_SIZE)
 
 
-#define GSS_SEQ_WIN                     (256)
+#define GSS_SEQ_WIN                     (2048)
 #define GSS_SEQ_WIN_MAIN                GSS_SEQ_WIN
-#define GSS_SEQ_WIN_BACK                (64)
-#define GSS_SEQ_REPACK_THRESHOLD        (GSS_SEQ_WIN_MAIN / 2)
+#define GSS_SEQ_WIN_BACK                (128)
+#define GSS_SEQ_REPACK_THRESHOLD        (GSS_SEQ_WIN_MAIN / 2 + \
+                                         GSS_SEQ_WIN_MAIN / 4)
 
 struct gss_svc_seq_data {
         spinlock_t              ssd_lock;
@@ -235,6 +257,12 @@ struct gss_cli_ctx {
         struct gss_ctx         *gc_mechctx;
 };
 
+struct gss_cli_ctx_keyring {
+        struct gss_cli_ctx      gck_base;
+        struct key             *gck_key;
+        struct timer_list      *gck_timer;
+};
+
 struct gss_sec {
         struct ptlrpc_sec       gs_base;
         struct gss_api_mech    *gs_mech;
@@ -242,6 +270,70 @@ struct gss_sec {
         __u64                   gs_rvs_hdl;
 };
 
+struct gss_sec_pipefs {
+        struct gss_sec          gsp_base;
+        int                     gsp_chash_size;  /* must be 2^n */
+        struct hlist_head       gsp_chash[0];
+};
+
+/*
+ * FIXME cleanup the keyring upcall mutexes
+ */
+#define HAVE_KEYRING_UPCALL_SERIALIZED  1
+
+struct gss_sec_keyring {
+        struct gss_sec          gsk_base;
+        /*
+         * unique sec_id.
+         */
+        int                     gsk_id;
+        /*
+         * all contexts listed here. access is protected by sec spinlock.
+         */
+        struct hlist_head       gsk_clist;
+        /*
+         * specially point to root ctx (only one at a time). access is
+         * protected by sec spinlock.
+         */
+        struct ptlrpc_cli_ctx  *gsk_root_ctx;
+        /*
+         * specially serialize upcalls for root context.
+         */
+        struct mutex            gsk_root_uc_lock;
+
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+        struct mutex            gsk_uc_lock;        /* serialize upcalls */
+#endif
+};
+
+static inline struct gss_cli_ctx *ctx2gctx(struct ptlrpc_cli_ctx *ctx)
+{
+        return container_of(ctx, struct gss_cli_ctx, gc_base);
+}
+
+static inline
+struct gss_cli_ctx_keyring *ctx2gctx_keyring(struct ptlrpc_cli_ctx *ctx)
+{
+        return container_of(ctx2gctx(ctx),
+                            struct gss_cli_ctx_keyring, gck_base);
+}
+
+static inline struct gss_sec *sec2gsec(struct ptlrpc_sec *sec)
+{
+        return container_of(sec, struct gss_sec, gs_base);
+}
+
+static inline struct gss_sec_pipefs *sec2gsec_pipefs(struct ptlrpc_sec *sec)
+{
+        return container_of(sec2gsec(sec), struct gss_sec_pipefs, gsp_base);
+}
+
+static inline struct gss_sec_keyring *sec2gsec_keyring(struct ptlrpc_sec *sec)
+{
+        return container_of(sec2gsec(sec), struct gss_sec_keyring, gsk_base);
+}
+
+
 #define GSS_CTX_INIT_MAX_LEN            (1024)
 
 /*
@@ -259,6 +351,38 @@ struct gss_svc_reqctx *gss_svc_ctx2reqctx(struct ptlrpc_svc_ctx *ctx)
 }
 
 /* sec_gss.c */
+int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred);
+int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
+int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req);
+
+int  gss_sec_install_rctx(struct obd_import *imp, struct ptlrpc_sec *sec,
+                          struct ptlrpc_cli_ctx *ctx);
+int  gss_alloc_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+                      int msgsize);
+void gss_free_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
+int  gss_alloc_repbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+                      int msgsize);
+void gss_free_repbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req);
+int  gss_enlarge_reqbuf(struct ptlrpc_sec *sec, struct ptlrpc_request *req,
+                        int segment, int newsize);
+
+int  gss_svc_accept(struct ptlrpc_sec_policy *policy,
+                    struct ptlrpc_request *req);
+void gss_svc_invalidate_ctx(struct ptlrpc_svc_ctx *svc_ctx);
+int  gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int  gss_svc_authorize(struct ptlrpc_request *req);
+void gss_svc_free_rs(struct ptlrpc_reply_state *rs);
+void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx);
+
+int cli_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx);
+
+int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
+                         struct ptlrpc_svc_ctx *svc_ctx);
+
 struct gss_header *gss_swab_header(struct lustre_msg *msg, int segment);
 netobj_t *gss_swab_netobj(struct lustre_msg *msg, int segment);
 
@@ -266,6 +390,30 @@ void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx);
 int gss_pack_err_notify(struct ptlrpc_request *req, __u32 major, __u32 minor);
 int gss_check_seq_num(struct gss_svc_seq_data *sd, __u32 seq_num, int set);
 
+int gss_sec_create_common(struct gss_sec *gsec,
+                          struct ptlrpc_sec_policy *policy,
+                          struct obd_import *imp,
+                          struct ptlrpc_svc_ctx *ctx,
+                          __u32 flavor,
+                          unsigned long flags);
+void gss_sec_destroy_common(struct gss_sec *gsec);
+
+int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx,
+                            struct ptlrpc_ctx_ops *ctxops,
+                            struct vfs_cred *vcred);
+int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx);
+
+/* gss_keyring.c */
+extern struct ptlrpc_sec_policy gss_policy_keyring;
+int  __init gss_init_keyring(void);
+void __exit gss_exit_keyring(void);
+
+/* gss_pipefs.c */
+int  __init gss_init_pipefs(void);
+void __exit gss_exit_pipefs(void);
+
 /* gss_bulk.c */
 int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
                           struct ptlrpc_request *req,
@@ -289,14 +437,12 @@ __u32 g_verify_token_header(rawobj_t *mech, int *body_size,
                             unsigned char **buf_in, int toksize);
 
 
-/* gss_upcall.c */
+/* gss_cli_upcall.c */
 int gss_do_ctx_init_rpc(char *buffer, unsigned long count);
 int gss_do_ctx_fini_rpc(struct gss_cli_ctx *gctx);
-int gss_ctx_refresh_pipefs(struct ptlrpc_cli_ctx *ctx);
-int gss_sec_upcall_init(struct gss_sec *gsec);
-void gss_sec_upcall_cleanup(struct gss_sec *gsec);
-int __init gss_init_upcall(void);
-void __exit gss_exit_upcall(void);
+
+int  __init gss_init_cli_upcall(void);
+void __exit gss_exit_cli_upcall(void);
 
 /* gss_svc_upcall.c */
 __u64 gss_get_next_ctx_index(void);
@@ -315,14 +461,15 @@ struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
 void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx);
 void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx);
 
-int  __init gss_svc_init_upcall(void);
-void __exit gss_svc_exit_upcall(void);
+int  __init gss_init_svc_upcall(void);
+void __exit gss_exit_svc_upcall(void);
 
 /* lproc_gss.c */
 void gss_stat_oos_record_cli(int behind);
 void gss_stat_oos_record_svc(int phase, int replay);
-int  gss_init_lproc(void);
-void gss_exit_lproc(void);
+
+int  __init gss_init_lproc(void);
+void __exit gss_exit_lproc(void);
 
 /* gss_krb5_mech.c */
 int __init init_kerberos_module(void);
diff --git a/lustre/ptlrpc/gss/gss_keyring.c b/lustre/ptlrpc/gss/gss_keyring.c
new file mode 100644 (file)
index 0000000..305d4c5
--- /dev/null
@@ -0,0 +1,1388 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc.
+ *   Author: Eric Mei <ericm@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+#ifdef __KERNEL__
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/mutex.h>
+#include <asm/atomic.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_sec.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static struct ptlrpc_sec_policy gss_policy_keyring;
+static struct ptlrpc_ctx_ops gss_keyring_ctxops;
+static struct key_type gss_key_type;
+
+static int sec_install_rctx_kr(struct ptlrpc_sec *sec,
+                               struct ptlrpc_svc_ctx *svc_ctx);
+
+/*
+ * the timeout is only for the case that upcall child process die abnormally.
+ * in any other cases it should finally update kernel key. so we set this
+ * timeout value excessive long.
+ */
+#define KEYRING_UPCALL_TIMEOUT  (obd_timeout + obd_timeout)
+
+/****************************************
+ * internal helpers                     *
+ ****************************************/
+
+#define DUMP_PROCESS_KEYRINGS(tsk)                                      \
+{                                                                       \
+        CWARN("DUMP PK: %s[%u,%u/%u](<-%s[%u,%u/%u]): "                 \
+              "a %d, t %d, p %d, s %d, u %d, us %d, df %d\n",           \
+              tsk->comm, tsk->pid, tsk->uid, tsk->fsuid,                \
+              tsk->parent->comm, tsk->parent->pid,                      \
+              tsk->parent->uid, tsk->parent->fsuid,                     \
+              task_aux(tsk)->request_key_auth ?                         \
+              task_aux(tsk)->request_key_auth->serial : 0,              \
+              task_aux(tsk)->thread_keyring ?                           \
+              task_aux(tsk)->thread_keyring->serial : 0,                \
+              tsk->signal->process_keyring ?                            \
+              tsk->signal->process_keyring->serial : 0,                 \
+              tsk->signal->session_keyring ?                            \
+              tsk->signal->session_keyring->serial : 0,                 \
+              tsk->user->uid_keyring ?                                  \
+              tsk->user->uid_keyring->serial : 0,                       \
+              tsk->user->session_keyring ?                              \
+              tsk->user->session_keyring->serial : 0,                   \
+              task_aux(tsk)->jit_keyring                                \
+             );                                                         \
+}
+
+#define DUMP_KEY(key)                                                   \
+{                                                                       \
+        CWARN("DUMP KEY: %p(%d) ref %d u%u/g%u desc %s\n",              \
+              key, key->serial, atomic_read(&key->usage),               \
+              key->uid, key->gid,                                       \
+              key->description ? key->description : "n/a"               \
+             );                                                         \
+}
+
+
+static inline void keyring_upcall_lock(struct gss_sec_keyring *gsec_kr)
+{
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+        mutex_lock(&gsec_kr->gsk_uc_lock);
+#endif
+}
+
+static inline void keyring_upcall_unlock(struct gss_sec_keyring *gsec_kr)
+{
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+        mutex_unlock(&gsec_kr->gsk_uc_lock);
+#endif
+}
+
+static inline void key_revoke_locked(struct key *key)
+{
+        set_bit(KEY_FLAG_REVOKED, &key->flags);
+}
+
+static void ctx_upcall_timeout_kr(unsigned long data)
+{
+        struct ptlrpc_cli_ctx *ctx = (struct ptlrpc_cli_ctx *) data;
+        struct key            *key = ctx2gctx_keyring(ctx)->gck_key;
+
+        CWARN("ctx %p, key %p\n", ctx, key);
+
+        LASSERT(key);
+
+        cli_ctx_expire(ctx);
+        key_revoke_locked(key);
+        sptlrpc_cli_ctx_wakeup(ctx);
+}
+
+static
+void ctx_start_timer_kr(struct ptlrpc_cli_ctx *ctx, long timeout)
+{
+        struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+        struct timer_list          *timer = gctx_kr->gck_timer;
+
+        LASSERT(timer);
+
+        CWARN("ctx %p: start timer %lds\n", ctx, timeout);
+        timeout = timeout * HZ + cfs_time_current();
+
+        init_timer(timer);
+        timer->expires = timeout;
+        timer->data = (unsigned long ) ctx;
+        timer->function = ctx_upcall_timeout_kr;
+
+        add_timer(timer);
+}
+
+static
+void ctx_clear_timer_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+        struct timer_list          *timer = gctx_kr->gck_timer;
+
+        CWARN("ctx %p, key %p\n", ctx, gctx_kr->gck_key);
+        if (timer == NULL)
+                return;
+
+        gctx_kr->gck_timer = NULL;
+
+        del_singleshot_timer_sync(timer);
+
+        OBD_FREE_PTR(timer);
+}
+
+static
+struct ptlrpc_cli_ctx *ctx_create_kr(struct ptlrpc_sec *sec,
+                                     struct vfs_cred *vcred)
+{
+        struct ptlrpc_cli_ctx      *ctx;
+        struct gss_cli_ctx_keyring *gctx_kr;
+
+        OBD_ALLOC_PTR(gctx_kr);
+        if (gctx_kr == NULL)
+                return NULL;
+
+        OBD_ALLOC_PTR(gctx_kr->gck_timer);
+        if (gctx_kr->gck_timer == NULL) {
+                OBD_FREE_PTR(gctx_kr);
+                return NULL;
+        }
+        init_timer(gctx_kr->gck_timer);
+
+        ctx = &gctx_kr->gck_base.gc_base;
+
+        if (gss_cli_ctx_init_common(sec, ctx, &gss_keyring_ctxops, vcred)) {
+                OBD_FREE_PTR(gctx_kr->gck_timer);
+                OBD_FREE_PTR(gctx_kr);
+                return NULL;
+        }
+
+        ctx->cc_expire = cfs_time_current_sec() + KEYRING_UPCALL_TIMEOUT;
+        clear_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags);
+        atomic_inc(&ctx->cc_refcount); /* for the caller */
+
+        return ctx;
+}
+
+static void ctx_destroy_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        struct ptlrpc_sec          *sec = ctx->cc_sec;
+        struct gss_cli_ctx_keyring *gctx_kr = ctx2gctx_keyring(ctx);
+        int                         rc;
+
+        CWARN("destroying ctx %p\n", ctx);
+
+        /* at this time the association with key has been broken. */
+        LASSERT(sec);
+        LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+        LASSERT(gctx_kr->gck_key == NULL);
+
+        ctx_clear_timer_kr(ctx);
+        LASSERT(gctx_kr->gck_timer == NULL);
+
+        rc = gss_cli_ctx_fini_common(sec, ctx);
+
+        OBD_FREE_PTR(gctx_kr);
+
+        if (rc) {
+                CWARN("released the last ctx, proceed to destroy sec %s@%p\n",
+                      sec->ps_policy->sp_name, sec);
+                sptlrpc_sec_destroy(sec);
+        }
+}
+
+static void ctx_put_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        if (atomic_dec_and_test(&ctx->cc_refcount))
+                ctx_destroy_kr(ctx);
+}
+
+/*
+ * key <-> ctx association and rules:
+ * - ctx might not bind with any key
+ * - key/ctx binding is protected by key semaphore (if the key present)
+ * - key and ctx each take a reference of the other
+ * - ctx enlist/unlist is protected by ctx spinlock
+ * - never enlist a ctx after it's been unlisted
+ * - whoever do enlist should also do bind, lock key before enlist:
+ *   - lock key -> lock ctx -> enlist -> unlock ctx -> bind -> unlock key
+ * - whoever do unlist should also do unbind:
+ *   - lock key -> lock ctx -> unlist -> unlock ctx -> unbind -> unlock key
+ *   - lock ctx -> unlist -> unlock ctx -> lock key -> unbind -> unlock key
+ */
+
+static inline void spin_lock_if(spinlock_t *lock, int condition)
+{
+        if (condition)
+                spin_lock(lock);
+}
+
+static inline void spin_unlock_if(spinlock_t *lock, int condition)
+{
+        if (condition)
+                spin_unlock(lock);
+}
+
+static
+void ctx_enlist_kr(struct ptlrpc_cli_ctx *ctx, int is_root, int locked)
+{
+        struct ptlrpc_sec      *sec = ctx->cc_sec;
+        struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+
+        LASSERT(!test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        spin_lock_if(&sec->ps_lock, !locked);
+
+        atomic_inc(&ctx->cc_refcount);
+        set_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+        hlist_add_head(&ctx->cc_hash, &gsec_kr->gsk_clist);
+        if (is_root)
+                gsec_kr->gsk_root_ctx = ctx;
+
+        spin_unlock_if(&sec->ps_lock, !locked);
+}
+
+/*
+ * Note after this get called, caller should not access ctx again because
+ * it might have been freed, unless caller hold at least one refcount of
+ * the ctx.
+ *
+ * return non-zero if we indeed unlist this ctx.
+ */
+static
+int ctx_unlist_kr(struct ptlrpc_cli_ctx *ctx, int locked)
+{
+        struct ptlrpc_sec       *sec = ctx->cc_sec;
+        struct gss_sec_keyring  *gsec_kr = sec2gsec_keyring(sec);
+
+        /*
+         * if hashed bit has gone, leave the job to somebody who is doing it
+         */
+        if (test_and_clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0)
+                return 0;
+
+        CWARN("ctx %p(%d) unlist\n", ctx, atomic_read(&ctx->cc_refcount));
+
+        /*
+         * drop ref inside spin lock to prevent race with other operations
+         */
+        spin_lock_if(&sec->ps_lock, !locked);
+
+        if (gsec_kr->gsk_root_ctx == ctx)
+                gsec_kr->gsk_root_ctx = NULL;
+        hlist_del_init(&ctx->cc_hash);
+        atomic_dec(&ctx->cc_refcount);
+
+        spin_unlock_if(&sec->ps_lock, !locked);
+
+        return 1;
+}
+
+/*
+ * bind a key with a ctx together.
+ * caller must hold write lock of the key, as well as ref on key & ctx.
+ */
+static
+void bind_key_ctx(struct key *key, struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(atomic_read(&key->usage) > 0);
+        LASSERT(ctx2gctx_keyring(ctx)->gck_key == NULL);
+        LASSERT(key->payload.data == NULL);
+        /*
+         * at this time context may or may not in list.
+         */
+        key_get(key);
+        atomic_inc(&ctx->cc_refcount);
+        ctx2gctx_keyring(ctx)->gck_key = key;
+        key->payload.data = ctx;
+}
+
+/*
+ * unbind a key and a ctx.
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static
+void unbind_key_ctx(struct key *key, struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(key->payload.data == ctx);
+        LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+
+        /* must revoke the key, or others may treat it as newly created */
+        key_revoke_locked(key);
+
+        key->payload.data = NULL;
+        ctx2gctx_keyring(ctx)->gck_key = NULL;
+
+        /* once ctx get split from key, the timer is meaningless */
+        ctx_clear_timer_kr(ctx);
+
+        ctx_put_kr(ctx);
+        key_put(key);
+}
+
+/*
+ * given a ctx, unbind with its coupled key, if any.
+ * unbind could only be called once, so we don't worry the key be released
+ * by someone else.
+ */
+static void unbind_ctx_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        struct key      *key = ctx2gctx_keyring(ctx)->gck_key;
+
+        if (key) {
+                LASSERT(key->payload.data == ctx);
+
+                key_get(key);
+                down_write(&key->sem);
+                unbind_key_ctx(key, ctx);
+                up_write(&key->sem);
+                key_put(key);
+        }
+}
+
+/*
+ * given a key, unbind with its coupled ctx, if any.
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static void unbind_key_locked(struct key *key)
+{
+        struct ptlrpc_cli_ctx   *ctx = key->payload.data;
+
+        if (ctx)
+                unbind_key_ctx(key, ctx);
+}
+
+/*
+ * unlist a ctx, and unbind from coupled key
+ */
+static void kill_ctx_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        if (ctx_unlist_kr(ctx, 0))
+                unbind_ctx_kr(ctx);
+}
+
+/*
+ * given a key, unlist and unbind with the coupled ctx (if any).
+ * caller must hold write lock, as well as a ref of the key.
+ */
+static void kill_key_locked(struct key *key)
+{
+        struct ptlrpc_cli_ctx *ctx = key->payload.data;
+
+        if (ctx && ctx_unlist_kr(ctx, 0))
+                unbind_key_locked(key);
+}
+
+/*
+ * since this called, nobody else could touch the ctx in @freelist
+ */
+static void dispose_ctx_list_kr(struct hlist_head *freelist)
+{
+        struct hlist_node      *pos, *next;
+        struct ptlrpc_cli_ctx  *ctx;
+
+        hlist_for_each_entry_safe(ctx, pos, next, freelist, cc_hash) {
+                hlist_del_init(&ctx->cc_hash);
+
+                atomic_inc(&ctx->cc_refcount);
+                unbind_ctx_kr(ctx);
+                ctx_put_kr(ctx);
+        }
+}
+
+/*
+ * lookup a root context directly in a sec, return root ctx with a
+ * reference taken or NULL.
+ */
+static
+struct ptlrpc_cli_ctx * sec_lookup_root_ctx_kr(struct ptlrpc_sec *sec)
+{
+        struct gss_sec_keyring  *gsec_kr = sec2gsec_keyring(sec);
+        struct ptlrpc_cli_ctx   *ctx = NULL;
+
+        spin_lock(&sec->ps_lock);
+
+        ctx = gsec_kr->gsk_root_ctx;
+        if (ctx) {
+                LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+                LASSERT(!hlist_empty(&gsec_kr->gsk_clist));
+                atomic_inc(&ctx->cc_refcount);
+        }
+
+        spin_unlock(&sec->ps_lock);
+
+        return ctx;
+}
+
+static void sec_replace_root_ctx_kr(struct ptlrpc_sec *sec,
+                                    struct ptlrpc_cli_ctx *new_ctx,
+                                    struct key *key)
+{
+        struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+        struct ptlrpc_cli_ctx  *root_ctx;
+        struct hlist_head       freelist = HLIST_HEAD_INIT;
+        ENTRY;
+
+        spin_lock(&sec->ps_lock);
+
+        if (gsec_kr->gsk_root_ctx) {
+                root_ctx = gsec_kr->gsk_root_ctx;
+
+                set_bit(PTLRPC_CTX_DEAD_BIT, &root_ctx->cc_flags);
+
+                if (ctx_unlist_kr(root_ctx, 1))
+                        hlist_add_head(&root_ctx->cc_hash, &freelist);
+        }
+
+        /*
+         * at this time, we can't guarantee the gsk_root_ctx is NULL, because
+         * another thread might clear the HASHED flag of root ctx earlier,
+         * and waiting for spinlock which is held by us. But anyway we just
+         * install the new root ctx.
+         */
+        ctx_enlist_kr(new_ctx, 1, 1);
+
+        if (key)
+                bind_key_ctx(key, new_ctx);
+
+        spin_unlock(&sec->ps_lock);
+
+        dispose_ctx_list_kr(&freelist);
+}
+
+static void construct_key_desc(void *buf, int bufsize,
+                               struct ptlrpc_sec *sec, uid_t uid)
+{
+        snprintf(buf, bufsize, "%d@%x", uid, sec2gsec_keyring(sec)->gsk_id);
+        ((char *)buf)[bufsize - 1] = '\0';
+}
+
+/****************************************
+ * sec apis                             *
+ ****************************************/
+
+static atomic_t gss_sec_id_kr = ATOMIC_INIT(0);
+
+static
+struct ptlrpc_sec * gss_sec_create_kr(struct obd_import *imp,
+                                      struct ptlrpc_svc_ctx *ctx,
+                                      __u32 flavor,
+                                      unsigned long flags)
+{
+        struct gss_sec_keyring  *gsec_kr;
+        ENTRY;
+
+        OBD_ALLOC(gsec_kr, sizeof(*gsec_kr));
+        if (gsec_kr == NULL)
+                RETURN(NULL);
+
+        gsec_kr->gsk_id = atomic_inc_return(&gss_sec_id_kr);
+        INIT_HLIST_HEAD(&gsec_kr->gsk_clist);
+        gsec_kr->gsk_root_ctx = NULL;
+        mutex_init(&gsec_kr->gsk_root_uc_lock);
+#ifdef HAVE_KEYRING_UPCALL_SERIALIZED
+        mutex_init(&gsec_kr->gsk_uc_lock);
+#endif
+
+        if (gss_sec_create_common(&gsec_kr->gsk_base, &gss_policy_keyring,
+                                  imp, ctx, flavor, flags))
+                goto err_free;
+
+        if (ctx != NULL) {
+                if (sec_install_rctx_kr(&gsec_kr->gsk_base.gs_base, ctx)) {
+                        gss_sec_destroy_common(&gsec_kr->gsk_base);
+                        goto err_free;
+                }
+        }
+
+        RETURN(&gsec_kr->gsk_base.gs_base);
+
+err_free:
+        OBD_FREE(gsec_kr, sizeof(*gsec_kr));
+        RETURN(NULL);
+}
+
+static
+void gss_sec_destroy_kr(struct ptlrpc_sec *sec)
+{
+        struct gss_sec          *gsec = sec2gsec(sec);
+        struct gss_sec_keyring  *gsec_kr = sec2gsec_keyring(sec);
+
+        CWARN("destroy %s@%p\n", sec->ps_policy->sp_name, sec);
+
+        LASSERT(hlist_empty(&gsec_kr->gsk_clist));
+        LASSERT(gsec_kr->gsk_root_ctx == NULL);
+
+        gss_sec_destroy_common(gsec);
+
+        OBD_FREE(gsec_kr, sizeof(*gsec_kr));
+}
+
+static
+int user_is_root(struct ptlrpc_sec *sec, struct vfs_cred *vcred)
+{
+        if (sec->ps_flags & PTLRPC_SEC_FL_ROOTONLY)
+                return 1;
+
+        /* FIXME
+         * more precisely deal with setuid. maybe add more infomation
+         * into vfs_cred ??
+         */
+        return (vcred->vc_uid == 0);
+}
+
+/*
+ * unlink request key from it's ring, which is linked during request_key().
+ * sadly, we have to 'guess' which keyring it's linked to.
+ *
+ * FIXME this code is fragile, depend on how request_key_link() is implemented.
+ */
+static void request_key_unlink(struct key *key)
+{
+        struct task_struct *tsk = current;
+        struct key *ring;
+
+        switch (task_aux(tsk)->jit_keyring) {
+        case KEY_REQKEY_DEFL_DEFAULT:
+        case KEY_REQKEY_DEFL_THREAD_KEYRING:
+                ring = key_get(task_aux(tsk)->thread_keyring);
+                if (ring)
+                        break;
+        case KEY_REQKEY_DEFL_PROCESS_KEYRING:
+                ring = key_get(tsk->signal->process_keyring);
+                if (ring)
+                        break;
+        case KEY_REQKEY_DEFL_SESSION_KEYRING:
+                rcu_read_lock();
+                ring = key_get(rcu_dereference(tsk->signal->session_keyring));
+                rcu_read_unlock();
+                if (ring)
+                        break;
+        case KEY_REQKEY_DEFL_USER_SESSION_KEYRING:
+                ring = key_get(tsk->user->session_keyring);
+                break;
+        case KEY_REQKEY_DEFL_USER_KEYRING:
+                ring = key_get(tsk->user->uid_keyring);
+                break;
+        case KEY_REQKEY_DEFL_GROUP_KEYRING:
+        default:
+                LBUG();
+        }
+
+        LASSERT(ring);
+        key_unlink(ring, key);
+        key_put(ring);
+}
+
+static
+struct ptlrpc_cli_ctx * gss_sec_lookup_ctx_kr(struct ptlrpc_sec *sec,
+                                              struct vfs_cred *vcred,
+                                              int create, int remove_dead)
+{
+        struct obd_import       *imp = sec->ps_import;
+        struct gss_sec_keyring  *gsec_kr = sec2gsec_keyring(sec);
+        struct ptlrpc_cli_ctx   *ctx = NULL;
+        unsigned int             is_root = 0, create_new = 0;
+        struct key              *key;
+        char                     desc[24];
+        char                    *coinfo;
+        const int                coinfo_size = sizeof(struct obd_uuid) + 64;
+        char                    *co_flags = "";
+        ENTRY;
+
+        LASSERT(imp != NULL);
+
+        is_root = user_is_root(sec, vcred);
+
+        /*
+         * a little bit optimization for root context
+         */
+        if (is_root) {
+                ctx = sec_lookup_root_ctx_kr(sec);
+                /*
+                 * Only lookup directly for REVERSE sec, which should
+                 * always succeed.
+                 */
+                if (ctx || (sec->ps_flags & PTLRPC_SEC_FL_REVERSE))
+                        RETURN(ctx);
+        }
+
+        LASSERT(create != 0);
+
+        /*
+         * for root context, obtain lock and check again, this time hold
+         * the root upcall lock, make sure nobody else populated new root
+         * context after last check.
+         */
+        if (is_root) {
+                mutex_lock(&gsec_kr->gsk_root_uc_lock);
+
+                ctx = sec_lookup_root_ctx_kr(sec);
+                if (ctx)
+                        goto out;
+
+                /* update reverse handle for root user */
+                sec2gsec(sec)->gs_rvs_hdl = gss_get_next_ctx_index();
+
+                co_flags = "r";
+        }
+
+        construct_key_desc(desc, sizeof(desc), sec, vcred->vc_uid);
+
+        /*
+         * callout info: mech:flags:svc_type:peer_nid:target_uuid
+         */
+        OBD_ALLOC(coinfo, coinfo_size);
+        if (coinfo == NULL)
+                goto out;
+
+        snprintf(coinfo, coinfo_size, "%s:%s:%d:"LPX64":%s",
+                 sec2gsec(sec)->gs_mech->gm_name,
+                 co_flags, import_to_gss_svc(imp),
+                 imp->imp_connection->c_peer.nid, imp->imp_obd->obd_name);
+
+        keyring_upcall_lock(gsec_kr);
+        key = request_key(&gss_key_type, desc, coinfo);
+        keyring_upcall_unlock(gsec_kr);
+
+        OBD_FREE(coinfo, coinfo_size);
+
+        if (IS_ERR(key)) {
+                CERROR("failed request key: %ld\n", PTR_ERR(key));
+                goto out;
+        }
+
+        /*
+         * once payload.data was pointed to a ctx, it never changes until
+         * we de-associate them; but parallel request_key() may return
+         * a key with payload.data == NULL at the same time. so we still
+         * need wirtelock of key->sem to serialize them.
+         */
+        down_write(&key->sem);
+
+        if (likely(key->payload.data != NULL)) {
+                ctx = key->payload.data;
+
+                LASSERT(atomic_read(&ctx->cc_refcount) >= 1);
+                LASSERT(ctx2gctx_keyring(ctx)->gck_key == key);
+                LASSERT(atomic_read(&key->usage) >= 2);
+
+                /* simply take a ref and return. it's upper layer's
+                 * responsibility to detect & replace dead ctx.
+                 */
+                atomic_inc(&ctx->cc_refcount);
+        } else {
+                /* pre initialization with a cli_ctx. this can't be done in
+                 * key_instantiate() because we'v no enough information there.
+                 */
+                ctx = ctx_create_kr(sec, vcred);
+                if (ctx != NULL) {
+                        ctx_enlist_kr(ctx, is_root, 0);
+                        bind_key_ctx(key, ctx);
+
+                        ctx_start_timer_kr(ctx, KEYRING_UPCALL_TIMEOUT);
+
+                        CWARN("installed key %p <-> ctx %p (sec %p)\n",
+                              key, ctx, sec);
+                } else {
+                        /*
+                         * we'd prefer to call key_revoke(), but we more like
+                         * to revoke it within this key->sem locked period.
+                         */
+                        key_revoke_locked(key);
+                }
+
+                create_new = 1;
+        }
+
+        up_write(&key->sem);
+
+        if (is_root && create_new)
+                request_key_unlink(key);
+
+        key_put(key);
+out:
+        if (is_root)
+                mutex_unlock(&gsec_kr->gsk_root_uc_lock);
+        RETURN(ctx);
+}
+
+static
+void gss_sec_release_ctx_kr(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx,
+                            int sync)
+{
+        CWARN("ctx %p\n", ctx);
+        ctx_destroy_kr(ctx);
+}
+
+/*
+ * flush context of normal user, we must resort to keyring itself to find out
+ * contexts which belong to me.
+ *
+ * Note here we suppose only to flush _my_ context, the "uid" will
+ * be ignored in the search.
+ */
+static
+void flush_user_ctx_cache_kr(struct ptlrpc_sec *sec,
+                             uid_t uid,
+                             int grace, int force)
+{
+        struct key              *key;
+        char                     desc[24];
+
+        /* nothing to do for reverse or rootonly sec */
+        if (sec->ps_flags & (PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY))
+                return;
+
+        construct_key_desc(desc, sizeof(desc), sec, uid);
+
+        /* there should be only one valid key, but we put it in the
+         * loop in case of any weird cases
+         */
+        for (;;) {
+                key = request_key(&gss_key_type, desc, NULL);
+                if (IS_ERR(key)) {
+                        CWARN("No more key found for current user\n");
+                        break;
+                }
+
+                down_write(&key->sem);
+
+                CWARN("invalidating key %p - ctx %p\n", key, key->payload.data);
+                kill_key_locked(key);
+
+                /* kill_key_locked() should usually revoke the key, but we
+                 * revoke it again to make sure, e.g. some case the key may
+                 * not well coupled with a context.
+                 */
+                key_revoke_locked(key);
+
+                up_write(&key->sem);
+
+                key_put(key);
+        }
+}
+
+/*
+ * flush context of root or all, we iterate through the list.
+ */
+static
+void flush_spec_ctx_cache_kr(struct ptlrpc_sec *sec,
+                             uid_t uid,
+                             int grace, int force)
+{
+        struct gss_sec_keyring *gsec_kr;
+        struct hlist_head       freelist = HLIST_HEAD_INIT;
+        struct hlist_node      *pos, *next;
+        struct ptlrpc_cli_ctx  *ctx;
+        ENTRY;
+
+        gsec_kr = sec2gsec_keyring(sec);
+
+        spin_lock(&sec->ps_lock);
+        hlist_for_each_entry_safe(ctx, pos, next,
+                                  &gsec_kr->gsk_clist, cc_hash) {
+                LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+                if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+                        continue;
+
+                /* at this moment there's at least 2 base reference:
+                 * key association and in-list.
+                 */
+                if (atomic_read(&ctx->cc_refcount) > 2) {
+                        if (!force)
+                                continue;
+                        CWARN("flush busy ctx %p(%u->%s, extra ref %d)\n",
+                              ctx, ctx->cc_vcred.vc_uid,
+                              sec2target_str(ctx->cc_sec),
+                              atomic_read(&ctx->cc_refcount) - 2);
+                }
+
+                set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+                if (!grace)
+                        clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+                if (ctx_unlist_kr(ctx, 1)) {
+                        hlist_add_head(&ctx->cc_hash, &freelist);
+                        CWARN("unlisted ctx %p\n", ctx);
+                } else
+                        CWARN("ctx %p: unlist return 0, let it go\n", ctx);
+
+        }
+        spin_unlock(&sec->ps_lock);
+
+        dispose_ctx_list_kr(&freelist);
+        EXIT;
+}
+
+static
+int gss_sec_flush_ctx_cache_kr(struct ptlrpc_sec *sec,
+                               uid_t uid,
+                               int grace, int force)
+{
+        ENTRY;
+
+        CWARN("sec %p(%d, busy %d), uid %d, grace %d, force %d\n",
+              sec, atomic_read(&sec->ps_refcount), atomic_read(&sec->ps_busy),
+              uid, grace, force);
+
+        if (uid != -1 && uid != 0)
+                flush_user_ctx_cache_kr(sec, uid, grace, force);
+        else
+                flush_spec_ctx_cache_kr(sec, uid, grace, force);
+
+        RETURN(0);
+}
+
+static
+void gss_sec_gc_ctx_kr(struct ptlrpc_sec *sec)
+{
+        struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+        struct hlist_head       freelist = HLIST_HEAD_INIT;
+        struct hlist_node      *pos, *next;
+        struct ptlrpc_cli_ctx  *ctx;
+        ENTRY;
+
+        CWARN("running gc\n");
+
+        spin_lock(&sec->ps_lock);
+        hlist_for_each_entry_safe(ctx, pos, next,
+                                  &gsec_kr->gsk_clist, cc_hash) {
+                LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+                if (cli_ctx_check_death(ctx) && ctx_unlist_kr(ctx, 1)) {
+                        hlist_add_head(&ctx->cc_hash, &freelist);
+                        CWARN("unhashed ctx %p\n", ctx);
+                }
+        }
+        spin_unlock(&sec->ps_lock);
+
+        dispose_ctx_list_kr(&freelist);
+        EXIT;
+        return;
+}
+
+static
+int gss_sec_display_kr(struct ptlrpc_sec *sec, char *buf, int bufsize)
+{
+        struct gss_sec_keyring *gsec_kr = sec2gsec_keyring(sec);
+        struct hlist_node      *pos, *next;
+        struct ptlrpc_cli_ctx  *ctx;
+        int                     written = 0;
+        ENTRY;
+
+        written = snprintf(buf, bufsize, "context list ===>\n");
+        bufsize -= written;
+        buf += written;
+
+        spin_lock(&sec->ps_lock);
+        hlist_for_each_entry_safe(ctx, pos, next,
+                                  &gsec_kr->gsk_clist, cc_hash) {
+                struct key *key;
+                int         len;
+
+                key = ctx2gctx_keyring(ctx)->gck_key;
+
+                len = snprintf(buf, bufsize, "%p(%d): expire %ld(%ld), "
+                               "uid %u, flags 0x%lx, key %08x(%d)\n",
+                               ctx, atomic_read(&ctx->cc_refcount),
+                               ctx->cc_expire,
+                               ctx->cc_expire - cfs_time_current_sec(),
+                               ctx->cc_vcred.vc_uid,
+                               ctx->cc_flags,
+                               key ? key->serial : 0,
+                               key ? atomic_read(&key->usage) : 0);
+
+                written += len;
+                buf += len;
+                bufsize -= len;
+
+                if (bufsize < len)
+                        break;
+        }
+        spin_unlock(&sec->ps_lock);
+
+        RETURN(written);
+}
+
+/****************************************
+ * cli_ctx apis                         *
+ ****************************************/
+
+static
+int gss_cli_ctx_refresh_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        /* upcall is already on the way */
+        return 0;
+}
+
+static
+int gss_cli_ctx_validate_kr(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(ctx->cc_sec);
+
+        if (cli_ctx_check_death(ctx)) {
+                kill_ctx_kr(ctx);
+                return 1;
+        }
+
+        if (cli_ctx_is_uptodate(ctx))
+                return 0;
+        return 1;
+}
+
+static
+void gss_cli_ctx_die_kr(struct ptlrpc_cli_ctx *ctx, int grace)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(ctx->cc_sec);
+
+        CWARN("ctx %p(%d)\n", ctx, atomic_read(&ctx->cc_refcount));
+        cli_ctx_expire(ctx);
+        kill_ctx_kr(ctx);
+}
+
+/****************************************
+ * (reverse) service                    *
+ ****************************************/
+
+/*
+ * reverse context could have nothing to do with keyrings. here we still keep
+ * the version which bind to a key, for future reference.
+ */
+#define HAVE_REVERSE_CTX_NOKEY
+
+#ifdef HAVE_REVERSE_CTX_NOKEY
+
+static
+int sec_install_rctx_kr(struct ptlrpc_sec *sec,
+                        struct ptlrpc_svc_ctx *svc_ctx)
+{
+        struct ptlrpc_cli_ctx   *cli_ctx;
+        struct vfs_cred          vcred = { 0, 0 };
+        int                      rc;
+
+        LASSERT(sec);
+        LASSERT(svc_ctx);
+
+        cli_ctx = ctx_create_kr(sec, &vcred);
+        if (cli_ctx == NULL)
+                return -ENOMEM;
+
+        rc = gss_copy_rvc_cli_ctx(cli_ctx, svc_ctx);
+        if (rc) {
+                CERROR("failed copy reverse cli ctx: %d\n", rc);
+
+                ctx_put_kr(cli_ctx);
+                return rc;
+        }
+
+        sec_replace_root_ctx_kr(sec, cli_ctx, NULL);
+
+        ctx_put_kr(cli_ctx);
+
+        return 0;
+}
+
+#else /* ! HAVE_REVERSE_CTX_NOKEY */
+
+static
+int sec_install_rctx_kr(struct ptlrpc_sec *sec,
+                        struct ptlrpc_svc_ctx *svc_ctx)
+{
+        struct ptlrpc_cli_ctx   *cli_ctx = NULL;
+        struct key              *key;
+        struct vfs_cred          vcred = { 0, 0 };
+        char                     desc[64];
+        int                      rc;
+
+        LASSERT(sec);
+        LASSERT(svc_ctx);
+        CWARN("called\n");
+
+        construct_key_desc(desc, sizeof(desc), sec, 0);
+
+        key = key_alloc(&gss_key_type, desc, 0, 0,
+                        KEY_POS_ALL | KEY_USR_ALL, 1);
+        if (IS_ERR(key)) {
+                CERROR("failed to alloc key: %ld\n", PTR_ERR(key));
+                return PTR_ERR(key);
+        }
+
+        rc = key_instantiate_and_link(key, NULL, 0, NULL, NULL);
+        if (rc) {
+                CERROR("failed to instantiate key: %d\n", rc);
+                goto err_revoke;
+        }
+
+        down_write(&key->sem);
+
+        LASSERT(key->payload.data == NULL);
+
+        cli_ctx = ctx_create_kr(sec, &vcred);
+        if (cli_ctx == NULL) {
+                rc = -ENOMEM;
+                goto err_up;
+        }
+
+        rc = gss_copy_rvc_cli_ctx(cli_ctx, svc_ctx);
+        if (rc) {
+                CERROR("failed copy reverse cli ctx: %d\n", rc);
+                goto err_put;
+        }
+
+        sec_replace_root_ctx_kr(sec, cli_ctx, key);
+
+        ctx_put_kr(cli_ctx);
+        up_write(&key->sem);
+
+        rc = 0;
+        CWARN("ok!\n");
+out:
+        key_put(key);
+        return rc;
+
+err_put:
+        ctx_put_kr(cli_ctx);
+err_up:
+        up_write(&key->sem);
+err_revoke:
+        key_revoke(key);
+        goto out;
+}
+
+#endif /* HAVE_REVERSE_CTX_NOKEY */
+
+/****************************************
+ * service apis                         *
+ ****************************************/
+
+static
+int gss_svc_accept_kr(struct ptlrpc_request *req)
+{
+        return gss_svc_accept(&gss_policy_keyring, req);
+}
+
+static
+int gss_svc_install_rctx_kr(struct obd_import *imp,
+                            struct ptlrpc_svc_ctx *svc_ctx)
+{
+        LASSERT(imp->imp_sec);
+
+        return sec_install_rctx_kr(imp->imp_sec, svc_ctx);
+}
+
+/****************************************
+ * key apis                             *
+ ****************************************/
+
+static
+int gss_kt_instantiate(struct key *key, const void *data, size_t datalen)
+{
+        ENTRY;
+
+        if (data != NULL || datalen != 0) {
+                CERROR("invalid: data %p, len %d\n", data, datalen);
+                RETURN(-EINVAL);
+        }
+
+        if (key->payload.data != 0) {
+                CERROR("key already have payload\n");
+                RETURN(-EINVAL);
+        }
+
+        /* XXX */
+        key->perm |= KEY_POS_ALL | KEY_USR_ALL;
+        CWARN("key %p instantiated, ctx %p\n", key, key->payload.data);
+        RETURN(0);
+}
+
+/*
+ * called with key semaphore write locked. it means we can operate
+ * on the context without fear of loosing refcount.
+ */
+static
+int gss_kt_update(struct key *key, const void *data, size_t datalen)
+{
+        struct ptlrpc_cli_ctx   *ctx = key->payload.data;
+        struct gss_cli_ctx      *gctx;
+        rawobj_t                 tmpobj = RAWOBJ_EMPTY;
+        int                      rc;
+        ENTRY;
+
+        if (data == NULL || datalen == 0) {
+                CWARN("invalid: data %p, len %d\n", data, datalen);
+                RETURN(-EINVAL);
+        }
+
+        /*
+         * there's a race between userspace parent - child processes. if
+         * child finish negotiation too fast and call kt_update(), the ctx
+         * might be still NULL. but the key will finally be associate
+         * with a context, or be revoked. if key status is fine, return
+         * -EAGAIN to allow userspace sleep a while and call again.
+         */
+        if (ctx == NULL) {
+                CWARN("race in userspace. key %p(%x) flags %lx\n",
+                      key, key->serial, key->flags);
+
+                rc = key_validate(key);
+                if (rc == 0)
+                        RETURN(-EAGAIN);
+                else
+                        RETURN(rc);
+        }
+
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(ctx->cc_sec);
+
+        ctx_clear_timer_kr(ctx);
+
+        /* don't proceed if already refreshed */
+        if (cli_ctx_is_refreshed(ctx)) {
+                CWARN("ctx already done refresh\n");
+                sptlrpc_cli_ctx_wakeup(ctx);
+                RETURN(0);
+        }
+
+        sptlrpc_cli_ctx_get(ctx);
+        gctx = ctx2gctx(ctx);
+        rc = -EFAULT;
+
+        if (buffer_extract_bytes(&data, &datalen,
+                                 &gctx->gc_win, sizeof(gctx->gc_win))) {
+                CERROR("failed extract seq_win\n");
+                goto out;
+        }
+
+        CWARN("secwin is %d\n", gctx->gc_win);
+        if (gctx->gc_win == 0) {
+                __u32   nego_rpc_err, nego_gss_err;
+
+                if (buffer_extract_bytes(&data, &datalen,
+                                         &nego_rpc_err, sizeof(nego_rpc_err))) {
+                        CERROR("failed to extrace rpc rc\n");
+                        goto out;
+                }
+
+                if (buffer_extract_bytes(&data, &datalen,
+                                         &nego_gss_err, sizeof(nego_gss_err))) {
+                        CERROR("failed to extrace gss rc\n");
+                        goto out;
+                }
+
+                CERROR("negotiation: rpc err %d, gss err %x\n",
+                       nego_rpc_err, nego_gss_err);
+
+                if (nego_rpc_err)
+                        rc = nego_rpc_err;
+        } else {
+                if (rawobj_extract_local_alloc(&gctx->gc_handle,
+                                               (__u32 **)&data, &datalen)) {
+                        CERROR("failed extract handle\n");
+                        goto out;
+                }
+
+                if (rawobj_extract_local(&tmpobj, (__u32 **)&data, &datalen)) {
+                        CERROR("failed extract mech\n");
+                        goto out;
+                }
+
+                if (lgss_import_sec_context(&tmpobj,
+                                            sec2gsec(ctx->cc_sec)->gs_mech,
+                                            &gctx->gc_mechctx) !=
+                    GSS_S_COMPLETE) {
+                        CERROR("failed import context\n");
+                        goto out;
+                }
+
+                rc = 0;
+        }
+out:
+        /* we don't care what current status of this ctx, even someone else
+         * is operating on the ctx at the same time. we just add up our own
+         * opinions here.
+         */
+        if (rc == 0) {
+                gss_cli_ctx_uptodate(gctx);
+        } else {
+                cli_ctx_expire(ctx);
+
+                if (rc != -ERESTART)
+                        set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+
+                /* this will also revoke the key. has to be done before
+                 * wakeup waiters otherwise they can find the stale key
+                 */
+                kill_key_locked(key);
+        }
+
+        sptlrpc_cli_ctx_wakeup(ctx);
+
+        /* let user space think it's a success */
+        sptlrpc_cli_ctx_put(ctx, 1);
+        RETURN(0);
+}
+
+static
+int gss_kt_match(const struct key *key, const void *desc)
+{
+        return (strcmp(key->description, (const char *) desc) == 0);
+}
+
+static
+void gss_kt_destroy(struct key *key)
+{
+        ENTRY;
+        LASSERT(key->payload.data == NULL);
+        CWARN("destroy key %p\n", key);
+        EXIT;
+}
+
+static
+void gss_kt_describe(const struct key *key, struct seq_file *s)
+{
+        if (key->description == NULL)
+                seq_puts(s, "[null]");
+        else
+                seq_puts(s, key->description);
+}
+
+static struct key_type gss_key_type =
+{
+        .name           = "lgssc",
+        .def_datalen    = 0,
+        .instantiate    = gss_kt_instantiate,
+        .update         = gss_kt_update,
+        .match          = gss_kt_match,
+        .destroy        = gss_kt_destroy,
+        .describe       = gss_kt_describe,
+};
+
+/****************************************
+ * lustre gss keyring policy            *
+ ****************************************/
+
+static struct ptlrpc_ctx_ops gss_keyring_ctxops = {
+        .match                  = gss_cli_ctx_match,
+        .refresh                = gss_cli_ctx_refresh_kr,
+        .validate               = gss_cli_ctx_validate_kr,
+        .die                    = gss_cli_ctx_die_kr,
+        .display                = gss_cli_ctx_display,
+        .sign                   = gss_cli_ctx_sign,
+        .verify                 = gss_cli_ctx_verify,
+        .seal                   = gss_cli_ctx_seal,
+        .unseal                 = gss_cli_ctx_unseal,
+        .wrap_bulk              = gss_cli_ctx_wrap_bulk,
+        .unwrap_bulk            = gss_cli_ctx_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops gss_sec_keyring_cops = {
+        .create_sec             = gss_sec_create_kr,
+        .destroy_sec            = gss_sec_destroy_kr,
+        .lookup_ctx             = gss_sec_lookup_ctx_kr,
+        .release_ctx            = gss_sec_release_ctx_kr,
+        .flush_ctx_cache        = gss_sec_flush_ctx_cache_kr,
+        .gc_ctx                 = gss_sec_gc_ctx_kr,
+        .install_rctx           = gss_sec_install_rctx,
+        .alloc_reqbuf           = gss_alloc_reqbuf,
+        .free_reqbuf            = gss_free_reqbuf,
+        .alloc_repbuf           = gss_alloc_repbuf,
+        .free_repbuf            = gss_free_repbuf,
+        .enlarge_reqbuf         = gss_enlarge_reqbuf,
+        .display                = gss_sec_display_kr,
+};
+
+static struct ptlrpc_sec_sops gss_sec_keyring_sops = {
+        .accept                 = gss_svc_accept_kr,
+        .invalidate_ctx         = gss_svc_invalidate_ctx,
+        .alloc_rs               = gss_svc_alloc_rs,
+        .authorize              = gss_svc_authorize,
+        .free_rs                = gss_svc_free_rs,
+        .free_ctx               = gss_svc_free_ctx,
+        .unwrap_bulk            = gss_svc_unwrap_bulk,
+        .wrap_bulk              = gss_svc_wrap_bulk,
+        .install_rctx           = gss_svc_install_rctx_kr,
+};
+
+static struct ptlrpc_sec_policy gss_policy_keyring = {
+        .sp_owner               = THIS_MODULE,
+        .sp_name                = "gss.keyring",
+        .sp_policy              = SPTLRPC_POLICY_GSS,
+        .sp_cops                = &gss_sec_keyring_cops,
+        .sp_sops                = &gss_sec_keyring_sops,
+};
+
+
+int __init gss_init_keyring(void)
+{
+        int rc;
+
+        rc = register_key_type(&gss_key_type);
+        if (rc) {
+                CERROR("failed to register keyring type: %d\n", rc);
+                return rc;
+        }
+
+        rc = sptlrpc_register_policy(&gss_policy_keyring);
+        if (rc) {
+                unregister_key_type(&gss_key_type);
+                return rc;
+        }
+
+        return 0;
+}
+
+void __exit gss_exit_keyring(void)
+{
+        unregister_key_type(&gss_key_type);
+        sptlrpc_unregister_policy(&gss_policy_keyring);
+}
index b0f9292..1b8d7a4 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/slab.h>
 #include <linux/crypto.h>
 #include <linux/random.h>
+#include <linux/mutex.h>
 #else
 #include <liblustre.h>
 #endif
@@ -103,7 +104,7 @@ static struct krb5_enctype enctypes[] = {
                 0,
         },
         [ENCTYPE_DES3_CBC_RAW] = {              /* des3-hmac-sha1 */
-                "des-hmac-sha1",
+                "des3-hmac-sha1",
                 "des3_ede",
                 "sha1",
                 CRYPTO_TFM_MODE_CBC,
index 4e2b17e..53e11e6 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/mutex.h>
 #else
 #include <liblustre.h>
 #endif
diff --git a/lustre/ptlrpc/gss/gss_pipefs.c b/lustre/ptlrpc/gss/gss_pipefs.c
new file mode 100644 (file)
index 0000000..9cc2a07
--- /dev/null
@@ -0,0 +1,1260 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Modifications for Lustre
+ * Copyright 2004 - 2006, Cluster File Systems, Inc.
+ * All rights reserved
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ * linux/net/sunrpc/auth_gss.c
+ *
+ * RPCSEC_GSS client authentication.
+ *
+ *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Dug Song       <dugsong@monkey.org>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+#ifdef __KERNEL__
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+#include <linux/random.h>
+#include <linux/mutex.h>
+#include <linux/crypto.h>
+#include <asm/atomic.h>
+struct rpc_clnt; /* for rpc_pipefs */
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_sec.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+
+#include "gss_err.h"
+#include "gss_internal.h"
+#include "gss_api.h"
+
+static struct ptlrpc_sec_policy gss_policy_pipefs;
+static struct ptlrpc_ctx_ops gss_pipefs_ctxops;
+
+static int gss_cli_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx);
+
+static int gss_sec_pipe_upcall_init(struct gss_sec *gsec)
+{
+        return 0;
+}
+
+static void gss_sec_pipe_upcall_fini(struct gss_sec *gsec)
+{
+}
+
+/****************************************
+ * internel context helpers             *
+ ****************************************/
+
+static
+struct ptlrpc_cli_ctx *ctx_create_pf(struct ptlrpc_sec *sec,
+                                     struct vfs_cred *vcred)
+{
+        struct gss_cli_ctx *gctx;
+
+        OBD_ALLOC_PTR(gctx);
+        if (gctx == NULL)
+                return NULL;
+
+        if (gss_cli_ctx_init_common(sec, &gctx->gc_base, &gss_pipefs_ctxops,
+                                    vcred)) {
+                OBD_FREE_PTR(gctx);
+                return NULL;
+        }
+
+        return &gctx->gc_base;
+}
+
+static
+void ctx_destroy_pf(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx)
+{
+        struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+        int                 rc;
+
+        rc = gss_cli_ctx_fini_common(sec, ctx);
+        OBD_FREE_PTR(gctx);
+
+        if (rc) {
+                CWARN("released the last ctx, proceed to destroy sec %s@%p\n",
+                      sec->ps_policy->sp_name, sec);
+                sptlrpc_sec_destroy(sec);
+        }
+}
+
+static
+void ctx_enhash_pf(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
+{
+        set_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+        atomic_inc(&ctx->cc_refcount);
+        hlist_add_head(&ctx->cc_hash, hash);
+}
+
+/*
+ * caller must hold spinlock
+ */
+static
+void ctx_unhash_pf(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+        LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+        LASSERT(!hlist_unhashed(&ctx->cc_hash));
+
+        clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags);
+
+        if (atomic_dec_and_test(&ctx->cc_refcount)) {
+                __hlist_del(&ctx->cc_hash);
+                hlist_add_head(&ctx->cc_hash, freelist);
+        } else
+                hlist_del_init(&ctx->cc_hash);
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+static
+int ctx_check_death_pf(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+        if (cli_ctx_check_death(ctx)) {
+                if (freelist)
+                        ctx_unhash_pf(ctx, freelist);
+                return 1;
+        }
+
+        return 0;
+}
+
+static inline
+int ctx_check_death_locked_pf(struct ptlrpc_cli_ctx *ctx,
+                              struct hlist_head *freelist)
+{
+        LASSERT(ctx->cc_sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags));
+
+        return ctx_check_death_pf(ctx, freelist);
+}
+
+static inline
+int ctx_match_pf(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
+{
+        /* a little bit optimization for null policy */
+        if (!ctx->cc_ops->match)
+                return 1;
+
+        return ctx->cc_ops->match(ctx, vcred);
+}
+
+static
+void ctx_list_destroy_pf(struct hlist_head *head)
+{
+        struct ptlrpc_cli_ctx *ctx;
+
+        while (!hlist_empty(head)) {
+                ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash);
+
+                LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+                LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+
+                hlist_del_init(&ctx->cc_hash);
+                ctx_destroy_pf(ctx->cc_sec, ctx);
+        }
+}
+
+/****************************************
+ * context apis                         *
+ ****************************************/
+
+static
+int gss_cli_ctx_validate_pf(struct ptlrpc_cli_ctx *ctx)
+{
+        if (ctx_check_death_pf(ctx, NULL))
+                return 1;
+        if (cli_ctx_is_uptodate(ctx))
+                return 0;
+        return 1;
+}
+
+static
+void gss_cli_ctx_die_pf(struct ptlrpc_cli_ctx *ctx, int grace)
+{
+        LASSERT(ctx->cc_sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        cli_ctx_expire(ctx);
+
+        spin_lock(&ctx->cc_sec->ps_lock);
+
+        if (test_and_clear_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags)) {
+                LASSERT(!hlist_unhashed(&ctx->cc_hash));
+                LASSERT(atomic_read(&ctx->cc_refcount) > 1);
+
+                hlist_del_init(&ctx->cc_hash);
+                if (atomic_dec_and_test(&ctx->cc_refcount))
+                        LBUG();
+        }
+
+        spin_unlock(&ctx->cc_sec->ps_lock);
+}
+
+/****************************************
+ * reverse context installation         *
+ ****************************************/
+
+static inline
+unsigned int ctx_hash_index(int hashsize, __u64 key)
+{
+        return (unsigned int) (key & ((__u64) hashsize - 1));
+}
+
+static
+void gss_sec_ctx_replace_pf(struct gss_sec *gsec,
+                            struct ptlrpc_cli_ctx *new)
+{
+        struct gss_sec_pipefs *gsec_pf;
+        struct ptlrpc_cli_ctx *ctx;
+        struct hlist_node *pos, *next;
+        HLIST_HEAD(freelist);
+        unsigned int hash;
+        ENTRY;
+
+        gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+        hash = ctx_hash_index(gsec_pf->gsp_chash_size,
+                              (__u64) new->cc_vcred.vc_uid);
+        LASSERT(hash < gsec_pf->gsp_chash_size);
+
+        spin_lock(&gsec->gs_base.ps_lock);
+
+        hlist_for_each_entry_safe(ctx, pos, next,
+                                  &gsec_pf->gsp_chash[hash], cc_hash) {
+                if (!ctx_match_pf(ctx, &new->cc_vcred))
+                        continue;
+
+                cli_ctx_expire(ctx);
+                ctx_unhash_pf(ctx, &freelist);
+                break;
+        }
+
+        ctx_enhash_pf(new, &gsec_pf->gsp_chash[hash]);
+        atomic_inc(&gsec->gs_base.ps_busy);
+
+        spin_unlock(&gsec->gs_base.ps_lock);
+
+        ctx_list_destroy_pf(&freelist);
+        EXIT;
+}
+
+static
+int gss_install_rvs_cli_ctx_pf(struct gss_sec *gsec,
+                               struct ptlrpc_svc_ctx *svc_ctx)
+{
+        struct vfs_cred          vcred;
+        struct ptlrpc_cli_ctx   *cli_ctx;
+        int                      rc;
+        ENTRY;
+
+        vcred.vc_uid = 0;
+        vcred.vc_gid = 0;
+
+        cli_ctx = ctx_create_pf(&gsec->gs_base, &vcred);
+        if (!cli_ctx)
+                RETURN(-ENOMEM);
+
+        rc = gss_copy_rvc_cli_ctx(cli_ctx, svc_ctx);
+        if (rc) {
+                ctx_destroy_pf(cli_ctx->cc_sec, cli_ctx);
+                RETURN(rc);
+        }
+
+        gss_sec_ctx_replace_pf(gsec, cli_ctx);
+        RETURN(0);
+}
+
+static
+void gss_ctx_cache_gc_pf(struct gss_sec_pipefs *gsec_pf,
+                         struct hlist_head *freelist)
+{
+        struct ptlrpc_sec       *sec;
+        struct ptlrpc_cli_ctx   *ctx;
+        struct hlist_node       *pos, *next;
+        int i;
+        ENTRY;
+
+        sec = &gsec_pf->gsp_base.gs_base;
+
+        CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
+
+        for (i = 0; i < gsec_pf->gsp_chash_size; i++) {
+                hlist_for_each_entry_safe(ctx, pos, next,
+                                          &gsec_pf->gsp_chash[i], cc_hash)
+                        ctx_check_death_locked_pf(ctx, freelist);
+        }
+
+        sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+        EXIT;
+}
+
+static
+struct ptlrpc_sec* gss_sec_create_pf(struct obd_import *imp,
+                                     struct ptlrpc_svc_ctx *ctx,
+                                     __u32 flavor,
+                                     unsigned long flags)
+{
+        struct gss_sec_pipefs   *gsec_pf;
+        int                      alloc_size, hash_size, i;
+        ENTRY;
+
+#define GSS_SEC_PIPEFS_CTX_HASH_SIZE    (32)
+
+        if (ctx || flags & (PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_REVERSE))
+                hash_size = 1;
+        else
+                hash_size = GSS_SEC_PIPEFS_CTX_HASH_SIZE;
+
+        alloc_size = sizeof(*gsec_pf) +
+                     sizeof(struct hlist_head) * hash_size;
+
+        OBD_ALLOC(gsec_pf, alloc_size);
+        if (!gsec_pf)
+                RETURN(NULL);
+
+        gsec_pf->gsp_chash_size = hash_size;
+        for (i = 0; i < hash_size; i++)
+                INIT_HLIST_HEAD(&gsec_pf->gsp_chash[i]);
+
+        if (gss_sec_create_common(&gsec_pf->gsp_base, &gss_policy_pipefs,
+                                  imp, ctx, flavor, flags))
+                goto err_free;
+
+        if (ctx == NULL) {
+                if (gss_sec_pipe_upcall_init(&gsec_pf->gsp_base))
+                        goto err_destroy;
+        } else {
+                if (gss_install_rvs_cli_ctx_pf(&gsec_pf->gsp_base, ctx))
+                        goto err_destroy;
+        }
+
+        RETURN(&gsec_pf->gsp_base.gs_base);
+
+err_destroy:
+        gss_sec_destroy_common(&gsec_pf->gsp_base);
+err_free:
+        OBD_FREE(gsec_pf, alloc_size);
+        RETURN(NULL);
+}
+
+static
+void gss_sec_destroy_pf(struct ptlrpc_sec *sec)
+{
+        struct gss_sec_pipefs   *gsec_pf;
+        struct gss_sec          *gsec;
+
+        CWARN("destroy %s@%p\n", sec->ps_policy->sp_name, sec);
+
+        gsec = container_of(sec, struct gss_sec, gs_base);
+        gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+        LASSERT(gsec_pf->gsp_chash);
+        LASSERT(gsec_pf->gsp_chash_size);
+
+        gss_sec_pipe_upcall_fini(gsec);
+
+        gss_sec_destroy_common(gsec);
+
+        OBD_FREE(gsec, sizeof(*gsec_pf) +
+                       sizeof(struct hlist_head) * gsec_pf->gsp_chash_size);
+}
+
+static
+struct ptlrpc_cli_ctx * gss_sec_lookup_ctx_pf(struct ptlrpc_sec *sec,
+                                              struct vfs_cred *vcred,
+                                              int create, int remove_dead)
+{
+        struct gss_sec         *gsec;
+        struct gss_sec_pipefs  *gsec_pf;
+        struct ptlrpc_cli_ctx  *ctx = NULL, *new = NULL;
+        struct hlist_head      *hash_head;
+        struct hlist_node      *pos, *next;
+        HLIST_HEAD(freelist);
+        unsigned int            hash, gc = 0, found = 0;
+        ENTRY;
+
+        might_sleep();
+
+        gsec = container_of(sec, struct gss_sec, gs_base);
+        gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+        hash = ctx_hash_index(gsec_pf->gsp_chash_size,
+                              (__u64) vcred->vc_uid);
+        hash_head = &gsec_pf->gsp_chash[hash];
+        LASSERT(hash < gsec_pf->gsp_chash_size);
+
+retry:
+        spin_lock(&sec->ps_lock);
+
+        /* gc_next == 0 means never do gc */
+        if (remove_dead && sec->ps_gc_next &&
+            cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
+                gss_ctx_cache_gc_pf(gsec_pf, &freelist);
+                gc = 1;
+        }
+
+        hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) {
+                if (gc == 0 &&
+                    ctx_check_death_locked_pf(ctx,
+                                              remove_dead ? &freelist : NULL))
+                        continue;
+
+                if (ctx_match_pf(ctx, vcred)) {
+                        found = 1;
+                        break;
+                }
+        }
+
+        if (found) {
+                if (new && new != ctx) {
+                        /* lost the race, just free it */
+                        hlist_add_head(&new->cc_hash, &freelist);
+                        new = NULL;
+                }
+
+                /* hot node, move to head */
+                if (hash_head->first != &ctx->cc_hash) {
+                        __hlist_del(&ctx->cc_hash);
+                        hlist_add_head(&ctx->cc_hash, hash_head);
+                }
+        } else {
+                /* don't allocate for reverse sec */
+                if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
+                        spin_unlock(&sec->ps_lock);
+                        RETURN(NULL);
+                }
+
+                if (new) {
+                        ctx_enhash_pf(new, hash_head);
+                        ctx = new;
+                } else if (create) {
+                        spin_unlock(&sec->ps_lock);
+                        new = ctx_create_pf(sec, vcred);
+                        if (new) {
+                                clear_bit(PTLRPC_CTX_NEW_BIT, &new->cc_flags);
+                                goto retry;
+                        }
+                } else
+                        ctx = NULL;
+        }
+
+        /* hold a ref */
+        if (ctx)
+                atomic_inc(&ctx->cc_refcount);
+
+        spin_unlock(&sec->ps_lock);
+
+        /* the allocator of the context must give the first push to refresh */
+        if (new) {
+                LASSERT(new == ctx);
+                gss_cli_ctx_refresh_pf(new);
+        }
+
+        ctx_list_destroy_pf(&freelist);
+        RETURN(ctx);
+}
+
+static
+void gss_sec_release_ctx_pf(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx,
+                            int sync)
+{
+        LASSERT(test_bit(PTLRPC_CTX_CACHED_BIT, &ctx->cc_flags) == 0);
+        LASSERT(hlist_unhashed(&ctx->cc_hash));
+
+        /* if required async, we must clear the UPTODATE bit to prevent extra
+         * rpcs during destroy procedure.
+         */
+        if (!sync)
+                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+        /* destroy this context */
+        ctx_destroy_pf(sec, ctx);
+}
+
+/*
+ * @uid: which user. "-1" means flush all.
+ * @grace: mark context DEAD, allow graceful destroy like notify
+ *         server side, etc.
+ * @force: also flush busy entries.
+ *
+ * return the number of busy context encountered.
+ *
+ * In any cases, never touch "eternal" contexts.
+ */
+static
+int gss_sec_flush_ctx_cache_pf(struct ptlrpc_sec *sec,
+                               uid_t uid,
+                               int grace, int force)
+{
+        struct gss_sec          *gsec;
+        struct gss_sec_pipefs   *gsec_pf;
+        struct ptlrpc_cli_ctx   *ctx;
+        struct hlist_node *pos, *next;
+        HLIST_HEAD(freelist);
+        int i, busy = 0;
+        ENTRY;
+
+        might_sleep_if(grace);
+
+        gsec = container_of(sec, struct gss_sec, gs_base);
+        gsec_pf = container_of(gsec, struct gss_sec_pipefs, gsp_base);
+
+        spin_lock(&sec->ps_lock);
+        for (i = 0; i < gsec_pf->gsp_chash_size; i++) {
+                hlist_for_each_entry_safe(ctx, pos, next,
+                                          &gsec_pf->gsp_chash[i], cc_hash) {
+                        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+                        if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+                                continue;
+
+                        if (atomic_read(&ctx->cc_refcount) > 1) {
+                                busy++;
+                                if (!force)
+                                        continue;
+
+                                CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
+                                      "grace %d\n",
+                                      atomic_read(&ctx->cc_refcount),
+                                      ctx, ctx->cc_vcred.vc_uid,
+                                      sec2target_str(ctx->cc_sec), grace);
+                        }
+                        ctx_unhash_pf(ctx, &freelist);
+
+                        set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+                        if (!grace)
+                                clear_bit(PTLRPC_CTX_UPTODATE_BIT,
+                                          &ctx->cc_flags);
+                }
+        }
+        spin_unlock(&sec->ps_lock);
+
+        ctx_list_destroy_pf(&freelist);
+        RETURN(busy);
+}
+
+/****************************************
+ * service apis                         *
+ ****************************************/
+
+static
+int gss_svc_accept_pf(struct ptlrpc_request *req)
+{
+        return gss_svc_accept(&gss_policy_pipefs, req);
+}
+
+static
+int gss_svc_install_rctx_pf(struct obd_import *imp,
+                            struct ptlrpc_svc_ctx *ctx)
+{
+        struct gss_sec *gsec;
+
+        LASSERT(imp->imp_sec);
+        LASSERT(ctx);
+
+        gsec = container_of(imp->imp_sec, struct gss_sec, gs_base);
+        return gss_install_rvs_cli_ctx_pf(gsec, ctx);
+}
+
+/****************************************
+ * rpc_pipefs definitions               *
+ ****************************************/
+
+#define LUSTRE_PIPE_ROOT        "/lustre"
+#define LUSTRE_PIPE_KRB5        LUSTRE_PIPE_ROOT"/krb5"
+
+struct gss_upcall_msg_data {
+        __u32                           gum_seq;
+        __u32                           gum_uid;
+        __u32                           gum_gid;
+        __u32                           gum_svc;        /* MDS/OSS... */
+        __u64                           gum_nid;        /* peer NID */
+        __u8                            gum_obd[64];    /* client obd name */
+};
+
+struct gss_upcall_msg {
+        struct rpc_pipe_msg             gum_base;
+        atomic_t                        gum_refcount;
+        struct list_head                gum_list;
+        __u32                           gum_mechidx;
+        struct gss_sec                 *gum_gsec;
+        struct gss_cli_ctx             *gum_gctx;
+        struct gss_upcall_msg_data      gum_data;
+};
+
+static atomic_t upcall_seq = ATOMIC_INIT(0);
+
+static inline
+__u32 upcall_get_sequence(void)
+{
+        return (__u32) atomic_inc_return(&upcall_seq);
+}
+
+enum mech_idx_t {
+        MECH_KRB5   = 0,
+        MECH_MAX
+};
+
+static inline
+__u32 mech_name2idx(const char *name)
+{
+        LASSERT(!strcmp(name, "krb5"));
+        return MECH_KRB5;
+}
+
+/* pipefs dentries for each mechanisms */
+static struct dentry *de_pipes[MECH_MAX] = { NULL, };
+/* all upcall messgaes linked here */
+static struct list_head upcall_lists[MECH_MAX];
+/* and protected by this */
+static spinlock_t upcall_locks[MECH_MAX];
+
+static inline
+void upcall_list_lock(int idx)
+{
+        spin_lock(&upcall_locks[idx]);
+}
+
+static inline
+void upcall_list_unlock(int idx)
+{
+        spin_unlock(&upcall_locks[idx]);
+}
+
+static
+void upcall_msg_enlist(struct gss_upcall_msg *msg)
+{
+        __u32 idx = msg->gum_mechidx;
+
+        upcall_list_lock(idx);
+        list_add(&msg->gum_list, &upcall_lists[idx]);
+        upcall_list_unlock(idx);
+}
+
+static
+void upcall_msg_delist(struct gss_upcall_msg *msg)
+{
+        __u32 idx = msg->gum_mechidx;
+
+        upcall_list_lock(idx);
+        list_del_init(&msg->gum_list);
+        upcall_list_unlock(idx);
+}
+
+/****************************************
+ * rpc_pipefs upcall helpers            *
+ ****************************************/
+
+static
+void gss_release_msg(struct gss_upcall_msg *gmsg)
+{
+        ENTRY;
+        LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+
+        if (!atomic_dec_and_test(&gmsg->gum_refcount)) {
+                EXIT;
+                return;
+        }
+
+        if (gmsg->gum_gctx) {
+                sptlrpc_cli_ctx_wakeup(&gmsg->gum_gctx->gc_base);
+                sptlrpc_cli_ctx_put(&gmsg->gum_gctx->gc_base, 1);
+                gmsg->gum_gctx = NULL;
+        }
+
+        LASSERT(list_empty(&gmsg->gum_list));
+        LASSERT(list_empty(&gmsg->gum_base.list));
+        OBD_FREE_PTR(gmsg);
+        EXIT;
+}
+
+static
+void gss_unhash_msg_nolock(struct gss_upcall_msg *gmsg)
+{
+        __u32 idx = gmsg->gum_mechidx;
+
+        LASSERT(idx < MECH_MAX);
+        LASSERT_SPIN_LOCKED(&upcall_locks[idx]);
+
+        if (list_empty(&gmsg->gum_list))
+                return;
+
+        list_del_init(&gmsg->gum_list);
+        LASSERT(atomic_read(&gmsg->gum_refcount) > 1);
+        atomic_dec(&gmsg->gum_refcount);
+}
+
+static
+void gss_unhash_msg(struct gss_upcall_msg *gmsg)
+{
+        __u32 idx = gmsg->gum_mechidx;
+
+        LASSERT(idx < MECH_MAX);
+        upcall_list_lock(idx);
+        gss_unhash_msg_nolock(gmsg);
+        upcall_list_unlock(idx);
+}
+
+static
+void gss_msg_fail_ctx(struct gss_upcall_msg *gmsg)
+{
+        if (gmsg->gum_gctx) {
+                struct ptlrpc_cli_ctx *ctx = &gmsg->gum_gctx->gc_base;
+
+                LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+                sptlrpc_cli_ctx_expire(ctx);
+                set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+        }
+}
+
+static
+struct gss_upcall_msg * gss_find_upcall(__u32 mechidx, __u32 seq)
+{
+        struct gss_upcall_msg *gmsg;
+
+        upcall_list_lock(mechidx);
+        list_for_each_entry(gmsg, &upcall_lists[mechidx], gum_list) {
+                if (gmsg->gum_data.gum_seq != seq)
+                        continue;
+
+                LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+                LASSERT(gmsg->gum_mechidx == mechidx);
+
+                atomic_inc(&gmsg->gum_refcount);
+                upcall_list_unlock(mechidx);
+                return gmsg;
+        }
+        upcall_list_unlock(mechidx);
+        return NULL;
+}
+
+static
+int simple_get_bytes(char **buf, __u32 *buflen, void *res, __u32 reslen)
+{
+        if (*buflen < reslen) {
+                CERROR("buflen %u < %u\n", *buflen, reslen);
+                return -EINVAL;
+        }
+
+        memcpy(res, *buf, reslen);
+        *buf += reslen;
+        *buflen -= reslen;
+        return 0;
+}
+
+/****************************************
+ * rpc_pipefs apis                      *
+ ****************************************/
+
+static
+ssize_t gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
+                        char *dst, size_t buflen)
+{
+        char *data = (char *)msg->data + msg->copied;
+        ssize_t mlen = msg->len;
+        ssize_t left;
+        ENTRY;
+
+        if (mlen > buflen)
+                mlen = buflen;
+        left = copy_to_user(dst, data, mlen);
+        if (left < 0) {
+                msg->errno = left;
+                RETURN(left);
+        }
+        mlen -= left;
+        msg->copied += mlen;
+        msg->errno = 0;
+        RETURN(mlen);
+}
+
+static
+ssize_t gss_pipe_downcall(struct file *filp, const char *src, size_t mlen)
+{
+        struct rpc_inode        *rpci = RPC_I(filp->f_dentry->d_inode);
+        struct gss_upcall_msg   *gss_msg;
+        struct ptlrpc_cli_ctx   *ctx;
+        struct gss_cli_ctx      *gctx = NULL;
+        char                    *buf, *data;
+        int                      datalen;
+        int                      timeout, rc;
+        __u32                    mechidx, seq, gss_err;
+        ENTRY;
+
+        mechidx = (__u32) (long) rpci->private;
+        LASSERT(mechidx < MECH_MAX);
+
+        OBD_ALLOC(buf, mlen);
+        if (!buf)
+                RETURN(-ENOMEM);
+
+        if (copy_from_user(buf, src, mlen)) {
+                CERROR("failed copy user space data\n");
+                GOTO(out_free, rc = -EFAULT);
+        }
+        data = buf;
+        datalen = mlen;
+
+        /* data passed down format:
+         *  - seq
+         *  - timeout
+         *  - gc_win / error
+         *  - wire_ctx (rawobj)
+         *  - mech_ctx (rawobj)
+         */
+        if (simple_get_bytes(&data, &datalen, &seq, sizeof(seq))) {
+                CERROR("fail to get seq\n");
+                GOTO(out_free, rc = -EFAULT);
+        }
+
+        gss_msg = gss_find_upcall(mechidx, seq);
+        if (!gss_msg) {
+                CERROR("upcall %u has aborted earlier\n", seq);
+                GOTO(out_free, rc = -EINVAL);
+        }
+
+        gss_unhash_msg(gss_msg);
+        gctx = gss_msg->gum_gctx;
+        LASSERT(gctx);
+        LASSERT(atomic_read(&gctx->gc_base.cc_refcount) > 0);
+
+        /* timeout is not in use for now */
+        if (simple_get_bytes(&data, &datalen, &timeout, sizeof(timeout)))
+                GOTO(out_msg, rc = -EFAULT);
+
+        /* lgssd signal an error by gc_win == 0 */
+        if (simple_get_bytes(&data, &datalen, &gctx->gc_win,
+                             sizeof(gctx->gc_win)))
+                GOTO(out_msg, rc = -EFAULT);
+
+        if (gctx->gc_win == 0) {
+                /* followed by:
+                 * - rpc error
+                 * - gss error
+                 */
+                if (simple_get_bytes(&data, &datalen, &rc, sizeof(rc)))
+                        GOTO(out_msg, rc = -EFAULT);
+                if (simple_get_bytes(&data, &datalen, &gss_err,sizeof(gss_err)))
+                        GOTO(out_msg, rc = -EFAULT);
+
+                if (rc == 0 && gss_err == GSS_S_COMPLETE) {
+                        CWARN("both rpc & gss error code not set\n");
+                        rc = -EPERM;
+                }
+        } else {
+                rawobj_t tmpobj;
+
+                /* handle */
+                if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
+                        GOTO(out_msg, rc = -EFAULT);
+                if (rawobj_dup(&gctx->gc_handle, &tmpobj))
+                        GOTO(out_msg, rc = -ENOMEM);
+
+                /* mechctx */
+                if (rawobj_extract_local(&tmpobj, (__u32 **) &data, &datalen))
+                        GOTO(out_msg, rc = -EFAULT);
+                gss_err = lgss_import_sec_context(&tmpobj,
+                                                  gss_msg->gum_gsec->gs_mech,
+                                                  &gctx->gc_mechctx);
+                rc = 0;
+        }
+
+        if (likely(rc == 0 && gss_err == GSS_S_COMPLETE)) {
+                gss_cli_ctx_uptodate(gctx);
+        } else {
+                ctx = &gctx->gc_base;
+                sptlrpc_cli_ctx_expire(ctx);
+                if (rc != -ERESTART || gss_err != GSS_S_COMPLETE)
+                        set_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags);
+
+                CERROR("refresh ctx %p(uid %d) failed: %d/0x%08x: %s\n",
+                       ctx, ctx->cc_vcred.vc_uid, rc, gss_err,
+                       test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags) ?
+                       "fatal error" : "non-fatal");
+        }
+
+        rc = mlen;
+
+out_msg:
+        gss_release_msg(gss_msg);
+
+out_free:
+        OBD_FREE(buf, mlen);
+        /* FIXME
+         * hack pipefs: always return asked length unless all following
+         * downcalls might be messed up.
+         */
+        rc = mlen;
+        RETURN(rc);
+}
+
+static
+void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+        struct gss_upcall_msg          *gmsg;
+        struct gss_upcall_msg_data     *gumd;
+        static cfs_time_t               ratelimit = 0;
+        ENTRY;
+
+        LASSERT(list_empty(&msg->list));
+
+        /* normally errno is >= 0 */
+        if (msg->errno >= 0) {
+                EXIT;
+                return;
+        }
+
+        gmsg = container_of(msg, struct gss_upcall_msg, gum_base);
+        gumd = &gmsg->gum_data;
+        LASSERT(atomic_read(&gmsg->gum_refcount) > 0);
+
+        CERROR("failed msg %p (seq %u, uid %u, svc %u, nid "LPX64", obd %.*s): "
+               "errno %d\n", msg, gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
+               gumd->gum_nid, (int) sizeof(gumd->gum_obd),
+               gumd->gum_obd, msg->errno);
+
+        atomic_inc(&gmsg->gum_refcount);
+        gss_unhash_msg(gmsg);
+        if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+                cfs_time_t now = cfs_time_current_sec();
+
+                if (cfs_time_after(now, ratelimit)) {
+                        CWARN("upcall timed out, is lgssd running?\n");
+                        ratelimit = now + 15;
+                }
+        }
+        gss_msg_fail_ctx(gmsg);
+        gss_release_msg(gmsg);
+        EXIT;
+}
+
+static
+void gss_pipe_release(struct inode *inode)
+{
+        struct rpc_inode *rpci = RPC_I(inode);
+        __u32             idx;
+        ENTRY;
+
+        idx = (__u32) (long) rpci->private;
+        LASSERT(idx < MECH_MAX);
+
+        upcall_list_lock(idx);
+        while (!list_empty(&upcall_lists[idx])) {
+                struct gss_upcall_msg      *gmsg;
+                struct gss_upcall_msg_data *gumd;
+
+                gmsg = list_entry(upcall_lists[idx].next,
+                                  struct gss_upcall_msg, gum_list);
+                gumd = &gmsg->gum_data;
+                LASSERT(list_empty(&gmsg->gum_base.list));
+
+                CERROR("failing remaining msg %p:seq %u, uid %u, svc %u, "
+                       "nid "LPX64", obd %.*s\n", gmsg,
+                       gumd->gum_seq, gumd->gum_uid, gumd->gum_svc,
+                       gumd->gum_nid, (int) sizeof(gumd->gum_obd),
+                       gumd->gum_obd);
+
+                gmsg->gum_base.errno = -EPIPE;
+                atomic_inc(&gmsg->gum_refcount);
+                gss_unhash_msg_nolock(gmsg);
+
+                gss_msg_fail_ctx(gmsg);
+
+                upcall_list_unlock(idx);
+                gss_release_msg(gmsg);
+                upcall_list_lock(idx);
+        }
+        upcall_list_unlock(idx);
+        EXIT;
+}
+
+static struct rpc_pipe_ops gss_upcall_ops = {
+        .upcall         = gss_pipe_upcall,
+        .downcall       = gss_pipe_downcall,
+        .destroy_msg    = gss_pipe_destroy_msg,
+        .release_pipe   = gss_pipe_release,
+};
+
+/****************************************
+ * upcall helper functions              *
+ ****************************************/
+
+static
+int gss_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx)
+{
+        struct obd_import          *imp;
+        struct gss_sec             *gsec;
+        struct gss_upcall_msg      *gmsg;
+        int                         rc = 0;
+        ENTRY;
+
+        might_sleep();
+
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_sec->ps_import);
+        LASSERT(ctx->cc_sec->ps_import->imp_obd);
+
+        imp = ctx->cc_sec->ps_import;
+        if (!imp->imp_connection) {
+                CERROR("import has no connection set\n");
+                RETURN(-EINVAL);
+        }
+
+        gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
+
+        OBD_ALLOC_PTR(gmsg);
+        if (!gmsg)
+                RETURN(-ENOMEM);
+
+        /* initialize pipefs base msg */
+        INIT_LIST_HEAD(&gmsg->gum_base.list);
+        gmsg->gum_base.data = &gmsg->gum_data;
+        gmsg->gum_base.len = sizeof(gmsg->gum_data);
+        gmsg->gum_base.copied = 0;
+        gmsg->gum_base.errno = 0;
+
+        /* init upcall msg */
+        atomic_set(&gmsg->gum_refcount, 1);
+        gmsg->gum_mechidx = mech_name2idx(gsec->gs_mech->gm_name);
+        gmsg->gum_gsec = gsec;
+        gmsg->gum_gctx = container_of(sptlrpc_cli_ctx_get(ctx),
+                                      struct gss_cli_ctx, gc_base);
+        gmsg->gum_data.gum_seq = upcall_get_sequence();
+        gmsg->gum_data.gum_uid = ctx->cc_vcred.vc_uid;
+        gmsg->gum_data.gum_gid = 0; /* not used for now */
+        gmsg->gum_data.gum_svc = import_to_gss_svc(imp);
+        gmsg->gum_data.gum_nid = imp->imp_connection->c_peer.nid;
+        strncpy(gmsg->gum_data.gum_obd, imp->imp_obd->obd_name,
+                sizeof(gmsg->gum_data.gum_obd));
+
+        /* This only could happen when sysadmin set it dead/expired
+         * using lctl by force.
+         */
+        if (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK) {
+                CWARN("ctx %p(%u->%s) was set flags %lx unexpectedly\n",
+                      ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec),
+                      ctx->cc_flags);
+
+                LASSERT(!(ctx->cc_flags & PTLRPC_CTX_UPTODATE));
+                ctx->cc_flags |= PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR;
+
+                rc = -EIO;
+                goto err_free;
+        }
+
+        upcall_msg_enlist(gmsg);
+
+        rc = rpc_queue_upcall(de_pipes[gmsg->gum_mechidx]->d_inode,
+                              &gmsg->gum_base);
+        if (rc) {
+                CERROR("rpc_queue_upcall failed: %d\n", rc);
+
+                upcall_msg_delist(gmsg);
+                goto err_free;
+        }
+
+        RETURN(0);
+err_free:
+        OBD_FREE_PTR(gmsg);
+        RETURN(rc);
+}
+
+static
+int gss_cli_ctx_refresh_pf(struct ptlrpc_cli_ctx *ctx)
+{
+        /* if we are refreshing for root, also update the reverse
+         * handle index, do not confuse reverse contexts.
+         */
+        if (ctx->cc_vcred.vc_uid == 0) {
+                struct gss_sec *gsec;
+
+                gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
+                gsec->gs_rvs_hdl = gss_get_next_ctx_index();
+        }
+
+        return gss_ctx_refresh_pf(ctx);
+}
+
+/****************************************
+ * lustre gss pipefs policy             *
+ ****************************************/
+
+static struct ptlrpc_ctx_ops gss_pipefs_ctxops = {
+        .match                  = gss_cli_ctx_match,
+        .refresh                = gss_cli_ctx_refresh_pf,
+        .validate               = gss_cli_ctx_validate_pf,
+        .die                    = gss_cli_ctx_die_pf,
+        .display                = gss_cli_ctx_display,
+        .sign                   = gss_cli_ctx_sign,
+        .verify                 = gss_cli_ctx_verify,
+        .seal                   = gss_cli_ctx_seal,
+        .unseal                 = gss_cli_ctx_unseal,
+        .wrap_bulk              = gss_cli_ctx_wrap_bulk,
+        .unwrap_bulk            = gss_cli_ctx_unwrap_bulk,
+};
+
+static struct ptlrpc_sec_cops gss_sec_pipefs_cops = {
+        .create_sec             = gss_sec_create_pf,
+        .destroy_sec            = gss_sec_destroy_pf,
+        .lookup_ctx             = gss_sec_lookup_ctx_pf,
+        .release_ctx            = gss_sec_release_ctx_pf,
+        .flush_ctx_cache        = gss_sec_flush_ctx_cache_pf,
+        .install_rctx           = gss_sec_install_rctx,
+        .alloc_reqbuf           = gss_alloc_reqbuf,
+        .free_reqbuf            = gss_free_reqbuf,
+        .alloc_repbuf           = gss_alloc_repbuf,
+        .free_repbuf            = gss_free_repbuf,
+        .enlarge_reqbuf         = gss_enlarge_reqbuf,
+};
+
+static struct ptlrpc_sec_sops gss_sec_pipefs_sops = {
+        .accept                 = gss_svc_accept_pf,
+        .invalidate_ctx         = gss_svc_invalidate_ctx,
+        .alloc_rs               = gss_svc_alloc_rs,
+        .authorize              = gss_svc_authorize,
+        .free_rs                = gss_svc_free_rs,
+        .free_ctx               = gss_svc_free_ctx,
+        .unwrap_bulk            = gss_svc_unwrap_bulk,
+        .wrap_bulk              = gss_svc_wrap_bulk,
+        .install_rctx           = gss_svc_install_rctx_pf,
+};
+
+static struct ptlrpc_sec_policy gss_policy_pipefs = {
+        .sp_owner               = THIS_MODULE,
+        .sp_name                = "gss.pipefs",
+        .sp_policy              = SPTLRPC_POLICY_GSS_PIPEFS,
+        .sp_cops                = &gss_sec_pipefs_cops,
+        .sp_sops                = &gss_sec_pipefs_sops,
+};
+
+static
+int __init gss_init_pipefs_upcall(void)
+{
+        struct dentry   *de;
+
+        /* pipe dir */
+        de = rpc_mkdir(LUSTRE_PIPE_ROOT, NULL);
+        if (IS_ERR(de) && PTR_ERR(de) != -EEXIST) {
+                CERROR("Failed to create gss pipe dir: %ld\n", PTR_ERR(de));
+                return PTR_ERR(de);
+        }
+        /* FIXME
+         * hack pipefs: dput will sometimes cause oops during module unload
+         * and lgssd close the pipe fds.
+         */
+        //dput(de);
+
+        /* krb5 mechanism */
+        de = rpc_mkpipe(LUSTRE_PIPE_KRB5, (void *) MECH_KRB5, &gss_upcall_ops,
+                        RPC_PIPE_WAIT_FOR_OPEN);
+        if (!de || IS_ERR(de)) {
+                CERROR("failed to make rpc_pipe %s: %ld\n",
+                       LUSTRE_PIPE_KRB5, PTR_ERR(de));
+                rpc_rmdir(LUSTRE_PIPE_ROOT);
+                return PTR_ERR(de);
+        }
+
+        de_pipes[MECH_KRB5] = de;
+        INIT_LIST_HEAD(&upcall_lists[MECH_KRB5]);
+        upcall_locks[MECH_KRB5] = SPIN_LOCK_UNLOCKED;
+
+        return 0;
+}
+
+static
+void __exit gss_exit_pipefs_upcall(void)
+{
+        __u32   i;
+
+        for (i = 0; i < MECH_MAX; i++) {
+                LASSERT(list_empty(&upcall_lists[i]));
+                /*
+                 * dput pipe dentry here might cause lgssd oops.
+                 */
+                //dput(de_pipes[i]);
+                de_pipes[i] = NULL;
+        }
+
+        rpc_unlink(LUSTRE_PIPE_KRB5);
+        rpc_rmdir(LUSTRE_PIPE_ROOT);
+}
+
+int __init gss_init_pipefs(void)
+{
+        int rc;
+
+        rc = gss_init_pipefs_upcall();
+        if (rc)
+                return rc;
+
+        rc = sptlrpc_register_policy(&gss_policy_pipefs);
+        if (rc) {
+                gss_exit_pipefs_upcall();
+                return rc;
+        }
+
+        return 0;
+}
+
+void __exit gss_exit_pipefs(void)
+{
+        gss_exit_pipefs_upcall();
+        sptlrpc_unregister_policy(&gss_policy_pipefs);
+}
index 847cb4d..99facc7 100644 (file)
@@ -24,6 +24,8 @@
 #endif
 #define DEBUG_SUBSYSTEM S_SEC
 
+#include <linux/mutex.h>
+
 #include <obd.h>
 #include <obd_class.h>
 #include <obd_support.h>
@@ -170,6 +172,11 @@ int rawobj_extract_local(rawobj_t *obj, __u32 **buf, __u32 *buflen)
         return __rawobj_extract(obj, buf, buflen, 0, 1);
 }
 
+int rawobj_extract_local_alloc(rawobj_t *obj, __u32 **buf, __u32 *buflen)
+{
+        return __rawobj_extract(obj, buf, buflen, 1, 1);
+}
+
 int rawobj_from_netobj(rawobj_t *rawobj, netobj_t *netobj)
 {
         rawobj->len = netobj->len;
@@ -193,3 +200,21 @@ int rawobj_from_netobj_alloc(rawobj_t *rawobj, netobj_t *netobj)
         memcpy(rawobj->data, netobj->data, netobj->len);
         return 0;
 }
+
+/****************************************
+ * misc more                            *
+ ****************************************/
+
+int buffer_extract_bytes(const void **buf, __u32 *buflen,
+                         void *res, __u32 reslen)
+{
+        if (*buflen < reslen) {
+                CERROR("buflen %u < %u\n", *buflen, reslen);
+                return -EINVAL;
+        }
+
+        memcpy(res, *buf, reslen);
+        *buf += reslen;
+        *buflen -= reslen;
+        return 0;
+}
index bc6c4f0..277fc23 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/hash.h>
+#include <linux/mutex.h>
 #else
 #include <liblustre.h>
 #endif
@@ -948,7 +949,7 @@ void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx)
         set_bit(CACHE_NEGATIVE, &rsc->h.flags);
 }
 
-int __init gss_svc_init_upcall(void)
+int __init gss_init_svc_upcall(void)
 {
         int     i;
 
@@ -984,7 +985,7 @@ int __init gss_svc_init_upcall(void)
         return 0;
 }
 
-void __exit gss_svc_exit_upcall(void)
+void __exit gss_exit_svc_upcall(void)
 {
         int rc;
 
index df50946..9523287 100644 (file)
@@ -1,8 +1,8 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
- *   Author Peter Braam <braam@clusterfs.com>
+ *  Copyright (C) 2006 Cluster File Systems, Inc.
+ *   Author Eric Mei <ericm@clusterfs.com>
  *
  *   This file is part of the Lustre file system, http://www.lustre.org
  *   Lustre is a trademark of Cluster File Systems, Inc.
@@ -35,6 +35,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/random.h>
+#include <linux/mutex.h>
 #else
 #include <liblustre.h>
 #endif
@@ -142,7 +143,9 @@ static struct lprocfs_vars gss_lprocfs_vars[] = {
 
 int gss_init_lproc(void)
 {
-        int rc;
+        struct proc_dir_entry  *ent;
+        int                     rc;
+
         gss_proc_root = lprocfs_register("gss", sptlrpc_proc_root,
                                          gss_lprocfs_vars, NULL);
 
@@ -153,6 +156,17 @@ int gss_init_lproc(void)
                 return rc;
         }
 
+        /* FIXME
+         * here we should hold proc_subdir_lock which is not exported
+         */
+        ent = gss_proc_root->subdir;
+        while (ent != NULL) {
+                if (strcmp(ent->name, "init_channel") == 0) {
+                        ent->mode |= 0222;
+                        break;
+                }
+        }
+        
         return 0;
 }
 
index 121a5de..382c966 100644 (file)
@@ -2,7 +2,7 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  * Modifications for Lustre
- * Copyright 2004 - 2006, Cluster File Systems, Inc.
+ * Copyright 2004 - 2007, Cluster File Systems, Inc.
  * All rights reserved
  * Author: Eric Mei <ericm@clusterfs.com>
  */
@@ -56,6 +56,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 #include <linux/random.h>
+#include <linux/mutex.h>
 #include <asm/atomic.h>
 #else
 #include <liblustre.h>
 
 #include <linux/crypto.h>
 
-/* pre-definition */
-static struct ptlrpc_sec_policy gss_policy;
-static struct ptlrpc_cli_ctx * gss_sec_create_ctx(struct ptlrpc_sec *sec,
-                                                  struct vfs_cred *vcred);
-static void gss_sec_destroy_ctx(struct ptlrpc_sec *sec,
-                                struct ptlrpc_cli_ctx *ctx);
 /********************************************
  * wire data swabber                        *
  ********************************************/
@@ -303,6 +298,55 @@ out_free:
  * gss client context manipulation helpers  *
  ********************************************/
 
+int cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount));
+
+        if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
+                cfs_time_t now;
+
+                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+                now = cfs_time_current_sec();
+                if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire))
+                        CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n",
+                              ctx, ctx->cc_vcred.vc_uid,
+                              sec2target_str(ctx->cc_sec),
+                              cfs_time_sub(now, ctx->cc_expire));
+                else
+                        CWARN("ctx %p(%u->%s): force to die (%lds remains)\n",
+                              ctx, ctx->cc_vcred.vc_uid,
+                              sec2target_str(ctx->cc_sec),
+                              ctx->cc_expire == 0 ? 0 :
+                              cfs_time_sub(ctx->cc_expire, now));
+
+                return 1;
+        }
+        return 0;
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+int cli_ctx_check_death(struct ptlrpc_cli_ctx *ctx)
+{
+        if (unlikely(cli_ctx_is_dead(ctx)))
+                return 1;
+
+        /* expire is 0 means never expire. a newly created gss context
+         * which during upcall may has 0 expiration
+         */
+        if (ctx->cc_expire == 0)
+                return 0;
+
+        /* check real expiration */
+        if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
+                return 0;
+
+        cli_ctx_expire(ctx);
+        return 1;
+}
+
 void gss_cli_ctx_uptodate(struct gss_cli_ctx *gctx)
 {
         struct ptlrpc_cli_ctx *ctx = &gctx->gc_base;
@@ -499,23 +543,6 @@ int gss_cli_payload(struct ptlrpc_cli_ctx *ctx,
         return gss_estimate_payload(NULL, msgsize, privacy);
 }
 
-static
-int gss_cli_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
-{
-        /* if we are refreshing for root, also update the reverse
-         * handle index, do not confuse reverse contexts.
-         */
-        if (ctx->cc_vcred.vc_uid == 0) {
-                struct gss_sec *gsec;
-
-                gsec = container_of(ctx->cc_sec, struct gss_sec, gs_base);
-                gsec->gs_rvs_hdl = gss_get_next_ctx_index();
-        }
-
-        return gss_ctx_refresh_pipefs(ctx);
-}
-
-static
 int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
 {
         return (ctx->cc_vcred.vc_uid == vcred->vc_uid);
@@ -532,8 +559,8 @@ void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize)
                 strncat(buf, "dead,", bufsize);
         if (flags & PTLRPC_CTX_ERROR)
                 strncat(buf, "error,", bufsize);
-        if (flags & PTLRPC_CTX_HASHED)
-                strncat(buf, "hashed,", bufsize);
+        if (flags & PTLRPC_CTX_CACHED)
+                strncat(buf, "cached,", bufsize);
         if (flags & PTLRPC_CTX_ETERNAL)
                 strncat(buf, "eternal,", bufsize);
         if (buf[0] == '\0')
@@ -542,7 +569,6 @@ void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize)
         buf[strlen(buf) - 1] = '\0';
 }
 
-static
 int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
 {
         struct gss_cli_ctx     *gctx;
@@ -571,7 +597,6 @@ int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
         return written;
 }
 
-static
 int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx,
                      struct ptlrpc_request *req)
 {
@@ -658,7 +683,7 @@ int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx,
                       errhdr->gh_major == GSS_S_NO_CONTEXT ?
                       "NO_CONTEXT" : "BAD_SIG");
 
-                sptlrpc_ctx_expire(ctx);
+                sptlrpc_cli_ctx_expire(ctx);
                 /*
                  * we need replace the ctx right here, otherwise during
                  * resent we'll hit the logic in sptlrpc_req_refresh_ctx()
@@ -677,7 +702,6 @@ int gss_cli_ctx_handle_err_notify(struct ptlrpc_cli_ctx *ctx,
         return rc;
 }
 
-static
 int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
                        struct ptlrpc_request *req)
 {
@@ -769,7 +793,6 @@ int gss_cli_ctx_verify(struct ptlrpc_cli_ctx *ctx,
         RETURN(rc);
 }
 
-static
 int gss_cli_ctx_seal(struct ptlrpc_cli_ctx *ctx,
                      struct ptlrpc_request *req)
 {
@@ -882,7 +905,6 @@ err_free:
         RETURN(rc);
 }
 
-static
 int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
                        struct ptlrpc_request *req)
 {
@@ -962,72 +984,9 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx,
         RETURN(rc);
 }
 
-static struct ptlrpc_ctx_ops gss_ctxops = {
-        .refresh        = gss_cli_ctx_refresh,
-        .match          = gss_cli_ctx_match,
-        .display        = gss_cli_ctx_display,
-        .sign           = gss_cli_ctx_sign,
-        .verify         = gss_cli_ctx_verify,
-        .seal           = gss_cli_ctx_seal,
-        .unseal         = gss_cli_ctx_unseal,
-        .wrap_bulk      = gss_cli_ctx_wrap_bulk,
-        .unwrap_bulk    = gss_cli_ctx_unwrap_bulk,
-};
-
 /*********************************************
  * reverse context installation              *
  *********************************************/
-static
-int gss_install_rvs_cli_ctx(struct gss_sec *gsec,
-                            struct ptlrpc_svc_ctx *svc_ctx)
-{
-        struct vfs_cred          vcred;
-        struct gss_svc_reqctx   *grctx;
-        struct ptlrpc_cli_ctx   *cli_ctx;
-        struct gss_cli_ctx      *cli_gctx;
-        struct gss_ctx          *mechctx = NULL;
-        __u32                    major;
-        int                      rc;
-        ENTRY;
-
-        vcred.vc_uid = 0;
-        vcred.vc_gid = 0;
-
-        cli_ctx = gss_sec_create_ctx(&gsec->gs_base, &vcred);
-        if (!cli_ctx)
-                RETURN(-ENOMEM);
-
-        grctx = container_of(svc_ctx, struct gss_svc_reqctx, src_base);
-        LASSERT(grctx);
-        LASSERT(grctx->src_ctx);
-        LASSERT(grctx->src_ctx->gsc_mechctx);
-
-        major = lgss_copy_reverse_context(grctx->src_ctx->gsc_mechctx, &mechctx);
-        if (major != GSS_S_COMPLETE)
-                GOTO(err_ctx, rc = -ENOMEM);
-
-        cli_gctx = container_of(cli_ctx, struct gss_cli_ctx, gc_base);
-
-        cli_gctx->gc_proc = PTLRPC_GSS_PROC_DATA;
-        cli_gctx->gc_win = GSS_SEQ_WIN;
-        atomic_set(&cli_gctx->gc_seq, 0);
-
-        if (rawobj_dup(&cli_gctx->gc_handle, &grctx->src_ctx->gsc_rvs_hdl))
-                GOTO(err_mechctx, rc = -ENOMEM);
-
-        cli_gctx->gc_mechctx = mechctx;
-        gss_cli_ctx_uptodate(cli_gctx);
-
-        sptlrpc_ctx_replace(&gsec->gs_base, cli_ctx);
-        RETURN(0);
-
-err_mechctx:
-        lgss_delete_sec_context(&mechctx);
-err_ctx:
-        gss_sec_destroy_ctx(cli_ctx->cc_sec, cli_ctx);
-        return rc;
-}
-
 
 static inline
 int gss_install_rvs_svc_ctx(struct obd_import *imp,
@@ -1040,121 +999,43 @@ int gss_install_rvs_svc_ctx(struct obd_import *imp,
 /*********************************************
  * GSS security APIs                         *
  *********************************************/
-
-static
-struct ptlrpc_cli_ctx * gss_sec_create_ctx(struct ptlrpc_sec *sec,
-                                           struct vfs_cred *vcred)
+int gss_sec_create_common(struct gss_sec *gsec,
+                          struct ptlrpc_sec_policy *policy,
+                          struct obd_import *imp,
+                          struct ptlrpc_svc_ctx *ctx,
+                          __u32 flavor,
+                          unsigned long flags)
 {
-        struct gss_cli_ctx    *gctx;
-        struct ptlrpc_cli_ctx *ctx;
-        ENTRY;
-
-        OBD_ALLOC_PTR(gctx);
-        if (!gctx)
-                RETURN(NULL);
-
-        gctx->gc_win = 0;
-        atomic_set(&gctx->gc_seq, 0);
-
-        ctx = &gctx->gc_base;
-        INIT_HLIST_NODE(&ctx->cc_hash);
-        atomic_set(&ctx->cc_refcount, 0);
-        ctx->cc_sec = sec;
-        ctx->cc_ops = &gss_ctxops;
-        ctx->cc_expire = 0;
-        ctx->cc_flags = 0;
-        ctx->cc_vcred = *vcred;
-        spin_lock_init(&ctx->cc_lock);
-        INIT_LIST_HEAD(&ctx->cc_req_list);
-
-        CDEBUG(D_SEC, "create a gss cred at %p(uid %u)\n", ctx, vcred->vc_uid);
-        RETURN(ctx);
-}
-
-static
-void gss_sec_destroy_ctx(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx)
-{
-        struct gss_cli_ctx *gctx;
-        ENTRY;
-
-        LASSERT(ctx);
-        LASSERT(atomic_read(&ctx->cc_refcount) == 0);
-
-        gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
-        if (gctx->gc_mechctx) {
-                gss_do_ctx_fini_rpc(gctx);
-                gss_cli_ctx_finalize(gctx);
-        }
-
-        CWARN("%s@%p: destroy ctx %p(%u->%s)\n",
-              ctx->cc_sec->ps_policy->sp_name, ctx->cc_sec,
-              ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
-
-        OBD_FREE_PTR(gctx);
-        EXIT;
-}
-
-#define GSS_CCACHE_SIZE         (32)
-
-static
-struct ptlrpc_sec* gss_sec_create(struct obd_import *imp,
-                                  struct ptlrpc_svc_ctx *ctx,
-                                  __u32 flavor,
-                                  unsigned long flags)
-{
-        struct gss_sec      *gsec;
         struct ptlrpc_sec   *sec;
-        int                  alloc_size, cache_size, i;
-        ENTRY;
 
         LASSERT(imp);
         LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_GSS);
 
-        if (ctx || flags & (PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_REVERSE))
-                cache_size = 1;
-        else
-                cache_size = GSS_CCACHE_SIZE;
-
-        alloc_size = sizeof(*gsec) + sizeof(struct list_head) * cache_size;
-
-        OBD_ALLOC(gsec, alloc_size);
-        if (!gsec)
-                RETURN(NULL);
-
         gsec->gs_mech = lgss_subflavor_to_mech(SEC_FLAVOR_SUB(flavor));
         if (!gsec->gs_mech) {
                 CERROR("gss backend 0x%x not found\n", SEC_FLAVOR_SUB(flavor));
-                goto err_free;
+                return -EOPNOTSUPP;
         }
 
         spin_lock_init(&gsec->gs_lock);
-        gsec->gs_rvs_hdl = 0ULL; /* will be updated later */
+        gsec->gs_rvs_hdl = 0ULL;
 
+        /* initialize upper ptlrpc_sec */
         sec = &gsec->gs_base;
-        sec->ps_policy = &gss_policy;
+        sec->ps_policy = policy;
         sec->ps_flavor = flavor;
         sec->ps_flags = flags;
         sec->ps_import = class_import_get(imp);
         sec->ps_lock = SPIN_LOCK_UNLOCKED;
-        sec->ps_ccache_size = cache_size;
-        sec->ps_ccache = (struct hlist_head *) (gsec + 1);
         atomic_set(&sec->ps_busy, 0);
-
-        for (i = 0; i < cache_size; i++)
-                INIT_HLIST_HEAD(&sec->ps_ccache[i]);
+        INIT_LIST_HEAD(&sec->ps_gc_list);
 
         if (!ctx) {
-                if (gss_sec_upcall_init(gsec))
-                        goto err_mech;
-
-                sec->ps_gc_interval = 30 * 60; /* 30 minutes */
+                sec->ps_gc_interval = GSS_GC_INTERVAL;
                 sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
         } else {
                 LASSERT(sec->ps_flags & PTLRPC_SEC_FL_REVERSE);
 
-                if (gss_install_rvs_cli_ctx(gsec, ctx))
-                        goto err_mech;
-
                 /* never do gc on reverse sec */
                 sec->ps_gc_interval = 0;
                 sec->ps_gc_next = 0;
@@ -1165,34 +1046,23 @@ struct ptlrpc_sec* gss_sec_create(struct obd_import *imp,
                 sptlrpc_enc_pool_add_user();
 
         CWARN("create %s%s@%p\n", (ctx ? "reverse " : ""),
-              gss_policy.sp_name, gsec);
-        RETURN(sec);
-
-err_mech:
-        lgss_mech_put(gsec->gs_mech);
-err_free:
-        OBD_FREE(gsec, alloc_size);
-        RETURN(NULL);
+              policy->sp_name, gsec);
+        return 0;
 }
 
-static
-void gss_sec_destroy(struct ptlrpc_sec *sec)
+void gss_sec_destroy_common(struct gss_sec *gsec)
 {
-        struct gss_sec *gsec;
+        struct ptlrpc_sec      *sec = &gsec->gs_base;
         ENTRY;
 
-        gsec = container_of(sec, struct gss_sec, gs_base);
-        CWARN("destroy %s@%p\n", gss_policy.sp_name, gsec);
-
-        LASSERT(gsec->gs_mech);
         LASSERT(sec->ps_import);
-        LASSERT(sec->ps_ccache);
-        LASSERT(sec->ps_ccache_size);
         LASSERT(atomic_read(&sec->ps_refcount) == 0);
         LASSERT(atomic_read(&sec->ps_busy) == 0);
 
-        gss_sec_upcall_cleanup(gsec);
-        lgss_mech_put(gsec->gs_mech);
+        if (gsec->gs_mech) {
+                lgss_mech_put(gsec->gs_mech);
+                gsec->gs_mech = NULL;
+        }
 
         class_import_put(sec->ps_import);
 
@@ -1200,11 +1070,68 @@ void gss_sec_destroy(struct ptlrpc_sec *sec)
             sec->ps_flags & PTLRPC_SEC_FL_BULK)
                 sptlrpc_enc_pool_del_user();
 
-        OBD_FREE(gsec, sizeof(*gsec) +
-                       sizeof(struct list_head) * sec->ps_ccache_size);
         EXIT;
 }
 
+int gss_cli_ctx_init_common(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx,
+                            struct ptlrpc_ctx_ops *ctxops,
+                            struct vfs_cred *vcred)
+{
+        struct gss_cli_ctx    *gctx = ctx2gctx(ctx);
+
+        gctx->gc_win = 0;
+        atomic_set(&gctx->gc_seq, 0);
+
+        INIT_HLIST_NODE(&ctx->cc_hash);
+        atomic_set(&ctx->cc_refcount, 0);
+        ctx->cc_sec = sec;
+        ctx->cc_ops = ctxops;
+        ctx->cc_expire = 0;
+        ctx->cc_flags = PTLRPC_CTX_NEW;
+        ctx->cc_vcred = *vcred;
+        spin_lock_init(&ctx->cc_lock);
+        INIT_LIST_HEAD(&ctx->cc_req_list);
+
+        /* take a ref on belonging sec */
+        atomic_inc(&sec->ps_busy);
+
+        CWARN("%s@%p: create ctx %p(%u->%s)\n",
+              sec->ps_policy->sp_name, ctx->cc_sec,
+              ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+        return 0;
+}
+
+/*
+ * return 1 if the busy count of the sec dropped to zero, then usually caller
+ * should destroy the sec too; otherwise return 0.
+ */
+int gss_cli_ctx_fini_common(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx)
+{
+        struct gss_cli_ctx *gctx = ctx2gctx(ctx);
+
+        LASSERT(ctx->cc_sec == sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+        LASSERT(atomic_read(&sec->ps_busy) > 0);
+
+        if (gctx->gc_mechctx) {
+                gss_do_ctx_fini_rpc(gctx);
+                gss_cli_ctx_finalize(gctx);
+        }
+
+        CWARN("%s@%p: destroy ctx %p(%u->%s)\n",
+              sec->ps_policy->sp_name, ctx->cc_sec,
+              ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec));
+
+        if (atomic_dec_and_test(&sec->ps_busy)) {
+                LASSERT(atomic_read(&sec->ps_refcount) == 0);
+                return 1;
+        }
+
+        return 0;
+}
+
 static
 int gss_alloc_reqbuf_auth(struct ptlrpc_sec *sec,
                           struct ptlrpc_request *req,
@@ -1358,7 +1285,6 @@ int gss_alloc_reqbuf_priv(struct ptlrpc_sec *sec,
  * NOTE: any change of request buffer allocation should also consider
  * changing enlarge_reqbuf() series functions.
  */
-static
 int gss_alloc_reqbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req,
                      int msgsize)
@@ -1378,7 +1304,6 @@ int gss_alloc_reqbuf(struct ptlrpc_sec *sec,
         return 0;
 }
 
-static
 void gss_free_reqbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req)
 {
@@ -1415,7 +1340,6 @@ release_reqbuf:
         EXIT;
 }
 
-static
 int gss_alloc_repbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req,
                      int msgsize)
@@ -1474,7 +1398,6 @@ int gss_alloc_repbuf(struct ptlrpc_sec *sec,
         return 0;
 }
 
-static
 void gss_free_repbuf(struct ptlrpc_sec *sec,
                      struct ptlrpc_request *req)
 {
@@ -1677,7 +1600,6 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec,
         RETURN(0);
 }
 
-static
 int gss_enlarge_reqbuf(struct ptlrpc_sec *sec,
                        struct ptlrpc_request *req,
                        int segment, int newsize)
@@ -1695,7 +1617,6 @@ int gss_enlarge_reqbuf(struct ptlrpc_sec *sec,
         }
 }
 
-static
 int gss_sec_install_rctx(struct obd_import *imp,
                          struct ptlrpc_sec *sec,
                          struct ptlrpc_cli_ctx *ctx)
@@ -1711,19 +1632,6 @@ int gss_sec_install_rctx(struct obd_import *imp,
         return rc;
 }
 
-static struct ptlrpc_sec_cops gss_sec_cops = {
-        .create_sec             = gss_sec_create,
-        .destroy_sec            = gss_sec_destroy,
-        .create_ctx             = gss_sec_create_ctx,
-        .destroy_ctx            = gss_sec_destroy_ctx,
-        .install_rctx           = gss_sec_install_rctx,
-        .alloc_reqbuf           = gss_alloc_reqbuf,
-        .free_reqbuf            = gss_free_reqbuf,
-        .alloc_repbuf           = gss_alloc_repbuf,
-        .free_repbuf            = gss_free_repbuf,
-        .enlarge_reqbuf         = gss_enlarge_reqbuf,
-};
-
 /********************************************
  * server side API                          *
  ********************************************/
@@ -2146,8 +2054,7 @@ int gss_svc_handle_destroy(struct ptlrpc_request *req,
         RETURN(SECSVC_OK);
 }
 
-static
-int gss_svc_accept(struct ptlrpc_request *req)
+int gss_svc_accept(struct ptlrpc_sec_policy *policy, struct ptlrpc_request *req)
 {
         struct gss_header      *ghdr;
         struct gss_svc_reqctx  *grctx;
@@ -2182,7 +2089,7 @@ int gss_svc_accept(struct ptlrpc_request *req)
                 CERROR("fail to alloc svc reqctx\n");
                 RETURN(SECSVC_DROP);
         }
-        grctx->src_base.sc_policy = sptlrpc_policy_get(&gss_policy);
+        grctx->src_base.sc_policy = sptlrpc_policy_get(policy);
         atomic_set(&grctx->src_base.sc_refcount, 1);
         req->rq_svc_ctx = &grctx->src_base;
         gw = &grctx->src_wirectx;
@@ -2236,7 +2143,6 @@ int gss_svc_accept(struct ptlrpc_request *req)
         RETURN(rc);
 }
 
-static
 void gss_svc_invalidate_ctx(struct ptlrpc_svc_ctx *svc_ctx)
 {
         struct gss_svc_reqctx  *grctx;
@@ -2265,7 +2171,6 @@ int gss_svc_payload(struct gss_svc_reqctx *grctx, int msgsize, int privacy)
         return gss_estimate_payload(NULL, msgsize, privacy);
 }
 
-static
 int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
 {
         struct gss_svc_reqctx       *grctx;
@@ -2504,7 +2409,6 @@ out:
         RETURN(rc);
 }
 
-static
 void gss_svc_free_rs(struct ptlrpc_reply_state *rs)
 {
         struct gss_svc_reqctx *grctx;
@@ -2519,44 +2423,44 @@ void gss_svc_free_rs(struct ptlrpc_reply_state *rs)
                 OBD_FREE(rs, rs->rs_size);
 }
 
-static
 void gss_svc_free_ctx(struct ptlrpc_svc_ctx *ctx)
 {
         LASSERT(atomic_read(&ctx->sc_refcount) == 0);
         gss_svc_reqctx_free(gss_svc_ctx2reqctx(ctx));
 }
 
-static
-int gss_svc_install_rctx(struct obd_import *imp, struct ptlrpc_svc_ctx *ctx)
+int gss_copy_rvc_cli_ctx(struct ptlrpc_cli_ctx *cli_ctx,
+                         struct ptlrpc_svc_ctx *svc_ctx)
 {
-        struct gss_sec *gsec;
+        struct gss_cli_ctx     *cli_gctx = ctx2gctx(cli_ctx);
+        struct gss_svc_reqctx  *grctx;
+        struct gss_ctx         *mechctx = NULL;
+
+        cli_gctx->gc_proc = PTLRPC_GSS_PROC_DATA;
+        cli_gctx->gc_win = GSS_SEQ_WIN;
+        atomic_set(&cli_gctx->gc_seq, 0);
 
-        LASSERT(imp->imp_sec);
-        LASSERT(ctx);
+        grctx = container_of(svc_ctx, struct gss_svc_reqctx, src_base);
+        LASSERT(grctx->src_ctx);
+        LASSERT(grctx->src_ctx->gsc_mechctx);
 
-        gsec = container_of(imp->imp_sec, struct gss_sec, gs_base);
-        return gss_install_rvs_cli_ctx(gsec, ctx);
-}
+        if (lgss_copy_reverse_context(grctx->src_ctx->gsc_mechctx, &mechctx) !=
+            GSS_S_COMPLETE) {
+                CERROR("failed to copy mech context\n");
+                return -ENOMEM;
+        }
 
-static struct ptlrpc_sec_sops gss_sec_sops = {
-        .accept                 = gss_svc_accept,
-        .invalidate_ctx         = gss_svc_invalidate_ctx,
-        .alloc_rs               = gss_svc_alloc_rs,
-        .authorize              = gss_svc_authorize,
-        .free_rs                = gss_svc_free_rs,
-        .free_ctx               = gss_svc_free_ctx,
-        .unwrap_bulk            = gss_svc_unwrap_bulk,
-        .wrap_bulk              = gss_svc_wrap_bulk,
-        .install_rctx           = gss_svc_install_rctx,
-};
-
-static struct ptlrpc_sec_policy gss_policy = {
-        .sp_owner               = THIS_MODULE,
-        .sp_name                = "sec.gss",
-        .sp_policy              = SPTLRPC_POLICY_GSS,
-        .sp_cops                = &gss_sec_cops,
-        .sp_sops                = &gss_sec_sops,
-};
+        if (rawobj_dup(&cli_gctx->gc_handle, &grctx->src_ctx->gsc_rvs_hdl)) {
+                CERROR("failed to dup reverse handle\n");
+                lgss_delete_sec_context(&mechctx);
+                return -ENOMEM;
+        }
+
+        cli_gctx->gc_mechctx = mechctx;
+        gss_cli_ctx_uptodate(cli_gctx);
+
+        return 0;
+}
 
 int __init sptlrpc_gss_init(void)
 {
@@ -2566,27 +2470,46 @@ int __init sptlrpc_gss_init(void)
         if (rc)
                 return rc;
 
-        rc = gss_init_upcall();
+        rc = gss_init_cli_upcall();
         if (rc)
                 goto out_lproc;
 
+        rc = gss_init_svc_upcall();
+        if (rc)
+                goto out_cli_upcall;
+
         rc = init_kerberos_module();
         if (rc)
-                goto out_upcall;
+                goto out_svc_upcall;
 
         /*
          * register policy after all other stuff be intialized, because it
          * might be in used immediately after the registration.
          */
-        rc = sptlrpc_register_policy(&gss_policy);
+
+        rc = gss_init_keyring();
         if (rc)
                 goto out_kerberos;
 
+#ifdef HAVE_GSS_PIPEFS
+        rc = gss_init_pipefs();
+        if (rc)
+                goto out_keyring;
+#endif
+
         return 0;
+
+#ifdef HAVE_GSS_PIPEFS
+out_keyring:
+        gss_exit_keyring();
+#endif
+
 out_kerberos:
         cleanup_kerberos_module();
-out_upcall:
-        gss_exit_upcall();
+out_svc_upcall:
+        gss_exit_svc_upcall();
+out_cli_upcall:
+        gss_exit_cli_upcall();
 out_lproc:
         gss_exit_lproc();
         return rc;
@@ -2594,9 +2517,13 @@ out_lproc:
 
 static void __exit sptlrpc_gss_exit(void)
 {
-        sptlrpc_unregister_policy(&gss_policy);
+        gss_exit_keyring();
+#ifdef HAVE_GSS_PIPEFS
+        gss_exit_pipefs();
+#endif
         cleanup_kerberos_module();
-        gss_exit_upcall();
+        gss_exit_svc_upcall();
+        gss_exit_cli_upcall();
         gss_exit_lproc();
 }
 
index 5d09f96..c2610aa 100644 (file)
@@ -196,8 +196,14 @@ const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg);
 int  sptlrpc_lproc_init(void);
 void sptlrpc_lproc_fini(void);
 
+/* sec_gc.c */
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec);
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec);
+int sptlrpc_gc_start_thread(void);
+void sptlrpc_gc_stop_thread(void);
+
 /* sec.c */
-int  sptlrpc_init(void);
-void sptlrpc_fini(void);
+int  __init sptlrpc_init(void);
+void __exit sptlrpc_fini(void);
 
 #endif /* PTLRPC_INTERNAL_H */
index 2aab749..76b40c5 100644 (file)
@@ -30,6 +30,7 @@
 #include <libcfs/list.h>
 #else
 #include <linux/crypto.h>
+#include <linux/key.h>
 #endif
 
 #include <obd.h>
 
 #include "ptlrpc_internal.h"
 
-static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
-static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
-                                   struct ptlrpc_cli_ctx *ctx);
-static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx);
-
 /***********************************************
  * policy registers                            *
  ***********************************************/
@@ -129,7 +125,9 @@ again:
 #ifdef CONFIG_KMOD
         /* if failure, try to load gss module, once */
         if (unlikely(policy == NULL) &&
-            number == SPTLRPC_POLICY_GSS && flag == 0) {
+            flag == 0 &&
+            (number == SPTLRPC_POLICY_GSS ||
+             number == SPTLRPC_POLICY_GSS_PIPEFS)) {
                 mutex_down(&load_mutex);
                 if (atomic_read(&loaded) == 0) {
                         if (request_module("ptlrpc_gss") != 0)
@@ -187,359 +185,18 @@ char *sptlrpc_flavor2name(ptlrpc_sec_flavor_t flavor)
 }
 EXPORT_SYMBOL(sptlrpc_flavor2name);
 
-/***********************************************
- * context helpers                             *
- * internal APIs                               *
- * cache management                            *
- ***********************************************/
-
-static inline
-unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx)
-{
-        smp_mb();
-        return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
-}
-
-static inline
-int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
-{
-        return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
-}
-
-static inline
-int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
-{
-        return (ctx_status(ctx) != 0);
-}
-
-static inline
-int ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
-{
-        smp_mb();
-        return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
-}
-
-static inline
-int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
-{
-        smp_mb();
-        return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
-}
-
-static
-int ctx_expire(struct ptlrpc_cli_ctx *ctx)
-{
-        LASSERT(atomic_read(&ctx->cc_refcount));
-
-        if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
-                cfs_time_t now = cfs_time_current_sec();
-
-                smp_mb();
-                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
-
-                if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire))
-                        CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n",
-                              ctx, ctx->cc_vcred.vc_uid,
-                              sec2target_str(ctx->cc_sec),
-                              cfs_time_sub(now, ctx->cc_expire));
-                else
-                        CWARN("ctx %p(%u->%s): force to die (%lds remains)\n",
-                              ctx, ctx->cc_vcred.vc_uid,
-                              sec2target_str(ctx->cc_sec),
-                              ctx->cc_expire == 0 ? 0 :
-                              cfs_time_sub(ctx->cc_expire, now));
-
-                return 1;
-        }
-        return 0;
-}
-
-static
-void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
-{
-        set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
-        atomic_inc(&ctx->cc_refcount);
-        hlist_add_head(&ctx->cc_hash, hash);
-}
-
-static
-void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
-{
-        LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
-        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
-        LASSERT(!hlist_unhashed(&ctx->cc_hash));
-
-        clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
-
-        if (atomic_dec_and_test(&ctx->cc_refcount)) {
-                __hlist_del(&ctx->cc_hash);
-                hlist_add_head(&ctx->cc_hash, freelist);
-        } else
-                hlist_del_init(&ctx->cc_hash);
-}
-
-/*
- * return 1 if the context is dead.
- */
-static
-int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
-{
-        if (unlikely(ctx_is_dead(ctx)))
-                goto unhash;
-
-        /* expire is 0 means never expire. a newly created gss context
-         * which during upcall also has 0 expiration
-         */
-        smp_mb();
-        if (ctx->cc_expire == 0)
-                return 0;
-
-        /* check real expiration */
-        smp_mb();
-        if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
-                return 0;
-
-        ctx_expire(ctx);
-
-unhash:
-        if (freelist)
-                ctx_unhash(ctx, freelist);
-
-        return 1;
-}
-
-static inline
-int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx,
-                           struct hlist_head *freelist)
-{
-        LASSERT(ctx->cc_sec);
-        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-        LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
-        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
-
-        return ctx_check_death(ctx, freelist);
-}
-
-static
-int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx)
-{
-        LASSERT(ctx->cc_sec);
-        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-
-        if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx))
-                return 1;
-        return 0;
-}
-
-static inline
-int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
-{
-        /* a little bit optimization for null policy */
-        if (!ctx->cc_ops->match)
-                return 1;
-
-        return ctx->cc_ops->match(ctx, vcred);
-}
-
-static
-void ctx_list_destroy(struct hlist_head *head)
-{
-        struct ptlrpc_cli_ctx *ctx;
-
-        while (!hlist_empty(head)) {
-                ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash);
-
-                LASSERT(atomic_read(&ctx->cc_refcount) == 0);
-                LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
-
-                hlist_del_init(&ctx->cc_hash);
-                sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx);
-        }
-}
-
-static
-void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist)
-{
-        struct ptlrpc_cli_ctx *ctx;
-        struct hlist_node *pos, *next;
-        int i;
-        ENTRY;
-
-        CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
-
-        for (i = 0; i < sec->ps_ccache_size; i++) {
-                hlist_for_each_entry_safe(ctx, pos, next,
-                                          &sec->ps_ccache[i], cc_hash)
-                        ctx_check_death_locked(ctx, freelist);
-        }
-
-        sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
-        EXIT;
-}
-
-/*
- * @uid: which user. "-1" means flush all.
- * @grace: mark context DEAD, allow graceful destroy like notify
- *         server side, etc.
- * @force: also flush busy entries.
- *
- * return the number of busy context encountered.
- *
- * In any cases, never touch "eternal" contexts.
- */
-static
-int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force)
-{
-        struct ptlrpc_cli_ctx *ctx;
-        struct hlist_node *pos, *next;
-        HLIST_HEAD(freelist);
-        int i, busy = 0;
-        ENTRY;
-
-        might_sleep_if(grace);
-
-        spin_lock(&sec->ps_lock);
-        for (i = 0; i < sec->ps_ccache_size; i++) {
-                hlist_for_each_entry_safe(ctx, pos, next,
-                                          &sec->ps_ccache[i], cc_hash) {
-                        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-
-                        if (ctx_is_eternal(ctx))
-                                continue;
-                        if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
-                                continue;
-
-                        if (atomic_read(&ctx->cc_refcount) > 1) {
-                                busy++;
-                                if (!force)
-                                        continue;
-
-                                CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
-                                      "grace %d\n",
-                                      atomic_read(&ctx->cc_refcount),
-                                      ctx, ctx->cc_vcred.vc_uid,
-                                      sec2target_str(ctx->cc_sec), grace);
-                        }
-                        ctx_unhash(ctx, &freelist);
-
-                        set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
-                        if (!grace)
-                                clear_bit(PTLRPC_CTX_UPTODATE_BIT,
-                                          &ctx->cc_flags);
-                }
-        }
-        spin_unlock(&sec->ps_lock);
-
-        ctx_list_destroy(&freelist);
-        RETURN(busy);
-}
-
-static inline
-unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key)
-{
-        return (unsigned int) (key & (sec->ps_ccache_size - 1));
-}
+/**************************************************
+ * client context APIs                            *
+ **************************************************/
 
-/*
- * return matched context. If it's a newly created one, we also give the
- * first push to refresh. return NULL if error happens.
- */
 static
-struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec,
-                                         struct vfs_cred *vcred,
-                                         int create, int remove_dead)
-{
-        struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL;
-        struct hlist_head *hash_head;
-        struct hlist_node *pos, *next;
-        HLIST_HEAD(freelist);
-        unsigned int hash, gc = 0, found = 0;
-        ENTRY;
-
-        might_sleep();
-
-        hash = ctx_hash_index(sec, (__u64) vcred->vc_uid);
-        LASSERT(hash < sec->ps_ccache_size);
-        hash_head = &sec->ps_ccache[hash];
-
-retry:
-        spin_lock(&sec->ps_lock);
-
-        /* gc_next == 0 means never do gc */
-        if (remove_dead && sec->ps_gc_next &&
-            cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
-                ctx_cache_gc(sec, &freelist);
-                gc = 1;
-        }
-
-        hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) {
-                if (gc == 0 &&
-                    ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL))
-                        continue;
-
-                if (ctx_match(ctx, vcred)) {
-                        found = 1;
-                        break;
-                }
-        }
-
-        if (found) {
-                if (new && new != ctx) {
-                        /* lost the race, just free it */
-                        hlist_add_head(&new->cc_hash, &freelist);
-                        new = NULL;
-                }
-
-                /* hot node, move to head */
-                if (hash_head->first != &ctx->cc_hash) {
-                        __hlist_del(&ctx->cc_hash);
-                        hlist_add_head(&ctx->cc_hash, hash_head);
-                }
-        } else {
-                /* don't allocate for reverse sec */
-                if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
-                        spin_unlock(&sec->ps_lock);
-                        RETURN(NULL);
-                }
-
-                if (new) {
-                        ctx_enhash(new, hash_head);
-                        ctx = new;
-                } else if (create) {
-                        spin_unlock(&sec->ps_lock);
-                        new = sec->ps_policy->sp_cops->create_ctx(sec, vcred);
-                        if (new) {
-                                atomic_inc(&sec->ps_busy);
-                                goto retry;
-                        }
-                } else
-                        ctx = NULL;
-        }
-
-        /* hold a ref */
-        if (ctx)
-                atomic_inc(&ctx->cc_refcount);
-
-        spin_unlock(&sec->ps_lock);
-
-        /* the allocator of the context must give the first push to refresh */
-        if (new) {
-                LASSERT(new == ctx);
-                sptlrpc_ctx_refresh(new);
-        }
-
-        ctx_list_destroy(&freelist);
-        RETURN(ctx);
-}
-
-static inline
 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
 {
         struct vfs_cred vcred;
         int create = 1, remove_dead = 1;
 
         LASSERT(sec);
+        LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
 
         if (sec->ps_flags & (PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY)) {
                 vcred.vc_uid = 0;
@@ -553,34 +210,19 @@ struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
                 vcred.vc_gid = cfs_current()->gid;
         }
 
-        if (sec->ps_policy->sp_cops->lookup_ctx)
-                return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred);
-        else
-                return ctx_cache_lookup(sec, &vcred, create, remove_dead);
-}
-
-/**************************************************
- * client context APIs                            *
- **************************************************/
-
-static
-void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
-{
-        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-
-        if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh)
-                ctx->cc_ops->refresh(ctx);
+        return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
+                                                   create, remove_dead);
 }
 
-struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx)
+struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
 {
         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
         atomic_inc(&ctx->cc_refcount);
         return ctx;
 }
-EXPORT_SYMBOL(sptlrpc_ctx_get);
+EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
 
-void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
 {
         struct ptlrpc_sec *sec = ctx->cc_sec;
 
@@ -590,85 +232,43 @@ void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
         if (!atomic_dec_and_test(&ctx->cc_refcount))
                 return;
 
-        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
-        LASSERT(hlist_unhashed(&ctx->cc_hash));
-
-        /* if required async, we must clear the UPTODATE bit to prevent extra
-         * rpcs during destroy procedure.
-         */
-        if (!sync)
-                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
-
-        /* destroy this context */
-        if (!sptlrpc_sec_destroy_ctx(sec, ctx))
-                return;
-
-        CWARN("%s@%p: put last ctx, also destroy the sec\n",
-              sec->ps_policy->sp_name, sec);
-
-        sptlrpc_sec_destroy(sec);
+        sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
 }
-EXPORT_SYMBOL(sptlrpc_ctx_put);
+EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
 
 /*
- * mark a ctx as DEAD, and pull it out from hash table.
- *
- * NOTE: the caller must hold at least 1 ref on the ctx.
+ * expire the context immediately.
+ * the caller must hold at least 1 ref on the ctx.
  */
-void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
 {
-        LASSERT(ctx->cc_sec);
-        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
-
-        ctx_expire(ctx);
-
-        spin_lock(&ctx->cc_sec->ps_lock);
-
-        if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) {
-                LASSERT(!hlist_unhashed(&ctx->cc_hash));
-                LASSERT(atomic_read(&ctx->cc_refcount) > 1);
-
-                hlist_del_init(&ctx->cc_hash);
-                if (atomic_dec_and_test(&ctx->cc_refcount))
-                        LBUG();
-        }
-
-        spin_unlock(&ctx->cc_sec->ps_lock);
+        LASSERT(ctx->cc_ops->die);
+        ctx->cc_ops->die(ctx, 0);
 }
-EXPORT_SYMBOL(sptlrpc_ctx_expire);
+EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
 
-void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new)
+void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
 {
-        struct ptlrpc_cli_ctx *ctx;
-        struct hlist_node *pos, *next;
-        HLIST_HEAD(freelist);
-        unsigned int hash;
-        ENTRY;
-
-        hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid);
-        LASSERT(hash < sec->ps_ccache_size);
-
-        spin_lock(&sec->ps_lock);
-
-        hlist_for_each_entry_safe(ctx, pos, next,
-                                  &sec->ps_ccache[hash], cc_hash) {
-                if (!ctx_match(ctx, &new->cc_vcred))
-                        continue;
+        struct ptlrpc_request *req, *next;
 
-                ctx_expire(ctx);
-                ctx_unhash(ctx, &freelist);
-                break;
+        spin_lock(&ctx->cc_lock);
+        list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
+                list_del_init(&req->rq_ctx_chain);
+                ptlrpc_wake_client_req(req);
         }
+        spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
 
-        ctx_enhash(new, &sec->ps_ccache[hash]);
-        atomic_inc(&sec->ps_busy);
+int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+        LASSERT(ctx->cc_ops);
 
-        spin_unlock(&sec->ps_lock);
+        if (ctx->cc_ops->display == NULL)
+                return 0;
 
-        ctx_list_destroy(&freelist);
-        EXIT;
+        return ctx->cc_ops->display(ctx, buf, bufsize);
 }
-EXPORT_SYMBOL(sptlrpc_ctx_replace);
 
 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
 {
@@ -687,36 +287,13 @@ int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
         req->rq_cli_ctx = get_my_ctx(imp->imp_sec);
 
         if (!req->rq_cli_ctx) {
-                CERROR("req %p: fail to get context from cache\n", req);
+                CERROR("req %p: fail to get context\n", req);
                 RETURN(-ENOMEM);
         }
 
         RETURN(0);
 }
 
-void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
-{
-        struct ptlrpc_request *req, *next;
-
-        spin_lock(&ctx->cc_lock);
-        list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
-                list_del_init(&req->rq_ctx_chain);
-                ptlrpc_wake_client_req(req);
-        }
-        spin_unlock(&ctx->cc_lock);
-}
-EXPORT_SYMBOL(sptlrpc_ctx_wakeup);
-
-int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
-{
-        LASSERT(ctx->cc_ops);
-
-        if (ctx->cc_ops->display == NULL)
-                return 0;
-
-        return ctx->cc_ops->display(ctx, buf, bufsize);
-}
-
 void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
 {
         ENTRY;
@@ -734,7 +311,7 @@ void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
         }
 
         /* this could be called with spinlock hold, use async mode */
-        sptlrpc_ctx_put(req->rq_cli_ctx, 0);
+        sptlrpc_cli_ctx_put(req->rq_cli_ctx, 0);
         req->rq_cli_ctx = NULL;
         EXIT;
 }
@@ -757,13 +334,12 @@ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
         list_del_init(&req->rq_ctx_chain);
         spin_unlock(&ctx->cc_lock);
 
-        sptlrpc_ctx_get(ctx);
+        sptlrpc_cli_ctx_get(ctx);
         sptlrpc_req_put_ctx(req);
         rc = sptlrpc_req_get_ctx(req);
         if (!rc) {
                 LASSERT(req->rq_cli_ctx);
-                LASSERT(req->rq_cli_ctx != ctx);
-                sptlrpc_ctx_put(ctx, 1);
+                sptlrpc_cli_ctx_put(ctx, 1);
         } else {
                 LASSERT(!req->rq_cli_ctx);
                 req->rq_cli_ctx = ctx;
@@ -775,8 +351,7 @@ EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
 static
 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
 {
-        smp_mb();
-        if (ctx_is_refreshed(ctx))
+        if (cli_ctx_is_refreshed(ctx))
                 return 1;
         return 0;
 }
@@ -798,7 +373,7 @@ int ctx_refresh_timeout(void *data)
          * later than the context refresh expire time.
          */
         if (rc == 0)
-                ctx_expire(req->rq_cli_ctx);
+                req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0);
         return rc;
 }
 
@@ -808,10 +383,19 @@ void ctx_refresh_interrupt(void *data)
         /* do nothing */
 }
 
+static
+void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
+{
+        spin_lock(&ctx->cc_lock);
+        if (!list_empty(&req->rq_ctx_chain))
+                list_del_init(&req->rq_ctx_chain);
+        spin_unlock(&ctx->cc_lock);
+}
+
 /*
  * the status of context could be subject to be changed by other threads at any
  * time. we allow this race. but once we return with 0, the caller will
- * suppose it's uptodated and keep using it until the affected rpc is done.
+ * suppose it's uptodated and keep using it until the owning rpc is done.
  *
  * @timeout:
  *    < 0  - don't wait
@@ -829,28 +413,24 @@ int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
 
         LASSERT(ctx);
 
-        /* special ctxs */
-        if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
-                RETURN(0);
-
-        /* reverse ctxs, don't refresh */
+        /* skip reverse ctxs */
         if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
                 RETURN(0);
 
-        spin_lock(&ctx->cc_lock);
-again:
-        if (ctx_check_uptodate(ctx)) {
-                if (!list_empty(&req->rq_ctx_chain))
-                        list_del_init(&req->rq_ctx_chain);
-                spin_unlock(&ctx->cc_lock);
+        /* skip special ctxs */
+        if (cli_ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
                 RETURN(0);
+
+        if (test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags)) {
+                LASSERT(ctx->cc_ops->refresh);
+                ctx->cc_ops->refresh(ctx);
         }
+        LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
 
-        if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) {
+again:
+        if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
                 req->rq_err = 1;
-                if (!list_empty(&req->rq_ctx_chain))
-                        list_del_init(&req->rq_ctx_chain);
-                spin_unlock(&ctx->cc_lock);
+                req_off_ctx_list(req, ctx);
                 RETURN(-EPERM);
         }
 
@@ -879,19 +459,15 @@ again:
          *     never really send request with old context before.
          */
         if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
-            req->rq_reqmsg &&
+            unlikely(req->rq_reqmsg) &&
             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
-                if (!list_empty(&req->rq_ctx_chain))
-                        list_del_init(&req->rq_ctx_chain);
-                spin_unlock(&ctx->cc_lock);
+                req_off_ctx_list(req, ctx);
                 RETURN(0);
         }
 
         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
-                spin_unlock(&ctx->cc_lock);
-
                 /* don't have to, but we don't want to release it too soon */
-                sptlrpc_ctx_get(ctx);
+                sptlrpc_cli_ctx_get(ctx);
 
                 rc = sptlrpc_req_replace_dead_ctx(req);
                 if (rc) {
@@ -900,29 +476,40 @@ again:
                                 req, ctx);
                         req->rq_err = 1;
                         LASSERT(list_empty(&req->rq_ctx_chain));
-                        sptlrpc_ctx_put(ctx, 1);
+                        sptlrpc_cli_ctx_put(ctx, 1);
                         RETURN(-ENOMEM);
                 }
 
-                LASSERT(ctx != req->rq_cli_ctx);
+                /* FIXME
+                 * if ctx didn't really switch, might be cpu tight or sth,
+                 * we just relax a little bit.
+                 */
+                if (ctx == req->rq_cli_ctx)
+                        schedule();
+
                 CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n",
                       req, ctx, ctx->cc_vcred.vc_uid,
                       sec2target_str(ctx->cc_sec), req->rq_cli_ctx);
 
-                sptlrpc_ctx_put(ctx, 1);
+                sptlrpc_cli_ctx_put(ctx, 1);
                 ctx = req->rq_cli_ctx;
                 LASSERT(list_empty(&req->rq_ctx_chain));
 
-                spin_lock(&ctx->cc_lock);
                 goto again;
         }
 
+        LASSERT(ctx->cc_ops->validate);
+        if (ctx->cc_ops->validate(ctx) == 0) {
+                req_off_ctx_list(req, ctx);
+                RETURN(0);
+        }
+
         /* Now we're sure this context is during upcall, add myself into
          * waiting list
          */
+        spin_lock(&ctx->cc_lock);
         if (list_empty(&req->rq_ctx_chain))
                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
-
         spin_unlock(&ctx->cc_lock);
 
         if (timeout < 0) {
@@ -942,7 +529,6 @@ again:
                                ctx_refresh_timeout, ctx_refresh_interrupt, req);
         rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
 
-        spin_lock(&ctx->cc_lock);
         /* five cases we are here:
          * 1. successfully refreshed;
          * 2. someone else mark this ctx dead by force;
@@ -950,10 +536,9 @@ again:
          * 4. timedout, and we don't want recover from the failure;
          * 5. timedout, and waked up upon recovery finished;
          */
-        if (!ctx_is_refreshed(ctx)) {
+        if (!cli_ctx_is_refreshed(ctx)) {
                 /* timed out or interruptted */
-                list_del_init(&req->rq_ctx_chain);
-                spin_unlock(&ctx->cc_lock);
+                req_off_ctx_list(req, ctx);
 
                 LASSERT(rc != 0);
                 RETURN(rc);
@@ -1053,8 +638,9 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
         if (!ctx)
                 RETURN(1);
 
-        if (ctx_is_eternal(ctx)) {
-                sptlrpc_ctx_put(ctx, 1);
+        if (cli_ctx_is_eternal(ctx) ||
+            ctx->cc_ops->validate(ctx) == 0) {
+                sptlrpc_cli_ctx_put(ctx, 1);
                 RETURN(0);
         }
 
@@ -1071,7 +657,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
 
         rc = sptlrpc_req_refresh_ctx(req, 0);
         LASSERT(list_empty(&req->rq_ctx_chain));
-        sptlrpc_ctx_put(req->rq_cli_ctx, 1);
+        sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
         OBD_FREE_PTR(req);
 
         RETURN(rc);
@@ -1191,18 +777,51 @@ int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
 }
 
 /**************************************************
- * security APIs                                  *
+ * client side high-level security APIs           *
  **************************************************/
 
+static
+void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
+{
+        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+        LASSERT(atomic_read(&sec->ps_refcount) == 0);
+        LASSERT(atomic_read(&sec->ps_busy) == 0);
+        LASSERT(policy->sp_cops->destroy_sec);
+
+        CWARN("%s@%p: being destroied\n", sec->ps_policy->sp_name, sec);
+
+        policy->sp_cops->destroy_sec(sec);
+        sptlrpc_policy_put(policy);
+}
+
+static
+int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
+                            int grace, int force)
+{
+        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+        LASSERT(policy->sp_cops);
+        LASSERT(policy->sp_cops->flush_ctx_cache);
+
+        return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
+}
+
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+        sec_cop_destroy_sec(sec);
+}
+EXPORT_SYMBOL(sptlrpc_sec_destroy);
+
 /*
  * let policy module to determine whether take refrence of
  * import or not.
  */
 static
-struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
-                                       struct ptlrpc_svc_ctx *ctx,
-                                       __u32 flavor,
-                                       unsigned long flags)
+struct ptlrpc_sec * import_create_sec(struct obd_import *imp,
+                                      struct ptlrpc_svc_ctx *ctx,
+                                      __u32 flavor,
+                                      unsigned long flags)
 {
         struct ptlrpc_sec_policy *policy;
         struct ptlrpc_sec *sec;
@@ -1243,69 +862,93 @@ struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
                  * balanced in sptlrpc_set_put()
                  */
                 atomic_inc(&sec->ps_busy);
+
+                if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
+                        sptlrpc_gc_add_sec(sec);
         } else
                 sptlrpc_policy_put(policy);
 
         RETURN(sec);
 }
 
-static
-void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+int sptlrpc_import_get_sec(struct obd_import *imp,
+                           struct ptlrpc_svc_ctx *ctx,
+                           __u32 flavor,
+                           unsigned long flags)
 {
-        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+        might_sleep();
 
-        LASSERT(policy);
-        LASSERT(atomic_read(&sec->ps_refcount) == 0);
-        LASSERT(atomic_read(&sec->ps_busy) == 0);
-        LASSERT(policy->sp_cops->destroy_sec);
+        /* old sec might be still there in reconnecting */
+        if (imp->imp_sec)
+                return 0;
 
-        policy->sp_cops->destroy_sec(sec);
-        sptlrpc_policy_put(policy);
+        imp->imp_sec = import_create_sec(imp, ctx, flavor, flags);
+        if (!imp->imp_sec)
+                return -EINVAL;
+
+        return 0;
 }
 
-static
-void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+void sptlrpc_import_put_sec(struct obd_import *imp)
 {
-        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+        struct ptlrpc_sec        *sec;
+        struct ptlrpc_sec_policy *policy;
+
+        might_sleep();
+
+        if (imp->imp_sec == NULL)
+                return;
+
+        sec = imp->imp_sec;
+        policy = sec->ps_policy;
 
         if (!atomic_dec_and_test(&sec->ps_refcount)) {
                 sptlrpc_policy_put(policy);
-                return;
+                goto out;
         }
 
-        ctx_cache_flush(sec, -1, 1, 1);
+        sec_cop_flush_ctx_cache(sec, -1, 1, 1);
+        sptlrpc_gc_del_sec(sec);
 
         if (atomic_dec_and_test(&sec->ps_busy))
-                sptlrpc_sec_destroy(sec);
-        else
+                sec_cop_destroy_sec(sec);
+        else {
                 CWARN("delay to destroy %s@%p: busy contexts\n",
                       policy->sp_name, sec);
+        }
+
+out:
+        imp->imp_sec = NULL;
 }
 
-/*
- * return 1 means we should also destroy the sec structure.
- * normally return 0
- */
-static
-int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
-                            struct ptlrpc_cli_ctx *ctx)
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+        if (imp == NULL || imp->imp_sec == NULL)
+                return;
+
+        /* it's important to use grace mode, see explain in
+         * sptlrpc_req_refresh_ctx()
+         */
+        sec_cop_flush_ctx_cache(imp->imp_sec, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
 {
-        LASSERT(sec == ctx->cc_sec);
-        LASSERT(atomic_read(&sec->ps_busy));
-        LASSERT(atomic_read(&ctx->cc_refcount) == 0);
-        LASSERT(hlist_unhashed(&ctx->cc_hash));
-        LASSERT(list_empty(&ctx->cc_req_list));
-        LASSERT(sec->ps_policy->sp_cops->destroy_ctx);
+        if (imp == NULL || imp->imp_sec == NULL)
+                return;
 
-        sec->ps_policy->sp_cops->destroy_ctx(sec, ctx);
+        sec_cop_flush_ctx_cache(imp->imp_sec, cfs_current()->uid, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
 
-        if (atomic_dec_and_test(&sec->ps_busy)) {
-                LASSERT(atomic_read(&sec->ps_refcount) == 0);
-                return 1;
-        }
+void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
+{
+        if (imp == NULL || imp->imp_sec == NULL)
+                return;
 
-        return 0;
+        sec_cop_flush_ctx_cache(imp->imp_sec, -1, 0, 1);
 }
+EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
 
 /*
  * when complete successfully, req->rq_reqmsg should point to the
@@ -1460,66 +1103,6 @@ void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
         EXIT;
 }
 
-int sptlrpc_import_get_sec(struct obd_import *imp,
-                           struct ptlrpc_svc_ctx *ctx,
-                           __u32 flavor,
-                           unsigned long flags)
-{
-        struct obd_device *obd = imp->imp_obd;
-        ENTRY;
-
-        LASSERT(obd);
-        LASSERT(obd->obd_type);
-
-        /* old sec might be still there in reconnecting */
-        if (imp->imp_sec)
-                RETURN(0);
-
-        imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags);
-        if (!imp->imp_sec)
-                RETURN(-EINVAL);
-
-        RETURN(0);
-}
-
-void sptlrpc_import_put_sec(struct obd_import *imp)
-{
-        if (imp->imp_sec == NULL)
-                return;
-
-        sptlrpc_sec_put(imp->imp_sec);
-        imp->imp_sec = NULL;
-}
-
-void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
-{
-        if (imp == NULL || imp->imp_sec == NULL)
-                return;
-
-        /* use 'grace' mode, it's crutial see explain in
-         * sptlrpc_req_refresh_ctx()
-         */
-        ctx_cache_flush(imp->imp_sec, 0, 1, 1);
-}
-
-void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
-{
-        if (imp == NULL || imp->imp_sec == NULL)
-                return;
-
-        ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1);
-}
-EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
-
-void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
-{
-        if (imp == NULL || imp->imp_sec == NULL)
-                return;
-
-        ctx_cache_flush(imp->imp_sec, -1, 0, 1);
-}
-EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
-
 int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
                                 struct ptlrpc_cli_ctx *ctx)
 {
@@ -2167,14 +1750,18 @@ EXPORT_SYMBOL(sec2target_str);
  * initialize/finalize                  *
  ****************************************/
 
-int sptlrpc_init(void)
+int __init sptlrpc_init(void)
 {
         int rc;
 
-        rc = sptlrpc_enc_pool_init();
+        rc = sptlrpc_gc_start_thread();
         if (rc)
                 goto out;
 
+        rc = sptlrpc_enc_pool_init();
+        if (rc)
+                goto out_gc;
+
         rc = sptlrpc_null_init();
         if (rc)
                 goto out_pool;
@@ -2195,14 +1782,17 @@ out_null:
         sptlrpc_null_fini();
 out_pool:
         sptlrpc_enc_pool_fini();
+out_gc:
+        sptlrpc_gc_stop_thread();
 out:
         return rc;
 }
 
-void sptlrpc_fini(void)
+void __exit sptlrpc_fini(void)
 {
         sptlrpc_lproc_fini();
         sptlrpc_plain_fini();
         sptlrpc_null_fini();
         sptlrpc_enc_pool_fini();
+        sptlrpc_gc_stop_thread();
 }
diff --git a/lustre/ptlrpc/sec_gc.c b/lustre/ptlrpc/sec_gc.c
new file mode 100644 (file)
index 0000000..f99c0a4
--- /dev/null
@@ -0,0 +1,199 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+#define SEC_GC_INTERVAL (30 * 60)
+
+#ifdef __KERNEL__
+
+static DECLARE_MUTEX(sec_gc_mutex);
+static LIST_HEAD(sec_gc_list);
+static spinlock_t sec_gc_list_lock = SPIN_LOCK_UNLOCKED;
+
+static struct ptlrpc_thread sec_gc_thread;
+static atomic_t sec_gc_wait_del = ATOMIC_INIT(0);
+
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
+{
+        CWARN("add sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+        if (!list_empty(&sec->ps_gc_list)) {
+                CERROR("sec %p(%s) already in gc list\n",
+                       sec, sec->ps_policy->sp_name);
+                return;
+        }
+
+        spin_lock(&sec_gc_list_lock);
+        list_add_tail(&sec_gc_list, &sec->ps_gc_list);
+        spin_unlock(&sec_gc_list_lock);
+}
+
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
+{
+        CWARN("del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+        if (list_empty(&sec->ps_gc_list))
+                return;
+
+        might_sleep();
+
+        spin_lock(&sec_gc_list_lock);
+        list_del_init(&sec->ps_gc_list);
+        spin_unlock(&sec_gc_list_lock);
+
+        /* barrier */
+        atomic_inc(&sec_gc_wait_del);
+        mutex_down(&sec_gc_mutex);
+        mutex_up(&sec_gc_mutex);
+        atomic_dec(&sec_gc_wait_del);
+}
+
+static void sec_do_gc(struct ptlrpc_sec *sec)
+{
+        cfs_time_t      now = cfs_time_current_sec();
+
+        if (unlikely(sec->ps_gc_next == 0)) {
+                CWARN("sec %p(%s) has 0 gc time\n",
+                      sec, sec->ps_policy->sp_name);
+                return;
+        }
+
+        if (unlikely(sec->ps_policy->sp_cops->gc_ctx == NULL)) {
+                CWARN("sec %p(%s) is not prepared for gc\n",
+                      sec, sec->ps_policy->sp_name);
+                return;
+        }
+
+        CWARN("check on sec %p(%s)\n", sec, sec->ps_policy->sp_name);
+        if (time_after(sec->ps_gc_next, now))
+                return;
+
+        sec->ps_policy->sp_cops->gc_ctx(sec);
+        sec->ps_gc_next = now + sec->ps_gc_interval;
+}
+
+static int sec_gc_main(void *arg)
+{
+        struct ptlrpc_thread *thread = (struct ptlrpc_thread *) arg;
+        struct l_wait_info    lwi;
+
+        cfs_daemonize("sptlrpc_ctx_gc");
+
+        /* Record that the thread is running */
+        thread->t_flags = SVC_RUNNING;
+        cfs_waitq_signal(&thread->t_ctl_waitq);
+
+        while (1) {
+                struct ptlrpc_sec *sec, *next;
+
+again:
+                mutex_down(&sec_gc_mutex);
+                list_for_each_entry_safe(sec, next, &sec_gc_list, ps_gc_list) {
+                        /*
+                         * if someone is waiting to be deleted, let it
+                         * proceed as soon as possible.
+                         */
+                        if (atomic_read(&sec_gc_wait_del)) {
+                                CWARN("deletion pending, retry\n");
+                                mutex_up(&sec_gc_mutex);
+                                goto again;
+                        }
+
+                        sec_do_gc(sec);
+                }
+                mutex_up(&sec_gc_mutex);
+
+                lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+                l_wait_event(thread->t_ctl_waitq,
+                             thread->t_flags & SVC_STOPPING,
+                             &lwi);
+
+                if (thread->t_flags & SVC_STOPPING) {
+                        thread->t_flags &= ~SVC_STOPPING;
+                        break;
+                }
+        }
+
+        thread->t_flags = SVC_STOPPED;
+        cfs_waitq_signal(&thread->t_ctl_waitq);
+        return 0;
+}
+
+int sptlrpc_gc_start_thread(void)
+{
+        struct l_wait_info lwi = { 0 };
+        int                rc;
+
+        /* initialize thread control */
+        memset(&sec_gc_thread, 0, sizeof(sec_gc_thread));
+        cfs_waitq_init(&sec_gc_thread.t_ctl_waitq);
+
+        rc = cfs_kernel_thread(sec_gc_main, &sec_gc_thread,
+                               CLONE_VM | CLONE_FILES);
+        if (rc < 0) {
+                CERROR("can't start gc thread: %d\n", rc);
+                return rc;
+        }
+
+        l_wait_event(sec_gc_thread.t_ctl_waitq,
+                     sec_gc_thread.t_flags & SVC_RUNNING, &lwi);
+        return 0;
+}
+
+void sptlrpc_gc_stop_thread(void)
+{
+        struct l_wait_info lwi = { 0 };
+
+        sec_gc_thread.t_flags = SVC_STOPPING;
+        cfs_waitq_signal(&sec_gc_thread.t_ctl_waitq);
+
+        l_wait_event(sec_gc_thread.t_ctl_waitq,
+                     sec_gc_thread.t_flags & SVC_STOPPED, &lwi);
+}
+
+#else /* !__KERNEL__ */
+
+void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
+{
+}
+void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
+{
+}
+int sptlrpc_gc_start_thread(void)
+{
+        return 0;
+}
+void sptlrpc_gc_stop_thread(void)
+{
+}
+
+#endif /* __KERNEL__ */
index 77c7cf4..0d1d571 100644 (file)
@@ -70,10 +70,8 @@ int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
         struct obd_device        *obd = data;
         struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
         struct ptlrpc_sec        *sec = NULL;
-        struct ptlrpc_cli_ctx    *ctx;
-        struct hlist_node        *pos, *next;
         char                      flags_str[32];
-        int                       written, i;
+        int                       written;
 
         if (obd == NULL)
                 return 0;
@@ -99,7 +97,6 @@ int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
                         "bulk checksum:         %s\n"
                         "bulk encrypt:          %s\n"
                         "flags:                 %s\n"
-                        "ctx cache size         %u\n"
                         "ctx cache busy         %d\n"
                         "gc interval            %lu\n"
                         "gc next                %ld\n",
@@ -107,12 +104,17 @@ int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
                         sptlrpc_bulk_csum_alg2name(conf->sfc_bulk_csum),
                         sptlrpc_bulk_priv_alg2name(conf->sfc_bulk_priv),
                         flags_str,
-                        sec->ps_ccache_size,
                         atomic_read(&sec->ps_busy),
                         sec->ps_gc_interval,
                         sec->ps_gc_interval ?
                                 sec->ps_gc_next - cfs_time_current_sec() : 0
                           );
+
+        if (sec->ps_policy->sp_cops->display) {
+                written += sec->ps_policy->sp_cops->display(
+                                        sec, page + written, count - written);
+        }
+#if 0
         /*
          * list contexts
          */
@@ -128,11 +130,12 @@ int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
                                           &sec->ps_ccache[i], cc_hash) {
                         if (written >= count)
                                 break;
-                        written += sptlrpc_ctx_display(ctx, page + written,
-                                                       count - written);
+                        written += sptlrpc_cli_ctx_display(ctx, page + written,
+                                                           count - written);
                 }
         }
         spin_unlock(&sec->ps_lock);
+#endif
 
 out:
         return written;
index 2b06e00..6d96c01 100644 (file)
@@ -92,13 +92,22 @@ void null_destroy_sec(struct ptlrpc_sec *sec)
 
 static
 struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
-                                       struct vfs_cred *vcred)
+                                       struct vfs_cred *vcred,
+                                       int create, int remove_dead)
 {
         atomic_inc(&null_cli_ctx.cc_refcount);
         return &null_cli_ctx;
 }
 
 static
+int null_flush_ctx_cache(struct ptlrpc_sec *sec,
+                         uid_t uid,
+                         int grace, int force)
+{
+        return 0;
+}
+
+static
 int null_alloc_reqbuf(struct ptlrpc_sec *sec,
                       struct ptlrpc_request *req,
                       int msgsize)
@@ -287,6 +296,7 @@ static struct ptlrpc_sec_cops null_sec_cops = {
         .create_sec             = null_create_sec,
         .destroy_sec            = null_destroy_sec,
         .lookup_ctx             = null_lookup_ctx,
+        .flush_ctx_cache        = null_flush_ctx_cache,
         .alloc_reqbuf           = null_alloc_reqbuf,
         .alloc_repbuf           = null_alloc_repbuf,
         .free_reqbuf            = null_free_reqbuf,
@@ -319,19 +329,18 @@ void null_init_internal(void)
         null_sec.ps_import = NULL;
         null_sec.ps_flavor = SPTLRPC_FLVR_NULL;
         null_sec.ps_flags = 0;
-        null_sec.ps_gc_interval = 0;
-        null_sec.ps_gc_next = 0;
         spin_lock_init(&null_sec.ps_lock);
-        null_sec.ps_ccache_size = 1;
-        null_sec.ps_ccache = &__list;
         atomic_set(&null_sec.ps_busy, 1);         /* for "null_cli_ctx" */
+        INIT_LIST_HEAD(&null_sec.ps_gc_list);
+        null_sec.ps_gc_interval = 0;
+        null_sec.ps_gc_next = 0;
 
         hlist_add_head(&null_cli_ctx.cc_hash, &__list);
         atomic_set(&null_cli_ctx.cc_refcount, 1);    /* for hash */
         null_cli_ctx.cc_sec = &null_sec;
         null_cli_ctx.cc_ops = &null_ctx_ops;
         null_cli_ctx.cc_expire = 0;
-        null_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+        null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
                                 PTLRPC_CTX_UPTODATE;
         null_cli_ctx.cc_vcred.vc_uid = 0;
         spin_lock_init(&null_cli_ctx.cc_lock);
@@ -346,7 +355,7 @@ int sptlrpc_null_init(void)
 
         rc = sptlrpc_register_policy(&null_policy);
         if (rc)
-                CERROR("failed to register sec.null: %d\n", rc);
+                CERROR("failed to register %s: %d\n", null_policy.sp_name, rc);
 
         return rc;
 }
@@ -357,5 +366,5 @@ void sptlrpc_null_fini(void)
 
         rc = sptlrpc_unregister_policy(&null_policy);
         if (rc)
-                CERROR("cannot unregister sec.null: %d\n", rc);
+                CERROR("failed to unregister %s: %d\n", null_policy.sp_name,rc);
 }
index 6993852..ee87465 100644 (file)
@@ -150,7 +150,8 @@ void plain_destroy_sec(struct ptlrpc_sec *sec)
 
 static
 struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
-                                        struct vfs_cred *vcred)
+                                        struct vfs_cred *vcred,
+                                        int create, int remove_dead)
 {
         ENTRY;
         atomic_inc(&plain_cli_ctx.cc_refcount);
@@ -158,6 +159,14 @@ struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
 }
 
 static
+int plain_flush_ctx_cache(struct ptlrpc_sec *sec,
+                          uid_t uid,
+                          int grace, int force)
+{
+        return 0;
+}
+
+static
 int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
                        struct ptlrpc_request *req,
                        int msgsize)
@@ -477,6 +486,7 @@ static struct ptlrpc_sec_cops plain_sec_cops = {
         .create_sec             = plain_create_sec,
         .destroy_sec            = plain_destroy_sec,
         .lookup_ctx             = plain_lookup_ctx,
+        .flush_ctx_cache        = plain_flush_ctx_cache,
         .alloc_reqbuf           = plain_alloc_reqbuf,
         .alloc_repbuf           = plain_alloc_repbuf,
         .free_reqbuf            = plain_free_reqbuf,
@@ -511,19 +521,18 @@ void plain_init_internal(void)
         plain_sec.ps_import = NULL;
         plain_sec.ps_flavor = SPTLRPC_FLVR_PLAIN;
         plain_sec.ps_flags = 0;
-        plain_sec.ps_gc_interval = 0;
-        plain_sec.ps_gc_next = 0;
         spin_lock_init(&plain_sec.ps_lock);
-        plain_sec.ps_ccache_size = 1;
-        plain_sec.ps_ccache = &__list;
         atomic_set(&plain_sec.ps_busy, 1);         /* for "plain_cli_ctx" */
+        INIT_LIST_HEAD(&plain_sec.ps_gc_list);
+        plain_sec.ps_gc_interval = 0;
+        plain_sec.ps_gc_next = 0;
 
         hlist_add_head(&plain_cli_ctx.cc_hash, &__list);
         atomic_set(&plain_cli_ctx.cc_refcount, 1);    /* for hash */
         plain_cli_ctx.cc_sec = &plain_sec;
         plain_cli_ctx.cc_ops = &plain_ctx_ops;
         plain_cli_ctx.cc_expire = 0;
-        plain_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+        plain_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL |
                                  PTLRPC_CTX_UPTODATE;
         plain_cli_ctx.cc_vcred.vc_uid = 0;
         spin_lock_init(&plain_cli_ctx.cc_lock);
index c6a0f3c..d52cc1f 100644 (file)
@@ -11,6 +11,9 @@ ONLY=${ONLY:-"$*"}
 # bug number for skipped test:
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+if [ "x$GSS_PIPEFS" != "xy" ]; then
+    ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
+fi
 
 [ "$SLOW" = "no" ] && EXCEPT="$EXCEPT"
 
@@ -192,6 +195,7 @@ test_3() {
     # because we always use root credential to OSTs
     $RUNAS kdestroy
     $RUNAS $LFS flushctx
+    echo "destroied credentials/contexs for $RUNAS_ID"
     $RUNAS $CHECKSTAT -p 0666 $file && error "checkstat succeed"
     kill -s 10 $OPPID
     wait $OPPID || error "read file data failed"
@@ -199,9 +203,13 @@ test_3() {
 
     # restore and check again
     restore_krb5_cred
+    echo "restored credentials for $RUNAS_ID"
     $RUNAS $CHECKSTAT -p 0666 $file || error "$RUNAS_ID checkstat (2) error"
+    echo "$RUNAS_ID checkstat OK"
     $CHECKSTAT -p 0666 $file || error "$UID checkstat (2) error"
+    echo "$UID checkstat OK"
     $RUNAS cat $file > /dev/null || error "$RUNAS_ID cat (2) error"
+    echo "$RUNAS_ID read file data OK"
 }
 run_test 3 "local cache under DLM lock"
 
@@ -326,23 +334,21 @@ run_test 7 "exercise enlarge_reqbuf()"
 
 check_multiple_gss_daemons() {
     local facet=$1
+    local gssd=$2
+    local gssd_name=`basename $gssd`
 
     for ((i=0;i<10;i++)); do
-        do_facet $facet "$LSVCGSSD -v &"
-    done
-    for ((i=0;i<10;i++)); do
-        do_facet $facet "$LGSSD -v &"
+        do_facet $facet "$gssd -v &"
     done
 
     # wait daemons entering "stable" status
     sleep 5
 
-    numc=`do_facet $facet ps -o cmd -C lgssd | grep lgssd | wc -l`
-    nums=`do_facet $facet ps -o cmd -C lgssd | grep lgssd | wc -l`
-    echo "$numc lgssd and $nums lsvcgssd are running"
+    num=`do_facet $facet ps -o cmd -C $gssd_name | grep $gssd_name | wc -l`
+    echo "$num instance(s) of $gssd_name are running"
 
-    if [ $numc -ne 1 -o $nums -ne 1 ]; then
-        error "lgssd/lsvcgssd not unique"
+    if [ $num -ne 1 ]; then
+        error "$gssd_name not unique"
     fi
 }
 
@@ -356,23 +362,32 @@ test_100() {
     start_gss_daemons
 
     echo "check with someone already running..."
-    check_multiple_gss_daemons $facet
+    check_multiple_gss_daemons $facet $LSVCGSSD
+    if [ "x$GSS_PIPEFS" == "xy" ]; then
+        check_multiple_gss_daemons $facet $LGSSD
+    fi
 
     echo "check with someone run & finished..."
     do_facet $facet killall -q -2 lgssd lsvcgssd || true
     sleep 5 # wait fully exit
-    check_multiple_gss_daemons $facet
+    check_multiple_gss_daemons $facet $LSVCGSSD
+    if [ "x$GSS_PIPEFS" == "xy" ]; then
+        check_multiple_gss_daemons $facet $LGSSD
+    fi
 
     echo "check refresh..."
     do_facet $facet killall -q -2 lgssd lsvcgssd || true
     sleep 5 # wait fully exit
     do_facet $facet ipcrm -S 0x3b92d473
-    do_facet $facet ipcrm -S 0x3a92d473
-    check_multiple_gss_daemons $facet
+    check_multiple_gss_daemons $facet $LSVCGSSD
+    if [ "x$GSS_PIPEFS" == "xy" ]; then
+        do_facet $facet ipcrm -S 0x3a92d473
+        check_multiple_gss_daemons $facet $LGSSD
+    fi
 
     stop_gss_daemons
 }
-run_test 100 "start more multiple gss daemons"
+run_test 100 "start multiple gss daemons"
 
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
index dbf0979..8141bb0 100644 (file)
@@ -250,7 +250,9 @@ start_gss_daemons() {
     # starting on MDT
     for num in `seq $MDSCOUNT`; do
         do_facet mds$num "$LSVCGSSD -v"
-        do_facet mds$num "$LGSSD -v"
+        if [ "x$GSS_PIPEFS" == "xy" ]; then
+            do_facet mds$num "$LGSSD -v"
+        fi
     done
     # starting on OSTs
     for num in `seq $OSTCOUNT`; do
@@ -258,7 +260,9 @@ start_gss_daemons() {
     done
     # starting on client
     # FIXME: is "client" the right facet name?
-    do_facet client "$LGSSD -v"
+    if [ "x$GSS_PIPEFS" == "xy" ]; then
+        do_facet client "$LGSSD -v"
+    fi
 
     # wait daemons entering "stable" status
     sleep 5
@@ -268,12 +272,16 @@ start_gss_daemons() {
     #
     for num in `seq $MDSCOUNT`; do
         check_gss_daemon_facet mds$num lsvcgssd
-        check_gss_daemon_facet mds$num lgssd
+        if [ "x$GSS_PIPEFS" == "xy" ]; then
+            check_gss_daemon_facet mds$num lgssd
+        fi
     done
     for num in `seq $OSTCOUNT`; do
         check_gss_daemon_facet ost$num lsvcgssd
     done
-    check_gss_daemon_facet client lgssd
+    if [ "x$GSS_PIPEFS" == "xy" ]; then
+        check_gss_daemon_facet client lgssd
+    fi
 }
 
 stop_gss_daemons() {
index 01ee650..fd63d29 100644 (file)
@@ -8,7 +8,15 @@ AM_LDFLAGS := -L$(top_builddir)/lnet/utils
 
 LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a
 
-sbin_PROGRAMS = lgssd lsvcgssd l_idmap
+sbin_PROGRAMS := lsvcgssd l_idmap
+
+if GSS_KEYRING
+sbin_PROGRAMS += lgss_keyring
+endif
+
+if GSS_PIPEFS
+sbin_PROGRAMS += lgssd
+endif
 
 COMMON_SRCS = \
         context.c \
@@ -25,7 +33,7 @@ COMMON_SRCS = \
         err_util.h \
         gss_oids.h \
         gss_util.h \
-       lsupport.h
+        lsupport.h
 
 lgssd_SOURCES = \
         $(COMMON_SRCS) \
@@ -64,4 +72,22 @@ l_idmap_SOURCES = \
        \
        lsupport.h
 
+lgss_keyring_SOURCES = \
+       lgss_keyring.c \
+       context.c \
+       context_lucid.c \
+       context_mit.c \
+       context_heimdal.c \
+       lgss_krb5_utils.c \
+       lgss_utils.c \
+       lsupport.c \
+       \
+       lgss_krb5_utils.h \
+       lgss_utils.h \
+       lsupport.h
+
+lgss_keyring_CFLAGS = $(AM_CFLAGS) $(CFLAGS) $(KRBCFLAGS) -D _NEW_BUILD_
+lgss_keyring_LDADD = -lkeyutils $(GSSAPI_LIBS) $(KRBLIBS)
+lgss_keyring_LDFLAGS = $(KRBLDFLAGS)
+
 EXTRA_DIST =
index 94d1dda..ca6a10c 100644 (file)
@@ -3,10 +3,10 @@ lustre/utils/gss: client & server side gss daemons for Lustre.
 All files came from standard nfs-utils package, applied with patches
 created by Cluster File Systems Inc.
 
-1. Stock nfs-utils-1.0.10.tgz
-2. Apply nfs-utils-1.0.10-CITI_NFS4_ALL-3.dif from Center for Information
+1. Stock nfs-utils-1.0.11.tgz
+2. Apply nfs-utils-1.0.11-CITI_NFS4_ALL-1.dif from Center for Information
    Technology Integration, University of Michigan
    (http://www.citi.umich.edu/projects/nfsv4/linux/)
-3. Apply lustre patch: nfs-utils-1.0.10-lustre.diff
+3. Apply lustre patch: nfs-utils-1.0.11-lustre.diff
 4. Copy nfs-utils-1.0.10/aclocal/kerberos5.m4 to lustre/autoconf
 5. Copy nfs-utils-1.0.10/utils/gssd/*.[ch] to here
index 3b39316..0a268d4 100644 (file)
@@ -132,6 +132,17 @@ void qword_addint(char **bpp, int *lp, int n)
        *lp -= len;
 }
 
+void qword_adduint(char **bpp, int *lp, unsigned int n)
+{
+       int len;
+
+       len = snprintf(*bpp, *lp, "%u ", n);
+       if (len > *lp)
+               len = *lp;
+       *bpp += len;
+       *lp -= len;
+}
+
 void qword_addeol(char **bpp, int *lp)
 {
        if (*lp <= 0)
@@ -173,11 +184,13 @@ void qword_printint(FILE *f, int num)
        printerr(2, "%d ", num);
 }
 
-void qword_eol(FILE *f)
+int qword_eol(FILE *f)
 {
+       int err;
        fprintf(f,"\n");
-       fflush(f);
+       err = fflush(f);
        printerr(2, "\n");
+       return err;
 }
 
 
index cc97b36..6585fc7 100644 (file)
 void qword_add(char **bpp, int *lp, char *str);
 void qword_addhex(char **bpp, int *lp, char *buf, int blen);
 void qword_addint(char **bpp, int *lp, int n);
+void qword_adduint(char **bpp, int *lp, unsigned int n);
 void qword_addeol(char **bpp, int *lp);
 void qword_print(FILE *f, char *str);
 void qword_printhex(FILE *f, char *str, int slen);
 void qword_printint(FILE *f, int num);
-void qword_eol(FILE *f);
+int qword_eol(FILE *f);
 int readline(int fd, char **buf, int *lenp);
 int qword_get(char **bpp, char *dest, int bufsize);
 int qword_get_int(char **bpp, int *anint);
index 5f347bb..f45caca 100644 (file)
 #include <syslog.h>
 #include <string.h>
 #include <gssapi/gssapi.h>
-#include "gss_util.h"
-#include "gss_oids.h"
-#include "err_util.h"
+
+#ifdef _NEW_BUILD_
+# include "lgss_utils.h"
+#else
+# include "gss_util.h"
+# include "gss_oids.h"
+# include "err_util.h"
+#endif
 #include "context.h"
 
 int
index 5520cbc..25faa71 100644 (file)
 #ifdef HAVE_COM_ERR_H
 #include <com_err.h>
 #endif
-#include "err_util.h"
-#include "gss_oids.h"
+
+#ifdef _NEW_BUILD_
+# include "lgss_utils.h"
+#else
+# include "err_util.h"
+# include "gss_oids.h"
+#endif
 #include "write_bytes.h"
 
 int write_heimdal_keyblock(char **p, char *end, krb5_keyblock *key)
index 2f802de..3b53fc8 100644 (file)
@@ -49,9 +49,14 @@ typedef uint64_t OM_uint64;
 #endif
 #include <gssapi/gssapi_krb5.h>
 
-#include "gss_util.h"
-#include "gss_oids.h"
-#include "err_util.h"
+#ifdef _NEW_BUILD_
+# include "lgss_utils.h"
+#else
+# include "gss_util.h"
+# include "gss_oids.h"
+# include "err_util.h"
+#endif
+#include "write_bytes.h"
 #include "context.h"
 
 static int
@@ -389,7 +394,7 @@ prepare_krb5_rfc4121_buffer(gss_krb5_lucid_context_v1_t *lctx,
        if (WRITE_BYTES(&p, end, lctx->send_seq)) goto out_err;
 
        /* Protocol 0 here implies DES3 or RC4 */
-       printerr(2, "%s: protocol %d\n", __FUNCTION__, lctx->protocol);
+       printerr(3, "protocol %d\n", lctx->protocol);
        if (lctx->protocol == 0) {
                enctype = lctx->rfc1964_kd.ctx_key.type;
 #ifdef HAVE_HEIMDAL
@@ -417,8 +422,8 @@ prepare_krb5_rfc4121_buffer(gss_krb5_lucid_context_v1_t *lctx,
                }
                numkeys = 3;
        }
-       printerr(2, "%s: serializing %d keys with enctype %d and size %d\n",
-                __FUNCTION__, numkeys, enctype, keysize);
+       printerr(3, "serializing %d keys with enctype %d and size %d\n",
+                numkeys, enctype, keysize);
        if (WRITE_BYTES(&p, end, enctype)) goto out_err;
        if (WRITE_BYTES(&p, end, keysize)) goto out_err;
        if (WRITE_BYTES(&p, end, numkeys)) goto out_err;
@@ -542,7 +547,7 @@ serialize_krb5_ctx(gss_ctx_id_t ctx, gss_buffer_desc *buf)
        gss_krb5_lucid_context_v1_t *lctx = 0;
        int retcode = 0;
 
-       printerr(2, "DEBUG: %s: lucid version!\n", __FUNCTION__);
+       printerr(3, "lucid version!\n");
        maj_stat = gss_export_lucid_sec_context(&min_stat, &ctx,
                                                1, &return_ctx);
        if (maj_stat != GSS_S_COMPLETE) {
index 43fc81d..1d734f8 100644 (file)
 #include <errno.h>
 #include <gssapi/gssapi.h>
 #include <rpc/rpc.h>
-#include "gss_util.h"
-#include "gss_oids.h"
-#include "err_util.h"
+
+#ifdef _NEW_BUILD_
+# include "lgss_utils.h"
+#else
+# include "gss_util.h"
+# include "gss_oids.h"
+# include "err_util.h"
+#endif
 #include "context.h"
 
 #include <krb5.h>
index c23e644..6650648 100644 (file)
@@ -203,7 +203,7 @@ main(int argc, char *argv[])
                                break;
                        case 'd':
                                strncpy(ccachedir, optarg, sizeof(ccachedir));
-                               if (ccachedir[sizeof(ccachedir-1)] != '\0')
+                               if (ccachedir[sizeof(ccachedir)-1] != '\0')
                                        errx(1, "ccachedir path name too long");
                                break;
                        default:
index a44724d..afc246a 100644 (file)
@@ -941,6 +941,18 @@ handle_krb5_upcall(struct clnt_info *clp)
                return;
        }
 
+       /* FIXME temporary fix, do this before fork.
+        * in case of errors could have memory leak!!!
+        */
+       if (updata.uid == 0) {
+               if (gssd_get_krb5_machine_cred_list(&credlist)) {
+                       printerr(0, "ERROR: Failed to obtain machine "
+                                   "credentials\n");
+                       do_error_downcall(clp->krb5_fd, updata.seq, -EPERM, 0);
+                       return;
+               }
+       }
+
        /* fork child process */
        pid = fork();
        if (pid < 0) {
@@ -975,11 +987,13 @@ handle_krb5_upcall(struct clnt_info *clp)
                 * Get a list of credential cache names and try each
                 * of them until one works or we've tried them all
                 */
+/*
                if (gssd_get_krb5_machine_cred_list(&credlist)) {
                        printerr(0, "ERROR: Failed to obtain machine "
                                    "credentials for %s\n", clp->servicename);
                        goto out_return_error;
                }
+*/
                for (ccname = credlist; ccname && *ccname; ccname++) {
                        gssd_setup_krb5_machine_gss_ccache(*ccname);
                        if ((gssd_create_lgd(clp, &lgd, &updata,
index 629c279..ab73add 100644 (file)
@@ -240,72 +240,66 @@ gssd_find_existing_krb5_ccache(uid_t uid, struct dirent **d)
                perror("scandir looking for krb5 credentials caches");
        }
        else if (n > 0) {
-               char substring[128];
-               char fullstring[128];
                char statname[1024];
-               snprintf(substring, sizeof(substring), "_%d_", uid);
-               snprintf(fullstring, sizeof(fullstring), "_%d", uid);
                for (i = 0; i < n; i++) {
                        printerr(3, "CC file '%s' being considered\n",
                                 namelist[i]->d_name);
-                       if (strstr(namelist[i]->d_name, substring) ||
-                           !strcmp(namelist[i]->d_name, fullstring)) {
-                               snprintf(statname, sizeof(statname),
-                                        "%s/%s", ccachedir,
-                                        namelist[i]->d_name);
-                               if (stat(statname, &tmp_stat)) {
-                                       printerr(0, "Error doing stat "
-                                                   "on file '%s'\n",
-                                                statname);
-                                       continue;
-                               }
-                               if (!S_ISREG(tmp_stat.st_mode)) {
-                                       printerr(3, "File '%s' is not "
-                                                   "a regular file\n",
-                                                statname);
-                                       continue;
-                               }
-                               printerr(3, "CC file '%s' matches "
-                                           "name check and has "
-                                           "mtime of %u\n",
-                                        namelist[i]->d_name,
-                                        tmp_stat.st_mtime);
-                               /* if more than one match is found,
-                                * return the most recent (the one
-                                * with the latest mtime),
-                                * and don't free the dirent */
-                               if (!found) {
+                       snprintf(statname, sizeof(statname),
+                                "%s/%s", ccachedir, namelist[i]->d_name);
+                       if (stat(statname, &tmp_stat)) {
+                               printerr(0, "Error doing stat on file '%s'\n",
+                                        statname);
+                               free(namelist[i]);
+                               continue;
+                       }
+                       /* Only pick caches owned by the user (uid) */
+                       if (tmp_stat.st_uid != uid) {
+                               printerr(3, "'%s' owned by %u, not %u\n",
+                                        statname, tmp_stat.st_uid, uid);
+                               free(namelist[i]);
+                               continue;
+                       }
+                       if (!S_ISREG(tmp_stat.st_mode)) {
+                               printerr(3, "'%s' is not a regular file\n",
+                                        statname);
+                               free(namelist[i]);
+                               continue;
+                       }
+                       printerr(3, "CC file '%s' matches owner check and has "
+                                "mtime of %u\n",
+                                namelist[i]->d_name, tmp_stat.st_mtime);
+                       /*
+                        * if more than one match is found, return the most
+                        * recent (the one with the latest mtime), and
+                        * don't free the dirent
+                        */
+                       if (!found) {
+                               best_match_dir = namelist[i];
+                               best_match_stat = tmp_stat;
+                               found++;
+                       }
+                       else {
+                               /*
+                                * If the current match has an mtime later
+                                * than the one we are looking at, then use
+                                * the current match.  Otherwise, we still
+                                * have the best match.
+                                */
+                               if (tmp_stat.st_mtime >
+                                           best_match_stat.st_mtime) {
+                                       free(best_match_dir);
                                        best_match_dir = namelist[i];
                                        best_match_stat = tmp_stat;
-                                       found++;
                                }
                                else {
-                                       /*
-                                        * If the current match has
-                                        * an mtime later than the
-                                        * one we are looking at,
-                                        * then use the current match.
-                                        * Otherwise, we still have
-                                        * the best match.
-                                        */
-                                       if (tmp_stat.st_mtime >
-                                                   best_match_stat.st_mtime) {
-                                               free(best_match_dir);
-                                               best_match_dir = namelist[i];
-                                               best_match_stat = tmp_stat;
-                                       }
-                                       else {
-                                               free(namelist[i]);
-                                       }
-                                       printerr(3, "CC file '%s' is our "
-                                                   "current best match "
-                                                   "with mtime of %u\n",
-                                                best_match_dir->d_name,
-                                                best_match_stat.st_mtime);
+                                       free(namelist[i]);
                                }
+                               printerr(3, "CC file '%s' is our "
+                                           "current best match "
+                                           "with mtime of %u\n",
+                                        best_match_dir->d_name,
+                                        best_match_stat.st_mtime);
                        }
-                       else
-                               free(namelist[i]);
                }
                free(namelist);
        }
@@ -1056,6 +1050,7 @@ limit_krb5_enctypes(struct rpc_gss_sec *sec, uid_t uid)
 #endif /* HAVE_SET_ALLOWABLE_ENCTYPES */
 #endif
 
+#if 0
 /*
  * Obtain supported enctypes from kernel.
  * Set defaults if info is not available.
@@ -1122,3 +1117,4 @@ gssd_obtain_kernel_krb5_info(void)
                         code);
        }
 }
+#endif
diff --git a/lustre/utils/gss/lgss_keyring.c b/lustre/utils/gss/lgss_keyring.c
new file mode 100644 (file)
index 0000000..cf267ad
--- /dev/null
@@ -0,0 +1,723 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lucall_keyring.c
+ *  user-space upcall to create GSS context, using keyring interface to kernel
+ *
+ *  Copyright (c) 2007 Cluster File Systems, Inc.
+ *   Author: Eric Mei <ericm@clusterfs.com>
+ *
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
+ *
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
+ *
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <pwd.h>
+#include <keyutils.h>
+#include <gssapi/gssapi.h>
+
+#include <libcfs/libcfs.h>
+
+#include "lsupport.h"
+#include "lgss_utils.h"
+#include "write_bytes.h"
+#include "context.h"
+
+/*
+ * gss target string of lustre service we are negotiating for
+ */
+static char *g_service = NULL;
+
+/*
+ * all data about negotiation
+ */
+struct lgss_nego_data {
+        uint32_t        lnd_established:1;
+        uint32_t        lnd_uid;
+        uint32_t        lnd_lsvc;
+        char           *lnd_uuid;
+
+        gss_OID         lnd_mech;               /* mech OID */
+        gss_name_t      lnd_svc_name;           /* service name */
+        u_int           lnd_req_flags;          /* request flags */
+        gss_cred_id_t   lnd_cred;               /* credential */
+        gss_ctx_id_t    lnd_ctx;                /* session context */
+        gss_buffer_desc lnd_rmt_ctx;            /* remote handle of context */
+        uint32_t        lnd_seq_win;            /* sequence window */
+
+        int             lnd_rpc_err;
+        int             lnd_gss_err;
+};
+
+/*
+ * context creation response
+ */
+struct lgss_init_res {
+        gss_buffer_desc gr_ctx;         /* context handle */
+        u_int           gr_major;       /* major status */
+        u_int           gr_minor;       /* minor status */
+        u_int           gr_win;         /* sequence window */
+        gss_buffer_desc gr_token;       /* token */
+};
+
+struct keyring_upcall_param {
+        uint32_t        kup_ver;
+        uint32_t        kup_uid;
+        uint32_t        kup_gid;
+        uint32_t        kup_svc;
+        uint64_t        kup_nid;
+        char            kup_tgt[64];
+        char            kup_mech[16];
+        int             kup_is_root;
+        int             kup_is_mds;
+};
+
+/****************************************
+ * child process: gss negotiation       *
+ ****************************************/
+
+#define INIT_CHANNEL    "/proc/fs/lustre/sptlrpc/gss/init_channel"
+
+int do_nego_rpc(struct lgss_nego_data *lnd,
+                gss_buffer_desc *gss_token,
+                struct lgss_init_res *gr)
+{
+        struct lgssd_ioctl_param  param;
+        struct passwd            *pw;
+        int                       fd, ret, res;
+        char                      outbuf[8192];
+        unsigned int             *p;
+
+        logmsg(LL_TRACE, "start negotiation rpc\n");
+
+        pw = getpwuid(lnd->lnd_uid);
+        if (!pw) {
+                logmsg(LL_ERR, "no uid %u in local user database\n",
+                       lnd->lnd_uid);
+                return -EACCES;
+        }
+
+        param.version = GSSD_INTERFACE_VERSION;
+        param.uuid = lnd->lnd_uuid;
+        param.lustre_svc = lnd->lnd_lsvc;
+        param.uid = lnd->lnd_uid;
+        param.gid = pw->pw_gid;
+        param.send_token_size = gss_token->length;
+        param.send_token = (char *) gss_token->value;
+        param.reply_buf_size = sizeof(outbuf);
+        param.reply_buf = outbuf;
+
+        logmsg(LL_TRACE, "to open " INIT_CHANNEL "\n");
+
+        fd = open(INIT_CHANNEL, O_WRONLY);
+        if (fd < 0) {
+                logmsg(LL_ERR, "can't open " INIT_CHANNEL "\n");
+                return -EACCES;
+        }
+
+        logmsg(LL_TRACE, "to down-write\n");
+
+        ret = write(fd, &param, sizeof(param));
+        if (ret != sizeof(param)) {
+                logmsg(LL_ERR, "lustre ioctl err: %d\n", strerror(errno));
+                close(fd);
+                return -EACCES;
+        }
+        close(fd);
+
+        logmsg(LL_TRACE, "do_nego_rpc: to parse reply\n");
+        if (param.status) {
+                logmsg(LL_ERR, "status: %d (%s)\n",
+                       param.status, strerror((int)param.status));
+
+                /* kernel return -ETIMEDOUT means the rpc timedout, we should
+                 * notify the caller to reinitiate the gss negotiation, by
+                 * returning -ERESTART
+                 */
+                if (param.status == -ETIMEDOUT)
+                        return -ERESTART;
+                else
+                        return param.status;
+        }
+
+        p = (unsigned int *)outbuf;
+        res = *p++;
+        gr->gr_major = *p++;
+        gr->gr_minor = *p++;
+        gr->gr_win = *p++;
+
+        gr->gr_ctx.length = *p++;
+        gr->gr_ctx.value = malloc(gr->gr_ctx.length);
+        memcpy(gr->gr_ctx.value, p, gr->gr_ctx.length);
+        p += (((gr->gr_ctx.length + 3) & ~3) / 4);
+
+        gr->gr_token.length = *p++;
+        gr->gr_token.value = malloc(gr->gr_token.length);
+        memcpy(gr->gr_token.value, p, gr->gr_token.length);
+        p += (((gr->gr_token.length + 3) & ~3) / 4);
+
+        logmsg(LL_DEBUG, "do_nego_rpc: receive handle len %d, token len %d\n",
+               gr->gr_ctx.length, gr->gr_token.length);
+        return 0;
+}
+
+/*
+ * if return error, the lnd_rpc_err or lnd_gss_err is set.
+ */
+int lgssc_negotiation(struct lgss_nego_data *lnd)
+{
+        struct lgss_init_res    gr;
+        gss_buffer_desc        *recv_tokenp, send_token;
+        OM_uint32               maj_stat, min_stat, ret_flags;
+
+        logmsg(LL_TRACE, "start gss negotiation\n");
+
+        /* GSS context establishment loop. */
+        memset(&gr, 0, sizeof(gr));
+        recv_tokenp = GSS_C_NO_BUFFER;
+
+        for (;;) {
+                maj_stat = gss_init_sec_context(&min_stat,
+                                                lnd->lnd_cred,
+                                                &lnd->lnd_ctx,
+                                                lnd->lnd_svc_name,
+                                                lnd->lnd_mech,
+                                                lnd->lnd_req_flags,
+                                                0,            /* time req */
+                                                NULL,         /* channel */
+                                                recv_tokenp,
+                                                NULL,         /* used mech */
+                                                &send_token,
+                                                &ret_flags,
+                                                NULL);        /* time rec */
+
+                if (recv_tokenp != GSS_C_NO_BUFFER) {
+                        gss_release_buffer(&min_stat, &gr.gr_token);
+                        recv_tokenp = GSS_C_NO_BUFFER;
+                }
+
+                if (maj_stat != GSS_S_COMPLETE &&
+                    maj_stat != GSS_S_CONTINUE_NEEDED) {
+                        lnd->lnd_gss_err = maj_stat;
+
+                        logmsg_gss(LL_ERR, lnd->lnd_mech, maj_stat, min_stat,
+                                   "failed init context");
+                        break;
+                }
+
+                if (send_token.length != 0) {
+                        memset(&gr, 0, sizeof(gr));
+
+                        lnd->lnd_rpc_err = do_nego_rpc(lnd, &send_token, &gr);
+                        gss_release_buffer(&min_stat, &send_token);
+
+                        if (lnd->lnd_rpc_err) {
+                                logmsg(LL_ERR, "negotiation rpc error: %d\n",
+                                       lnd->lnd_rpc_err);
+                                return -1;
+                        }
+
+                        if (gr.gr_major != GSS_S_COMPLETE &&
+                            gr.gr_major != GSS_S_CONTINUE_NEEDED) {
+                                lnd->lnd_gss_err = gr.gr_major;
+
+                                logmsg(LL_ERR, "negotiation gss error %x\n",
+                                       lnd->lnd_gss_err);
+                                return -1;
+                        }
+
+                        if (gr.gr_ctx.length != 0) {
+                                if (lnd->lnd_rmt_ctx.value)
+                                        gss_release_buffer(&min_stat,
+                                                           &lnd->lnd_rmt_ctx);
+                                lnd->lnd_rmt_ctx = gr.gr_ctx;
+                        }
+
+                        if (gr.gr_token.length != 0) {
+                                if (maj_stat != GSS_S_CONTINUE_NEEDED)
+                                        break;
+                                recv_tokenp = &gr.gr_token;
+                        }
+                }
+
+                /* GSS_S_COMPLETE => check gss header verifier,
+                 * usually checked in gss_validate
+                 */
+                if (maj_stat == GSS_S_COMPLETE) {
+                        lnd->lnd_established = 1;
+                        lnd->lnd_seq_win = gr.gr_win;
+                        break;
+                }
+        }
+
+        /* End context negotiation loop. */
+        if (!lnd->lnd_established) {
+                if (gr.gr_token.length != 0)
+                        gss_release_buffer(&min_stat, &gr.gr_token);
+
+                if (lnd->lnd_gss_err == GSS_S_COMPLETE)
+                        lnd->lnd_rpc_err = -EACCES;
+
+                logmsg(LL_ERR, "context negotiation failed\n");
+                return -1;
+        }
+
+        logmsg(LL_DEBUG, "successfully negotiated a context\n");
+        return 0;
+}
+
+/*
+ * if return error, the lnd_rpc_err or lnd_gss_err is set.
+ */
+int lgssc_init_nego_data(struct lgss_nego_data *lnd,
+                         struct keyring_upcall_param *kup,
+                         lgss_mech_t mech)
+{
+        gss_buffer_desc         sname;
+        OM_uint32               maj_stat, min_stat;
+
+        memset(lnd, 0, sizeof(*lnd));
+
+        lnd->lnd_uid = kup->kup_uid;
+        lnd->lnd_lsvc = kup->kup_svc;
+        lnd->lnd_uuid = kup->kup_tgt;
+
+        lnd->lnd_established = 0;
+        lnd->lnd_svc_name = GSS_C_NO_NAME;
+        lnd->lnd_cred = GSS_C_NO_CREDENTIAL;
+        lnd->lnd_ctx = GSS_C_NO_CONTEXT;
+        lnd->lnd_rmt_ctx = (gss_buffer_desc) GSS_C_EMPTY_BUFFER;
+        lnd->lnd_seq_win = 0;
+
+        switch (mech) {
+        case LGSS_MECH_KRB5:
+                lnd->lnd_mech = (gss_OID) &krb5oid;
+                lnd->lnd_req_flags = GSS_C_MUTUAL_FLAG;
+                break;
+        default:
+                logmsg(LL_ERR, "invalid mech: %d\n", mech);
+                lnd->lnd_rpc_err = -EACCES;
+                return -1;
+        }
+
+        sname.value = g_service;
+        sname.length = strlen(g_service);
+
+        maj_stat = gss_import_name(&min_stat, &sname,
+                                   (gss_OID) GSS_C_NT_HOSTBASED_SERVICE,
+                                   &lnd->lnd_svc_name);
+        if (maj_stat != GSS_S_COMPLETE) {
+                logmsg_gss(LL_ERR, lnd->lnd_mech, maj_stat, min_stat,
+                           "can't import svc name");
+                lnd->lnd_gss_err = maj_stat;
+                return -1;
+        }
+
+        return 0;
+}
+
+void lgssc_fini_nego_data(struct lgss_nego_data *lnd)
+{
+        OM_uint32       maj_stat, min_stat;
+
+        if (lnd->lnd_svc_name != GSS_C_NO_NAME) {
+                maj_stat = gss_release_name(&min_stat, &lnd->lnd_svc_name);
+                if (maj_stat != GSS_S_COMPLETE)
+                        logmsg_gss(LL_ERR, lnd->lnd_mech, maj_stat, min_stat,
+                                   "can't release service name");
+        }
+
+        if (lnd->lnd_cred != GSS_C_NO_CREDENTIAL) {
+                maj_stat = gss_release_cred(&min_stat, &lnd->lnd_cred);
+                if (maj_stat != GSS_S_COMPLETE)
+                        logmsg_gss(LL_ERR, lnd->lnd_mech, maj_stat, min_stat,
+                                   "can't release credential");
+        }
+}
+
+static
+int error_kernel_key(key_serial_t keyid, int rpc_error, int gss_error)
+{
+        int      seqwin = 0;
+        char     buf[32];
+        char    *p, *end;
+
+        logmsg(LL_TRACE, "revoking kernel key %08x\n", keyid);
+
+        p = buf;
+        end = buf + sizeof(buf);
+
+        WRITE_BYTES(&p, end, seqwin);
+        WRITE_BYTES(&p, end, rpc_error);
+        WRITE_BYTES(&p, end, gss_error);
+
+again:
+        if (keyctl_update(keyid, buf, p - buf)) {
+                if (errno != EAGAIN) {
+                        logmsg(LL_ERR, "revoke key %08x: %s\n",
+                               keyid, strerror(errno));
+                        return -1;
+                }
+
+                logmsg(LL_WARN, "key %08x: revoking too soon, try again\n",
+                       keyid);
+                sleep(2);
+                goto again;
+        }
+
+        logmsg(LL_INFO, "key %08x: revoked\n", keyid);
+        return 0;
+}
+
+static
+int update_kernel_key(key_serial_t keyid,
+                      struct lgss_nego_data *lnd,
+                      gss_buffer_desc *ctx_token)
+{
+        char        *buf = NULL, *p = NULL, *end = NULL;
+        unsigned int buf_size = 0;
+        int          rc;
+
+        logmsg(LL_TRACE, "updating kernel key %08x\n", keyid);
+
+        buf_size = sizeof(lnd->lnd_seq_win) +
+                   sizeof(lnd->lnd_rmt_ctx.length) + lnd->lnd_rmt_ctx.length +
+                   sizeof(ctx_token->length) + ctx_token->length;
+        buf = malloc(buf_size);
+        if (buf == NULL) {
+                logmsg(LL_ERR, "key %08x: can't alloc update buf: size %d\n",
+                       keyid, buf_size);
+                return 1;
+        }
+
+        p = buf;
+        end = buf + buf_size;
+        rc = -1;
+
+        if (WRITE_BYTES(&p, end, lnd->lnd_seq_win))
+                goto out;
+        if (write_buffer(&p, end, &lnd->lnd_rmt_ctx))
+                goto out;
+        if (write_buffer(&p, end, ctx_token))
+                goto out;
+
+again:
+        if (keyctl_update(keyid, buf, p - buf)) {
+                if (errno != EAGAIN) {
+                        logmsg(LL_ERR, "update key %08x: %s\n",
+                               keyid, strerror(errno));
+                        goto out;
+                }
+
+                logmsg(LL_DEBUG, "key %08x: updating too soon, try again\n",
+                       keyid);
+                sleep(2);
+                goto again;
+        }
+
+        rc = 0;
+        logmsg(LL_DEBUG, "key %08x: updated\n", keyid);
+out:
+        free(buf);
+        return rc;
+}
+
+/*
+ * note we can't assume authority in child process
+ */
+int lgssc_kr_negotiate(key_serial_t keyid, struct lgss_cred *cred,
+                       struct keyring_upcall_param *kup)
+{
+        struct lgss_nego_data   lnd;
+        gss_buffer_desc         token = GSS_C_EMPTY_BUFFER;
+        OM_uint32               min_stat;
+        int                     rc = -1;
+
+        logmsg(LL_TRACE, "child start on behalf of key %08x: "
+               "cred %p, uid %u, svc %u, nid %Lx\n", keyid, cred,
+               cred->lc_uid, cred->lc_tgt_svc, cred->lc_tgt_nid);
+
+        if (kup->kup_gid != 0 && setregid(kup->kup_gid, kup->kup_gid)) {
+                logmsg(LL_WARN, "key %08x, failed set gids to %u: %s\n",
+                       keyid, kup->kup_gid, strerror(errno));
+        }
+
+        if (kup->kup_uid != 0 && setreuid(kup->kup_uid, kup->kup_uid)) {
+                logmsg(LL_WARN, "key %08x, failed set uids to %u: %s\n",
+                       keyid, kup->kup_uid, strerror(errno));
+        }
+
+        /*
+         * link to session keyring, allow the key to be found.
+         */
+        if (keyctl_link(keyid, KEY_SPEC_SESSION_KEYRING)) {
+                logmsg(LL_ERR, "key %08x, failed to link to session "
+                       "keyring: %s\n", keyid, strerror(errno));
+                error_kernel_key(keyid, -EACCES, 0);
+                goto out_cred;
+        }
+
+        if (lgss_get_service_str(&g_service, kup->kup_svc, kup->kup_nid)) {
+                logmsg(LL_ERR, "key %08x: failed to construct service "
+                       "string\n", keyid);
+                error_kernel_key(keyid, -EACCES, 0);
+                goto out_unlink;
+        }
+
+        if (lgss_using_cred(cred)) {
+                logmsg(LL_ERR, "key %08x: can't using cred\n", keyid);
+                error_kernel_key(keyid, -EACCES, 0);
+                goto out_unlink;
+        }
+
+        if (lgssc_init_nego_data(&lnd, kup, cred->lc_mech->lmt_mech_n)) {
+                logmsg(LL_ERR, "key %08x: failed to initialize "
+                       "negotiation data\n", keyid);
+                error_kernel_key(keyid, lnd.lnd_rpc_err, lnd.lnd_gss_err);
+                goto out_unlink;
+        }
+
+        rc = lgssc_negotiation(&lnd);
+        if (rc) {
+                logmsg(LL_ERR, "key %08x: failed to negotiation\n", keyid);
+                error_kernel_key(keyid, lnd.lnd_rpc_err, lnd.lnd_gss_err);
+                goto out;
+        }
+
+        rc = serialize_context_for_kernel(lnd.lnd_ctx, &token, lnd.lnd_mech);
+        if (rc) {
+                logmsg(LL_ERR, "key %08x: failed to export context\n", keyid);
+                error_kernel_key(keyid, rc, lnd.lnd_gss_err);
+                goto out;
+        }
+
+        rc = update_kernel_key(keyid,  &lnd, &token);
+        if (rc)
+                goto out;
+
+        rc = 0;
+        logmsg(LL_INFO, "key %08x for user %u is updated OK!\n",
+               keyid, kup->kup_uid);
+out:
+        if (token.length != 0)
+                gss_release_buffer(&min_stat, &token);
+
+        lgssc_fini_nego_data(&lnd);
+
+out_unlink:
+        if (keyctl_unlink(keyid, KEY_SPEC_SESSION_KEYRING)) {
+                logmsg(LL_WARN, "failed to unlink key %08x: %s\n",
+                       keyid, strerror(errno));
+        }
+
+out_cred:
+        lgss_release_cred(cred);
+        return rc;
+}
+
+/*
+ * call out info format: s[:s]...
+ *  [0]: mech_name      (string)
+ *  [1]: flags          (chars) FMT: r-root; m-mds
+ *  [2]: lustre_svc     (uint)
+ *  [3]: target_nid     (uint64)
+ *  [4]: target_uuid    (string)
+ */
+static
+int parse_callout_info(const char *coinfo,
+                       struct keyring_upcall_param *uparam)
+{
+        char    buf[1024];
+        char   *string = buf;
+        int     length, i;
+        char   *data[5];
+        char   *pos;
+
+        length = strlen(coinfo) + 1;
+        if (length > 1024) {
+                logmsg(LL_ERR, "coinfo too long\n");
+                return -1;
+        }
+        memcpy(buf, coinfo, length);
+
+        for (i = 0; i < 4; i++) {
+                pos = strchr(string, ':');
+                if (pos == NULL) {
+                        logmsg(LL_ERR, "short of components\n");
+                        return -1;
+                }
+
+                *pos = '\0';
+                data[i] = string;
+                string = pos + 1;
+        }
+        data[i] = string;
+
+        logmsg(LL_TRACE, "components: %s,%s,%s,%s,%s\n",
+               data[0], data[1], data[2], data[3], data[4], data[5]);
+
+        strncpy(uparam->kup_mech, data[0], sizeof(uparam->kup_mech));
+        if (strchr(data[1], 'r'))
+                uparam->kup_is_root = 1;
+        if (strchr(data[1], 'm'))
+                uparam->kup_is_mds = 1;
+        uparam->kup_svc = strtol(data[2], NULL, 0);
+        uparam->kup_nid = strtoll(data[3], NULL, 0);
+        strncpy(uparam->kup_tgt, data[4], sizeof(uparam->kup_tgt));
+
+        logmsg(LL_DEBUG, "parse call out info: mech %s, is_root %d, "
+               "is_mds %d, svc %d, nid 0x%Lx, tgt %s\n",
+               uparam->kup_mech, uparam->kup_is_root, uparam->kup_is_mds,
+               uparam->kup_svc, uparam->kup_nid, uparam->kup_tgt);
+        return 0;
+}
+
+/****************************************
+ * main process                         *
+ ****************************************/
+
+int main(int argc, char *argv[])
+{
+        struct keyring_upcall_param     uparam;
+        key_serial_t                    keyid;
+        key_serial_t                    sring;
+        key_serial_t                    inst_keyring;
+        pid_t                           child;
+        struct lgss_mech_type          *mech;
+        struct lgss_cred               *cred;
+
+        /*
+         * parse & sanity check upcall parameters
+         * expected to be called with:
+         * [1]:  operation
+         * [2]:  key ID
+         * [3]:  key type
+         * [4]:  key description
+         * [5]:  call out info
+         * [6]:  UID
+         * [7]:  GID
+         * [8]:  thread keyring
+         * [9]:  process keyring
+         * [10]: session keyring
+         */
+        if (argc != 10 + 1) {
+                logmsg(LL_ERR, "invalid parameter number %d\n", argc);
+                return 1;
+        }
+
+        logmsg(LL_INFO, "key %s, desc %s, uid %s, sring %s, coinfo %s\n",
+               argv[2], argv[4], argv[6], argv[10], argv[5]);
+
+        memset(&uparam, 0, sizeof(uparam));
+
+        if (strcmp(argv[1], "create") != 0) {
+                logmsg(LL_ERR, "invalid OP %s\n", argv[1]);
+                return 1;
+        }
+
+        if (sscanf(argv[2], "%d", &keyid) != 1) {
+                logmsg(LL_ERR, "can't extract KeyID: %s\n", argv[2]);
+                return 1;
+        }
+
+        if (sscanf(argv[6], "%d", &uparam.kup_uid) != 1) {
+                logmsg(LL_ERR, "can't extract UID: %s\n", argv[6]);
+                return 1;
+        }
+
+        if (sscanf(argv[10], "%d", &sring) != 1) {
+                logmsg(LL_ERR, "can't extract session keyring: %s\n", argv[10]);
+                return 1;
+        }
+
+        if (parse_callout_info(argv[5], &uparam)) {
+                logmsg(LL_ERR, "can't extract callout info: %s\n", argv[5]);
+                return 1;
+        }
+
+        logmsg(LL_TRACE, "parsing parameters OK\n");
+
+        /*
+         * prepare a cred
+         */
+        mech = lgss_name2mech(uparam.kup_mech);
+        if (mech == NULL) {
+                logmsg(LL_ERR, "key %08x: unsupported mech: %s\n",
+                       keyid, uparam.kup_mech);
+                return 1;
+        }
+
+        if (lgss_mech_initialize(mech)) {
+                logmsg(LL_ERR, "key %08x: can't initialize mech %s\n",
+                       keyid, mech->lmt_name);
+                return 1;
+        }
+
+        cred = lgss_create_cred(mech);
+        if (cred == NULL) {
+                logmsg(LL_ERR, "key %08x: can't create a new %s cred\n",
+                       keyid, mech->lmt_name);
+                return 1;
+        }
+
+        cred->lc_uid = uparam.kup_uid;
+        cred->lc_fl_root = (uparam.kup_is_root != 0);
+        cred->lc_fl_mds = (uparam.kup_is_mds != 0);
+        cred->lc_tgt_nid = uparam.kup_nid;
+        cred->lc_tgt_svc = uparam.kup_svc;
+
+        if (lgss_prepare_cred(cred)) {
+                logmsg(LL_ERR, "key %08x: failed to prepare credentials "
+                       "for user %d\n", keyid, uparam.kup_uid);
+                return 1;
+        }
+
+        /*
+         * pre initialize the key
+         */
+        inst_keyring = (cred->lc_fl_root || cred->lc_fl_mds) ?
+                                0 : KEY_SPEC_SESSION_KEYRING;
+
+        if (keyctl_instantiate(keyid, NULL, 0, inst_keyring)) {
+                logmsg(LL_ERR, "instantiate key %08x: %s\n",
+                       keyid, strerror(errno));
+                return 1;
+        }
+
+        logmsg(LL_TRACE, "instantiated kernel key %08x\n", keyid);
+
+        /*
+         * fork a child to do the real gss negotiation
+         */
+        child = fork();
+        if (child == -1) {
+                logmsg(LL_ERR, "key %08x: can't create child: %s\n",
+                       keyid, strerror(errno));
+                return 1;
+        } else if (child == 0) {
+                return lgssc_kr_negotiate(keyid, cred, &uparam);
+        }
+
+        logmsg(LL_TRACE, "forked child %d\n", child);
+        return 0;
+}
diff --git a/lustre/utils/gss/lgss_krb5_utils.c b/lustre/utils/gss/lgss_krb5_utils.c
new file mode 100644 (file)
index 0000000..0a4d44d
--- /dev/null
@@ -0,0 +1,793 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Modifications for Lustre
+ * Copyright 2007, Cluster File Systems, Inc.
+ * All rights reserved
+ * Author: Eric Mei <ericm@clusterfs.com>
+ */
+
+/*
+ *  Adapted in part from MIT Kerberos 5-1.2.1 slave/kprop.c and from
+ *  http://docs.sun.com/?p=/doc/816-1331/6m7oo9sms&a=view
+ *
+ *  Copyright (c) 2002-2004 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Andy Adamson <andros@umich.edu>
+ *  J. Bruce Fields <bfields@umich.edu>
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *  Kevin Coffman <kwc@umich.edu>
+ */
+
+/*
+ * slave/kprop.c
+ *
+ * Copyright 1990,1991 by the Massachusetts Institute of Technology.
+ * All Rights Reserved.
+ *
+ * Export of this software from the United States of America may
+ *   require a specific license from the United States Government.
+ *   It is the responsibility of any person or organization contemplating
+ *   export to obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of M.I.T. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  Furthermore if you modify this software you must label
+ * your software as modified software and not distribute it in such a
+ * fashion that it might be confused with the original M.I.T. software.
+ * M.I.T. makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ */
+
+/*
+ * Copyright 1994 by OpenVision Technologies, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appears in all copies and
+ * that both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of OpenVision not be used
+ * in advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission. OpenVision makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+  krb5_util.c
+
+  Copyright (c) 2004 The Regents of the University of Michigan.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+  1. Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+  2. Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+  3. Neither the name of the University nor the names of its
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include "config.h"
+#include <sys/param.h>
+//#include <rpc/rpc.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <gssapi/gssapi.h>
+#ifdef USE_PRIVATE_KRB5_FUNCTIONS
+#include <gssapi/gssapi_krb5.h>
+#endif
+#include <krb5.h>
+
+#include "lgss_utils.h"
+#include "lgss_krb5_utils.h"
+
+static void lgss_krb5_mutex_lock(void)
+{
+        if (lgss_mutex_lock(LGSS_MUTEX_KRB5)) {
+                logmsg(LL_ERR, "can't lock process, abort!\n");
+                exit(-1);
+        }
+}
+
+static void lgss_krb5_mutex_unlock(void)
+{
+        if (lgss_mutex_unlock(LGSS_MUTEX_KRB5)) {
+                logmsg(LL_WARN, "can't unlock process, other processes "
+                       "might need to wait long time\n");
+        }
+}
+
+/*
+ * NOTE
+ *  - currently we only support "normal" cache types: "FILE" and "MEMORY".
+ */
+
+#define krb5_err_msg(code)      error_message(code)
+
+const char *krb5_cc_type_mem    = "MEMORY:";
+const char *krb5_cc_type_file   = "FILE:";
+
+char    *krb5_this_realm        = NULL;
+char    *krb5_keytab_file       = "/etc/krb5.keytab";
+char    *krb5_cc_type           = "FILE:";
+char    *krb5_cc_dir            = "/tmp";
+char    *krb5_cred_prefix       = "krb5cc_";
+char    *krb5_cred_root_suffix  = "lustre_root";
+
+struct lgss_krb5_cred {
+        char            kc_ccname[128];
+        int             kc_remove;        /* remove cache upon release */
+};
+
+static
+int lgss_krb5_set_ccache_name(const char *ccname)
+{
+#ifdef USE_GSS_KRB5_CCACHE_NAME
+        unsigned int    maj_stat, min_stat;
+
+        maj_stat = gss_krb5_ccache_name(&min_stat, ccname, NULL);
+        if (maj_stat != GSS_S_COMPLETE) {
+                logmsg(LL_ERR, "failed to set ccache name\n");
+                return -1;
+        }
+#else
+        /*
+         * Set the KRB5CCNAME environment variable to tell the krb5 code
+         * which credentials cache to use.  (Instead of using the private
+         * function above for which there is no generic gssapi equivalent)
+         */
+        if (setenv("KRB5CCNAME", ccname, 1)) {
+                logmsg(LL_ERR, "set env of krb5 ccname: %s\n",
+                       strerror(errno));
+                return -1;
+        }
+#endif
+        logmsg(LL_DEBUG, "set cc: %s\n", ccname);
+        return 0;
+}
+
+static
+int lgss_krb5_get_local_realm(void)
+{
+        krb5_context    context = NULL;
+        krb5_error_code code;
+        int             retval = -1;
+
+        if (krb5_this_realm != NULL)
+                return 0;
+
+        code = krb5_init_context(&context);
+        if (code) {
+                logmsg(LL_ERR, "init ctx: %s\n", krb5_err_msg(code));
+                return -1;
+        }
+
+        code = krb5_get_default_realm(context, &krb5_this_realm);
+        if (code) {
+                logmsg(LL_ERR, "get default realm: %s\n", krb5_err_msg(code));
+                goto out;
+        }
+
+        logmsg(LL_DEBUG, "Local realm: %s\n", krb5_this_realm);
+        retval = 0;
+out:
+        krb5_free_context(context);
+        return retval;
+}
+
+static
+int princ_is_local_realm(krb5_context ctx, krb5_principal princ)
+{
+        return (lgss_krb5_strcasecmp(krb5_princ_realm(ctx, princ),
+                &nb