Whamcloud - gitweb
branch: b_new_cmd
authorericm <ericm>
Mon, 11 Sep 2006 16:53:38 +0000 (16:53 +0000)
committerericm <ericm>
Mon, 11 Sep 2006 16:53:38 +0000 (16:53 +0000)
land the first part of secure ptlrpc support.

46 files changed:
lustre/include/liblustre.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre_cfg.h
lustre/include/lustre_disk.h
lustre/include/lustre_import.h
lustre/include/lustre_net.h
lustre/include/lustre_param.h
lustre/include/lustre_sec.h [new file with mode: 0644]
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/liblustre/lutil.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/lmv/lmv_obd.c
lustre/lov/lov_obd.c
lustre/mdc/lproc_mdc.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mdt/mdt_handler.c
lustre/mgs/mgs_llog.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c
lustre/obdclass/obd_mount.c
lustre/osc/lproc_osc.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/Makefile.in
lustre/ptlrpc/autoMakefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/import.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/pers.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/sec.c [new file with mode: 0644]
lustre/ptlrpc/sec_null.c [new file with mode: 0644]
lustre/ptlrpc/sec_plain.c [new file with mode: 0644]
lustre/ptlrpc/service.c
lustre/utils/lfs.c

index fa39903..e23fec2 100644 (file)
@@ -583,6 +583,8 @@ struct task_struct {
         int state;
         struct signal pending;
         char comm[32];
+        int uid;
+        int gid;
         int pid;
         int fsuid;
         int fsgid;
@@ -705,6 +707,7 @@ static inline void del_timer(struct timer_list *l)
 
 typedef struct { volatile int counter; } atomic_t;
 
+#define ATOMIC_INIT(i) { (i) }
 #define atomic_read(a) ((a)->counter)
 #define atomic_set(a,b) do {(a)->counter = b; } while (0)
 #define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
@@ -721,6 +724,40 @@ typedef struct { volatile int counter; } atomic_t;
 #define unlikely(exp) (exp)
 #endif
 
+#define might_sleep()
+#define might_sleep_if(c)
+#define smp_mb()
+
+static inline
+int test_and_set_bit(int nr, unsigned long *addr)
+{
+        int oldbit;
+
+        while (nr >= sizeof(long)) {
+                nr -= sizeof(long);
+                addr++;
+        }
+
+        oldbit = (*addr) & (1 << nr);
+        *addr |= (1 << nr);
+        return oldbit;
+}
+
+static inline
+int test_and_clear_bit(int nr, unsigned long *addr)
+{
+        int oldbit;
+
+        while (nr >= sizeof(long)) {
+                nr -= sizeof(long);
+                addr++;
+        }
+
+        oldbit = (*addr) & (1 << nr);
+        *addr &= ~(1 << nr);
+        return oldbit;
+}
+
 /* FIXME sys/capability will finally included linux/fs.h thus
  * cause numerous trouble on x86-64. as temporary solution for
  * build broken at cary, we copy definition we need from capability.h
index 30be4cb..17fd2d2 100644 (file)
@@ -1784,4 +1784,13 @@ static inline int fid_res_name_eq(const struct lu_fid *f,
 }
 
 #define JOIN_FILE_ALIGN 4096
+
+/* security opcodes */
+typedef enum {
+        SEC_CTX_INIT            = 801,
+        SEC_CTX_INIT_CONT       = 802,
+        SEC_CTX_FINI            = 803,
+        SEC_LAST_OPC
+} sec_cmd_t;
+
 #endif
index 5183732..f1119e8 100644 (file)
@@ -58,6 +58,7 @@ struct obd_statfs;
 #define LL_IOC_JOIN                     _IOW ('f', 163, long)
 #define IOC_OBD_STATFS                  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO                 _IOWR('f', 165, struct lov_user_mds_data *)
+#define LL_IOC_FLUSHCTX                 _IOW ('f', 166, long)
 
 #define LL_STATFS_MDC           1
 #define LL_STATFS_LOV           2
index f508bdf..ae21989 100644 (file)
@@ -58,6 +58,7 @@ enum lcfg_command_type {
         LCFG_LOV_ADD_INA    = 0x00ce013,
         LCFG_ADD_MDC        = 0x00cf014,
         LCFG_DEL_MDC        = 0x00cf015,
+        LCFG_SEC_FLAVOR     = 0x00ce016,
 };
 
 struct lustre_cfg_bufs {
index b2b7d48..69f07d7 100644 (file)
@@ -138,6 +138,8 @@ struct lustre_mount_data {
         int        lmd_exclude_count;
         char      *lmd_dev;           /* device name */
         char      *lmd_profile;       /* client only */
+        char      *lmd_sec_mdt;       /* sec from mdt (to ost/mdt) */
+        char      *lmd_sec_cli;       /* sec from client (to ost/mdt) */
         char      *lmd_opts;          /* lustre mount options (as opposed to 
                                          _device_ mount options) */
         __u32     *lmd_exclude;       /* array of OSTs to ignore */
index 0639d79..f29ea11 100644 (file)
@@ -64,6 +64,7 @@ struct obd_import {
         struct list_head          imp_delayed_list;
 
         struct obd_device        *imp_obd;
+        struct ptlrpc_sec        *imp_sec;
         cfs_waitq_t               imp_recovery_waitq;
 
         atomic_t                  imp_inflight;
@@ -78,6 +79,7 @@ struct obd_import {
         struct lustre_handle      imp_remote_handle;
         cfs_time_t                imp_next_ping;   /* jiffies */
         __u64                     imp_last_success_conn;   /* jiffies, 64-bit */
+        cfs_time_t                imp_next_reconnect;      /* seconds */
 
         /* all available obd_import_conn linked here */
         struct list_head          imp_conn_list;
@@ -98,7 +100,10 @@ struct obd_import {
                                   imp_pingable:1,         /* pingable */
                                   imp_resend_replay:1,    /* resend for replay */
                                   imp_recon_bk:1,         /* turn off reconnect if all failovers fail */
-                                  imp_last_recon:1;       /* internally used by above */
+                                  imp_last_recon:1,       /* internally used by above */
+                                  imp_force_reconnect:1;  /* need to reconnect 
+                                                           * even the status is
+                                                           * FULL */
         __u32                     imp_connect_op;
         struct obd_connect_data   imp_connect_data;
         __u64                     imp_connect_flags_orig;
index a2fd0df..8c2f0af 100644 (file)
@@ -38,6 +38,7 @@
 #include <lnet/lnet.h>
 #include <lustre/lustre_idl.h>
 #include <lustre_ha.h>
+#include <lustre_sec.h>
 #include <lustre_import.h>
 #include <lprocfs_status.h>
 
@@ -278,12 +279,16 @@ struct ptlrpc_reply_state {
         lnet_handle_md_t       rs_md_h;
         atomic_t               rs_refcount;
 
+        struct ptlrpc_svc_ctx *rs_svc_ctx;
+        struct lustre_msg     *rs_repbuf;       /* wrapper */
+        int                    rs_repbuf_len;   /* wrapper buf length */
+        int                    rs_repdata_len;  /* wrapper msg length */
+        struct lustre_msg     *rs_msg;          /* reply message */
+
         /* locks awaiting client reply ACK */
         int                    rs_nlocks;
         struct lustre_handle   rs_locks[RS_MAX_LOCKS];
         ldlm_mode_t            rs_modes[RS_MAX_LOCKS];
-        /* last member: variable sized reply message */
-        struct lustre_msg     *rs_msg;
 };
 
 struct ptlrpc_thread;
@@ -324,7 +329,7 @@ struct ptlrpc_request {
                  */
                 rq_replay:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
-                rq_no_delay:1, rq_net_err:1;
+                rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1;
         enum rq_phase rq_phase; /* one of RQ_PHASE_* */
         atomic_t rq_refcount;   /* client-side refcount for SENT race */
 
@@ -345,6 +350,38 @@ struct ptlrpc_request {
         __u64 rq_xid;
         struct list_head rq_replay_list;
 
+        struct ptlrpc_cli_ctx   *rq_cli_ctx;     /* client's half ctx */
+        struct ptlrpc_svc_ctx   *rq_svc_ctx;     /* server's half ctx */
+        struct list_head         rq_ctx_chain;   /* link to waited ctx */
+        ptlrpc_flavor_t          rq_sec_flavor;  /* client & server */
+                                 /* client security flags */
+        unsigned int             rq_ctx_init:1,      /* context initiation */
+                                 rq_ctx_fini:1,      /* context destroy */
+                                 rq_bulk_read:1,     /* request bulk read */
+                                 rq_bulk_write:1,    /* request bulk write */
+                                 /* server authentication flags */
+                                 rq_auth_gss:1,      /* authenticated by gss */
+                                 rq_auth_remote:1,   /* authed as remote user */
+                                 rq_auth_usr_root:1, /* authed as root */
+                                 rq_auth_usr_mds:1;  /* authed as mds */
+
+        uid_t                    rq_auth_uid;        /* authed uid */
+        uid_t                    rq_auth_mapped_uid; /* authed uid mapped to */
+
+        /* (server side), pointed directly into req buffer */
+        struct ptlrpc_user_desc *rq_user_desc;
+
+        /* various buffer pointers */
+        struct lustre_msg       *rq_reqbuf;      /* req wrapper */
+        int                      rq_reqbuf_len;  /* req wrapper buf len */
+        int                      rq_reqdata_len; /* req wrapper msg len */
+        struct lustre_msg       *rq_repbuf;      /* rep wrapper */
+        int                      rq_repbuf_len;  /* rep wrapper buf len */
+        int                      rq_repdata_len; /* rep wrapper msg len */
+        struct lustre_msg       *rq_clrbuf;      /* only in priv mode */
+        int                      rq_clrbuf_len;  /* only in priv mode */
+        int                      rq_clrdata_len; /* only in priv mode */
+
 #if SWAB_PARANOIA
         __u32 rq_req_swab_mask;
         __u32 rq_rep_swab_mask;
@@ -421,9 +458,10 @@ ptlrpc_rqphase2str(const struct ptlrpc_request *req)
         FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
         FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),                  \
         FLAG(req->rq_no_resend, "N"),                                           \
-        FLAG(req->rq_waiting, "W")
+        FLAG(req->rq_waiting, "W"),                                             \
+        FLAG(req->rq_wait_ctx, "C")
 
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s"
 
 #define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...)                       \
 CDEB_TYPE(level, "@@@ " fmt                                                    \
@@ -490,8 +528,10 @@ struct ptlrpc_bulk_desc {
         lnet_handle_md_t        bd_md_h;         /* associated MD */
 
 #if defined(__KERNEL__)
+        lnet_kiov_t            *bd_enc_iov;     /* used in privacy mode */
         lnet_kiov_t             bd_iov[0];
 #else
+        lnet_md_iovec_t        *bd_enc_iov;
         lnet_md_iovec_t         bd_iov[0];
 #endif
 };
@@ -709,7 +749,8 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
 struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp,
                                              __u32 version, int opcode,
                                             int count, int *lengths, char **bufs,
-                                            struct ptlrpc_request_pool *pool);
+                                            struct ptlrpc_request_pool *pool,
+                                            struct ptlrpc_cli_ctx *ctx);
 void ptlrpc_free_req(struct ptlrpc_request *request);
 void ptlrpc_req_finished(struct ptlrpc_request *request);
 void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
@@ -785,16 +826,23 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
 /* ptlrpc/pack_generic.c */
 int lustre_msg_swabbed(struct lustre_msg *msg);
 int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens,
+                        char **bufs);
 int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count,
                         int *lens, char **bufs);
 int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens,
                       char **bufs);
-void lustre_shrink_reply(struct ptlrpc_request *req, int segment,
-                         unsigned int newlen, int move_data);
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+                         int *lens, char **bufs);
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+                      unsigned int newlen, int move_data);
 void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
 int lustre_msg_size(__u32 magic, int count, int *lengths);
+int lustre_msg_size_v2(int count, int *lengths);
 int lustre_unpack_msg(struct lustre_msg *m, int len);
 int lustre_unpack_ptlrpc_body(struct lustre_msg *m);
+void *lustre_msg_buf_v1(void *msg, int n, int min_size);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
 void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
 int lustre_msg_buflen(struct lustre_msg *m, int n);
 void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len);
@@ -833,6 +881,16 @@ void lustre_msg_set_status(struct lustre_msg *msg, __u32 status);
 void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
 
 static inline void
+lustre_shrink_reply(struct ptlrpc_request *req, int segment,
+                    unsigned int newlen, int move_data)
+{
+        LASSERT(req->rq_reply_state);
+        LASSERT(req->rq_repmsg);
+        req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
+                                           newlen, move_data);
+}
+
+static inline void
 ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
 {
         LASSERT(atomic_read(&rs->rs_refcount) > 0);
@@ -887,6 +945,10 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
 int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
 int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
 
+/* ptlrpc/pers.c */
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc);
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc);
+
 /* ptlrpc/pinger.c */
 int ptlrpc_pinger_add_import(struct obd_import *imp);
 int ptlrpc_pinger_del_import(struct obd_import *imp);
index 427973d..adff8f1 100644 (file)
@@ -53,5 +53,9 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
 #define PARAM_LOV_STRIPE_COUNT     PARAM_LOV"stripecount="
 #define PARAM_LOV_STRIPE_OFFSET    PARAM_LOV"stripeoffset="
 #define PARAM_LOV_STRIPE_PATTERN   PARAM_LOV"stripetype="
+#define PARAM_SEC                  "security."
+#define PARAM_SEC_RPC              PARAM_SEC"rpc."
+#define PARAM_SEC_RPC_MDT          PARAM_SEC_RPC"mdt="
+#define PARAM_SEC_RPC_CLI          PARAM_SEC_RPC"cli="
 
 #endif // _LUSTRE_PARAM_H
diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h
new file mode 100644 (file)
index 0000000..1170cc5
--- /dev/null
@@ -0,0 +1,514 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LUSTRE_SEC_H_
+#define _LUSTRE_SEC_H_
+
+/*
+ * to avoid include
+ */
+struct obd_import;
+struct ptlrpc_request;
+struct ptlrpc_reply_state;
+struct ptlrpc_bulk_desc;
+struct brw_page;
+
+/*
+ * forward declaration
+ */
+struct ptlrpc_sec_policy;
+struct ptlrpc_sec_cops;
+struct ptlrpc_sec_sops;
+struct ptlrpc_sec;
+struct ptlrpc_svc_ctx;
+struct ptlrpc_cli_ctx;
+struct ptlrpc_ctx_ops;
+
+/*
+ * flavor constants
+ */
+enum sptlrpc_policies {
+        SPTLRPC_POLICY_NULL             = 0,
+        SPTLRPC_POLICY_PLAIN            = 1,
+        SPTLRPC_POLICY_GSS              = 2,
+        SPTLRPC_POLICY_MAX,
+};
+
+enum sptlrpc_subpolicy_null {
+        SPTLRPC_SUBPOLICY_NULL          = 0,
+        SPTLRPC_SUBPOLICY_NULL_MAX,
+};
+
+enum sptlrpc_subpolicy_plain {
+        SPTLRPC_SUBPOLICY_PLAIN         = 0,
+        SPTLRPC_SUBPOLICY_PLAIN_MAX,
+};
+
+enum sptlrpc_subpolicy_gss {
+        SPTLRPC_SUBPOLICY_GSS_NONE      = 0,
+        SPTLRPC_SUBPOLICY_GSS_KRB5      = 1,
+        SPTLRPC_SUBPOLICY_GSS_MAX,
+};
+
+enum sptlrpc_service_type {
+        SPTLRPC_SVC_NONE                = 0,    /* no security */
+        SPTLRPC_SVC_AUTH                = 1,    /* authentication */
+        SPTLRPC_SVC_PRIV                = 2,    /* privacy */
+        SPTLRPC_SVC_MAX,
+};
+
+/*
+ * flavor compose/extract
+ */
+
+typedef __u32 ptlrpc_flavor_t;
+
+/*
+ *  8b (reserved) | 8b (flags) | 6b (policy) | 6b (subpolicy) | 4b (svc)
+ */
+#define SEC_FLAVOR_FLAGS_OFFSET         (16)
+#define SEC_FLAVOR_POLICY_OFFSET        (10)
+#define SEC_FLAVOR_SUBPOLICY_OFFSET     (4)
+#define SEC_FLAVOR_SVC_OFFSET           (0)
+
+#define SEC_MAKE_RPC_FLAVOR(policy, subpolicy, svc)                     \
+        (((__u32)(policy) << SEC_FLAVOR_POLICY_OFFSET) |                \
+         ((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) |          \
+         ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET))
+
+#define SEC_MAKE_RPC_SUBFLAVOR(subpolicy, svc)                          \
+        (((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) |          \
+         ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET))
+
+#define SEC_FLAVOR_POLICY(flavor)                                       \
+        ((((__u32)(flavor)) >> SEC_FLAVOR_POLICY_OFFSET) & 0x3F)
+#define SEC_FLAVOR_SUBPOLICY(flavor)                                    \
+        ((((__u32)(flavor)) >> SEC_FLAVOR_SUBPOLICY_OFFSET) & 0x3F)
+#define SEC_FLAVOR_SVC(flavor)                                          \
+        ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0xF)
+#define SEC_FLAVOR_SUB(flavor)                                          \
+        ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0x3FF)
+
+#define SEC_FLAVOR_RPC(f)                                               \
+        (((__u32) f) & ((1 << SEC_FLAVOR_FLAGS_OFFSET) - 1))
+
+/*
+ * general gss flavors
+ */
+#define SPTLRPC_FLVR_GSS_NONE                                   \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_NONE,         \
+                            SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_GSS_AUTH                                   \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_NONE,         \
+                            SPTLRPC_SVC_AUTH)
+#define SPTLRPC_FLVR_GSS_PRIV                                   \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_NONE,         \
+                            SPTLRPC_SVC_PRIV)
+
+/*
+ * gss subflavors
+ */
+#define SPTLRPC_SUBFLVR_KRB5                                    \
+        SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5,      \
+                               SPTLRPC_SVC_NONE)
+#define SPTLRPC_SUBFLVR_KRB5I                                   \
+        SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5,      \
+                               SPTLRPC_SVC_AUTH)
+#define SPTLRPC_SUBFLVR_KRB5P                                   \
+        SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5,      \
+                               SPTLRPC_SVC_PRIV)
+
+/*
+ * "end user" flavors
+ */
+#define SPTLRPC_FLVR_NULL                                       \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_NULL,                \
+                            SPTLRPC_SUBPOLICY_NULL,             \
+                            SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_PLAIN                                      \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_PLAIN,               \
+                            SPTLRPC_SUBPOLICY_PLAIN,            \
+                            SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_KRB5                                       \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_KRB5,         \
+                            SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_KRB5I                                      \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_KRB5,         \
+                            SPTLRPC_SVC_AUTH)
+#define SPTLRPC_FLVR_KRB5P                                      \
+        SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS,                 \
+                            SPTLRPC_SUBPOLICY_GSS_KRB5,         \
+                            SPTLRPC_SVC_PRIV)
+
+#define SPTLRPC_FLVR_INVALID            (-1)
+
+#define SPTLRPC_FLVR_DEFAULT            SPTLRPC_FLVR_NULL
+
+/*
+ * flavor flags (maximum 8 flags)
+ */
+#define SEC_FLAVOR_FL_BULK              (1 << (0 + SEC_FLAVOR_FLAGS_OFFSET))
+#define SEC_FLAVOR_FL_USER              (1 << (1 + SEC_FLAVOR_FLAGS_OFFSET))
+
+#define SEC_FLAVOR_HAS_BULK(flavor)             \
+        (((flavor) & SEC_FLAVOR_FL_BULK) != 0)
+#define SEC_FLAVOR_HAS_USER(flavor)             \
+        (((flavor) & SEC_FLAVOR_FL_USER) != 0)
+
+
+struct sec_flavor_config {
+        __u32   sfc_rpc_flavor; /* main rpc flavor */
+        __u32   sfc_bulk_priv;  /* bulk encryption algorithm */
+        __u32   sfc_bulk_csum;  /* bulk checksum algorithm */
+        __u32   sfc_flags;      /* extra flags */
+};
+
+enum lustre_part {
+        LUSTRE_CLI      = 0,
+        LUSTRE_MDT,
+        LUSTRE_OST,
+        LUSTRE_MGC,
+        LUSTRE_MGS,
+};
+
+/* The maximum length of security payload. 1024 is enough for Kerberos 5,
+ * and should be enough for other future mechanisms but not sure.
+ * Only used by pre-allocated request/reply pool.
+ */
+#define SPTLRPC_MAX_PAYLOAD     (1024)
+
+
+struct vfs_cred {
+        uint32_t        vc_uid;
+        uint32_t        vc_gid;
+};
+
+struct ptlrpc_ctx_ops {
+        int     (*match)       (struct ptlrpc_cli_ctx *ctx,
+                                struct vfs_cred *vcred);
+        int     (*refresh)     (struct ptlrpc_cli_ctx *ctx);
+        /*
+         * rpc data transform
+         */
+        int     (*sign)        (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req);
+        int     (*verify)      (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req);
+        int     (*seal)        (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req);
+        int     (*unseal)      (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req);
+        /*
+         * bulk transform
+         */
+        int     (*wrap_bulk)   (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req,
+                                struct ptlrpc_bulk_desc *desc);
+        int     (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+                                struct ptlrpc_request *req,
+                                struct ptlrpc_bulk_desc *desc);
+};
+
+#define PTLRPC_CTX_UPTODATE_BIT        (0)  /* uptodate */
+#define PTLRPC_CTX_DEAD_BIT            (1)  /* mark expired gracefully */
+#define PTLRPC_CTX_ERROR_BIT           (2)  /* fatal error (refresh, etc.) */
+#define PTLRPC_CTX_HASHED_BIT          (8)  /* in hash table */
+#define PTLRPC_CTX_ETERNAL_BIT         (9)  /* always valid */
+
+#define PTLRPC_CTX_UPTODATE            (1 << PTLRPC_CTX_UPTODATE_BIT)
+#define PTLRPC_CTX_DEAD                (1 << PTLRPC_CTX_DEAD_BIT)
+#define PTLRPC_CTX_ERROR               (1 << PTLRPC_CTX_ERROR_BIT)
+#define PTLRPC_CTX_HASHED              (1 << PTLRPC_CTX_HASHED_BIT)
+#define PTLRPC_CTX_ETERNAL             (1 << PTLRPC_CTX_ETERNAL_BIT)
+
+#define PTLRPC_CTX_STATUS_MASK         (PTLRPC_CTX_UPTODATE   |       \
+                                        PTLRPC_CTX_DEAD       |       \
+                                        PTLRPC_CTX_ERROR)
+
+struct ptlrpc_cli_ctx {
+        struct hlist_node       cc_hash;       /* linked into hash table */
+        atomic_t                cc_refcount;
+        struct ptlrpc_sec      *cc_sec;
+        struct ptlrpc_ctx_ops  *cc_ops;
+        cfs_time_t              cc_expire;     /* in seconds */
+        unsigned long           cc_flags;
+        struct vfs_cred         cc_vcred;
+        spinlock_t              cc_lock;
+        struct list_head        cc_req_list;   /* waiting reqs linked here */
+};
+
+struct ptlrpc_sec_cops {
+        /*
+         * ptlrpc_sec constructor/destructor
+         */
+        struct ptlrpc_sec *     (*create_sec)  (struct obd_import *imp,
+                                                struct ptlrpc_svc_ctx *ctx,
+                                                __u32 flavor,
+                                                unsigned long flags);
+        void                    (*destroy_sec) (struct ptlrpc_sec *sec);
+        /*
+         * search ctx for a certain user, if this function is missing,
+         * a generic function will be invoked by caller. implement this
+         * for any special need.
+         */
+        struct ptlrpc_cli_ctx * (*lookup_ctx)  (struct ptlrpc_sec *sec,
+                                                struct vfs_cred *vcred);
+        /*
+         * ptlrpc_cli_ctx constructor/destructor
+         */
+        struct ptlrpc_cli_ctx * (*create_ctx)  (struct ptlrpc_sec *sec,
+                                                struct vfs_cred *vcred);
+        void                    (*destroy_ctx) (struct ptlrpc_sec *sec,
+                                                struct ptlrpc_cli_ctx *ctx);
+        /* reverse service */
+        int                     (*install_rctx)(struct obd_import *imp,
+                                                struct ptlrpc_sec *sec,
+                                                struct ptlrpc_cli_ctx *ctx);
+        /*
+         * request/reply buffer manipulation
+         */
+        int                     (*alloc_reqbuf)(struct ptlrpc_sec *sec,
+                                                struct ptlrpc_request *req,
+                                                int lustre_msg_size);
+        void                    (*free_reqbuf) (struct ptlrpc_sec *sec,
+                                                struct ptlrpc_request *req);
+        int                     (*alloc_repbuf)(struct ptlrpc_sec *sec,
+                                                struct ptlrpc_request *req,
+                                                int lustre_msg_size);
+        void                    (*free_repbuf) (struct ptlrpc_sec *sec,
+                                                struct ptlrpc_request *req);
+};
+
+struct ptlrpc_sec_sops {
+        int                     (*accept)      (struct ptlrpc_request *req);
+        int                     (*authorize)   (struct ptlrpc_request *req);
+        /* buffer manipulation */
+        int                     (*alloc_rs)    (struct ptlrpc_request *req,
+                                                int msgsize);
+        void                    (*free_rs)     (struct ptlrpc_reply_state *rs);
+        void                    (*free_ctx)    (struct ptlrpc_svc_ctx *ctx);
+        /* reverse credential */
+        int                     (*install_rctx)(struct obd_import *imp,
+                                                struct ptlrpc_svc_ctx *ctx);
+        /* bulk transform */
+        int                     (*unwrap_bulk) (struct ptlrpc_request *req,
+                                                struct ptlrpc_bulk_desc *desc);
+        int                     (*wrap_bulk)   (struct ptlrpc_request *req,
+                                                struct ptlrpc_bulk_desc *desc);
+};
+
+struct ptlrpc_sec_policy {
+        struct module                  *sp_owner;
+        char                           *sp_name;
+        __u32                           sp_policy; /* policy number */
+        struct ptlrpc_sec_cops         *sp_cops;   /* client ops */
+        struct ptlrpc_sec_sops         *sp_sops;   /* server ops */
+};
+
+#define PTLRPC_SEC_FL_REVERSE           0x0001 /* reverse sec */
+#define PTLRPC_SEC_FL_ROOTONLY          0x0002 /* treat everyone as root */
+
+struct ptlrpc_sec {
+        struct ptlrpc_sec_policy       *ps_policy;
+        atomic_t                        ps_refcount;
+        __u32                           ps_flavor;      /* rpc flavor */
+        unsigned long                   ps_flags;       /* PTLRPC_SEC_FL_XX */
+        struct obd_import              *ps_import;      /* owning import */
+        spinlock_t                      ps_lock;        /* protect ccache */
+        int                             ps_ccache_size; /* must be 2^n */
+        struct hlist_head              *ps_ccache;      /* ctx cache hash */
+        atomic_t                        ps_busy;        /* busy count */
+        cfs_time_t                      ps_gc_interval; /* in seconds */
+        cfs_time_t                      ps_gc_next;     /* in seconds */
+};
+
+struct ptlrpc_svc_ctx {
+        atomic_t                        sc_refcount;
+        struct ptlrpc_sec_policy       *sc_policy;
+};
+
+/*
+ * user identity descriptor
+ */
+#define LUSTRE_MAX_GROUPS               (128)
+
+struct ptlrpc_user_desc {
+        __u32           pud_uid;
+        __u32           pud_gid;
+        __u32           pud_fsuid;
+        __u32           pud_fsgid;
+        __u32           pud_cap;
+        __u32           pud_ngroups;
+        __u32           pud_groups[0];
+};
+
+/*
+ * bulk flavors
+ */
+enum bulk_checksum_alg {
+        BULK_CSUM_ALG_NULL      = 0,
+        BULK_CSUM_ALG_CRC32,
+        BULK_CSUM_ALG_MD5,
+        BULK_CSUM_ALG_SHA1,
+        BULK_CSUM_ALG_SHA256,
+        BULK_CSUM_ALG_SHA384,
+        BULK_CSUM_ALG_SHA512,
+        BULK_CSUM_ALG_MAX
+};
+
+enum bulk_encrypt_alg {
+        BULK_PRIV_ALG_NULL      = 0,
+        BULK_PRIV_ALG_ARC4,
+        BULK_PRIV_ALG_MAX
+};
+
+struct ptlrpc_bulk_sec_desc {
+        __u32           bsd_version;
+        __u32           bsd_pad;
+        __u32           bsd_csum_alg;   /* checksum algorithm */
+        __u32           bsd_priv_alg;   /* encrypt algorithm */
+        __u8            bsd_iv[16];     /* encrypt iv */
+        __u8            bsd_csum[0];
+};
+
+/*
+ * security type
+ */
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
+
+__u32 sptlrpc_name2flavor(const char *name);
+char *sptlrpc_flavor2name(__u32 flavor);
+
+static inline
+struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
+{
+        __module_get(policy->sp_owner);
+        return policy;
+}
+
+static inline
+void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
+{
+        module_put(policy->sp_owner);
+}
+
+/*
+ * client credential
+ */
+struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
+void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new);
+void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+
+/*
+ * client wrap/buffers
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize);
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req);
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req);
+void sptlrpc_request_out_callback(struct ptlrpc_request *req);
+
+/*
+ * higher interface of import & request
+ */
+int sptlrpc_import_get_sec(struct obd_import *imp, struct ptlrpc_svc_ctx *svc_ctx,
+                           __u32 flavor, unsigned long flags);
+void sptlrpc_import_put_sec(struct obd_import *imp);
+int sptlrpc_import_check_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp);
+int  sptlrpc_req_get_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req);
+int  sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout);
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
+
+int sptlrpc_parse_flavor(enum lustre_part from, enum lustre_part to,
+                         char *str, struct sec_flavor_config *conf);
+/* misc */
+const char * sec2target_str(struct ptlrpc_sec *sec);
+int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
+                       int *eof, void *data);
+
+/*
+ * server side
+ */
+enum secsvc_accept_res {
+        SECSVC_OK       = 0,
+        SECSVC_COMPLETE,
+        SECSVC_DROP,
+};
+
+int  sptlrpc_svc_unwrap_request(struct ptlrpc_request *req);
+int  sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int  sptlrpc_svc_wrap_reply(struct ptlrpc_request *req);
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs);
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req);
+
+/*
+ * reverse context
+ */
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+                                struct ptlrpc_svc_ctx *ctx);
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+                                struct ptlrpc_cli_ctx *ctx);
+
+/* bulk security api */
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc);
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+                                 int nob, obd_count pg_count,
+                                 struct brw_page **pga);
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+                                  struct ptlrpc_bulk_desc *desc);
+int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc);
+int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
+                            struct ptlrpc_bulk_desc *desc);
+
+/* user descriptor helpers */
+int sptlrpc_user_desc_size(void);
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset);
+
+/* bulk helpers (internal use only by policies) */
+int bulk_sec_desc_size(__u32 csum_alg, int request, int read);
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset);
+
+int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
+                          __u32 alg, struct lustre_msg *rmsg, int roff);
+int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
+                        struct lustre_msg *rmsg, int roff,
+                        struct lustre_msg *vmsg, int voff);
+int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
+                  struct lustre_msg *vmsg, int voff,
+                  struct lustre_msg *rmsg, int roff);
+#endif /* _LUSTRE_SEC_H_ */
index 6192e9a..debc5cf 100644 (file)
@@ -386,6 +386,9 @@ struct client_obd {
         int                      cl_max_mds_cookiesize;
         kdev_t                   cl_sandev;
 
+        /* security configuration */
+        struct sec_flavor_config cl_sec_conf;
+
         //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */
         void                    *cl_llcd_offset;
 
@@ -918,11 +921,12 @@ enum obd_cleanup_stage {
 };
 
 /* get/set_info keys */
-#define KEY_MDS_CONN "mds_conn"
-#define KEY_NEXT_ID  "next_id"
-#define KEY_LOVDESC  "lovdesc"
-#define KEY_INIT_RECOV "initial_recov"
-#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+#define KEY_MDS_CONN            "mds_conn"
+#define KEY_NEXT_ID             "next_id"
+#define KEY_LOVDESC             "lovdesc"
+#define KEY_INIT_RECOV          "initial_recov"
+#define KEY_INIT_RECOV_BACKUP   "init_recov_bk"
+#define KEY_FLUSH_CTX           "flush_ctx"
 
 struct lu_context;
 
index 41742c6..5ccc999 100644 (file)
@@ -36,6 +36,7 @@
 #include <lustre_mds.h>
 #include <lustre_dlm.h>
 #include <lustre_net.h>
+#include <lustre_sec.h>
 #include <lustre_ver.h>
 
 /* @priority: if non-zero, move the selected to the list head
@@ -176,6 +177,18 @@ out:
         RETURN(rc);
 }
 
+static
+void destroy_import(struct obd_import *imp)
+{
+        /* drop security policy instance after all rpc finished/aborted
+         * to let all busy credentials be released.
+         */
+        class_import_get(imp);
+        class_destroy_import(imp);
+        sptlrpc_import_put_sec(imp);
+        class_import_put(imp);
+}
+
 /* configure an RPC client OBD device
  *
  * lcfg parameters:
@@ -235,6 +248,10 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 
         sema_init(&cli->cl_sem, 1);
         sema_init(&cli->cl_mgc_sem, 1);
+        cli->cl_sec_conf.sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+        cli->cl_sec_conf.sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+        cli->cl_sec_conf.sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+        cli->cl_sec_conf.sfc_flags = 0;
         cli->cl_conn_count = 0;
         memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
                min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
@@ -374,6 +391,11 @@ int client_connect_import(const struct lu_context *ctx,
         if (rc != 0)
                 GOTO(out_ldlm, rc);
 
+        rc = sptlrpc_import_get_sec(imp, NULL, cli->cl_sec_conf.sfc_rpc_flavor,
+                                    cli->cl_sec_conf.sfc_flags);
+        if (rc)
+                GOTO(out_ldlm, rc);
+
         ocd = &imp->imp_connect_data;
         if (data) {
                 *ocd = *data;
@@ -465,7 +487,7 @@ int client_disconnect_export(struct obd_export *exp)
 
         ptlrpc_invalidate_import(imp);
         ptlrpc_free_rq_pool(imp->imp_rq_pool);
-        class_destroy_import(imp);
+        destroy_import(imp);
         cli->cl_import = NULL;
 
         EXIT;
@@ -776,8 +798,12 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                                                        req->rq_self,
                                                        &remote_uuid);
 
-        if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT)
+        if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) {
+                LASSERT(export->exp_imp_reverse);
+                sptlrpc_svc_install_rvs_ctx(export->exp_imp_reverse,
+                                            req->rq_svc_ctx);
                 GOTO(out, rc = 0);
+        }
 
         if (target->obd_recovering)
                 target->obd_connected_clients++;
@@ -787,7 +813,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                sizeof conn);
 
         if (export->exp_imp_reverse != NULL)
-                class_destroy_import(export->exp_imp_reverse);
+                destroy_import(export->exp_imp_reverse);
         revimp = export->exp_imp_reverse = class_new_import(target);
         revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection);
         revimp->imp_client = &export->exp_obd->obd_ldlm_client;
@@ -800,6 +826,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_NEXT_VER);
         }
 
+        rc = sptlrpc_import_get_sec(revimp, req->rq_svc_ctx,
+                                    req->rq_sec_flavor, 0);
+        if (rc) {
+                CERROR("Failed to get sec for reverse import: %d\n", rc);
+                export->exp_imp_reverse = NULL;
+                class_destroy_import(revimp);
+        }
+
         class_import_put(revimp);
 out:
         if (export)
@@ -830,7 +864,7 @@ void target_destroy_export(struct obd_export *exp)
         /* exports created from last_rcvd data, and "fake"
            exports created by lctl don't have an import */
         if (exp->exp_imp_reverse != NULL)
-                class_destroy_import(exp->exp_imp_reverse);
+                destroy_import(exp->exp_imp_reverse);
 
         /* We cancel locks at disconnect time, but this will catch any locks
          * granted in a race with recovery-induced disconnect. */
@@ -843,16 +877,53 @@ void target_destroy_export(struct obd_export *exp)
  */
 
 
-static void target_release_saved_req(struct ptlrpc_request *req)
+static
+struct ptlrpc_request *target_save_req(struct ptlrpc_request *src)
 {
-        if (req->rq_reply_state != NULL) {
-                ptlrpc_rs_decref(req->rq_reply_state);
-                /* req->rq_reply_state = NULL; */
+        struct ptlrpc_request *req;
+        struct lustre_msg *reqmsg;
+
+        OBD_ALLOC(req, sizeof(*req));
+        if (!req)
+                return NULL;
+
+        OBD_ALLOC(reqmsg, src->rq_reqlen);
+        if (!reqmsg) {
+                OBD_FREE(req, sizeof(*req));
+                return NULL;
         }
 
+        memcpy(req, src, sizeof(*req));
+        memcpy(reqmsg, src->rq_reqmsg, src->rq_reqlen);
+        req->rq_reqmsg = reqmsg;
+
+        class_export_get(req->rq_export);
+        CFS_INIT_LIST_HEAD(&req->rq_list);
+        sptlrpc_svc_ctx_addref(req);
+        if (req->rq_reply_state)
+                ptlrpc_rs_addref(req->rq_reply_state);
+
+        /* repmsg have been taken over, in privacy mode this might point to
+         * invalid data. prevent further access on it.
+         */
+        src->rq_repmsg = NULL;
+        src->rq_replen = 0;
+
+        return req;
+}
+
+static
+void target_release_saved_req(struct ptlrpc_request *req)
+{
+        if (req->rq_reply_state) {
+                ptlrpc_rs_decref(req->rq_reply_state);
+                req->rq_reply_state = NULL;
+        }
+        sptlrpc_svc_ctx_decref(req);
         class_export_put(req->rq_export);
+
         OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
-        OBD_FREE(req, sizeof *req);
+        OBD_FREE(req, sizeof(*req));
 }
 
 static void target_finish_recovery(struct obd_device *obd)
@@ -1108,13 +1179,8 @@ static void process_recovery_queue(struct obd_device *obd)
                 reset_recovery_timer(obd);
                 /* bug 1580: decide how to properly sync() in recovery */
                 //mds_fsync_super(obd->u.obt.obt_sb);
-                class_export_put(req->rq_export);
-                if (req->rq_reply_state != NULL) {
-                        ptlrpc_rs_decref(req->rq_reply_state);
-                        /* req->rq_reply_state = NULL; */
-                }
-                OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
-                OBD_FREE(req, sizeof *req);
+                target_release_saved_req(req);
+
                 spin_lock_bh(&obd->obd_processing_task_lock);
                 obd->obd_next_recovery_transno++;
                 if (list_empty(&obd->obd_recovery_queue)) {
@@ -1134,7 +1200,6 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         int inserted = 0;
         __u64 transno = lustre_msg_get_transno(req->rq_reqmsg);
         struct ptlrpc_request *saved_req;
-        struct lustre_msg *reqmsg;
 
         /* CAVEAT EMPTOR: The incoming request message has been swabbed
          * (i.e. buflens etc are in my own byte order), but type-dependent
@@ -1147,13 +1212,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         }
 
         /* XXX If I were a real man, these LBUGs would be sane cleanups. */
-        /* XXX just like the request-dup code in queue_final_reply */
-        OBD_ALLOC(saved_req, sizeof *saved_req);
+        saved_req = target_save_req(req);
         if (!saved_req)
                 LBUG();
-        OBD_ALLOC(reqmsg, req->rq_reqlen);
-        if (!reqmsg)
-                LBUG();
 
         spin_lock_bh(&obd->obd_processing_task_lock);
 
@@ -1172,8 +1233,8 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                 /* Processing the queue right now, don't re-add. */
                 LASSERT(list_empty(&req->rq_list));
                 spin_unlock_bh(&obd->obd_processing_task_lock);
-                OBD_FREE(reqmsg, req->rq_reqlen);
-                OBD_FREE(saved_req, sizeof *saved_req);
+
+                target_release_saved_req(saved_req);
                 return 1;
         }
 
@@ -1183,17 +1244,12 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
             (MSG_RESENT | MSG_REPLAY)) {
                 DEBUG_REQ(D_ERROR, req, "dropping resent queued req");
                 spin_unlock_bh(&obd->obd_processing_task_lock);
-                OBD_FREE(reqmsg, req->rq_reqlen);
-                OBD_FREE(saved_req, sizeof *saved_req);
+
+                target_release_saved_req(saved_req);
                 return 0;
         }
 
-        memcpy(saved_req, req, sizeof *req);
-        memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
         req = saved_req;
-        req->rq_reqmsg = reqmsg;
-        class_export_get(req->rq_export);
-        CFS_INIT_LIST_HEAD(&req->rq_list);
 
         /* XXX O(n^2) */
         list_for_each(tmp, &obd->obd_recovery_queue) {
@@ -1241,7 +1297,6 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
 {
         struct obd_device *obd = target_req2obd(req);
         struct ptlrpc_request *saved_req;
-        struct lustre_msg *reqmsg;
         int recovery_done = 0;
 
         LASSERT ((rc == 0) == (req->rq_reply_state != NULL));
@@ -1255,30 +1310,22 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
 
         LASSERT (!req->rq_reply_state->rs_difficult);
         LASSERT(list_empty(&req->rq_list));
-        /* XXX a bit like the request-dup code in queue_recovery_request */
-        OBD_ALLOC(saved_req, sizeof *saved_req);
+
+        saved_req = target_save_req(req);
         if (!saved_req)
                 LBUG();
-        OBD_ALLOC(reqmsg, req->rq_reqlen);
-        if (!reqmsg)
-                LBUG();
-        *saved_req = *req;
-        memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
 
         /* Don't race cleanup */
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (obd->obd_stopping) {
                 spin_unlock_bh(&obd->obd_processing_task_lock);
-                OBD_FREE(reqmsg, req->rq_reqlen);
-                OBD_FREE(saved_req, sizeof *req);
+                target_release_saved_req(saved_req);
                 req->rq_status = -ENOTCONN;
                 /* rv is ignored anyhow */
                 return -ENOTCONN;
         }
-        ptlrpc_rs_addref(req->rq_reply_state);  /* +1 ref for saved reply */
+
         req = saved_req;
-        req->rq_reqmsg = reqmsg;
-        class_export_get(req->rq_export);
         list_add(&req->rq_list, &obd->obd_delayed_reply_queue);
 
         /* only count the first "replay over" request from each
index 3c21a00..fa4657b 100644 (file)
@@ -757,6 +757,10 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock)
  *
  * Returns 1 if it finds an already-existing lock that is compatible; in this
  * case, lockh is filled in with a addref()ed lock
+ *
+ * we also check security context, if that failed we simply return 0 (to keep
+ * caller code unchanged), the context failure will be discovered by caller
+ * sometime later.
  */
 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                     struct ldlm_res_id *res_id, ldlm_type_t type,
@@ -836,6 +840,18 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                                 res_id->name[2] : policy->l_extent.start,
                            (type == LDLM_PLAIN || type == LDLM_IBITS) ?
                                 res_id->name[3] : policy->l_extent.end);
+
+                /* check user's security context */
+                if (lock->l_conn_export &&
+                    sptlrpc_import_check_ctx(
+                                class_exp2cliimp(lock->l_conn_export))) {
+                        if (!(flags & LDLM_FL_TEST_LOCK))
+                                ldlm_lock_decref_internal(lock, mode);
+                        rc = 0;
+                }
+
+                if (flags & LDLM_FL_TEST_LOCK)
+                        LDLM_LOCK_PUT(lock);
         } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/
                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
@@ -847,8 +863,6 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
         }
         if (old_lock)
                 LDLM_LOCK_PUT(old_lock);
-        if (flags & LDLM_FL_TEST_LOCK && rc)
-                LDLM_LOCK_PUT(lock);
 
         return rc;
 }
index b4689d5..0789c72 100644 (file)
@@ -217,6 +217,7 @@ int liblustre_init_current(char *comm)
 
         strncpy(current->comm, comm, sizeof(current->comm));
         current->pid = getpid();
+        current->gid = getgid();
         current->fsuid = geteuid();
         current->fsgid = getegid();
         memset(&current->pending, 0, sizeof(current->pending));
index 1eb10e2..807bb2d 100644 (file)
@@ -1090,6 +1090,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         RETURN (-EFAULT);
                 RETURN(0);
         }
+        case LL_IOC_FLUSHCTX:
+                RETURN(ll_flush_ctx(inode));
+
         default:
                 RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg));
         }
index 5d682fa..21052ba 100644 (file)
@@ -2017,6 +2017,8 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         case EXT3_IOC_SETVERSION_OLD:
         case EXT3_IOC_SETVERSION:
         */
+        case LL_IOC_FLUSHCTX:
+                RETURN(ll_flush_ctx(inode));
         default:
                 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                      (void *)arg));
index 49f4cd4..523ac02 100644 (file)
@@ -486,6 +486,7 @@ void ll_read_inode2(struct inode *inode, void *opaque);
 void ll_delete_inode(struct inode *inode);
 int ll_iocontrol(struct inode *inode, struct file *file,
                  unsigned int cmd, unsigned long arg);
+int ll_flush_ctx(struct inode *inode);
 void ll_umount_begin(struct super_block *sb);
 int ll_remount_fs(struct super_block *sb, int *flags, char *data);
 int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
index 09a9120..58df98f 100644 (file)
@@ -1835,6 +1835,21 @@ int ll_iocontrol(struct inode *inode, struct file *file,
         RETURN(0);
 }
 
+int ll_flush_ctx(struct inode *inode)
+{
+        struct ll_sb_info  *sbi = ll_i2sbi(inode);
+
+        CDEBUG(D_SEC, "flush context for user %d\n", current->uid);
+
+        obd_set_info_async(sbi->ll_md_exp,
+                           sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX,
+                           0, NULL, NULL);
+        obd_set_info_async(sbi->ll_dt_exp,
+                           sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX,
+                           0, NULL, NULL);
+        return 0;
+}
+
 /* umount -f client means force down, don't save state */
 void ll_umount_begin(struct super_block *sb)
 {
index 8bd4538..d9342be 100644 (file)
@@ -2080,62 +2080,22 @@ int lmv_set_info_async(struct obd_export *exp, obd_count keylen,
         }
         lmv = &obd->u.lmv;
 
-        /* maybe this could be default */
-        if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) ||
-            (keylen == strlen("sec_flags") && strcmp(key, "sec_flags") == 0) ||
-            (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) {
-                struct obd_export *exp;
-                int err, i;
-
-                spin_lock(&lmv->lmv_lock);
-                for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
-                     i++, tgt++) {
-                        exp = tgt->ltd_exp;
-                        /* during setup time the connections to mdc might
-                         * haven't been established.
-                         */
-                        if (exp == NULL) {
-                                struct obd_device *tgt_obd;
-
-                                tgt_obd = class_find_client_obd(&tgt->uuid,
-                                                                LUSTRE_MDC_NAME,
-                                                                &obd->obd_uuid);
-                                if (!tgt_obd) {
-                                        CERROR("can't set info %s, "
-                                               "device %s not attached?\n",
-                                                (char *) key, tgt->uuid.uuid);
-                                        rc = -EINVAL;
-                                        continue;
-                                }
-                                exp = tgt_obd->obd_self_export;
-                        }
-
-                        err = obd_set_info_async(exp, keylen, key, vallen, val, set);
-                        if (!rc)
-                                rc = err;
-                }
-                spin_unlock(&lmv->lmv_lock);
+        if (KEY_IS(KEY_FLUSH_CTX)) {
+                int i, err = 0;
 
-                RETURN(rc);
-        }
-        if (((keylen == strlen("flush_cred") &&
-             strcmp(key, "flush_cred") == 0)) ||
-             ((keylen == strlen("crypto_type") &&
-             strcmp(key, "crypto_type") == 0))) {
-                int i;
+                for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+                        tgt = &lmv->tgts[i];
 
-                for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
-                     i++, tgt++) {
                         if (!tgt->ltd_exp)
                                 continue;
-                        rc = obd_set_info_async(tgt->ltd_exp,
-                                                keylen, key, vallen,
-                                                val, set);
-                        if (rc)
-                                RETURN(rc);
+
+                        err = obd_set_info_async(tgt->ltd_exp,
+                                                 keylen, key, vallen, val, set);
+                        if (err && rc == 0)
+                                rc = err;
                 }
 
-                RETURN(0);
+                RETURN(rc);
         }
 
         RETURN(-EINVAL);
index 50c7a85..48b7c32 100644 (file)
@@ -2468,6 +2468,8 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen,
         if (KEY_IS("unlinked")) {
                 if (vallen != 0 && KEY_IS("unlinked"))
                         GOTO(out, rc = -EINVAL);
+        } else if (KEY_IS(KEY_FLUSH_CTX)) {
+                /* fall through */
         } else {
                 GOTO(out, rc = -EINVAL);
         }
index 27107cd..d7b00dc 100644 (file)
@@ -79,6 +79,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "mds_conn_uuid",   lprocfs_rd_conn_uuid,   0, 0 },
         { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight,
                                 mdc_wr_max_rpcs_in_flight, 0 },
+        { "sptlrpc",         sptlrpc_lprocfs_rd, 0, 0 },
         { 0 }
 };
 
index 8ed7055..6a7edaa 100644 (file)
@@ -943,6 +943,11 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen,
                 RETURN(rc);
         }
 
+        if (KEY_IS(KEY_FLUSH_CTX)) {
+                sptlrpc_import_flush_my_ctx(imp);
+                RETURN(0);
+        }
+
         RETURN(rc);
 }
 
index 9cf8423..4458f76 100644 (file)
@@ -1347,6 +1347,9 @@ int mds_msg_check_version(struct lustre_msg *msg)
         case MDS_CONNECT:
         case MDS_DISCONNECT:
         case OBD_PING:
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case SEC_CTX_FINI:
                 rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION);
                 if (rc)
                         CERROR("bad opc %u version %08x, expecting %08x\n",
index 4f8900f..a4ccba6 100644 (file)
@@ -1420,8 +1420,13 @@ static int mdt_recovery(struct ptlrpc_request *req)
 
         ENTRY;
 
-        if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT)
+        switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+        case MDS_CONNECT:
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case SEC_CTX_FINI:
                 RETURN(+1);
+        }
 
         if (req->rq_export == NULL) {
                 CERROR("operation %d on unconnected MDS from %s\n",
@@ -3411,6 +3416,9 @@ static struct mdt_handler mdt_dlm_ops[] = {
 static struct mdt_handler mdt_llog_ops[] = {
 };
 
+static struct mdt_handler mdt_sec_ops[] = {
+};
+
 static struct mdt_opc_slice mdt_regular_handlers[] = {
         {
                 .mos_opc_start = MDS_GETATTR,
@@ -3433,6 +3441,11 @@ static struct mdt_opc_slice mdt_regular_handlers[] = {
                 .mos_hs        = mdt_llog_ops
         },
         {
+                .mos_opc_start = SEC_CTX_INIT,
+                .mos_opc_end   = SEC_LAST_OPC,
+                .mos_hs        = mdt_sec_ops
+        },
+        {
                 .mos_hs        = NULL
         }
 };
index 9ff6eb2..dfca3ee 100644 (file)
@@ -45,6 +45,7 @@
 #include <lustre_disk.h>
 #include <lustre_param.h>
 #include <lustre_ver.h>
+#include <lustre_sec.h>
 #include "mgs_internal.h"
 
 /********************** Class fns ********************/
@@ -617,6 +618,24 @@ static inline int record_setup(struct obd_device *obd, struct llog_handle *llh,
         return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4);
 }
 
+static inline int record_sec_flavor(struct obd_device *obd,
+                                    struct llog_handle *llh, char *devname,
+                                    struct sec_flavor_config *conf)
+{
+        struct lustre_cfg_bufs bufs;
+        struct lustre_cfg *lcfg;
+        int rc;
+
+        lustre_cfg_bufs_reset(&bufs, devname);
+        lustre_cfg_bufs_set(&bufs, 1, conf, sizeof(*conf));
+        lcfg = lustre_cfg_new(LCFG_SEC_FLAVOR, &bufs);
+
+        rc = record_lcfg(obd, llh, lcfg);
+
+        lustre_cfg_free(lcfg);
+        return rc;
+}
+
 static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh,
                             char *devname, struct lov_desc *desc)
 {
@@ -854,14 +873,16 @@ int mgs_write_log_direct_all(struct obd_device *obd, struct fs_db *fsdb,
 }
 struct temp_comp
 {
-        struct mgs_target_info *comp_tmti;
-        struct mgs_target_info *comp_mti;
-        struct fs_db *comp_fsdb;
-        struct obd_device *comp_obd;
+        struct mgs_target_info   *comp_tmti;
+        struct mgs_target_info   *comp_mti;
+        struct fs_db             *comp_fsdb;
+        struct obd_device        *comp_obd;
+        struct sec_flavor_config  comp_sec;
 };
 
 static int mgs_write_log_mdc_to_mdt(struct obd_device *, struct fs_db *,
-                                    struct mgs_target_info *, char *);
+                                    struct mgs_target_info *,
+                                    struct sec_flavor_config *, char *);
 
 static int mgs_steal_llog_handler(struct llog_handle *llh,
                                   struct llog_rec_hdr *rec,
@@ -873,6 +894,7 @@ static int mgs_steal_llog_handler(struct llog_handle *llh,
         int cfg_len = rec->lrh_len;
         char *cfg_buf = (char*) (rec + 1);
         struct lustre_cfg *lcfg;
+        struct sec_flavor_config *sec_conf;
         int rc = 0;
         struct llog_handle *mdt_llh = NULL;
         static int got_an_osc_or_mdc = 0;
@@ -888,6 +910,7 @@ static int mgs_steal_llog_handler(struct llog_handle *llh,
         tmti = ((struct temp_comp*)data)->comp_tmti;
         fsdb = ((struct temp_comp*)data)->comp_fsdb;
         obd = ((struct temp_comp*)data)->comp_obd;
+        sec_conf = &((struct temp_comp*)data)->comp_sec;
 
         if (rec->lrh_type != OBD_CFG_REC) {
                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
@@ -966,17 +989,24 @@ static int mgs_steal_llog_handler(struct llog_handle *llh,
                 RETURN(rc);
         }
 
+        if (lcfg->lcfg_command == LCFG_SEC_FLAVOR) {
+                memcpy(sec_conf, lustre_cfg_buf(lcfg, 1), sizeof(*sec_conf));
+
+                RETURN(rc);
+        }
+
         if (lcfg->lcfg_command == LCFG_ADD_MDC) {
                 int index;
 
                 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
                         RETURN (-EINVAL);
-                
+
                 memcpy(tmti->mti_fsname, mti->mti_fsname,
                        strlen(mti->mti_fsname));
                 tmti->mti_stripe_index = index;
-                
-                mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, mti->mti_svname);
+
+                mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, sec_conf,
+                                         mti->mti_svname);
                 memset(tmti, 0, sizeof(*tmti));
                 RETURN(rc);
         }
@@ -1155,10 +1185,35 @@ static int mgs_write_log_failnids(struct obd_device *obd,
         return rc;
 }
 
+static
+void extract_sec_flavor(char *params, char *key, char **ptr)
+{
+        char *val = NULL, *tail;
+        int   len;
+
+        *ptr = NULL;
+
+        if (class_find_param(params, key, &val))
+                return;
+
+        tail = strchr(val, ' ');
+        if (tail == NULL)
+                len = strlen(val);
+        else
+                len = tail - val;
+
+        OBD_ALLOC(*ptr, len + 1);
+        if (*ptr == NULL)
+                return;
+
+        memcpy(*ptr, val, len);
+        (*ptr)[len] = '\0';
+}
 
 /***************************************BEGIN PROTO****************************/
 static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb,
                                     struct mgs_target_info *mti,
+                                    struct sec_flavor_config *sec_conf,
                                     char *logname, char *lmvname)
 {
         struct llog_handle *llh = NULL;
@@ -1193,6 +1248,7 @@ static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb,
 
         rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
         rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+        rc = record_sec_flavor(obd, llh, mdcname, sec_conf);
         rc = mgs_write_log_failnids(obd, mti, llh, mdcname);
         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
         rc = record_mdc_add(obd, llh, lmvname, mdcuuid, mti->mti_uuid,
@@ -1210,7 +1266,9 @@ static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb,
 
 /* add new mdc to already existent MDS */
 static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb,
-                                    struct mgs_target_info *mti, char *logname)
+                                    struct mgs_target_info *mti,
+                                    struct sec_flavor_config *sec_conf,
+                                    char *logname)
 {
         struct llog_handle *llh = NULL;
         char *nodeuuid, *mdcname, *mdcuuid, *mdtuuid;
@@ -1241,6 +1299,7 @@ static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb,
         }
         rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
         rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+        rc = record_sec_flavor(obd, llh, mdcname, sec_conf);
         rc = mgs_write_log_failnids(obd, mti, llh, mdcname);
         snprintf(index, sizeof(index), "%d", idx);
 
@@ -1299,9 +1358,10 @@ out:
 static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb,
                               struct mgs_target_info *mti)
 {
-        char *cliname;
+        char *cliname, *sec;
         struct llog_handle *llh = NULL;
         struct temp_comp comp = { 0 };
+        struct sec_flavor_config sec_conf_mdt, sec_conf_cli;
         char mdt_index[9];
         int rc, i = 0;
         ENTRY;
@@ -1330,6 +1390,19 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb,
                          "%s_UUID", mti->mti_svname);
         }
 
+        /* security flavor */
+        extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec);
+        rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_MDT, sec, &sec_conf_mdt);
+        name_destroy(sec);
+        if (rc)
+                RETURN(rc);
+
+        extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec);
+        rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_MDT, sec, &sec_conf_cli);
+        name_destroy(sec);
+        if (rc)
+                RETURN(rc);
+
         /* add mdt */
         rc = mgs_write_log_mdt0(obd, fsdb, mti);
         
@@ -1384,8 +1457,8 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb,
                 rc = mgs_steal_llog_for_mdt_from_client(obd, cliname, 
                                                         &comp);
 
-                rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, cliname, 
-                                              fsdb->fsdb_clilmv);
+                rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, &sec_conf_cli,
+                                              cliname, fsdb->fsdb_clilmv);
                 /* add mountopts */
                 rc = record_start_log(obd, &llh, cliname);
                 if (rc) 
@@ -1411,7 +1484,8 @@ out:
                         sprintf(mdt_index,"-MDT%04x",i);
                         
                         name_create(&mdtname, mti->mti_fsname, mdt_index);
-                        rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti, mdtname);
+                        rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti,
+                                                      &sec_conf_mdt, mdtname);
                         name_destroy(mdtname);
                 }
         }
@@ -1422,7 +1496,9 @@ out:
 /* Add the ost info to the client/mdt lov */
 static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb,
                                     struct mgs_target_info *mti,
-                                    char *logname, char *lovname, int flags)
+                                    char *logname, char *lovname,
+                                    struct sec_flavor_config *sec_conf,
+                                    int flags)
 {
         struct llog_handle *llh = NULL;
         char *nodeuuid, *svname, *oscname, *oscuuid, *lovuuid;
@@ -1470,6 +1546,7 @@ static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb,
         }
         rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
         rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
+        rc = record_sec_flavor(obd, llh, oscname, sec_conf);
         rc = mgs_write_log_failnids(obd, mti, llh, oscname);
         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
         rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1");
@@ -1489,9 +1566,10 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
                              struct mgs_target_info *mti)
 {
         struct llog_handle *llh = NULL;
-        char *logname, *lovname;
+        char *logname, *lovname, *sec;
         char mdt_index[9];
         char *ptr = mti->mti_params;
+        struct sec_flavor_config sec_conf_mdt, sec_conf_cli;
         int rc, flags = 0, failout = 0, i;
         ENTRY;
         
@@ -1509,6 +1587,20 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
                                " all logs.\n", mti->mti_svname);
                 RETURN(-EALREADY);
         }
+
+        /* security flavors */
+        extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec);
+        rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_OST, sec, &sec_conf_mdt);
+        name_destroy(sec);
+        if (rc)
+                RETURN(rc);
+
+        extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec);
+        rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_OST, sec, &sec_conf_cli);
+        name_destroy(sec);
+        if (rc)
+                RETURN(rc);
+
         /*
         attach obdfilter ost1 ost1_UUID
         setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
@@ -1547,7 +1639,7 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
                         name_create(&logname, mti->mti_fsname, mdt_index);
                         name_create(&lovname, logname, "-mdtlov");
                         mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, 
-                                                 lovname, flags);
+                                                 lovname, &sec_conf_mdt, flags);
                         name_destroy(logname);
                         name_destroy(lovname);
                 }
@@ -1556,7 +1648,8 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb,
     
         /* Append ost info to the client log */
         name_create(&logname, mti->mti_fsname, "-client");
-        mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov, 0);
+        mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov,
+                                 &sec_conf_cli, 0);
         name_destroy(logname);
         
         RETURN(rc);
@@ -1664,6 +1757,11 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb,
                 if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) 
                         GOTO(end_while, rc);
 
+                /* Processed in mgs_write_log_mdt/mgs_write_log_ost */
+                if (class_match_param(ptr, PARAM_SEC_RPC_MDT, NULL) == 0 ||
+                    class_match_param(ptr, PARAM_SEC_RPC_CLI, NULL) == 0)
+                        GOTO(end_while, rc);
+
                 if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
                         /* Add a failover nidlist */
                         rc = 0;
index 9613962..49a686f 100644 (file)
@@ -749,6 +749,7 @@ void class_import_put(struct obd_import *import)
         }
 
         LASSERT(list_empty(&import->imp_handle.h_link));
+        LASSERT(import->imp_sec == NULL);
         class_decref(import->imp_obd);
         OBD_FREE(import, sizeof(*import));
         EXIT;
index bc41a0b..f0c4ad8 100644 (file)
@@ -559,6 +559,36 @@ int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
         RETURN(rc);
 }
 
+int class_sec_flavor(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+        struct sec_flavor_config *conf;
+        ENTRY;
+
+        if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
+            strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+                CERROR("Can't set security flavor on obd %s\n",
+                       obd->obd_type->typ_name);
+                RETURN(-EINVAL);
+        }
+
+        if (LUSTRE_CFG_BUFLEN(lcfg, 1) != sizeof(*conf)) {
+                CERROR("invalid data\n");
+                RETURN(-EINVAL);
+        }
+
+        conf = &obd->u.cli.cl_sec_conf;
+        memcpy(conf, lustre_cfg_buf(lcfg, 1), sizeof(*conf));
+
+#ifdef __BIG_ENDIAN
+        __swab32s(&conf->sfc_rpc_flavor);
+        __swab32s(&conf->sfc_bulk_csum);
+        __swab32s(&conf->sfc_bulk_priv);
+        __swab32s(&conf->sfc_flags);
+#endif
+
+        RETURN(0);
+}
+
 CFS_LIST_HEAD(lustre_profile_list);
 
 struct lustre_profile *class_get_profile(const char * prof)
@@ -787,6 +817,10 @@ int class_process_config(struct lustre_cfg *lcfg)
                 err = class_del_conn(obd, lcfg);
                 GOTO(out, err = 0);
         }
+        case LCFG_SEC_FLAVOR: {
+                err = class_sec_flavor(obd, lcfg);
+                GOTO(out, err = 0);
+        }
         default: {
                 err = obd_process_config(obd, sizeof(*lcfg), lcfg);
                 GOTO(out, err);
index 7779095..83062ac 100644 (file)
@@ -851,11 +851,82 @@ int server_mti_print(char *title, struct mgs_target_info *mti)
         return(0);
 }
 
+static
+int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd)
+{
+        char *s1, *s2;
+
+        if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) {
+                /* just let on-disk params do its work. but we have an
+                 * assumption that any changes of on-disk data by tune2fs
+                 * should lead to server rewrite log.
+                 */
+                return 0;
+        }
+
+        /* filter out existing sec options */
+        s1 = mti->mti_params;
+        while (*s1) {
+                int clear;
+
+                while (*s1 == ' ')
+                        s1++;
+
+                if (strncmp(s1, PARAM_SEC_RPC_MDT,
+                            sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 ||
+                    strncmp(s1, PARAM_SEC_RPC_CLI,
+                            sizeof(PARAM_SEC_RPC_CLI) - 1) == 0)
+                        clear = 1;
+                else
+                        clear = 0;
+
+                s2 = strchr(s1, ' ');
+                if (s2 == NULL) {
+                        if (clear)
+                                *s1 = '\0';
+                        break;
+                }
+                s2++;
+                if (clear)
+                        memmove(s1, s2, strlen(s2) + 1);
+                else
+                        s1 = s2;
+        }
+
+        /* append sec options from lmd */
+        /* FIXME add flag LDD_F_UPDATE after mountconf start supporting
+         * log updating.
+         */
+        if (lmd->lmd_sec_mdt) {
+                if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) +
+                    sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) {
+                        CERROR("security params too big for mti\n");
+                        return -ENOMEM;
+                }
+                strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT);
+                strcat(mti->mti_params, lmd->lmd_sec_mdt);
+                //mti->mti_flags |= LDD_F_UPDATE;
+        }
+        if (lmd->lmd_sec_cli) {
+                if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) +
+                    sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) {
+                        CERROR("security params too big for mti\n");
+                        return -ENOMEM;
+                }
+                strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI);
+                strcat(mti->mti_params, lmd->lmd_sec_cli);
+                //mti->mti_flags |= LDD_F_UPDATE;
+        }
+
+        return 0;
+}
+
 static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
 {
-        struct lustre_sb_info   *lsi = s2lsi(sb);
-        struct lustre_disk_data *ldd = lsi->lsi_ldd;
-        lnet_process_id_t        id;
+        struct lustre_sb_info    *lsi = s2lsi(sb);
+        struct lustre_disk_data  *ldd = lsi->lsi_ldd;
+        struct lustre_mount_data *lmd = lsi->lsi_lmd;
+        lnet_process_id_t         id;
         int i = 0;
         ENTRY;
 
@@ -891,7 +962,8 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
                 /* FIXME we can't send a msg much bigger than 4k - use bulk? */
         }
         memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
-        RETURN(0);
+
+        RETURN(mti_set_sec_opts(mti, lmd));
 }
 
 /* Register an old or new target with the MGS. If needed MGS will construct
@@ -1139,6 +1211,12 @@ static int lustre_free_lsi(struct super_block *sb)
                 if (lsi->lsi_lmd->lmd_profile != NULL)
                         OBD_FREE(lsi->lsi_lmd->lmd_profile,
                                  strlen(lsi->lsi_lmd->lmd_profile) + 1);
+                if (lsi->lsi_lmd->lmd_sec_mdt != NULL)
+                        OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt,
+                                 strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1);
+                if (lsi->lsi_lmd->lmd_sec_cli != NULL)
+                        OBD_FREE(lsi->lsi_lmd->lmd_sec_cli,
+                                 strlen(lsi->lsi_lmd->lmd_sec_cli) + 1);
                 if (lsi->lsi_lmd->lmd_opts != NULL)
                         OBD_FREE(lsi->lsi_lmd->lmd_opts,
                                  strlen(lsi->lsi_lmd->lmd_opts) + 1);
@@ -1591,6 +1669,10 @@ static void lmd_print(struct lustre_mount_data *lmd)
                 PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
         PRINT_CMD(PRINT_MASK, "device:  %s\n", lmd->lmd_dev);
         PRINT_CMD(PRINT_MASK, "flags:   %x\n", lmd->lmd_flags);
+        if (lmd->lmd_sec_mdt)
+                PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt);
+        if (lmd->lmd_sec_cli)
+                PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli);
         if (lmd->lmd_opts)
                 PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
         for (i = 0; i < lmd->lmd_exclude_count; i++) {
@@ -1676,6 +1758,66 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr)
         RETURN(rc);
 }
 
+static
+int lmd_set_sec_opts(char **set, char *opts, int length)
+{
+        if (*set)
+                OBD_FREE(*set, strlen(*set) + 1);
+
+        OBD_ALLOC(*set, length + 1);
+        if (*set == NULL)
+                return -ENOMEM;
+
+        memcpy(*set, opts, length);
+        (*set)[length] = '\0';
+
+        return 0;
+}
+
+static
+int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr)
+{
+        char  *tail;
+        char **set = NULL;
+        int    length;
+
+        /* check peer name */
+        if (strncmp(ptr, "sec_mdt=", 8) == 0) {
+                set = &lmd->lmd_sec_mdt;
+                ptr += 8;
+        } else if (strncmp(ptr, "sec_cli=", 8) == 0) {
+                set = &lmd->lmd_sec_cli;
+                ptr += 8;
+        } else if (strncmp(ptr, "sec=", 4) == 0) {
+                /* leave 'set' be null */
+                ptr += 4;
+        } else {
+                CERROR("invalid security options: %s\n", ptr);
+                return -EINVAL;
+        }
+
+        tail = strchr(ptr, ',');
+        if (tail == NULL)
+                length = strlen(ptr);
+        else
+                length = tail - ptr;
+
+        if (set) {
+                if (lmd_set_sec_opts(set, ptr, length))
+                        return -EINVAL;
+        } else {
+                if (lmd->lmd_sec_mdt == NULL &&
+                    lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length))
+                        return -EINVAL;
+
+                if (lmd->lmd_sec_cli == NULL &&
+                    lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length))
+                        return -EINVAL;
+        }
+
+        return 0;
+}
+
 /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 {
@@ -1726,6 +1868,11 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                         if (rc)
                                 goto invalid;
                         clear++;
+                } else if (strncmp(s1, "sec", 3) == 0) {
+                        rc = lmd_parse_sec_opts(lmd, s1);
+                        if (rc)
+                                goto invalid;
+                        clear++;
                 }
 
                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
index 7079547..439671e 100644 (file)
@@ -286,6 +286,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
         { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
         { "checksums",       osc_rd_checksum, osc_wr_checksum, 0 },
+        { "sptlrpc",         sptlrpc_lprocfs_rd, 0, 0 },
         { 0 }
 };
 
index 495af7b..33a9710 100644 (file)
@@ -40,7 +40,7 @@
 # include <liblustre.h>
 #endif
 
-# include <lustre_dlm.h>
+#include <lustre_dlm.h>
 #include <libcfs/kp30.h>
 #include <lustre_net.h>
 #include <lustre/lustre_user.h>
@@ -844,7 +844,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
 
         OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM);
         req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL,
-                                   pool);
+                                   pool, NULL);
         if (req == NULL)
                 RETURN (-ENOMEM);
 
@@ -1032,6 +1032,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
                                          requested_nob, page_count, pga);
                 }
 
+                sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk);
+
                 RETURN(check_write_rcs(req, requested_nob, niocount,
                                        page_count, pga));
         }
@@ -1085,6 +1087,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
                                cksum_missed, libcfs_nid2str(peer->nid));
         }
 
+        sptlrpc_cli_unwrap_bulk_read(req, rc, page_count, pga);
+
         RETURN(0);
 }
 
@@ -2635,7 +2639,8 @@ static int sanosc_brw_write(struct obd_export *exp, struct obd_info *oinfo,
         size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
 
         req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                                OST_SAN_WRITE, 4, size, NULL, imp->imp_rq_pool);
+                                   OST_SAN_WRITE, 4, size, NULL,
+                                   imp->imp_rq_pool, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -3462,6 +3467,11 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
 
+        if (KEY_IS(KEY_FLUSH_CTX)) {
+                sptlrpc_import_flush_my_ctx(imp);
+                RETURN(0);
+        }
+
         if (!set)
                 RETURN(-EINVAL);
 
index 3643451..c2183cf 100644 (file)
@@ -628,6 +628,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int comms_error = 0, niocount, npages, nob = 0, rc, i, do_checksum;
         ENTRY;
 
+        req->rq_bulk_read = 1;
+
         if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
                 GOTO(out, rc = -EIO);
 
@@ -740,8 +742,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (rc == 0) {
                 if (desc->bd_export->exp_failed)
                         rc = -ENOTCONN;
-                else
+                else {
+                        sptlrpc_svc_wrap_bulk(req, desc);
+
                         rc = ptlrpc_start_bulk_transfer(desc);
+                }
+
                 if (rc == 0) {
                         lwi = LWI_TIMEOUT_INTERVAL(obd_timeout * HZ / 4, HZ,
                                                    ost_bulk_timeout, desc);
@@ -839,6 +845,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         int rc, swab, i, j, do_checksum;
         ENTRY;
 
+        req->rq_bulk_write = 1;
+
         if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
                 GOTO(out, rc = -EIO);
 
@@ -1010,6 +1018,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 }
         }
 
+        sptlrpc_svc_unwrap_bulk(req, desc);
+
         /* Must commit after prep above in all cases */
         rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
                            objcount, ioo, npages, local_nb, oti, rc);
@@ -1297,6 +1307,9 @@ int ost_msg_check_version(struct lustre_msg *msg)
         case OST_CONNECT:
         case OST_DISCONNECT:
         case OBD_PING:
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case SEC_CTX_FINI:
                 rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION);
                 if (rc)
                         CERROR("bad opc %u version %08x, expecting %08x\n",
@@ -1363,6 +1376,15 @@ static int ost_handle(struct ptlrpc_request *req)
         ENTRY;
 
         LASSERT(current->journal_info == NULL);
+
+        /* primordial rpcs don't affect server recovery */
+        switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+        case SEC_CTX_INIT:
+        case SEC_CTX_INIT_CONT:
+        case SEC_CTX_FINI:
+                GOTO(out, rc = 0);
+        }
+
         /* XXX identical to MDS */
         if (lustre_msg_get_opc(req->rq_reqmsg) != OST_CONNECT) {
                 int abort_recovery, recovering;
@@ -1397,10 +1419,6 @@ static int ost_handle(struct ptlrpc_request *req)
         if (rc)
                 RETURN(rc);
 
-        rc = ost_msg_check_version(req->rq_reqmsg);
-        if (rc)
-                RETURN(rc);
-
         switch (lustre_msg_get_opc(req->rq_reqmsg)) {
         case OST_CONNECT: {
                 CDEBUG(D_INODE, "connect\n");
index 38a9b33..405c896 100644 (file)
@@ -13,6 +13,7 @@ ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o
 ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o
 ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o
 ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
+ptlrpc_objs += sec.o sec_null.o sec_plain.o
 
 ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs)
 
index c80a8ca..1f1bd42 100644 (file)
@@ -18,7 +18,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \
 COMMON_SOURCES =  client.c recover.c connection.c niobuf.c pack_generic.c   \
     events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c   \
     llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c               \
-    ptlrpc_internal.h layout.c $(LDLM_COMM_SOURCES)
+    ptlrpc_internal.h layout.c sec.c sec_null.c sec_plain.c                 \
+    $(LDLM_COMM_SOURCES)
 
 if LIBLUSTRE
 
@@ -57,6 +58,9 @@ ptlrpc_SOURCES := \
         recov_thread.c \
         service.c \
        wiretest.c \
+       sec.c \
+       sec_null.c \
+       sec_plain.c \
         $(LDLM_COMM_SOURCES)
 
 ptlrpc_CFLAGS := $(EXTRA_KCFLAGS)
index d636621..9b79a1e 100644 (file)
@@ -179,6 +179,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
         LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
         LASSERT(!desc->bd_network_rw);         /* network hands off or */
         LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+
+        ptlrpc_bulk_free_enc_pages(desc);
+
         if (desc->bd_export)
                 class_export_put(desc->bd_export);
         else
@@ -200,8 +203,9 @@ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
         list_for_each_safe(l, tmp, &pool->prp_req_list) {
                 req = list_entry(l, struct ptlrpc_request, rq_list);
                 list_del(&req->rq_list);
-                LASSERT (req->rq_reqmsg);
-                OBD_FREE(req->rq_reqmsg, pool->prp_rq_size);
+                LASSERT(req->rq_reqbuf);
+                LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
+                OBD_FREE(req->rq_reqbuf, pool->prp_rq_size);
                 OBD_FREE(req, sizeof(*req));
         }
         OBD_FREE(pool, sizeof(*pool));
@@ -212,7 +216,7 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
         int i;
         int size = 1;
 
-        while (size < pool->prp_rq_size)
+        while (size < pool->prp_rq_size + SPTLRPC_MAX_PAYLOAD)
                 size <<= 1;
 
         LASSERTF(list_empty(&pool->prp_req_list) || size == pool->prp_rq_size,
@@ -234,7 +238,8 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
                         OBD_FREE(req, sizeof(struct ptlrpc_request));
                         return;
                 }
-                req->rq_reqmsg = msg;
+                req->rq_reqbuf = msg;
+                req->rq_reqbuf_len = size;
                 req->rq_pool = pool;
                 spin_lock(&pool->prp_lock);
                 list_add_tail(&req->rq_list, &pool->prp_req_list);
@@ -273,7 +278,7 @@ struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int num_rq, int msgsize,
 static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
 {
         struct ptlrpc_request *request;
-        struct lustre_msg *reqmsg;
+        struct lustre_msg *reqbuf;
 
         if (!pool)
                 return NULL;
@@ -294,21 +299,31 @@ static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_po
         list_del(&request->rq_list);
         spin_unlock(&pool->prp_lock);
 
-        LASSERT(request->rq_reqmsg);
+        LASSERT(request->rq_reqbuf);
         LASSERT(request->rq_pool);
 
-        reqmsg = request->rq_reqmsg;
+        reqbuf = request->rq_reqbuf;
         memset(request, 0, sizeof(*request));
-        request->rq_reqmsg = reqmsg;
+        request->rq_reqbuf = reqbuf;
+        request->rq_reqbuf_len = pool->prp_rq_size;
         request->rq_pool = pool;
-        request->rq_reqlen = pool->prp_rq_size;
         return request;
 }
 
+static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
+{
+        struct ptlrpc_request_pool *pool = request->rq_pool;
+
+        spin_lock(&pool->prp_lock);
+        list_add_tail(&request->rq_list, &pool->prp_req_list);
+        spin_unlock(&pool->prp_lock);
+}
+
 struct ptlrpc_request *
 ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
                      int count, int *lengths, char **bufs,
-                     struct ptlrpc_request_pool *pool)
+                     struct ptlrpc_request_pool *pool,
+                     struct ptlrpc_cli_ctx *ctx)
 {
         struct ptlrpc_request *request = NULL;
         int rc;
@@ -330,12 +345,23 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
                 RETURN(NULL);
         }
 
+        request->rq_import = class_import_get(imp);
+
+        if (unlikely(ctx))
+                request->rq_cli_ctx = sptlrpc_ctx_get(ctx);
+        else {
+                rc = sptlrpc_req_get_ctx(request);
+                if (rc)
+                        GOTO(out_free, rc);
+        }
+
+        sptlrpc_req_set_flavor(request, opcode);
+
         rc = lustre_pack_request(request, imp->imp_msg_magic, count, lengths,
                                  bufs);
         if (rc) {
                 LASSERT(!request->rq_pool);
-                OBD_FREE(request, sizeof(*request));
-                RETURN(NULL);
+                GOTO(out_ctx, rc);
         }
 
         lustre_msg_add_version(request->rq_reqmsg, version);
@@ -346,7 +372,6 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
                 request->rq_timeout = obd_timeout;
         request->rq_send_state = LUSTRE_IMP_FULL;
         request->rq_type = PTL_RPC_MSG_REQUEST;
-        request->rq_import = class_import_get(imp);
         request->rq_export = NULL;
 
         request->rq_req_cbid.cbid_fn  = request_out_callback;
@@ -364,6 +389,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
         spin_lock_init(&request->rq_lock);
         CFS_INIT_LIST_HEAD(&request->rq_list);
         CFS_INIT_LIST_HEAD(&request->rq_replay_list);
+        CFS_INIT_LIST_HEAD(&request->rq_ctx_chain);
         CFS_INIT_LIST_HEAD(&request->rq_set_chain);
         cfs_waitq_init(&request->rq_reply_waitq);
         request->rq_xid = ptlrpc_next_xid();
@@ -373,6 +399,15 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
         lustre_msg_set_flags(request->rq_reqmsg, 0);
 
         RETURN(request);
+out_ctx:
+        sptlrpc_req_put_ctx(request);
+out_free:
+        class_import_put(imp);
+        if (request->rq_pool)
+                __ptlrpc_free_req_to_pool(request);
+        else
+                OBD_FREE(request, sizeof(*request));
+        return NULL;
 }
 
 struct ptlrpc_request *
@@ -380,7 +415,7 @@ ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count,
                 int *lengths, char **bufs)
 {
         return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs,
-                                    NULL);
+                                    NULL, NULL);
 }
 
 struct ptlrpc_request_set *ptlrpc_prep_set(void)
@@ -497,7 +532,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
         LASSERT (status != NULL);
         *status = 0;
 
-        if (imp->imp_state == LUSTRE_IMP_NEW) {
+        if (req->rq_ctx_init || req->rq_ctx_fini) {
+                /* always allow ctx init/fini rpc go through */
+        } else if (imp->imp_state == LUSTRE_IMP_NEW) {
                 DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
                 *status = -EIO;
                 LBUG();
@@ -597,6 +634,7 @@ static int after_reply(struct ptlrpc_request *req)
         ENTRY;
 
         LASSERT(!req->rq_receiving_reply);
+        LASSERT(req->rq_nob_received <= req->rq_repbuf_len);
 
         /* NB Until this point, the whole of the incoming message,
          * including buflens, status etc is in the sender's byte order. */
@@ -605,8 +643,17 @@ static int after_reply(struct ptlrpc_request *req)
         /* Clear reply swab mask; this is a new reply in sender's byte order */
         req->rq_rep_swab_mask = 0;
 #endif
-        LASSERT (req->rq_nob_received <= req->rq_replen);
-        rc = lustre_unpack_msg(req->rq_repmsg, req->rq_nob_received);
+        rc = sptlrpc_cli_unwrap_reply(req);
+        if (rc) {
+                DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
+                RETURN(rc);
+        }
+
+        /* security layer unwrap might ask resend this request */
+        if (req->rq_resend)
+                RETURN(0);
+
+        rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
         if (rc) {
                 DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d\n", rc);
                 RETURN(-EPROTO);
@@ -710,6 +757,20 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
         spin_unlock(&imp->imp_lock);
 
         lustre_msg_set_status(req->rq_reqmsg, cfs_curproc_pid());
+
+        rc = sptlrpc_req_refresh_ctx(req, -1);
+        if (rc) {
+                if (req->rq_err) {
+                        req->rq_status = rc;
+                        RETURN(1);
+                } else {
+                        /* here begins timeout counting */
+                        req->rq_sent = CURRENT_SECONDS;
+                        req->rq_wait_ctx = 1;
+                        RETURN(0);
+                }
+        }
+
         CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc"
                " %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                imp->imp_obd->obd_uuid.uuid,
@@ -782,7 +843,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                  * path sets rq_intr irrespective of whether ptlrpcd has
                  * seen a timeout.  our policy is to only interpret
                  * interrupted rpcs after they have timed out */
-                if (req->rq_intr && (req->rq_timedout || req->rq_waiting)) {
+                if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
+                                     req->rq_wait_ctx)) {
                         /* NB could be on delayed list */
                         ptlrpc_unregister_reply(req);
                         req->rq_status = -EINTR;
@@ -796,9 +858,16 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 }
 
                 if (req->rq_phase == RQ_PHASE_RPC) {
-                        if (req->rq_timedout||req->rq_waiting||req->rq_resend) {
+                        if (req->rq_timedout || req->rq_resend ||
+                            req->rq_waiting || req->rq_wait_ctx) {
                                 int status;
 
+                                /* rq_wait_ctx is only touched in ptlrpcd,
+                                 * no lock needed here.
+                                 */
+                                if (req->rq_wait_ctx)
+                                        goto check_ctx;
+
                                 ptlrpc_unregister_reply(req);
 
                                 spin_lock(&imp->imp_lock);
@@ -815,7 +884,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                         spin_unlock(&imp->imp_lock);
                                         GOTO(interpret, req->rq_status);
                                 }
-                                if (req->rq_no_resend) {
+                                if (req->rq_no_resend && !req->rq_wait_ctx) {
                                         req->rq_status = -ENOTCONN;
                                         req->rq_phase = RQ_PHASE_INTERPRET;
                                         spin_unlock(&imp->imp_lock);
@@ -843,6 +912,23 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                                                        old_xid, req->rq_xid);
                                         }
                                 }
+check_ctx:
+                                status = sptlrpc_req_refresh_ctx(req, -1);
+                                if (status) {
+                                        if (req->rq_err) {
+                                                req->rq_status = status;
+                                                force_timer_recalc = 1;
+                                        }
+                                        if (!req->rq_wait_ctx) {
+                                                /* begins timeout counting */
+                                                req->rq_sent = CURRENT_SECONDS;
+                                                req->rq_wait_ctx = 1;
+                                        }
+                                        continue;
+                                } else {
+                                        req->rq_sent = 0;
+                                        req->rq_wait_ctx = 0;
+                                }
 
                                 rc = ptl_send_rpc(req, 0);
                                 if (rc) {
@@ -951,6 +1037,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
 
         spin_lock(&req->rq_lock);
         req->rq_timedout = 1;
+        req->rq_wait_ctx = 0;
         spin_unlock(&req->rq_lock);
 
         ptlrpc_unregister_reply (req);
@@ -972,7 +1059,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
 
         /* If this request is for recovery or other primordial tasks,
          * then error it out here. */
-        if (req->rq_send_state != LUSTRE_IMP_FULL ||
+        if (req->rq_ctx_init || req->rq_ctx_fini ||
+            req->rq_send_state != LUSTRE_IMP_FULL ||
             imp->imp_obd->obd_no_recov) {
                 spin_lock(&req->rq_lock);
                 req->rq_status = -ETIMEDOUT;
@@ -1138,15 +1226,6 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
         RETURN(rc);
 }
 
-static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
-{
-        struct ptlrpc_request_pool *pool = request->rq_pool;
-
-        spin_lock(&pool->prp_lock);
-        list_add_tail(&request->rq_list, &pool->prp_req_list);
-        spin_unlock(&pool->prp_lock);
-}
-
 static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
 {
         ENTRY;
@@ -1159,6 +1238,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
         LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */
         LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
         LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+        LASSERT(request->rq_cli_ctx);
 
         /* We must take it off the imp_replay_list first.  Otherwise, we'll set
          * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
@@ -1177,10 +1257,8 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
                 LBUG();
         }
 
-        if (request->rq_repmsg != NULL) {
-                OBD_FREE(request->rq_repmsg, request->rq_replen);
-                request->rq_repmsg = NULL;
-        }
+        if (request->rq_repbuf != NULL)
+                sptlrpc_cli_free_repbuf(request);
         if (request->rq_export != NULL) {
                 class_export_put(request->rq_export);
                 request->rq_export = NULL;
@@ -1192,15 +1270,15 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
         if (request->rq_bulk != NULL)
                 ptlrpc_free_bulk(request->rq_bulk);
 
-        if (request->rq_pool) {
+        if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL)
+                sptlrpc_cli_free_reqbuf(request);
+
+        sptlrpc_req_put_ctx(request);
+
+        if (request->rq_pool)
                 __ptlrpc_free_req_to_pool(request);
-        } else {
-                if (request->rq_reqmsg != NULL) {
-                        OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
-                        request->rq_reqmsg = NULL;
-                }
+        else
                 OBD_FREE(request, sizeof(*request));
-        }
         EXIT;
 }
 
@@ -1563,6 +1641,23 @@ restart:
         list_add_tail(&req->rq_list, &imp->imp_sending_list);
         spin_unlock(&imp->imp_lock);
 
+        rc = sptlrpc_req_refresh_ctx(req, 0);
+        if (rc) {
+                if (req->rq_err) {
+                        /* we got fatal ctx refresh error, directly jump out
+                         * thus we can pass back the actual error code.
+                         */
+                        spin_lock(&imp->imp_lock);
+                        list_del_init(&req->rq_list);
+                        spin_unlock(&imp->imp_lock);
+
+                        CERROR("Failed to refresh ctx of req %p: %d\n", req, rc);
+                        GOTO(out, rc);
+                }
+                /* simulating we got error during send rpc */
+                goto after_send;
+        }
+
         rc = ptl_send_rpc(req, 0);
         if (rc) {
                 DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc);
@@ -1577,6 +1672,7 @@ restart:
         l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
         DEBUG_REQ(D_NET, req, "-- done sleeping");
 
+after_send:
         CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc "
                "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                imp->imp_obd->obd_uuid.uuid,
index 96aa14b..8863dc2 100644 (file)
@@ -30,6 +30,7 @@
 #endif
 #include <obd_class.h>
 #include <lustre_net.h>
+#include <lustre_sec.h>
 #include "ptlrpc_internal.h"
 
 lnet_handle_eq_t   ptlrpc_eq_h;
@@ -50,6 +51,8 @@ void request_out_callback(lnet_event_t *ev)
         DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req,
                   "type %d, status %d", ev->type, ev->status);
 
+        sptlrpc_request_out_callback(req);
+
         if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
 
                 /* Failed send: make it seem like the reply timed out, just
@@ -81,9 +84,9 @@ void reply_in_callback(lnet_event_t *ev)
         LASSERT (ev->type == LNET_EVENT_PUT ||
                  ev->type == LNET_EVENT_UNLINK);
         LASSERT (ev->unlinked);
-        LASSERT (ev->md.start == req->rq_repmsg);
+        LASSERT (ev->md.start == req->rq_repbuf);
         LASSERT (ev->offset == 0);
-        LASSERT (ev->mlength <= req->rq_replen);
+        LASSERT (ev->mlength <= req->rq_repbuf_len);
 
         DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req,
                   "type %d, status %d", ev->type, ev->status);
@@ -136,6 +139,8 @@ void client_bulk_callback (lnet_event_t *ev)
                 desc->bd_nob_transferred = ev->mlength;
         }
 
+        ptlrpc_bulk_free_enc_pages(desc);
+
         /* NB don't unlock till after wakeup; desc can disappear under us
          * otherwise */
         ptlrpc_wake_client_req(desc->bd_req);
@@ -193,9 +198,9 @@ void request_in_callback(lnet_event_t *ev)
          * flags are reset and scalars are zero.  We only set the message
          * size to non-zero if this was a successful receive. */
         req->rq_xid = ev->match_bits;
-        req->rq_reqmsg = ev->md.start + ev->offset;
+        req->rq_reqbuf = ev->md.start + ev->offset;
         if (ev->type == LNET_EVENT_PUT && ev->status == 0)
-                req->rq_reqlen = ev->mlength;
+                req->rq_reqdata_len = ev->mlength;
         do_gettimeofday(&req->rq_arrival_time);
         req->rq_peer = ev->initiator;
         req->rq_self = ev->target.nid;
index b7a9e49..600b514 100644 (file)
@@ -350,7 +350,8 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                 spin_unlock(&imp->imp_lock);
                 CERROR("can't connect to a closed import\n");
                 RETURN(-EINVAL);
-        } else if (imp->imp_state == LUSTRE_IMP_FULL) {
+        } else if (imp->imp_state == LUSTRE_IMP_FULL &&
+                   imp->imp_force_reconnect == 0) {
                 spin_unlock(&imp->imp_lock);
                 CERROR("already connected\n");
                 RETURN(0);
@@ -499,11 +500,16 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request,
                 spin_unlock(&imp->imp_lock);
                 RETURN(0);
         }
+        imp->imp_force_reconnect = 0;
         spin_unlock(&imp->imp_lock);
 
         if (rc)
                 GOTO(out, rc);
 
+        rc = sptlrpc_cli_install_rvs_ctx(imp, request->rq_cli_ctx);
+        if (rc)
+                GOTO(out, rc);
+
         LASSERT(imp->imp_conn_current);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
@@ -719,6 +725,11 @@ finish:
 
                 if (rc == -EPROTO) {
                         struct obd_connect_data *ocd;
+
+                        /* reply message might not be ready */
+                        if (request->rq_repmsg != NULL)
+                                RETURN(-EPROTO);
+
                         ocd = lustre_swab_repbuf(request, REPLY_REC_OFF,
                                                  sizeof *ocd,
                                                  lustre_swab_connect);
index 53fac91..5aa2460 100644 (file)
@@ -311,17 +311,19 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult)
         int                        rc;
 
         /* We must already have a reply buffer (only ptlrpc_error() may be
-         * called without one).  We must also have a request buffer which
-         * is either the actual (swabbed) incoming request, or a saved copy
-         * if this is a req saved in target_queue_final_reply(). */
-        LASSERT (req->rq_reqmsg != NULL);
+         * called without one). The reply generated by security layer (e.g.
+         * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
+         * have a request buffer which is either the actual (swabbed) incoming
+         * request, or a saved copy if this is a req saved in
+         * target_queue_final_reply().
+         */
+        LASSERT (req->rq_reqbuf != NULL);
         LASSERT (rs != NULL);
-        LASSERT (req->rq_repmsg != NULL);
         LASSERT (may_be_difficult || !rs->rs_difficult);
+        LASSERT (req->rq_repmsg != NULL);
         LASSERT (req->rq_repmsg == rs->rs_msg);
         LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback);
         LASSERT (rs->rs_cb_id.cbid_arg == rs);
-        LASSERT (req->rq_repmsg != NULL);
 
         if (req->rq_export && req->rq_export->exp_obd &&
             req->rq_export->exp_obd->obd_fail) {
@@ -337,7 +339,8 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult)
 
         lustre_msg_set_type(req->rq_repmsg, req->rq_type);
         lustre_msg_set_status(req->rq_repmsg, req->rq_status);
-        lustre_msg_set_opc(req->rq_repmsg, lustre_msg_get_opc(req->rq_reqmsg));
+        lustre_msg_set_opc(req->rq_repmsg,
+                req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
 
         if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
                 conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
@@ -351,10 +354,15 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult)
         atomic_inc (&svc->srv_outstanding_replies);
         ptlrpc_rs_addref(rs);                   /* +1 ref for the network */
 
-        rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen,
+        rc = sptlrpc_svc_wrap_reply(req);
+        if (rc)
+                goto out;
+
+        rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
                            rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ,
                            &rs->rs_cb_id, conn,
                            svc->srv_rep_portal, req->rq_xid);
+out:
         if (rc != 0) {
                 atomic_dec (&svc->srv_outstanding_replies);
                 ptlrpc_rs_decref(rs);
@@ -413,24 +421,31 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         
         connection = request->rq_import->imp_connection;
 
-        if (request->rq_bulk != NULL) {
-                rc = ptlrpc_register_bulk (request);
-                if (rc != 0)
-                        RETURN(rc);
-        }
-
         lustre_msg_set_handle(request->rq_reqmsg,
                               &request->rq_import->imp_remote_handle);
         lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
         lustre_msg_set_conn_cnt(request->rq_reqmsg,
                                 request->rq_import->imp_conn_cnt);
 
+        rc = sptlrpc_cli_wrap_request(request);
+        if (rc)
+                RETURN(rc);
+
+        /* bulk register should be done after wrap_request() */
+        if (request->rq_bulk != NULL) {
+                rc = ptlrpc_register_bulk (request);
+                if (rc != 0)
+                        RETURN(rc);
+        }
+
         if (!noreply) {
                 LASSERT (request->rq_replen != 0);
-                if (request->rq_repmsg == NULL)
-                        OBD_ALLOC(request->rq_repmsg, request->rq_replen);
-                if (request->rq_repmsg == NULL)
-                        GOTO(cleanup_bulk, rc = -ENOMEM);
+                if (request->rq_repbuf == NULL) {
+                        rc = sptlrpc_cli_alloc_repbuf(request,
+                                                      request->rq_replen);
+                        if (rc)
+                                GOTO(cleanup_bulk, rc);
+                }
 
                 rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
                                   connection->c_peer, request->rq_xid, 0,
@@ -438,7 +453,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 if (rc != 0) {
                         CERROR("LNetMEAttach failed: %d\n", rc);
                         LASSERT (rc == -ENOMEM);
-                        GOTO(cleanup_repmsg, rc = -ENOMEM);
+                        GOTO(cleanup_bulk, rc = -ENOMEM);
                 }
         }
 
@@ -455,8 +470,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         spin_unlock(&request->rq_lock);
 
         if (!noreply) {
-                reply_md.start     = request->rq_repmsg;
-                reply_md.length    = request->rq_replen;
+                reply_md.start     = request->rq_repbuf;
+                reply_md.length    = request->rq_repbuf_len;
                 reply_md.threshold = 1;
                 reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT;
                 reply_md.user_ptr  = &request->rq_reply_cbid;
@@ -476,7 +491,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
                 CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
                        ", portal %u\n",
-                       request->rq_replen, request->rq_xid,
+                       request->rq_repbuf_len, request->rq_xid,
                        request->rq_reply_portal);
         }
 
@@ -489,8 +504,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         request->rq_sent = CURRENT_SECONDS;
         ptlrpc_pinger_sending_on_import(request->rq_import);
         rc = ptl_send_buf(&request->rq_req_md_h,
-                          request->rq_reqmsg, request->rq_reqlen,
-                          LNET_NOACK_REQ, &request->rq_req_cbid,
+                          request->rq_reqbuf, request->rq_reqdata_len,
+                          LNET_NOACK_REQ, &request->rq_req_cbid, 
                           connection,
                           request->rq_request_portal,
                           request->rq_xid);
@@ -516,10 +531,6 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         /* UNLINKED callback called synchronously */
         LASSERT (!request->rq_receiving_reply);
 
- cleanup_repmsg:
-        OBD_FREE(request->rq_repmsg, request->rq_replen);
-        request->rq_repmsg = NULL;
-
  cleanup_bulk:
         if (request->rq_bulk != NULL)
                 ptlrpc_unregister_bulk(request);
index 49e571c..5724089 100644 (file)
@@ -98,7 +98,7 @@ static inline int lustre_msg_size_v1(int count, int *lengths)
         return size;
 }
 
-static inline int lustre_msg_size_v2(int count, int *lengths)
+int lustre_msg_size_v2(int count, int *lengths)
 {
         int size;
         int i;
@@ -109,6 +109,7 @@ static inline int lustre_msg_size_v2(int count, int *lengths)
 
         return size;
 }
+EXPORT_SYMBOL(lustre_msg_size_v2);
 
 /* This returns the size of the buffer that is required to hold a lustre_msg
  * with the given sub-buffer lengths. */
@@ -135,8 +136,8 @@ int lustre_msg_size(__u32 magic, int count, int *lens)
         }
 }
 
-static void
-lustre_init_msg_v1(void *m, int count, int *lens, char **bufs)
+static
+void lustre_init_msg_v1(void *m, int count, int *lens, char **bufs)
 {
         struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)m;
         char *ptr;
@@ -160,8 +161,8 @@ lustre_init_msg_v1(void *m, int count, int *lens, char **bufs)
         }
 }
 
-static void
-lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs)
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens,
+                        char **bufs)
 {
         char *ptr;
         int i;
@@ -182,32 +183,18 @@ lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs)
                 LOGL(tmp, lens[i], ptr);
         }
 }
+EXPORT_SYMBOL(lustre_init_msg_v2);
 
 static int lustre_pack_request_v1(struct ptlrpc_request *req,
                                   int count, int *lens, char **bufs)
 {
-        int reqlen;
+        int reqlen, rc;
 
         reqlen = lustre_msg_size_v1(count, lens);
 
-        /* See if we got it from prealloc pool */
-        if (req->rq_reqmsg) {
-                /* Cannot return error here, that would create
-                   infinite loop in ptlrpc_prep_req_pool */
-                /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen
-                   to maximum size that would fit into this preallocated
-                   request */
-                LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, "
-                                                   "reqlen %d\n",req->rq_reqlen,
-                                                    reqlen);
-                memset(req->rq_reqmsg, 0, reqlen);
-        } else {
-                OBD_ALLOC(req->rq_reqmsg, reqlen);
-                if (req->rq_reqmsg == NULL) {
-                        CERROR("alloc reqmsg (len %d) failed\n", reqlen);
-                        return -ENOMEM;
-                }
-        }
+        rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+        if (rc)
+                return rc;
 
         req->rq_reqlen = reqlen;
 
@@ -218,28 +205,13 @@ static int lustre_pack_request_v1(struct ptlrpc_request *req,
 static int lustre_pack_request_v2(struct ptlrpc_request *req,
                                   int count, int *lens, char **bufs)
 {
-        int reqlen;
+        int reqlen, rc;
 
         reqlen = lustre_msg_size_v2(count, lens);
 
-        /* See if we got it from prealloc pool */
-        if (req->rq_reqmsg) {
-                /* Cannot return error here, that would create
-                   infinite loop in ptlrpc_prep_req_pool */
-                /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen
-                   to maximum size that would fit into this preallocated
-                   request */
-                LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, "
-                                                   "reqlen %d\n",req->rq_reqlen,
-                                                    reqlen);
-                memset(req->rq_reqmsg, 0, reqlen);
-        } else {
-                OBD_ALLOC(req->rq_reqmsg, reqlen);
-                if (req->rq_reqmsg == NULL) {
-                        CERROR("alloc reqmsg (len %d) failed\n", reqlen);
-                        return -ENOMEM;
-                }
-        }
+        rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+        if (rc)
+                return rc;
 
         req->rq_reqlen = reqlen;
 
@@ -261,6 +233,13 @@ int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
         LASSERT(count > 0);
         LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
 
+        /* if we choose policy other than null, we have also choosed
+         * to use new message format.
+         */
+        if (magic == LUSTRE_MSG_MAGIC_V1 &&
+            req->rq_sec_flavor != SPTLRPC_FLVR_NULL)
+                magic = LUSTRE_MSG_MAGIC_V2;
+
         switch (magic) {
         case LUSTRE_MSG_MAGIC_V1:
                 return lustre_pack_request_v1(req, count - 1, lens + 1,
@@ -295,8 +274,7 @@ do {                                            \
 # define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0)
 #endif
 
-static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc,
-                                                      int size)
+struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc)
 {
         struct ptlrpc_reply_state *rs = NULL;
 
@@ -321,40 +299,46 @@ static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc
         list_del(&rs->rs_list);
         spin_unlock(&svc->srv_lock);
         LASSERT(rs);
-        LASSERTF(svc->srv_max_reply_size > size, "Want %d, prealloc %d\n", size,
-                 svc->srv_max_reply_size);
-        memset(rs, 0, size);
+        memset(rs, 0, svc->srv_max_reply_size);
+        rs->rs_service = svc;
         rs->rs_prealloc = 1;
 out:
         return rs;
 }
 
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
+{
+        struct ptlrpc_service *svc = rs->rs_service;
+
+        LASSERT(svc);
+
+        spin_lock(&svc->srv_lock);
+        list_add(&rs->rs_list, &svc->srv_free_rs_list);
+        spin_unlock(&svc->srv_lock);
+        cfs_waitq_signal(&svc->srv_free_rs_waitq);
+}
+
 static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count,
                                 int *lens, char **bufs)
 {
         struct ptlrpc_reply_state *rs;
-        int                        msg_len;
-        int                        size;
+        int                        msg_len, rc;
         ENTRY;
 
         LASSERT (req->rq_reply_state == NULL);
 
         msg_len = lustre_msg_size_v1(count, lens);
-        size = sizeof(struct ptlrpc_reply_state) + msg_len;
-        OBD_ALLOC(rs, size);
-        if (unlikely(rs == NULL)) {
-                rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size);
-                if (!rs)
-                        RETURN (-ENOMEM);
-        }
+        rc = sptlrpc_svc_alloc_rs(req, msg_len);
+        if (rc)
+                RETURN(rc);
+
+        rs = req->rq_reply_state;
         atomic_set(&rs->rs_refcount, 1);        /* 1 ref for rq_reply_state */
         rs->rs_cb_id.cbid_fn = reply_out_callback;
         rs->rs_cb_id.cbid_arg = rs;
         rs->rs_service = req->rq_rqbd->rqbd_service;
-        rs->rs_size = size;
         CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
         CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
-        rs->rs_msg = (struct lustre_msg *)(rs + 1);
 
         req->rq_replen = msg_len;
         req->rq_reply_state = rs;
@@ -366,32 +350,27 @@ static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count,
         RETURN (0);
 }
 
-static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
-                                int *lens, char **bufs)
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+                         int *lens, char **bufs)
 {
         struct ptlrpc_reply_state *rs;
-        int                        msg_len;
-        int                        size;
+        int                        msg_len, rc;
         ENTRY;
 
         LASSERT(req->rq_reply_state == NULL);
 
         msg_len = lustre_msg_size_v2(count, lens);
-        size = sizeof(struct ptlrpc_reply_state) + msg_len;
-        OBD_ALLOC(rs, size);
-        if (unlikely(rs == NULL)) {
-                rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size);
-                if (!rs)
-                        RETURN (-ENOMEM);
-        }
+        rc = sptlrpc_svc_alloc_rs(req, msg_len);
+        if (rc)
+                RETURN(rc);
+
+        rs = req->rq_reply_state;
         atomic_set(&rs->rs_refcount, 1);        /* 1 ref for rq_reply_state */
         rs->rs_cb_id.cbid_fn = reply_out_callback;
         rs->rs_cb_id.cbid_arg = rs;
         rs->rs_service = req->rq_rqbd->rqbd_service;
-        rs->rs_size = size;
         CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
         CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
-        rs->rs_msg = (struct lustre_msg *)(rs + 1);
 
         req->rq_replen = msg_len;
         req->rq_reply_state = rs;
@@ -403,6 +382,7 @@ static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
 
         RETURN(0);
 }
+EXPORT_SYMBOL(lustre_pack_reply_v2);
 
 int lustre_pack_reply(struct ptlrpc_request *req, int count, int *lens,
                       char **bufs)
@@ -505,21 +485,19 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
         }
 }
 
-void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment,
-                            unsigned int newlen, int move_data)
+int lustre_shrink_msg_v1(struct lustre_msg_v1 *msg, int segment,
+                         unsigned int newlen, int move_data)
 {
-        struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)req->rq_repmsg;
-        char *tail = NULL, *newpos;
-        int tail_len = 0, n;
+        char   *tail = NULL, *newpos;
+        int     tail_len = 0, n;
 
-        LASSERT(req->rq_reply_state);
         LASSERT(msg);
         LASSERT(segment >= 0);
         LASSERT(msg->lm_bufcount > segment);
         LASSERT(msg->lm_buflens[segment] >= newlen);
 
         if (msg->lm_buflens[segment] == newlen)
-                return;
+                goto out;
 
         if (move_data && msg->lm_bufcount > segment + 1) {
                 tail = lustre_msg_buf_v1(msg, segment + 1, 0);
@@ -542,23 +520,22 @@ void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment,
                 msg->lm_buflens[msg->lm_bufcount - 1] = 0;
         }
 
-        req->rq_replen = lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens);
+out:
+        return lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens);
 }
 
-void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment,
-                            unsigned int newlen, int move_data)
+int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+                         unsigned int newlen, int move_data)
 {
-        struct lustre_msg_v2 *msg = req->rq_repmsg;
-        char *tail = NULL, *newpos;
-        int tail_len = 0, n;
+        char   *tail = NULL, *newpos;
+        int     tail_len = 0, n;
 
-        LASSERT(req->rq_reply_state);
         LASSERT(msg);
         LASSERT(msg->lm_bufcount > segment);
         LASSERT(msg->lm_buflens[segment] >= newlen);
 
         if (msg->lm_buflens[segment] == newlen)
-                return;
+                goto out;
 
         if (move_data && msg->lm_bufcount > segment + 1) {
                 tail = lustre_msg_buf_v2(msg, segment + 1, 0);
@@ -581,36 +558,37 @@ void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment,
                 msg->lm_buflens[msg->lm_bufcount - 1] = 0;
         }
 
-        req->rq_replen = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+out:
+        return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
 }
 
 /*
- * shrink @segment to size @newlen. if @move_data is non-zero, we also move
- * data forward from @segment + 1.
+ * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
+ * we also move data forward from @segment + 1.
  * 
  * if @newlen == 0, we remove the segment completely, but we still keep the
  * totally bufcount the same to save possible data moving. this will leave a
  * unused segment with size 0 at the tail, but that's ok.
  *
+ * return new msg size after shrinking.
+ *
  * CAUTION:
  * + if any buffers higher than @segment has been filled in, must call shrink
  *   with non-zero @move_data.
  * + caller should NOT keep pointers to msg buffers which higher than @segment
  *   after call shrink.
  */
-void lustre_shrink_reply(struct ptlrpc_request *req, int segment,
-                        unsigned int newlen, int move_data)
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+                      unsigned int newlen, int move_data)
 {
-        switch (req->rq_repmsg->lm_magic) {
+        switch (msg->lm_magic) {
         case LUSTRE_MSG_MAGIC_V1:
-                lustre_shrink_reply_v1(req, segment - 1, newlen, move_data);
-                return;
+                return lustre_shrink_msg_v1((struct lustre_msg_v1 *) msg,
+                                            segment - 1, newlen, move_data);
         case LUSTRE_MSG_MAGIC_V2:
-                lustre_shrink_reply_v2(req, segment, newlen, move_data);
-                return;
+                return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
         default:
-                LASSERTF(0, "incorrect message magic: %08x\n",
-                         req->rq_repmsg->lm_magic);
+                LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
         }
 }
 
@@ -627,17 +605,7 @@ void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
         LASSERT (list_empty(&rs->rs_exp_list));
         LASSERT (list_empty(&rs->rs_obd_list));
 
-        if (unlikely(rs->rs_prealloc)) {
-                struct ptlrpc_service *svc = rs->rs_service;
-
-                spin_lock(&svc->srv_lock);
-                list_add(&rs->rs_list,
-                         &svc->srv_free_rs_list);
-                spin_unlock(&svc->srv_lock);
-                cfs_waitq_signal(&svc->srv_free_rs_waitq);
-        } else {
-                OBD_FREE(rs, rs->rs_size);
-        }
+        sptlrpc_svc_free_rs(rs);
 }
 
 int lustre_unpack_msg_v1(void *msg, int len)
index 865dcf0..8e92509 100644 (file)
@@ -45,7 +45,7 @@ void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc)
         LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS)));
 
         md->options |= LNET_MD_KIOV;
-        md->start = &desc->bd_iov[0];
+        md->start = desc->bd_enc_iov ? desc->bd_enc_iov : &desc->bd_iov[0];
         md->length = desc->bd_iov_count;
 }
 
@@ -73,6 +73,61 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc)
         }
 }
 
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+        int i, alloc_size;
+
+        LASSERT(desc->bd_enc_iov == NULL);
+
+        if (desc->bd_iov_count == 0)
+                return 0;
+
+        alloc_size = desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]);
+
+        OBD_ALLOC(desc->bd_enc_iov, alloc_size);
+        if (desc->bd_enc_iov == NULL)
+                return -ENOMEM;
+
+        memcpy(desc->bd_enc_iov, desc->bd_iov, alloc_size);
+
+        for (i = 0; i < desc->bd_iov_count; i++) {
+                desc->bd_enc_iov[i].kiov_page =
+                        cfs_alloc_page(CFS_ALLOC_IO | CFS_ALLOC_HIGH);
+                if (desc->bd_enc_iov[i].kiov_page == NULL) {
+                        CERROR("Failed to alloc %d encryption pages\n",
+                               desc->bd_iov_count);
+                        break;
+                }
+        }
+
+        if (i == desc->bd_iov_count)
+                return 0;
+
+        /* error, cleanup */
+        for (i = i - 1; i >= 0; i--)
+                __free_page(desc->bd_enc_iov[i].kiov_page);
+        OBD_FREE(desc->bd_enc_iov, alloc_size);
+        desc->bd_enc_iov = NULL;
+        return -ENOMEM;
+}
+
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+        int     i;
+
+        if (desc->bd_enc_iov == NULL)
+                return;
+
+        for (i = 0; i < desc->bd_iov_count; i++) {
+                LASSERT(desc->bd_enc_iov[i].kiov_page);
+                __free_page(desc->bd_enc_iov[i].kiov_page);
+        }
+
+        OBD_FREE(desc->bd_enc_iov,
+                 desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]));
+        desc->bd_enc_iov = NULL;
+}
+
 #else /* !__KERNEL__ */
 
 void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc)
@@ -127,4 +182,12 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc)
                 memset(iov->iov_base, 0xab, iov->iov_len);
         }
 }
+
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+        return 0;
+}
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+}
 #endif /* !__KERNEL__ */
index 787074c..9b00e1e 100644 (file)
@@ -76,6 +76,50 @@ void ptlrpc_ping_import_soon(struct obd_import *imp)
 }
 
 #ifdef __KERNEL__
+static
+int check_import_reconnect(struct obd_import *imp)
+{
+        spin_lock(&imp->imp_lock);
+
+        /* next_reconnect == 0 mean never need reconnect.
+         */
+        if (imp->imp_next_reconnect == 0 ||
+            cfs_time_before(cfs_time_current_sec(), imp->imp_next_reconnect)) {
+                spin_unlock(&imp->imp_lock);
+                return 0;
+        }
+
+        if (imp->imp_state != LUSTRE_IMP_FULL ||
+            imp->imp_force_reconnect == 1) {
+                spin_unlock(&imp->imp_lock);
+                return 0;
+        }
+
+        imp->imp_force_reconnect = 1;
+
+        /* prevent concurrent reconnect. if this reconnect failed, import
+         * will be set to non-FULL; if success, next_reconnect value will
+         * will be updated by security module.
+         */
+        imp->imp_next_reconnect = 0;
+
+        spin_unlock(&imp->imp_lock);
+
+        CWARN("issue a force reconnect on imp %p(%s) to %s\n",
+              imp, ptlrpc_import_state_name(imp->imp_state),
+              imp->imp_obd->u.cli.cl_target_uuid.uuid);
+
+        /* usually the root context should be still valid, because import
+         * reconnect have a nice time advance, thus we have little chance
+         * that a newly created & refreshing context be wrongly flushed by us.
+         * but even that we are still fine.
+         */
+        sptlrpc_import_flush_root_ctx(imp);
+
+        ptlrpc_connect_import(imp, NULL);
+        return 1;
+}
+
 static int ptlrpc_pinger_main(void *arg)
 {
         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
@@ -102,6 +146,15 @@ static int ptlrpc_pinger_main(void *arg)
                                            imp_pinger_chain);
                         int force, level;
 
+                        if (check_import_reconnect(imp)) {
+                                /* if a forced reconnect was issued, we don't
+                                 * need additional ping at this time.
+                                 */
+                                if (imp->imp_pingable)
+                                        ptlrpc_update_next_ping(imp);
+                                continue;
+                        }
+
                         spin_lock(&imp->imp_lock);
                         level = imp->imp_state;
                         force = imp->imp_force_verify;
index 54abe7a..bd92a26 100644 (file)
@@ -126,6 +126,10 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page,
                           int pageoffset, int len);
 void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc);
 
+/* pack_generic.c */
+struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc);
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
+
 /* pinger.c */
 int ptlrpc_start_pinger(void);
 int ptlrpc_stop_pinger(void);
@@ -138,4 +142,16 @@ int ping_evictor_wake(struct obd_export *exp);
 #define ping_evictor_wake(exp)     1
 #endif
 
+/* sec_null.c */
+int sptlrpc_null_init(void);
+int sptlrpc_null_exit(void);
+
+/* sec_plain.c */
+int sptlrpc_plain_init(void);
+int sptlrpc_plain_exit(void);
+
+/* sec.c */
+int sptlrpc_init(void);
+int sptlrpc_exit(void);
+
 #endif /* PTLRPC_INTERNAL_H */
index c809437..be470c9 100644 (file)
@@ -85,10 +85,18 @@ __init int ptlrpc_init(void)
         rc = ldlm_init();
         if (rc)
                 GOTO(cleanup, rc);
+        cleanup_phase = 5;
+
+        rc = sptlrpc_init();
+        if (rc)
+                GOTO(cleanup, rc);
+
         RETURN(0);
 
 cleanup:
         switch(cleanup_phase) {
+        case 5:
+                ldlm_exit();
         case 4:
                 ptlrpc_stop_pinger();
         case 3:
@@ -107,6 +115,7 @@ cleanup:
 #ifdef __KERNEL__
 static void __exit ptlrpc_exit(void)
 {
+        sptlrpc_exit();
         ldlm_exit();
         ptlrpc_stop_pinger();
         ptlrpc_exit_portals();
@@ -187,7 +196,7 @@ EXPORT_SYMBOL(lustre_msg_swabbed);
 EXPORT_SYMBOL(lustre_msg_check_version);
 EXPORT_SYMBOL(lustre_pack_request);
 EXPORT_SYMBOL(lustre_pack_reply);
-EXPORT_SYMBOL(lustre_shrink_reply);
+EXPORT_SYMBOL(lustre_shrink_msg);
 EXPORT_SYMBOL(lustre_free_reply_state);
 EXPORT_SYMBOL(lustre_msg_size);
 EXPORT_SYMBOL(lustre_unpack_msg);
@@ -277,6 +286,10 @@ EXPORT_SYMBOL(ptlrpc_invalidate_import);
 EXPORT_SYMBOL(ptlrpc_fail_import);
 EXPORT_SYMBOL(ptlrpc_recover_import);
 
+/* pers.c */
+EXPORT_SYMBOL(ptlrpc_bulk_alloc_enc_pages);
+EXPORT_SYMBOL(ptlrpc_bulk_free_enc_pages);
+
 /* pinger.c */
 EXPORT_SYMBOL(ptlrpc_pinger_add_import);
 EXPORT_SYMBOL(ptlrpc_pinger_del_import);
diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c
new file mode 100644 (file)
index 0000000..b9dddba
--- /dev/null
@@ -0,0 +1,2495 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <libcfs/libcfs.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <libcfs/list.h>
+#else
+#include <linux/crypto.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
+static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
+                                   struct ptlrpc_cli_ctx *ctx);
+static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx);
+
+/***********************************************
+ * policy registers                            *
+ ***********************************************/
+
+static spinlock_t policy_lock = SPIN_LOCK_UNLOCKED;
+static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
+        NULL,
+};
+
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
+{
+        __u32 number = policy->sp_policy;
+
+        LASSERT(policy->sp_name);
+        LASSERT(policy->sp_cops);
+        LASSERT(policy->sp_sops);
+
+        if (number >= SPTLRPC_POLICY_MAX)
+                return -EINVAL;
+
+        spin_lock(&policy_lock);
+        if (policies[number]) {
+                spin_unlock(&policy_lock);
+                return -EALREADY;
+        }
+        policies[number] = policy;
+        spin_unlock(&policy_lock);
+
+        CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_register_policy);
+
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
+{
+        __u32 number = policy->sp_policy;
+
+        LASSERT(number < SPTLRPC_POLICY_MAX);
+
+        spin_lock(&policy_lock);
+        if (!policies[number]) {
+                spin_unlock(&policy_lock);
+                CERROR("%s: already unregistered\n", policy->sp_name);
+                return -EINVAL;
+        }
+
+        LASSERT(policies[number] == policy);
+        policies[number] = NULL;
+        spin_unlock(&policy_lock);
+
+        CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unregister_policy);
+
+static
+struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_flavor_t flavor)
+{
+        static int load_module = 0;
+        struct ptlrpc_sec_policy *policy;
+        __u32 number = SEC_FLAVOR_POLICY(flavor);
+
+        if (number >= SPTLRPC_POLICY_MAX)
+                return NULL;
+
+again:
+        spin_lock(&policy_lock);
+        policy = policies[number];
+        if (policy && !try_module_get(policy->sp_owner))
+                policy = NULL;
+        spin_unlock(&policy_lock);
+
+        /* if failure, try to load gss module, once */
+        if (policy == NULL && load_module == 0 &&
+            number == SPTLRPC_POLICY_GSS) {
+                load_module = 1;
+                if (request_module("ptlrpc_gss") == 0)
+                        goto again;
+        }
+
+        return policy;
+}
+
+ptlrpc_flavor_t sptlrpc_name2flavor(const char *name)
+{
+        if (!strcmp(name, "null"))
+                return SPTLRPC_FLVR_NULL;
+        if (!strcmp(name, "plain"))
+                return SPTLRPC_FLVR_PLAIN;
+        if (!strcmp(name, "krb5"))
+                return SPTLRPC_FLVR_KRB5;
+        if (!strcmp(name, "krb5i"))
+                return SPTLRPC_FLVR_KRB5I;
+        if (!strcmp(name, "krb5p"))
+                return SPTLRPC_FLVR_KRB5P;
+
+        return SPTLRPC_FLVR_INVALID;
+}
+EXPORT_SYMBOL(sptlrpc_name2flavor);
+
+char *sptlrpc_flavor2name(ptlrpc_flavor_t flavor)
+{
+        switch (flavor) {
+        case SPTLRPC_FLVR_NULL:
+                return "null";
+        case SPTLRPC_FLVR_PLAIN:
+                return "plain";
+        case SPTLRPC_FLVR_KRB5:
+                return "krb5";
+        case SPTLRPC_FLVR_KRB5I:
+                return "krb5i";
+        case SPTLRPC_FLVR_KRB5P:
+                return "krb5p";
+        default:
+                CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor,
+                       SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor),
+                       SEC_FLAVOR_SVC(flavor));
+        }
+        return "UNKNOWN";
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name);
+
+/***********************************************
+ * context helpers                             *
+ * internal APIs                               *
+ * cache management                            *
+ ***********************************************/
+
+static inline
+unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx)
+{
+        smp_mb();
+        return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
+}
+
+static inline
+int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+        return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
+}
+
+static inline
+int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
+{
+        return (ctx_status(ctx) != 0);
+}
+
+static inline
+int ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
+{
+        smp_mb();
+        return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
+}
+
+static inline
+int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
+{
+        smp_mb();
+        return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
+}
+
+static
+int ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount));
+
+        if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
+                cfs_time_t now = cfs_time_current_sec();
+
+                smp_mb();
+                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+                if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire))
+                        CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n",
+                              ctx, ctx->cc_vcred.vc_uid,
+                              sec2target_str(ctx->cc_sec),
+                              cfs_time_sub(now, ctx->cc_expire));
+                else
+                        CWARN("ctx %p(%u->%s): force to die (%lds remains)\n",
+                              ctx, ctx->cc_vcred.vc_uid,
+                              sec2target_str(ctx->cc_sec),
+                              ctx->cc_expire == 0 ? 0 :
+                              cfs_time_sub(ctx->cc_expire, now));
+
+                return 1;
+        }
+        return 0;
+}
+
+static
+void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
+{
+        set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
+        atomic_inc(&ctx->cc_refcount);
+        hlist_add_head(&ctx->cc_hash, hash);
+}
+
+static
+void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+        LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
+        LASSERT(!hlist_unhashed(&ctx->cc_hash));
+
+        clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
+
+        if (atomic_dec_and_test(&ctx->cc_refcount)) {
+                __hlist_del(&ctx->cc_hash);
+                hlist_add_head(&ctx->cc_hash, freelist);
+        } else
+                hlist_del_init(&ctx->cc_hash);
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+static
+int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+        if (unlikely(ctx_is_dead(ctx)))
+                goto unhash;
+
+        /* expire is 0 means never expire. a newly created gss context
+         * which during upcall also has 0 expiration
+         */
+        smp_mb();
+        if (ctx->cc_expire == 0)
+                return 0;
+
+        /* check real expiration */
+        smp_mb();
+        if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
+                return 0;
+
+        ctx_expire(ctx);
+
+unhash:
+        if (freelist)
+                ctx_unhash(ctx, freelist);
+
+        return 1;
+}
+
+static inline
+int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx,
+                           struct hlist_head *freelist)
+{
+        LASSERT(ctx->cc_sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
+        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
+
+        return ctx_check_death(ctx, freelist);
+}
+
+static
+int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(ctx->cc_sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx))
+                return 1;
+        return 0;
+}
+
+static inline
+int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
+{
+        /* a little bit optimization for null policy */
+        if (!ctx->cc_ops->match)
+                return 1;
+
+        return ctx->cc_ops->match(ctx, vcred);
+}
+
+static
+void ctx_list_destroy(struct hlist_head *head)
+{
+        struct ptlrpc_cli_ctx *ctx;
+
+        while (!hlist_empty(head)) {
+                ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash);
+
+                LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+                LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
+
+                hlist_del_init(&ctx->cc_hash);
+                sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx);
+        }
+}
+
+static
+void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist)
+{
+        struct ptlrpc_cli_ctx *ctx;
+        struct hlist_node *pos, *next;
+        int i;
+        ENTRY;
+
+        CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
+
+        for (i = 0; i < sec->ps_ccache_size; i++) {
+                hlist_for_each_entry_safe(ctx, pos, next,
+                                          &sec->ps_ccache[i], cc_hash)
+                        ctx_check_death_locked(ctx, freelist);
+        }
+
+        sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+        EXIT;
+}
+
+/*
+ * @uid: which user. "-1" means flush all.
+ * @grace: mark context DEAD, allow graceful destroy like notify
+ *         server side, etc.
+ * @force: also flush busy entries.
+ *
+ * return the number of busy context encountered.
+ *
+ * In any cases, never touch "eternal" contexts.
+ */
+static
+int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force)
+{
+        struct ptlrpc_cli_ctx *ctx;
+        struct hlist_node *pos, *next;
+        HLIST_HEAD(freelist);
+        int i, busy = 0;
+        ENTRY;
+
+        might_sleep_if(grace);
+
+        spin_lock(&sec->ps_lock);
+        for (i = 0; i < sec->ps_ccache_size; i++) {
+                hlist_for_each_entry_safe(ctx, pos, next,
+                                          &sec->ps_ccache[i], cc_hash) {
+                        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+                        if (ctx_is_eternal(ctx))
+                                continue;
+                        if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+                                continue;
+
+                        if (atomic_read(&ctx->cc_refcount) > 1) {
+                                busy++;
+                                if (!force)
+                                        continue;
+
+                                CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
+                                      "grace %d\n",
+                                      atomic_read(&ctx->cc_refcount),
+                                      ctx, ctx->cc_vcred.vc_uid,
+                                      sec2target_str(ctx->cc_sec), grace);
+                        }
+                        ctx_unhash(ctx, &freelist);
+
+                        set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+                        if (!grace)
+                                clear_bit(PTLRPC_CTX_UPTODATE_BIT,
+                                          &ctx->cc_flags);
+                }
+        }
+        spin_unlock(&sec->ps_lock);
+
+        ctx_list_destroy(&freelist);
+        RETURN(busy);
+}
+
+static inline
+unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key)
+{
+        return (unsigned int) (key & (sec->ps_ccache_size - 1));
+}
+
+/*
+ * return matched context. If it's a newly created one, we also give the
+ * first push to refresh. return NULL if error happens.
+ */
+static
+struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec,
+                                         struct vfs_cred *vcred,
+                                         int create, int remove_dead)
+{
+        struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL;
+        struct hlist_head *hash_head;
+        struct hlist_node *pos, *next;
+        HLIST_HEAD(freelist);
+        unsigned int hash, gc = 0, found = 0;
+        ENTRY;
+
+        might_sleep();
+
+        hash = ctx_hash_index(sec, (__u64) vcred->vc_uid);
+        LASSERT(hash < sec->ps_ccache_size);
+        hash_head = &sec->ps_ccache[hash];
+
+retry:
+        spin_lock(&sec->ps_lock);
+
+        /* gc_next == 0 means never do gc */
+        if (remove_dead && sec->ps_gc_next &&
+            cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
+                ctx_cache_gc(sec, &freelist);
+                gc = 1;
+        }
+
+        hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) {
+                if (gc == 0 &&
+                    ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL))
+                        continue;
+
+                if (ctx_match(ctx, vcred)) {
+                        found = 1;
+                        break;
+                }
+        }
+
+        if (found) {
+                if (new && new != ctx) {
+                        /* lost the race, just free it */
+                        hlist_add_head(&new->cc_hash, &freelist);
+                        new = NULL;
+                }
+
+                /* hot node, move to head */
+                if (hash_head->first != &ctx->cc_hash) {
+                        __hlist_del(&ctx->cc_hash);
+                        hlist_add_head(&ctx->cc_hash, hash_head);
+                }
+        } else {
+                /* don't allocate for reverse sec */
+                if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
+                        spin_unlock(&sec->ps_lock);
+                        RETURN(NULL);
+                }
+
+                if (new) {
+                        ctx_enhash(new, hash_head);
+                        ctx = new;
+                } else if (create) {
+                        spin_unlock(&sec->ps_lock);
+                        new = sec->ps_policy->sp_cops->create_ctx(sec, vcred);
+                        if (new) {
+                                atomic_inc(&sec->ps_busy);
+                                goto retry;
+                        }
+                } else
+                        ctx = NULL;
+        }
+
+        /* hold a ref */
+        if (ctx)
+                atomic_inc(&ctx->cc_refcount);
+
+        spin_unlock(&sec->ps_lock);
+
+        /* the allocator of the context must give the first push to refresh */
+        if (new) {
+                LASSERT(new == ctx);
+                sptlrpc_ctx_refresh(new);
+        }
+
+        ctx_list_destroy(&freelist);
+        RETURN(ctx);
+}
+
+static inline
+struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
+{
+        struct vfs_cred vcred = { cfs_current()->uid, cfs_current()->gid };
+        int create = 1, remove_dead = 1;
+
+        if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
+                vcred.vc_uid = 0;
+                create = 0;
+                remove_dead = 0;
+        } else if (sec->ps_flags & PTLRPC_SEC_FL_ROOTONLY)
+                vcred.vc_uid = 0;
+
+        if (sec->ps_policy->sp_cops->lookup_ctx)
+                return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred);
+        else
+                return ctx_cache_lookup(sec, &vcred, create, remove_dead);
+}
+
+/**************************************************
+ * client context APIs                            *
+ **************************************************/
+
+static
+void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh)
+                ctx->cc_ops->refresh(ctx);
+}
+
+struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+        atomic_inc(&ctx->cc_refcount);
+        return ctx;
+}
+EXPORT_SYMBOL(sptlrpc_ctx_get);
+
+void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+        struct ptlrpc_sec *sec = ctx->cc_sec;
+
+        LASSERT(sec);
+        LASSERT(atomic_read(&ctx->cc_refcount));
+
+        if (!atomic_dec_and_test(&ctx->cc_refcount))
+                return;
+
+        LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
+        LASSERT(hlist_unhashed(&ctx->cc_hash));
+
+        /* if required async, we must clear the UPTODATE bit to prevent extra
+         * rpcs during destroy procedure.
+         */
+        if (!sync)
+                clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+        /* destroy this context */
+        if (!sptlrpc_sec_destroy_ctx(sec, ctx))
+                return;
+
+        CWARN("%s@%p: put last ctx, also destroy the sec\n",
+              sec->ps_policy->sp_name, sec);
+
+        sptlrpc_sec_destroy(sec);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_put);
+
+/*
+ * mark a ctx as DEAD, and pull it out from hash table.
+ *
+ * NOTE: the caller must hold at least 1 ref on the ctx.
+ */
+void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(ctx->cc_sec);
+        LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+        ctx_expire(ctx);
+
+        spin_lock(&ctx->cc_sec->ps_lock);
+
+        if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) {
+                LASSERT(!hlist_unhashed(&ctx->cc_hash));
+                LASSERT(atomic_read(&ctx->cc_refcount) > 1);
+
+                hlist_del_init(&ctx->cc_hash);
+                if (atomic_dec_and_test(&ctx->cc_refcount))
+                        LBUG();
+        }
+
+        spin_unlock(&ctx->cc_sec->ps_lock);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_expire);
+
+void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new)
+{
+        struct ptlrpc_cli_ctx *ctx;
+        struct hlist_node *pos, *next;
+        HLIST_HEAD(freelist);
+        unsigned int hash;
+        ENTRY;
+
+        hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid);
+        LASSERT(hash < sec->ps_ccache_size);
+
+        spin_lock(&sec->ps_lock);
+
+        hlist_for_each_entry_safe(ctx, pos, next,
+                                  &sec->ps_ccache[hash], cc_hash) {
+                if (!ctx_match(ctx, &new->cc_vcred))
+                        continue;
+
+                ctx_expire(ctx);
+                ctx_unhash(ctx, &freelist);
+                break;
+        }
+
+        ctx_enhash(new, &sec->ps_ccache[hash]);
+        atomic_inc(&sec->ps_busy);
+
+        spin_unlock(&sec->ps_lock);
+
+        ctx_list_destroy(&freelist);
+        EXIT;
+}
+EXPORT_SYMBOL(sptlrpc_ctx_replace);
+
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+        struct obd_import *imp = req->rq_import;
+        ENTRY;
+
+        LASSERT(!req->rq_cli_ctx);
+        LASSERT(imp);
+
+        req->rq_cli_ctx = get_my_ctx(imp->imp_sec);
+
+        if (!req->rq_cli_ctx) {
+                CERROR("req %p: fail to get context from cache\n", req);
+                RETURN(-ENOMEM);
+        }
+
+        RETURN(0);
+}
+
+void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
+{
+        struct ptlrpc_request *req, *next;
+
+        spin_lock(&ctx->cc_lock);
+        list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
+                list_del_init(&req->rq_ctx_chain);
+                ptlrpc_wake_client_req(req);
+        }
+        spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_wakeup);
+
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
+{
+        ENTRY;
+
+        LASSERT(req);
+        LASSERT(req->rq_cli_ctx);
+
+        /* request might be asked to release earlier while still
+         * in the context waiting list.
+         */
+        if (!list_empty(&req->rq_ctx_chain)) {
+                spin_lock(&req->rq_cli_ctx->cc_lock);
+                list_del_init(&req->rq_ctx_chain);
+                spin_unlock(&req->rq_cli_ctx->cc_lock);
+        }
+
+        /* this could be called with spinlock hold, use async mode */
+        sptlrpc_ctx_put(req->rq_cli_ctx, 0);
+        req->rq_cli_ctx = NULL;
+        EXIT;
+}
+
+/*
+ * request must have a context. if failed to get new context,
+ * just restore the old one
+ */
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        int rc;
+        ENTRY;
+
+        LASSERT(ctx);
+        LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags));
+
+        /* make sure not on context waiting list */
+        spin_lock(&ctx->cc_lock);
+        list_del_init(&req->rq_ctx_chain);
+        spin_unlock(&ctx->cc_lock);
+
+        sptlrpc_ctx_get(ctx);
+        sptlrpc_req_put_ctx(req);
+        rc = sptlrpc_req_get_ctx(req);
+        if (!rc) {
+                LASSERT(req->rq_cli_ctx);
+                LASSERT(req->rq_cli_ctx != ctx);
+                sptlrpc_ctx_put(ctx, 1);
+        } else {
+                LASSERT(!req->rq_cli_ctx);
+                req->rq_cli_ctx = ctx;
+        }
+        RETURN(rc);
+}
+
+static
+int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+        smp_mb();
+        if (ctx_is_refreshed(ctx))
+                return 1;
+        return 0;
+}
+
+static
+int ctx_refresh_timeout(void *data)
+{
+        struct ptlrpc_request *req = data;
+        int rc;
+
+        /* conn_cnt is needed in expire_one_request */
+        lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
+
+        rc = ptlrpc_expire_one_request(req);
+        /* if we started recovery, we should mark this ctx dead; otherwise
+         * in case of lgssd died nobody would retire this ctx, following
+         * connecting will still find the same ctx thus cause deadlock.
+         * there's an assumption that expire time of the request should be
+         * later than the context refresh expire time.
+         */
+        if (rc == 0)
+                ctx_expire(req->rq_cli_ctx);
+        return rc;
+}
+
+static
+void ctx_refresh_interrupt(void *data)
+{
+        /* do nothing */
+}
+
+/*
+ * the status of context could be subject to be changed by other threads at any
+ * time. we allow this race. but once we return with 0, the caller will
+ * suppose it's uptodated and keep using it until the affected rpc is done.
+ *
+ * @timeout:
+ *    < 0  - don't wait
+ *    = 0  - wait until success or fatal error occur
+ *    > 0  - timeout value
+ *
+ * return 0 only if the context is uptodated.
+ */
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
+{
+        struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
+        struct l_wait_info      lwi;
+        int                     rc;
+        ENTRY;
+
+        LASSERT(ctx);
+
+        /* special ctxs */
+        if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
+                RETURN(0);
+
+        /* reverse ctxs, don't refresh */
+        if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
+                RETURN(0);
+
+        spin_lock(&ctx->cc_lock);
+again:
+        if (ctx_check_uptodate(ctx)) {
+                if (!list_empty(&req->rq_ctx_chain))
+                        list_del_init(&req->rq_ctx_chain);
+                spin_unlock(&ctx->cc_lock);
+                RETURN(0);
+        }
+
+        if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) {
+                req->rq_err = 1;
+                if (!list_empty(&req->rq_ctx_chain))
+                        list_del_init(&req->rq_ctx_chain);
+                spin_unlock(&ctx->cc_lock);
+                RETURN(-EPERM);
+        }
+
+        /* This is subtle. For resent message we have to keep original
+         * context to survive following situation:
+         *  1. the request sent to server
+         *  2. recovery was kick start
+         *  3. recovery finished, the request marked as resent
+         *  4. resend the request
+         *  5. old reply from server received (because xid is the same)
+         *  6. verify reply (has to be success)
+         *  7. new reply from server received, lnet drop it
+         *
+         * Note we can't simply change xid for resent request because
+         * server reply on it for reply reconstruction.
+         *
+         * Commonly the original context should be uptodate because we
+         * have a expiry nice time; And server will keep their half part
+         * context because we at least hold a ref of old context which
+         * prevent the context detroy RPC be sent. So server still can
+         * accept the request and finish RPC. Two cases:
+         *  1. If server side context has been trimed, a NO_CONTEXT will
+         *     be returned, gss_cli_ctx_verify/unseal will switch to new
+         *     context by force.
+         *  2. Current context never be refreshed, then we are fine: we
+         *     never really send request with old context before.
+         */
+        if (test_bit(PTLRPC_CTX_UPTODATE, &ctx->cc_flags) &&
+            req->rq_reqmsg &&
+            lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                if (!list_empty(&req->rq_ctx_chain))
+                        list_del_init(&req->rq_ctx_chain);
+                spin_unlock(&ctx->cc_lock);
+                RETURN(0);
+        }
+
+        if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+                spin_unlock(&ctx->cc_lock);
+
+                /* don't have to, but we don't want to release it too soon */
+                sptlrpc_ctx_get(ctx);
+
+                rc = sptlrpc_req_replace_dead_ctx(req);
+                if (rc) {
+                        LASSERT(ctx == req->rq_cli_ctx);
+                        CERROR("req %p: failed to replace dead ctx %p\n",
+                                req, ctx);
+                        req->rq_err = 1;
+                        LASSERT(list_empty(&req->rq_ctx_chain));
+                        sptlrpc_ctx_put(ctx, 1);
+                        RETURN(-ENOMEM);
+                }
+
+                LASSERT(ctx != req->rq_cli_ctx);
+                CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n",
+                      req, ctx, ctx->cc_vcred.vc_uid,
+                      sec2target_str(ctx->cc_sec), req->rq_cli_ctx);
+
+                sptlrpc_ctx_put(ctx, 1);
+                ctx = req->rq_cli_ctx;
+                LASSERT(list_empty(&req->rq_ctx_chain));
+
+                spin_lock(&ctx->cc_lock);
+                goto again;
+        }
+
+        /* Now we're sure this context is during upcall, add myself into
+         * waiting list
+         */
+        if (list_empty(&req->rq_ctx_chain))
+                list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
+
+        spin_unlock(&ctx->cc_lock);
+
+        if (timeout < 0) {
+                RETURN(-EWOULDBLOCK);
+        }
+
+        /* Clear any flags that may be present from previous sends */
+        LASSERT(req->rq_receiving_reply == 0);
+        spin_lock(&req->rq_lock);
+        req->rq_err = 0;
+        req->rq_timedout = 0;
+        req->rq_resend = 0;
+        req->rq_restart = 0;
+        spin_unlock(&req->rq_lock);
+
+        lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ,
+                               ctx_refresh_timeout, ctx_refresh_interrupt, req);
+        rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
+
+        spin_lock(&ctx->cc_lock);
+        /* five cases we are here:
+         * 1. successfully refreshed;
+         * 2. someone else mark this ctx dead by force;
+         * 3. interruptted;
+         * 4. timedout, and we don't want recover from the failure;
+         * 5. timedout, and waked up upon recovery finished;
+         */
+        if (!ctx_is_refreshed(ctx)) {
+                /* timed out or interruptted */
+                list_del_init(&req->rq_ctx_chain);
+                spin_unlock(&ctx->cc_lock);
+
+                LASSERT(rc != 0);
+                RETURN(rc);
+        }
+
+        goto again;
+}
+
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
+{
+        struct sec_flavor_config *conf;
+
+        LASSERT(req->rq_import);
+        LASSERT(req->rq_import->imp_sec);
+        LASSERT(req->rq_cli_ctx);
+        LASSERT(req->rq_cli_ctx->cc_sec);
+        LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
+
+        /* special security flags accoding to opcode */
+        switch (opcode) {
+        case OST_READ:
+        case OST_SAN_READ:
+                req->rq_bulk_read = 1;
+                break;
+        case OST_WRITE:
+        case OST_SAN_WRITE:
+                req->rq_bulk_write = 1;
+                break;
+        case SEC_CTX_INIT:
+                req->rq_ctx_init = 1;
+                break;
+        case SEC_CTX_FINI:
+                req->rq_ctx_fini = 1;
+                break;
+        }
+
+        req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor;
+
+        /* force SVC_NONE for context initiation rpc, SVC_AUTH for context
+         * destruction rpc
+         */
+        if (unlikely(req->rq_ctx_init)) {
+                req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
+                                SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+                                SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
+                                SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE));
+        } else if (unlikely(req->rq_ctx_fini)) {
+                req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
+                                SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+                                SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
+                                SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH));
+        }
+
+        conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+
+        /* user descriptor flag, except ROOTONLY which don't need, and
+         * null security which can't
+         */
+        if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 &&
+            req->rq_sec_flavor != SPTLRPC_FLVR_NULL)
+                req->rq_sec_flavor |= SEC_FLAVOR_FL_USER;
+
+        /* bulk security flag */
+        if ((req->rq_bulk_read || req->rq_bulk_write) &&
+            (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL ||
+             conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL))
+                req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK;
+}
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req)
+{
+        if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV)
+                return;
+
+        LASSERT(req->rq_clrbuf);
+        if (req->rq_pool || !req->rq_reqbuf)
+                return;
+
+        OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+        req->rq_reqbuf = NULL;
+        req->rq_reqbuf_len = 0;
+}
+
+/*
+ * check whether current user have valid context for an import or not.
+ * might repeatedly try in case of non-fatal errors.
+ * return 0 on success, < 0 on failure
+ */
+int sptlrpc_import_check_ctx(struct obd_import *imp)
+{
+        struct ptlrpc_cli_ctx *ctx;
+        struct ptlrpc_request *req = NULL;
+        int rc;
+        ENTRY;
+
+        might_sleep();
+
+        ctx = get_my_ctx(imp->imp_sec);
+        if (!ctx)
+                RETURN(1);
+
+        if (ctx_is_eternal(ctx)) {
+                sptlrpc_ctx_put(ctx, 1);
+                RETURN(0);
+        }
+
+        OBD_ALLOC(req, sizeof(*req));
+        if (!req)
+                RETURN(-ENOMEM);
+
+        spin_lock_init(&req->rq_lock);
+        atomic_set(&req->rq_refcount, 10000);
+        INIT_LIST_HEAD(&req->rq_ctx_chain);
+        init_waitqueue_head(&req->rq_reply_waitq);
+        req->rq_import = imp;
+        req->rq_cli_ctx = ctx;
+
+        rc = sptlrpc_req_refresh_ctx(req, 0);
+        LASSERT(list_empty(&req->rq_ctx_chain));
+        sptlrpc_ctx_put(req->rq_cli_ctx, 1);
+        OBD_FREE(req, sizeof(*req));
+
+        RETURN(rc);
+}
+
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(ctx);
+        LASSERT(ctx->cc_sec);
+        LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+        /* we wrap bulk request here because now we can be sure
+         * the context is uptodate.
+         */
+        if (req->rq_bulk) {
+                rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
+                if (rc)
+                        RETURN(rc);
+        }
+
+        switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
+        case SPTLRPC_SVC_NONE:
+        case SPTLRPC_SVC_AUTH:
+                LASSERT(ctx->cc_ops->sign);
+                rc = ctx->cc_ops->sign(ctx, req);
+                break;
+        case SPTLRPC_SVC_PRIV:
+                LASSERT(ctx->cc_ops->seal);
+                rc = ctx->cc_ops->seal(ctx, req);
+                break;
+        default:
+                LBUG();
+        }
+
+        if (rc == 0) {
+                LASSERT(req->rq_reqdata_len);
+                LASSERT(req->rq_reqdata_len % 8 == 0);
+                LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
+        }
+
+        RETURN(rc);
+}
+
+/*
+ * rq_nob_received is the actual received data length
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        int rc;
+        ENTRY;
+
+        LASSERT(ctx);
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_ops);
+        LASSERT(req->rq_repbuf);
+
+        req->rq_repdata_len = req->rq_nob_received;
+
+        if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+                CERROR("replied data length %d too small\n",
+                       req->rq_nob_received);
+                RETURN(-EPROTO);
+        }
+
+        if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
+            req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
+                /* it's must be null flavor, so our requets also should be
+                 * in null flavor */
+                if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+                    SPTLRPC_POLICY_NULL) {
+                        CERROR("request flavor is %x but reply with null\n",
+                               req->rq_sec_flavor);
+                        RETURN(-EPROTO);
+                }
+        } else {
+                /* v2 message... */
+                ptlrpc_flavor_t tmpf = req->rq_repbuf->lm_secflvr;
+
+                if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
+                        __swab32s(&tmpf);
+
+                if (SEC_FLAVOR_POLICY(tmpf) !=
+                    SEC_FLAVOR_POLICY(req->rq_sec_flavor)) {
+                        CERROR("request policy %u while reply with %d\n",
+                               SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+                               SEC_FLAVOR_POLICY(tmpf));
+                        RETURN(-EPROTO);
+                }
+
+                if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+                     SPTLRPC_POLICY_NULL) &&
+                    lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received))
+                        RETURN(-EPROTO);
+        }
+
+        switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
+        case SPTLRPC_SVC_NONE:
+        case SPTLRPC_SVC_AUTH:
+                LASSERT(ctx->cc_ops->verify);
+                rc = ctx->cc_ops->verify(ctx, req);
+                break;
+        case SPTLRPC_SVC_PRIV:
+                LASSERT(ctx->cc_ops->unseal);
+                rc = ctx->cc_ops->unseal(ctx, req);
+                break;
+        default:
+                LBUG();
+        }
+
+        LASSERT(rc || req->rq_repmsg);
+        RETURN(rc);
+}
+
+/**************************************************
+ * security APIs                                  *
+ **************************************************/
+
+/*
+ * let policy module to determine whether take refrence of
+ * import or not.
+ */
+static
+struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
+                                       struct ptlrpc_svc_ctx *ctx,
+                                       __u32 flavor,
+                                       unsigned long flags)
+{
+        struct ptlrpc_sec_policy *policy;
+        struct ptlrpc_sec *sec;
+        ENTRY;
+
+        flavor = SEC_FLAVOR_RPC(flavor);
+
+        if (ctx) {
+                LASSERT(imp->imp_dlm_fake == 1);
+
+                CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
+                       imp->imp_obd->obd_type->typ_name,
+                       imp->imp_obd->obd_name,
+                       sptlrpc_flavor2name(flavor));
+
+                policy = sptlrpc_policy_get(ctx->sc_policy);
+                flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+        } else {
+                LASSERT(imp->imp_dlm_fake == 0);
+
+                CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
+                       imp->imp_obd->obd_type->typ_name,
+                       imp->imp_obd->obd_name,
+                       sptlrpc_flavor2name(flavor));
+
+                policy = sptlrpc_flavor2policy(flavor);
+                if (!policy) {
+                        CERROR("invalid flavor 0x%x\n", flavor);
+                        RETURN(NULL);
+                }
+        }
+
+        sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags);
+        if (sec) {
+                atomic_inc(&sec->ps_refcount);
+
+                /* take 1 busy count on behalf of sec itself,
+                 * balanced in sptlrpc_set_put()
+                 */
+                atomic_inc(&sec->ps_busy);
+        } else
+                sptlrpc_policy_put(policy);
+
+        RETURN(sec);
+}
+
+static
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+        LASSERT(policy);
+        LASSERT(atomic_read(&sec->ps_refcount) == 0);
+        LASSERT(atomic_read(&sec->ps_busy) == 0);
+        LASSERT(policy->sp_cops->destroy_sec);
+
+        policy->sp_cops->destroy_sec(sec);
+        sptlrpc_policy_put(policy);
+}
+
+static
+void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+{
+        struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+        if (!atomic_dec_and_test(&sec->ps_refcount)) {
+                sptlrpc_policy_put(policy);
+                return;
+        }
+
+        ctx_cache_flush(sec, -1, 1, 1);
+
+        if (atomic_dec_and_test(&sec->ps_busy))
+                sptlrpc_sec_destroy(sec);
+        else
+                CWARN("delay to destroy %s@%p: busy contexts\n",
+                      policy->sp_name, sec);
+}
+
+/*
+ * return 1 means we should also destroy the sec structure.
+ * normally return 0
+ */
+static
+int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
+                            struct ptlrpc_cli_ctx *ctx)
+{
+        LASSERT(sec == ctx->cc_sec);
+        LASSERT(atomic_read(&sec->ps_busy));
+        LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+        LASSERT(hlist_unhashed(&ctx->cc_hash));
+        LASSERT(list_empty(&ctx->cc_req_list));
+        LASSERT(sec->ps_policy->sp_cops->destroy_ctx);
+
+        sec->ps_policy->sp_cops->destroy_ctx(sec, ctx);
+
+        if (atomic_dec_and_test(&sec->ps_busy)) {
+                LASSERT(atomic_read(&sec->ps_refcount) == 0);
+                return 1;
+        }
+
+        return 0;
+}
+
+/*
+ * when complete successfully, req->rq_reqmsg should point to the
+ * right place.
+ */
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        struct ptlrpc_sec_policy *policy;
+        int rc;
+
+        LASSERT(ctx);
+        LASSERT(atomic_read(&ctx->cc_refcount));
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_sec->ps_policy);
+        LASSERT(req->rq_reqmsg == NULL);
+
+        policy = ctx->cc_sec->ps_policy;
+        rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
+        if (!rc) {
+                LASSERT(req->rq_reqmsg);
+                LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+                /* zeroing preallocated buffer */
+                if (req->rq_pool)
+                        memset(req->rq_reqmsg, 0, msgsize);
+        }
+
+        return rc;
+}
+
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        struct ptlrpc_sec_policy *policy;
+
+        LASSERT(ctx);
+        LASSERT(atomic_read(&ctx->cc_refcount));
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_sec->ps_policy);
+        LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+        policy = ctx->cc_sec->ps_policy;
+        policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
+}
+
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        struct ptlrpc_sec_policy *policy;
+        ENTRY;
+
+        LASSERT(ctx);
+        LASSERT(atomic_read(&ctx->cc_refcount));
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_sec->ps_policy);
+
+        if (req->rq_repbuf)
+                RETURN(0);
+
+        policy = ctx->cc_sec->ps_policy;
+        RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
+}
+
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
+{
+        struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+        struct ptlrpc_sec_policy *policy;
+        ENTRY;
+
+        LASSERT(ctx);
+        LASSERT(atomic_read(&ctx->cc_refcount));
+        LASSERT(ctx->cc_sec);
+        LASSERT(ctx->cc_sec->ps_policy);
+        LASSERT(req->rq_repbuf);
+
+        policy = ctx->cc_sec->ps_policy;
+        policy->sp_cops->free_repbuf(ctx->cc_sec, req);
+        EXIT;
+}
+
+int sptlrpc_import_get_sec(struct obd_import *imp,
+                           struct ptlrpc_svc_ctx *ctx,
+                           __u32 flavor,
+                           unsigned long flags)
+{
+        struct obd_device *obd = imp->imp_obd;
+        ENTRY;
+
+        LASSERT(obd);
+        LASSERT(obd->obd_type);
+
+        /* old sec might be still there in reconnecting */
+        if (imp->imp_sec)
+                RETURN(0);
+
+        imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags);
+        if (!imp->imp_sec)
+                RETURN(-EINVAL);
+
+        RETURN(0);
+}
+
+void sptlrpc_import_put_sec(struct obd_import *imp)
+{
+        if (imp->imp_sec == NULL)
+                return;
+
+        sptlrpc_sec_put(imp->imp_sec);
+        imp->imp_sec = NULL;
+}
+
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+        if (imp == NULL || imp->imp_sec == NULL)
+                return;
+
+        /* use 'grace' mode, it's crutial see explain in
+         * sptlrpc_req_refresh_ctx()
+         */
+        ctx_cache_flush(imp->imp_sec, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
+{
+        if (imp == NULL || imp->imp_sec == NULL)
+                return;
+
+        ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
+
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+                                struct ptlrpc_cli_ctx *ctx)
+{
+        struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
+
+        if (!policy->sp_cops->install_rctx)
+                return 0;
+        return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
+}
+
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+                                struct ptlrpc_svc_ctx *ctx)
+{
+        struct ptlrpc_sec_policy *policy = ctx->sc_policy;
+
+        if (!policy->sp_sops->install_rctx)
+                return 0;
+        return policy->sp_sops->install_rctx(imp, ctx);
+}
+
+/****************************************
+ * server side security                 *
+ ****************************************/
+
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
+{
+        struct ptlrpc_sec_policy *policy;
+        struct lustre_msg *msg = req->rq_reqbuf;
+        int rc;
+        ENTRY;
+
+        LASSERT(msg);
+        LASSERT(req->rq_reqmsg == NULL);
+        LASSERT(req->rq_repmsg == NULL);
+
+        /* 
+         * in any case we avoid to call unpack_msg() for request of null flavor
+         * which will later be done by ptlrpc_server_handle_request().
+         */
+        if (req->rq_reqdata_len < sizeof(struct lustre_msg)) {
+                CERROR("request size %d too small\n", req->rq_reqdata_len);
+                RETURN(SECSVC_DROP);
+        }
+
+        if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
+            msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
+                req->rq_sec_flavor = SPTLRPC_FLVR_NULL;
+        } else {
+                req->rq_sec_flavor = msg->lm_secflvr;
+
+                if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
+                        __swab32s(&req->rq_sec_flavor);
+
+                if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+                     SPTLRPC_POLICY_NULL) &&
+                    lustre_unpack_msg(msg, req->rq_reqdata_len))
+                        RETURN(SECSVC_DROP);
+        }
+
+        policy = sptlrpc_flavor2policy(req->rq_sec_flavor);
+        if (!policy) {
+                CERROR("unsupported security flavor %x\n", req->rq_sec_flavor);
+                RETURN(SECSVC_DROP);
+        }
+
+        LASSERT(policy->sp_sops->accept);
+        rc = policy->sp_sops->accept(req);
+
+        LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+        sptlrpc_policy_put(policy);
+
+        /* FIXME move to proper place */
+        if (rc == SECSVC_OK) {
+                __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+                if (opc == OST_WRITE || opc == OST_SAN_WRITE)
+                        req->rq_bulk_write = 1;
+                else if (opc == OST_READ || opc == OST_SAN_READ)
+                        req->rq_bulk_read = 1;
+        }
+
+        RETURN(rc);
+}
+
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req,
+                         int msglen)
+{
+        struct ptlrpc_sec_policy *policy;
+        struct ptlrpc_reply_state *rs;
+        int rc;
+        ENTRY;
+
+        LASSERT(req->rq_svc_ctx);
+        LASSERT(req->rq_svc_ctx->sc_policy);
+
+        policy = req->rq_svc_ctx->sc_policy;
+        LASSERT(policy->sp_sops->alloc_rs);
+
+        rc = policy->sp_sops->alloc_rs(req, msglen);
+        if (unlikely(rc == -ENOMEM)) {
+                /* failed alloc, try emergency pool */
+                rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
+                if (rs == NULL)
+                        RETURN(-ENOMEM);
+
+                req->rq_reply_state = rs;
+                rc = policy->sp_sops->alloc_rs(req, msglen);
+                if (rc) {
+                        lustre_put_emerg_rs(rs);
+                        req->rq_reply_state = NULL;
+                }
+        }
+
+        LASSERT(rc != 0 ||
+                (req->rq_reply_state && req->rq_reply_state->rs_msg));
+
+        RETURN(rc);
+}
+
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
+{
+        struct ptlrpc_sec_policy *policy;
+        int rc;
+        ENTRY;
+
+        LASSERT(req->rq_svc_ctx);
+        LASSERT(req->rq_svc_ctx->sc_policy);
+
+        policy = req->rq_svc_ctx->sc_policy;
+        LASSERT(policy->sp_sops->authorize);
+
+        rc = policy->sp_sops->authorize(req);
+        LASSERT(rc || req->rq_reply_state->rs_repdata_len);
+
+        RETURN(rc);
+}
+
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+        struct ptlrpc_sec_policy *policy;
+        unsigned int prealloc;
+        ENTRY;
+
+        LASSERT(rs->rs_svc_ctx);
+        LASSERT(rs->rs_svc_ctx->sc_policy);
+
+        policy = rs->rs_svc_ctx->sc_policy;
+        LASSERT(policy->sp_sops->free_rs);
+
+        prealloc = rs->rs_prealloc;
+        policy->sp_sops->free_rs(rs);
+
+        if (prealloc)
+                lustre_put_emerg_rs(rs);
+        EXIT;
+}
+
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
+{
+        struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+        if (ctx == NULL)
+                return;
+
+        LASSERT(atomic_read(&ctx->sc_refcount) > 0);
+        atomic_inc(&ctx->sc_refcount);
+}
+
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
+{
+        struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+        if (ctx == NULL)
+                return;
+
+        LASSERT(atomic_read(&ctx->sc_refcount) > 0);
+        if (atomic_dec_and_test(&ctx->sc_refcount)) {
+                if (ctx->sc_policy->sp_sops->free_ctx)
+                        ctx->sc_policy->sp_sops->free_ctx(ctx);
+        }
+        req->rq_svc_ctx = NULL;
+}
+
+/****************************************
+ * bulk security                        *
+ ****************************************/
+
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_cli_ctx *ctx;
+
+        if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+                return 0;
+
+        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+        ctx = req->rq_cli_ctx;
+        if (ctx->cc_ops->wrap_bulk)
+                return ctx->cc_ops->wrap_bulk(ctx, req, desc);
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
+
+static
+void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
+                      struct ptlrpc_bulk_desc *desc)
+{
+        int i;
+
+        LASSERT(pga);
+        LASSERT(*pga);
+
+        for (i = 0; i < pg_count && nob > 0; i++) {
+#ifdef __KERNEL__
+                desc->bd_iov[i].kiov_page = pga[i]->pg;
+                desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
+                                           nob : pga[i]->count;
+                desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
+#else
+#warning FIXME for liblustre!
+                desc->bd_iov[i].iov_base = pga[i]->pg->addr;
+                desc->bd_iov[i].iov_len = pga[i]->count > nob ?
+                                           nob : pga[i]->count;
+#endif
+
+                desc->bd_iov_count++;
+                nob -= pga[i]->count;
+        }
+}
+
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+                                 int nob, obd_count pg_count,
+                                 struct brw_page **pga)
+{
+        struct ptlrpc_bulk_desc *desc;
+        struct ptlrpc_cli_ctx *ctx;
+        int rc = 0;
+
+        if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+                return 0;
+
+        LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
+
+        OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
+        if (desc == NULL) {
+                CERROR("out of memory, can't verify bulk read data\n");
+                return -ENOMEM;
+        }
+
+        pga_to_bulk_desc(nob, pg_count, pga, desc);
+
+        ctx = req->rq_cli_ctx;
+        if (ctx->cc_ops->unwrap_bulk)
+                rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+
+        OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
+
+        return rc;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+                                  struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_cli_ctx *ctx;
+
+        if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+                return 0;
+
+        LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
+
+        ctx = req->rq_cli_ctx;
+        if (ctx->cc_ops->unwrap_bulk)
+                return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
+
+int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_svc_ctx *ctx;
+
+        if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+                return 0;
+
+        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+        ctx = req->rq_svc_ctx;
+        if (ctx->sc_policy->sp_sops->wrap_bulk)
+                return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
+
+int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
+                            struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_svc_ctx *ctx;
+
+        if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+                return 0;
+
+        LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+        ctx = req->rq_svc_ctx;
+        if (ctx->sc_policy->sp_sops->unwrap_bulk);
+                return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
+
+
+/****************************************
+ * user descriptor helpers              *
+ ****************************************/
+
+int sptlrpc_user_desc_size(void)
+{
+#ifdef __KERNEL__
+        int ngroups = current_ngroups;
+
+        if (ngroups > LUSTRE_MAX_GROUPS)
+                ngroups = LUSTRE_MAX_GROUPS;
+
+        return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
+#else
+        return sizeof(struct ptlrpc_user_desc);
+#endif
+}
+EXPORT_SYMBOL(sptlrpc_user_desc_size);
+
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
+{
+        struct ptlrpc_user_desc *pud;
+
+        pud = lustre_msg_buf(msg, offset, 0);
+
+        pud->pud_uid = cfs_current()->uid;
+        pud->pud_gid = cfs_current()->gid;
+        pud->pud_fsuid = cfs_current()->fsuid;
+        pud->pud_fsgid = cfs_current()->fsgid;
+        pud->pud_cap = cfs_current()->cap_effective;
+        pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
+
+#ifdef __KERNEL__
+        task_lock(current);
+        if (pud->pud_ngroups > current_ngroups)
+                pud->pud_ngroups = current_ngroups;
+        memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0],
+               pud->pud_ngroups * sizeof(__u32));
+        task_unlock(current);
+#endif
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_pack_user_desc);
+
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset)
+{
+        struct ptlrpc_user_desc *pud;
+        int                      i;
+
+        pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+        if (!pud)
+                return -EINVAL;
+
+        if (lustre_msg_swabbed(msg)) {
+                __swab32s(&pud->pud_uid);
+                __swab32s(&pud->pud_gid);
+                __swab32s(&pud->pud_fsuid);
+                __swab32s(&pud->pud_fsgid);
+                __swab32s(&pud->pud_cap);
+                __swab32s(&pud->pud_ngroups);
+        }
+
+        if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
+                CERROR("%u groups is too large\n", pud->pud_ngroups);
+                return -EINVAL;
+        }
+
+        if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
+            msg->lm_buflens[offset]) {
+                CERROR("%u groups are claimed but bufsize only %u\n",
+                       pud->pud_ngroups, msg->lm_buflens[offset]);
+                return -EINVAL;
+        }
+
+        if (lustre_msg_swabbed(msg)) {
+                for (i = 0; i < pud->pud_ngroups; i++)
+                        __swab32s(&pud->pud_groups[i]);
+        }
+
+        return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
+
+/****************************************
+ * Helpers to assist policy modules to  *
+ * implement checksum funcationality    *
+ ****************************************/
+
+struct {
+        char    *name;
+        int      size;
+} csum_types[] = {
+        [BULK_CSUM_ALG_NULL]    = { "null",     0 },
+        [BULK_CSUM_ALG_CRC32]   = { "crc32",    4 },
+        [BULK_CSUM_ALG_MD5]     = { "md5",     16 },
+        [BULK_CSUM_ALG_SHA1]    = { "sha1",    20 },
+        [BULK_CSUM_ALG_SHA256]  = { "sha256",  32 },
+        [BULK_CSUM_ALG_SHA384]  = { "sha384",  48 },
+        [BULK_CSUM_ALG_SHA512]  = { "sha512",  64 },
+};
+
+int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
+{
+        int size = sizeof(struct ptlrpc_bulk_sec_desc);
+
+        LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
+
+        /* read request don't need extra data */
+        if (!(read && request))
+                size += csum_types[csum_alg].size;
+
+        return size;
+}
+EXPORT_SYMBOL(bulk_sec_desc_size);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
+{
+        struct ptlrpc_bulk_sec_desc *bsd;
+        int    size = msg->lm_buflens[offset];
+
+        bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+        if (bsd == NULL) {
+                CERROR("Invalid bulk sec desc: size %d\n", size);
+                return -EINVAL;
+        }
+
+        if (lustre_msg_swabbed(msg)) {
+                __swab32s(&bsd->bsd_version);
+                __swab32s(&bsd->bsd_pad);
+                __swab32s(&bsd->bsd_csum_alg);
+                __swab32s(&bsd->bsd_priv_alg);
+        }
+
+        if (bsd->bsd_version != 0) {
+                CERROR("Unexpected version %u\n", bsd->bsd_version);
+                return -EPROTO;
+        }
+
+        if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
+                CERROR("Unsupported checksum algorithm %u\n",
+                       bsd->bsd_csum_alg);
+                return -EINVAL;
+        }
+        if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
+                CERROR("Unsupported cipher algorithm %u\n",
+                       bsd->bsd_priv_alg);
+                return -EINVAL;
+        }
+
+        if (size > sizeof(*bsd) &&
+            size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
+                CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
+                       bsd->bsd_csum_alg, size);
+                return -EINVAL;
+        }
+
+        return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+#ifdef __KERNEL__
+static
+int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
+{
+        struct page *page;
+        int off;
+        char *ptr;
+        __u32 crc32 = ~0;
+        int len, i;
+
+        for (i = 0; i < desc->bd_iov_count; i++) {
+                page = desc->bd_iov[i].kiov_page;
+                off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+                ptr = cfs_kmap(page) + off;
+                len = desc->bd_iov[i].kiov_len;
+
+                crc32 = crc32_le(crc32, ptr, len);
+
+                cfs_kunmap(page);
+        }
+
+        *((__u32 *) buf) = crc32;
+        return 0;
+}
+
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+        struct crypto_tfm *tfm;
+        struct scatterlist *sl;
+        int i, rc = 0;
+
+        LASSERT(alg > BULK_CSUM_ALG_NULL &&
+                alg < BULK_CSUM_ALG_MAX);
+
+        if (alg == BULK_CSUM_ALG_CRC32)
+                return do_bulk_checksum_crc32(desc, buf);
+
+        tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
+        if (tfm == NULL) {
+                CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
+                return -ENOMEM;
+        }
+
+        OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
+        if (sl == NULL) {
+                rc = -ENOMEM;
+                goto out_tfm;
+        }
+
+        for (i = 0; i < desc->bd_iov_count; i++) {
+                sl[i].page = desc->bd_iov[i].kiov_page;
+                sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+                sl[i].length = desc->bd_iov[i].kiov_len;
+        }
+
+        crypto_digest_init(tfm);
+        crypto_digest_update(tfm, sl, desc->bd_iov_count);
+        crypto_digest_final(tfm, buf);
+
+        OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
+
+out_tfm:
+        crypto_free_tfm(tfm);
+        return rc;
+}
+                         
+#else /* !__KERNEL__ */
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+        __u32 crc32 = ~0;
+        int i;
+
+        LASSERT(alg == BULK_CSUM_ALG_CRC32);
+
+        for (i = 0; i < desc->bd_iov_count; i++) {
+                char *ptr = desc->bd_iov[i].iov_base;
+                int len = desc->bd_iov[i].iov_len;
+
+                crc32 = crc32_le(crc32, ptr, len);
+        }
+
+        *((__u32 *) buf) = crc32;
+        return 0;
+}
+#endif
+
+/*
+ * perform algorithm @alg checksum on @desc, store result in @buf.
+ * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
+ */
+static
+int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
+                       struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
+{
+        int rc;
+
+        LASSERT(bsd);
+        LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+        bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+        if (alg == BULK_CSUM_ALG_NULL)
+                return 0;
+
+        LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
+
+        rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
+        if (rc == 0)
+                bsd->bsd_csum_alg = alg;
+
+        return rc;
+}
+
+static
+int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
+                     struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
+                     struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
+{
+        char *csum_p;
+        char *buf = NULL;
+        int   csum_size, rc = 0;
+
+        LASSERT(bsdv);
+        LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+
+        if (bsdr)
+                bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+        if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+                return 0;
+
+        /* for all supported algorithms */
+        csum_size = csum_types[bsdv->bsd_csum_alg].size;
+
+        if (bsdvsize < sizeof(*bsdv) + csum_size) {
+                CERROR("verifier size %d too small, require %d\n",
+                       bsdvsize, sizeof(*bsdv) + csum_size);
+                return -EINVAL;
+        }
+
+        if (bsdr) {
+                LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
+                csum_p = (char *) bsdr->bsd_csum;
+        } else {
+                OBD_ALLOC(buf, csum_size);
+                if (buf == NULL)
+                        return -EINVAL;
+                csum_p = buf;
+        }
+
+        rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
+
+        if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
+                CERROR("BAD %s CHECKSUM (%s), data mutated during "
+                       "transfer!\n", read ? "READ" : "WRITE",
+                       csum_types[bsdv->bsd_csum_alg].name);
+                rc = -EINVAL;
+        } else {
+                CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
+                      read ? "read" : "write",
+                      csum_types[bsdv->bsd_csum_alg].name);
+        }
+
+        if (bsdr) {
+                bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
+                memcpy(bsdr->bsd_csum, csum_p, csum_size);
+        } else {
+                LASSERT(buf);
+                OBD_FREE(buf, csum_size);
+        }
+
+        return rc;
+}
+
+int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
+                          __u32 alg, struct lustre_msg *rmsg, int roff)
+{
+        struct ptlrpc_bulk_sec_desc *bsdr;
+        int    rsize, rc = 0;
+
+        rsize = rmsg->lm_buflens[roff];
+        bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
+
+        LASSERT(bsdr);
+        LASSERT(rsize >= sizeof(*bsdr));
+        LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+        if (read)
+                bsdr->bsd_csum_alg = alg;
+        else {
+                rc = generate_bulk_csum(desc, alg, bsdr, rsize);
+                if (rc) {
+                        CERROR("client bulk write: failed to perform "
+                               "checksum: %d\n", rc);
+                }
+        }
+
+        return rc;
+}
+EXPORT_SYMBOL(bulk_csum_cli_request);
+
+int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
+                        struct lustre_msg *rmsg, int roff,
+                        struct lustre_msg *vmsg, int voff)
+{
+        struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+        int    rsize, vsize;
+
+        rsize = rmsg->lm_buflens[roff];
+        vsize = vmsg->lm_buflens[voff];
+        bsdr = lustre_msg_buf(rmsg, roff, 0);
+        bsdv = lustre_msg_buf(vmsg, voff, 0);
+
+        if (bsdv == NULL || vsize < sizeof(*bsdv)) {
+                CERROR("Invalid checksum verifier from server: size %d\n",
+                       vsize);
+                return -EINVAL;
+        }
+
+        LASSERT(bsdr);
+        LASSERT(rsize >= sizeof(*bsdr));
+        LASSERT(vsize >= sizeof(*bsdv));
+
+        if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
+                CERROR("bulk %s: checksum algorithm mismatch: client request "
+                       "%s but server reply with %s. try to use the new one "
+                       "for checksum verification\n",
+                       read ? "read" : "write",
+                       csum_types[bsdr->bsd_csum_alg].name,
+                       csum_types[bsdv->bsd_csum_alg].name);
+        }
+
+        if (read)
+                return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
+        else {
+                char *cli, *srv, *new = NULL;
+                int csum_size = csum_types[bsdr->bsd_csum_alg].size;
+
+                LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+                if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+                        return 0;
+
+                if (vsize < sizeof(*bsdv) + csum_size) {
+                        CERROR("verifier size %d too small, require %d\n",
+                               vsize, sizeof(*bsdv) + csum_size);
+                        return -EINVAL;
+                }
+
+                cli = (char *) (bsdr + 1);
+                srv = (char *) (bsdv + 1);
+
+                if (!memcmp(cli, srv, csum_size)) {
+                        /* checksum confirmed */
+                        CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
+                              csum_types[bsdr->bsd_csum_alg].name);
+                        return 0;
+                }
+
+                /* checksum mismatch, re-compute a new one and compare with
+                 * others, give out proper warnings.
+                 */
+                OBD_ALLOC(new, csum_size);
+                if (new == NULL)
+                        return -ENOMEM;
+
+                do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
+
+                if (!memcmp(new, srv, csum_size)) {
+                        CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+                               "on the client after we checksummed them\n",
+                               csum_types[bsdr->bsd_csum_alg].name);
+                } else if (!memcmp(new, cli, csum_size)) {
+                        CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+                               "in transit\n",
+                               csum_types[bsdr->bsd_csum_alg].name);
+                } else {
+                        CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+                               "in transit, and the current page contents "
+                               "don't match the originals and what the server "
+                               "received\n",
+                               csum_types[bsdr->bsd_csum_alg].name);
+                }
+                OBD_FREE(new, csum_size);
+
+                return -EINVAL;
+        }
+}
+EXPORT_SYMBOL(bulk_csum_cli_reply);
+
+int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
+                  struct lustre_msg *vmsg, int voff,
+                  struct lustre_msg *rmsg, int roff)
+{
+        struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+        int    vsize, rsize, rc;
+
+        vsize = vmsg->lm_buflens[voff];
+        rsize = rmsg->lm_buflens[roff];
+        bsdv = lustre_msg_buf(vmsg, voff, 0);
+        bsdr = lustre_msg_buf(rmsg, roff, 0);
+
+        LASSERT(vsize >= sizeof(*bsdv));
+        LASSERT(rsize >= sizeof(*bsdr));
+        LASSERT(bsdv && bsdr);
+
+        if (read) {
+                rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
+                if (rc)
+                        CERROR("bulk read: server failed to generate %s "
+                               "checksum: %d\n",
+                               csum_types[bsdv->bsd_csum_alg].name, rc);
+        } else
+                rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
+
+        return rc;
+}
+EXPORT_SYMBOL(bulk_csum_svc);
+
+/****************************************
+ * user supplied flavor string parsing  *
+ ****************************************/
+
+static
+int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf)
+{
+        conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+        conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+        conf->sfc_flags = 0;
+
+        switch (to_part) {
+        case LUSTRE_MDT:
+                conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN;
+                return 0;
+        case LUSTRE_OST:
+                conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+                return 0;
+        default:
+                CERROR("Unknown to lustre part %d, apply defaults\n", to_part);
+                conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+                return -EINVAL;
+        }
+}
+
+static
+void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf)
+{
+        conf->sfc_rpc_flavor = rpc_flavor;
+        conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+        conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+        conf->sfc_flags = 0;
+
+        switch (rpc_flavor) {
+        case SPTLRPC_FLVR_NULL:
+        case SPTLRPC_FLVR_PLAIN:
+                break;
+        case SPTLRPC_FLVR_KRB5P:
+                conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+                /* fall through */
+        case SPTLRPC_FLVR_KRB5I:
+                conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
+                break;
+        default:
+                LBUG();
+        }
+}
+
+static
+void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv,
+                            struct sec_flavor_config *conf)
+{
+        if (bulk_priv)
+                conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+        else
+                conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+
+        switch (rpc_flavor) {
+        case SPTLRPC_FLVR_PLAIN:
+                conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5;
+                break;
+        case SPTLRPC_FLVR_KRB5I:
+        case SPTLRPC_FLVR_KRB5P:
+                conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
+                break;
+        default:
+                LBUG();
+        }
+}
+
+static __u32 __flavors[] = {
+        SPTLRPC_FLVR_NULL,
+        SPTLRPC_FLVR_PLAIN,
+        SPTLRPC_FLVR_KRB5I,
+        SPTLRPC_FLVR_KRB5P,
+};
+
+#define __nflavors      (sizeof(__flavors)/sizeof(__u32))
+
+/*
+ * flavor string format: rpc[-bulk[:cksum/enc]]
+ * for examples:
+ *  null
+ *  plain-bulki
+ *  krb5p-bulkn
+ *  krb5i-bulkp
+ *  krb5i-bulkp:sha512/arc4
+ */
+int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part,
+                         char *str, struct sec_flavor_config *conf)
+{
+        char   *f, *bulk, *alg, *enc;
+        char    buf[64];
+        int     i, bulk_priv;
+        ENTRY;
+
+        if (str == NULL) {
+                if (get_default_flavor(to_part, conf))
+                        return -EINVAL;
+                goto set_flags;
+        }
+
+        for (i = 0; i < __nflavors; i++) {
+                f = sptlrpc_flavor2name(__flavors[i]);
+                if (strncmp(str, f, strlen(f)) == 0)
+                        break;
+        }
+
+        if (i >= __nflavors)
+                GOTO(invalid, -EINVAL);
+
+        /* prepare local buffer thus we can modify it as we want */
+        strncpy(buf, str, 64);
+        buf[64 - 1] = '\0';
+
+        /* find bulk string */
+        bulk = strchr(buf, '-');
+        if (bulk)
+                *bulk++ = '\0';
+
+        /* now the first part must equal to rpc flavor name */
+        if (strcmp(buf, f) != 0)
+                GOTO(invalid, -EINVAL);
+
+        get_flavor_by_rpc(__flavors[i], conf);
+
+        if (bulk == NULL)
+                goto set_flags;
+
+        /* null flavor should not have any suffix */
+        if (__flavors[i] == SPTLRPC_FLVR_NULL)
+                GOTO(invalid, -EINVAL);
+
+        /* find bulk algorithm string */
+        alg = strchr(bulk, ':');
+        if (alg)
+                *alg++ = '\0';
+
+        /* verify bulk section */
+        if (strcmp(bulk, "bulkn") == 0) {
+                conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+                conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+                goto set_flags;
+        }
+
+        if (strcmp(bulk, "bulki") == 0)
+                bulk_priv = 0;
+        else if (strcmp(bulk, "bulkp") == 0)
+                bulk_priv = 1;
+        else
+                GOTO(invalid, -EINVAL);
+
+        /* plain policy dosen't support bulk encryption */
+        if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN)
+                GOTO(invalid, -EINVAL);
+
+        get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf);
+
+        if (alg == NULL)
+                goto set_flags;
+
+        /* find encryption algorithm string */
+        enc = strchr(alg, '/');
+        if (enc)
+                *enc++ = '\0';
+
+        /* bulk combination sanity check */
+        if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc))
+                GOTO(invalid, -EINVAL);
+
+        /* checksum algorithm */
+        for (i = 0; i < BULK_CSUM_ALG_MAX; i++) {
+                if (strcmp(alg, csum_types[i].name) == 0) {
+                        conf->sfc_bulk_csum = i;
+                        break;
+                }
+        }
+        if (i >= BULK_CSUM_ALG_MAX)
+                GOTO(invalid, -EINVAL);
+
+        /* privacy algorithm */
+        if (enc) {
+                if (strcmp(enc, "arc4") != 0)
+                        GOTO(invalid, -EINVAL);
+                conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+        }
+
+set_flags:
+        /* set ROOTONLY flag to:
+         *  - to OST
+         *  - from MDT to MDT
+         */
+        if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) ||
+            to_part == LUSTRE_OST)
+                conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY;
+
+#ifdef __BIG_ENDIAN
+        __swab32s(&conf->sfc_rpc_flavor);
+        __swab32s(&conf->sfc_bulk_csum);
+        __swab32s(&conf->sfc_bulk_priv);
+        __swab32s(&conf->sfc_flags);
+#endif
+        return 0;
+invalid:
+        CERROR("invalid flavor string: %s\n", str);
+        return -EINVAL;
+}
+EXPORT_SYMBOL(sptlrpc_parse_flavor);
+
+/****************************************
+ * misc helpers                         *
+ ****************************************/
+
+const char * sec2target_str(struct ptlrpc_sec *sec)
+{
+        if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
+                return "*";
+        if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
+                return "c";
+        return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
+}
+EXPORT_SYMBOL(sec2target_str);
+
+int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
+                       int *eof, void *data)
+{
+        struct obd_device        *obd = data;
+        struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
+        struct ptlrpc_sec        *sec = NULL;
+        char                      flags_str[20];
+
+        if (obd == NULL)
+                return 0;
+
+        LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+                strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+                strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+        LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX);
+        LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX);
+
+        if (obd->u.cli.cl_import)
+                sec = obd->u.cli.cl_import->imp_sec;
+
+        flags_str[0] = '\0';
+        if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE)
+                strncat(flags_str, "reverse,", sizeof(flags_str));
+        if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY)
+                strncat(flags_str, "rootonly,", sizeof(flags_str));
+        if (flags_str[0] != '\0')
+                flags_str[strlen(flags_str) - 1] = '\0';
+
+        return snprintf(page, count,
+                        "rpc_flavor:  %s\n"
+                        "bulk_flavor: %s checksum, %s encryption\n"
+                        "flags:       %s\n"
+                        "ctx_cache:   size %u, busy %d\n"
+                        "gc:          interval %lus, next %lds\n",
+                        sptlrpc_flavor2name(conf->sfc_rpc_flavor),
+                        csum_types[conf->sfc_bulk_csum].name,
+                        conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ?
+                        "null" : "arc4", // XXX
+                        flags_str,
+                        sec ? sec->ps_ccache_size : 0,
+                        sec ? atomic_read(&sec->ps_busy) : 0,
+                        sec ? sec->ps_gc_interval: 0,
+                        sec ? (sec->ps_gc_interval ?
+                               sec->ps_gc_next - cfs_time_current_sec() : 0)
+                              : 0);
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_rd);
+
+
+int sptlrpc_init(void)
+{
+        int rc;
+
+        rc = sptlrpc_null_init();
+        if (rc)
+                goto out;
+
+        rc = sptlrpc_plain_init();
+        if (rc)
+                goto out_null;
+        return 0;
+
+out_null:
+        sptlrpc_null_exit();
+out:
+        return rc;
+}
+
+int sptlrpc_exit(void)
+{
+        sptlrpc_plain_exit();
+        sptlrpc_null_exit();
+        return 0;
+}
diff --git a/lustre/ptlrpc/sec_null.c b/lustre/ptlrpc/sec_null.c
new file mode 100644 (file)
index 0000000..7b1d391
--- /dev/null
@@ -0,0 +1,305 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+static struct ptlrpc_sec_policy null_policy;
+static struct ptlrpc_sec        null_sec;
+static struct ptlrpc_cli_ctx    null_cli_ctx;
+static struct ptlrpc_svc_ctx    null_svc_ctx;
+
+static
+int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+        /* should never reach here */
+        LBUG();
+        return 0;
+}
+
+static
+int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+        if (req->rq_reqbuf->lm_magic != LUSTRE_MSG_MAGIC_V1)
+                req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+        req->rq_reqdata_len = req->rq_reqlen;
+        return 0;
+}
+
+static
+int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+        req->rq_repmsg = req->rq_repbuf;
+        req->rq_replen = req->rq_repdata_len;
+        return 0;
+}
+
+static struct ptlrpc_ctx_ops null_ctx_ops = {
+        .refresh        = null_ctx_refresh,
+        .sign           = null_ctx_sign,
+        .verify         = null_ctx_verify,
+};
+
+static struct ptlrpc_svc_ctx null_svc_ctx = {
+        .sc_refcount    = ATOMIC_INIT(1),
+        .sc_policy      = &null_policy,
+};
+
+static
+struct ptlrpc_sec* null_create_sec(struct obd_import *imp,
+                                   struct ptlrpc_svc_ctx *ctx,
+                                   __u32 flavor,
+                                   unsigned long flags)
+{
+        LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_NULL);
+        return &null_sec;
+}
+
+static
+void null_destroy_sec(struct ptlrpc_sec *sec)
+{
+        LASSERT(sec == &null_sec);
+}
+
+static
+struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
+                                       struct vfs_cred *vcred)
+{
+        atomic_inc(&null_cli_ctx.cc_refcount);
+        return &null_cli_ctx;
+}
+
+static
+int null_alloc_reqbuf(struct ptlrpc_sec *sec,
+                      struct ptlrpc_request *req,
+                      int msgsize)
+{
+        if (!req->rq_reqbuf) {
+                LASSERT(!req->rq_pool);
+                OBD_ALLOC(req->rq_reqbuf, msgsize);
+                if (!req->rq_reqbuf)
+                        return -ENOMEM;
+
+                req->rq_reqbuf_len = msgsize;
+        } else {
+                LASSERT(req->rq_pool);
+                LASSERT(req->rq_reqbuf_len >= msgsize);
+                memset(req->rq_reqbuf, 0, msgsize);
+        }
+
+        req->rq_reqmsg = req->rq_reqbuf;
+        return 0;
+}
+
+static
+void null_free_reqbuf(struct ptlrpc_sec *sec,
+                      struct ptlrpc_request *req)
+{
+        if (!req->rq_pool) {
+                OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+                req->rq_reqbuf = NULL;
+                req->rq_reqbuf_len = 0;
+        }
+}
+
+static
+int null_alloc_repbuf(struct ptlrpc_sec *sec,
+                      struct ptlrpc_request *req,
+                      int msgsize)
+{
+        OBD_ALLOC(req->rq_repbuf, msgsize);
+        if (!req->rq_repbuf)
+                return -ENOMEM;
+
+        req->rq_repbuf_len = msgsize;
+        return 0;
+}
+
+static
+void null_free_repbuf(struct ptlrpc_sec *sec,
+                      struct ptlrpc_request *req)
+{
+        OBD_FREE(req->rq_repbuf, req->rq_repbuf_len);
+        req->rq_repbuf = NULL;
+        req->rq_repbuf_len = 0;
+}
+
+static
+int null_accept(struct ptlrpc_request *req)
+{
+        LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_NULL);
+
+        if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_NULL) {
+                CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor);
+                return SECSVC_DROP;
+        }
+
+        req->rq_reqmsg = req->rq_reqbuf;
+        req->rq_reqlen = req->rq_reqdata_len;
+
+        req->rq_svc_ctx = &null_svc_ctx;
+        atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+        return SECSVC_OK;
+}
+
+static
+int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+        struct ptlrpc_reply_state *rs;
+        int rs_size = sizeof(*rs) + msgsize;
+
+        LASSERT(msgsize % 8 == 0);
+
+        rs = req->rq_reply_state;
+
+        if (rs) {
+                /* pre-allocated */
+                LASSERT(rs->rs_size >= rs_size);
+        } else {
+                OBD_ALLOC(rs, rs_size);
+                if (rs == NULL)
+                        return -ENOMEM;
+
+                rs->rs_size = rs_size;
+        }
+
+        rs->rs_svc_ctx = req->rq_svc_ctx;
+        atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+        rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+        rs->rs_repbuf_len = rs_size - sizeof(*rs);
+        rs->rs_msg = rs->rs_repbuf;
+
+        req->rq_reply_state = rs;
+        return 0;
+}
+
+static
+void null_free_rs(struct ptlrpc_reply_state *rs)
+{
+        LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+        atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+        if (!rs->rs_prealloc)
+                OBD_FREE(rs, rs->rs_size);
+}
+
+static
+int null_authorize(struct ptlrpc_request *req)
+{
+        struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+        LASSERT(rs);
+        if (rs->rs_repbuf->lm_magic != LUSTRE_MSG_MAGIC_V1)
+                rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+        rs->rs_repdata_len = req->rq_replen;
+        return 0;
+}
+
+static struct ptlrpc_sec_cops null_sec_cops = {
+        .create_sec             = null_create_sec,
+        .destroy_sec            = null_destroy_sec,
+        .lookup_ctx             = null_lookup_ctx,
+        .alloc_reqbuf           = null_alloc_reqbuf,
+        .alloc_repbuf           = null_alloc_repbuf,
+        .free_reqbuf            = null_free_reqbuf,
+        .free_repbuf            = null_free_repbuf,
+};
+
+static struct ptlrpc_sec_sops null_sec_sops = {
+        .accept                 = null_accept,
+        .alloc_rs               = null_alloc_rs,
+        .authorize              = null_authorize,
+        .free_rs                = null_free_rs,
+};
+
+static struct ptlrpc_sec_policy null_policy = {
+        .sp_owner               = THIS_MODULE,
+        .sp_name                = "sec.null",
+        .sp_policy              = SPTLRPC_POLICY_NULL,
+        .sp_cops                = &null_sec_cops,
+        .sp_sops                = &null_sec_sops,
+};
+
+static
+void null_init_internal(void)
+{
+        static HLIST_HEAD(__list);
+
+        null_sec.ps_policy = &null_policy;
+        atomic_set(&null_sec.ps_refcount, 1);     /* always busy */
+        null_sec.ps_import = NULL;
+        null_sec.ps_flavor = SPTLRPC_FLVR_NULL;
+        null_sec.ps_flags = 0;
+        null_sec.ps_gc_interval = 0;
+        null_sec.ps_gc_next = 0;
+        spin_lock_init(&null_sec.ps_lock);
+        null_sec.ps_ccache_size = 1;
+        null_sec.ps_ccache = &__list;
+        atomic_set(&null_sec.ps_busy, 1);         /* for "null_cli_ctx" */
+
+        hlist_add_head(&null_cli_ctx.cc_hash, &__list);
+        atomic_set(&null_cli_ctx.cc_refcount, 1);    /* for hash */
+        null_cli_ctx.cc_sec = &null_sec;
+        null_cli_ctx.cc_ops = &null_ctx_ops;
+        null_cli_ctx.cc_expire = 0;
+        null_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+                                PTLRPC_CTX_UPTODATE;
+        null_cli_ctx.cc_vcred.vc_uid = 0;
+        spin_lock_init(&null_cli_ctx.cc_lock);
+        INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
+}
+
+int sptlrpc_null_init(void)
+{
+        int rc;
+
+        null_init_internal();
+
+        rc = sptlrpc_register_policy(&null_policy);
+        if (rc)
+                CERROR("failed to register sec.null: %d\n", rc);
+
+        return rc;
+}
+
+int sptlrpc_null_exit(void)
+{
+        int rc;
+
+        rc = sptlrpc_unregister_policy(&null_policy);
+        if (rc)
+                CERROR("cannot unregister sec.null: %d\n", rc);
+
+        return rc;
+}
diff --git a/lustre/ptlrpc/sec_plain.c b/lustre/ptlrpc/sec_plain.c
new file mode 100644 (file)
index 0000000..c2c7df4
--- /dev/null
@@ -0,0 +1,498 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+static struct ptlrpc_sec_policy plain_policy;
+static struct ptlrpc_sec        plain_sec;
+static struct ptlrpc_cli_ctx    plain_cli_ctx;
+static struct ptlrpc_svc_ctx    plain_svc_ctx;
+
+static
+int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+        /* should never reach here */
+        LBUG();
+        return 0;
+}
+
+static
+int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+        struct lustre_msg_v2 *msg = req->rq_reqbuf;
+        ENTRY;
+
+        msg->lm_secflvr = req->rq_sec_flavor;
+        req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
+                                                 msg->lm_buflens);
+        RETURN(0);
+}
+
+static
+int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+        struct lustre_msg *msg = req->rq_repbuf;
+        ENTRY;
+
+        if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+                if (msg->lm_bufcount != 2) {
+                        CERROR("Protocol error: invalid buf count %d\n",
+                               msg->lm_bufcount);
+                        RETURN(-EPROTO);
+                }
+
+                if (bulk_sec_desc_unpack(msg, 1)) {
+                        CERROR("Mal-formed bulk checksum reply\n");
+                        RETURN(-EINVAL);
+                }
+        }
+
+        req->rq_repmsg = lustre_msg_buf(msg, 0, 0);
+        req->rq_replen = msg->lm_buflens[0];
+        RETURN(0);
+}
+
+static
+int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+                        struct ptlrpc_request *req,
+                        struct ptlrpc_bulk_desc *desc)
+{
+        struct sec_flavor_config *conf;
+
+        LASSERT(req->rq_import);
+        LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor));
+        LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+
+        conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+        return bulk_csum_cli_request(desc, req->rq_bulk_read,
+                                     conf->sfc_bulk_csum,
+                                     req->rq_reqbuf,
+                                     req->rq_reqbuf->lm_bufcount - 1);
+}
+
+static
+int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+                          struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc)
+{
+        LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor));
+        LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+        LASSERT(req->rq_repbuf->lm_bufcount >= 2);
+
+        return bulk_csum_cli_reply(desc, req->rq_bulk_read,
+                                   req->rq_reqbuf,
+                                   req->rq_reqbuf->lm_bufcount - 1,
+                                   req->rq_repbuf,
+                                   req->rq_repbuf->lm_bufcount - 1);
+}
+
+static struct ptlrpc_ctx_ops plain_ctx_ops = {
+        .refresh        = plain_ctx_refresh,
+        .sign           = plain_ctx_sign,
+        .verify         = plain_ctx_verify,
+        .wrap_bulk      = plain_cli_wrap_bulk,
+        .unwrap_bulk    = plain_cli_unwrap_bulk,
+};
+
+static struct ptlrpc_svc_ctx plain_svc_ctx = {
+        .sc_refcount    = ATOMIC_INIT(1),
+        .sc_policy      = &plain_policy,
+};
+
+static
+struct ptlrpc_sec* plain_create_sec(struct obd_import *imp,
+                                    struct ptlrpc_svc_ctx *ctx,
+                                    __u32 flavor,
+                                    unsigned long flags)
+{
+        ENTRY;
+        LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_PLAIN);
+        RETURN(&plain_sec);
+}
+
+static
+void plain_destroy_sec(struct ptlrpc_sec *sec)
+{
+        ENTRY;
+        LASSERT(sec == &plain_sec);
+        EXIT;
+}
+
+static
+struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
+                                        struct vfs_cred *vcred)
+{
+        ENTRY;
+        atomic_inc(&plain_cli_ctx.cc_refcount);
+        RETURN(&plain_cli_ctx);
+}
+
+static
+int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
+                       struct ptlrpc_request *req,
+                       int msgsize)
+{
+        struct sec_flavor_config *conf;
+        int bufcnt = 1, buflens[2], alloc_len;
+        ENTRY;
+
+        buflens[0] = msgsize;
+
+        if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor))
+                buflens[bufcnt++] = sptlrpc_user_desc_size();
+
+        if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+                LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+                conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+                buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1,
+                                                       req->rq_bulk_read);
+        }
+
+        alloc_len = lustre_msg_size_v2(bufcnt, buflens);
+
+
+        if (!req->rq_reqbuf) {
+                LASSERT(!req->rq_pool);
+                OBD_ALLOC(req->rq_reqbuf, alloc_len);
+                if (!req->rq_reqbuf)
+                        RETURN(-ENOMEM);
+
+                req->rq_reqbuf_len = alloc_len;
+        } else {
+                LASSERT(req->rq_pool);
+                LASSERT(req->rq_reqbuf_len >= alloc_len);
+                memset(req->rq_reqbuf, 0, alloc_len);
+        }
+
+        lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL);
+        req->rq_reqmsg = lustre_msg_buf_v2(req->rq_reqbuf, 0, 0);
+
+        if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor))
+                sptlrpc_pack_user_desc(req->rq_reqbuf, 1);
+
+        RETURN(0);
+}
+
+static
+void plain_free_reqbuf(struct ptlrpc_sec *sec,
+                       struct ptlrpc_request *req)
+{
+        ENTRY;
+        if (!req->rq_pool) {
+                OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+                req->rq_reqbuf = NULL;
+                req->rq_reqbuf_len = 0;
+        }
+        EXIT;
+}
+
+static
+int plain_alloc_repbuf(struct ptlrpc_sec *sec,
+                       struct ptlrpc_request *req,
+                       int msgsize)
+{
+        struct sec_flavor_config *conf;
+        int bufcnt = 1, buflens[2], alloc_len;
+        ENTRY;
+
+        buflens[0] = msgsize;
+
+        if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+                LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+                conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+                buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 0,
+                                                       req->rq_bulk_read);
+        }
+
+        alloc_len = lustre_msg_size_v2(bufcnt, buflens);
+
+        OBD_ALLOC(req->rq_repbuf, alloc_len);
+        if (!req->rq_repbuf)
+                RETURN(-ENOMEM);
+
+        req->rq_repbuf_len = alloc_len;
+        RETURN(0);
+}
+
+static
+void plain_free_repbuf(struct ptlrpc_sec *sec,
+                       struct ptlrpc_request *req)
+{
+        ENTRY;
+        OBD_FREE(req->rq_repbuf, req->rq_repbuf_len);
+        req->rq_repbuf = NULL;
+        req->rq_repbuf_len = 0;
+        EXIT;
+}
+
+static
+int plain_accept(struct ptlrpc_request *req)
+{
+        struct lustre_msg *msg = req->rq_reqbuf;
+        int                bufcnt = 1;
+        ENTRY;
+
+        LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_PLAIN);
+
+        if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_PLAIN) {
+                CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor);
+                return SECSVC_DROP;
+        }
+
+        if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) {
+                if (msg->lm_bufcount < ++bufcnt) {
+                        CERROR("Protocal error: too small buf count %d\n",
+                               msg->lm_bufcount);
+                        RETURN(SECSVC_DROP);
+                }
+
+                if (sptlrpc_unpack_user_desc(msg, bufcnt - 1)) {
+                        CERROR("Mal-formed user descriptor\n");
+                        RETURN(SECSVC_DROP);
+                }
+
+                req->rq_user_desc = lustre_msg_buf(msg, bufcnt - 1, 0);
+        }
+
+        if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+                if (msg->lm_bufcount != ++bufcnt) {
+                        CERROR("Protocal error: invalid buf count %d\n",
+                               msg->lm_bufcount);
+                        RETURN(SECSVC_DROP);
+                }
+
+                if (bulk_sec_desc_unpack(msg, bufcnt - 1)) {
+                        CERROR("Mal-formed bulk checksum request\n");
+                        RETURN(SECSVC_DROP);
+                }
+        }
+
+        req->rq_reqmsg = lustre_msg_buf(msg, 0, 0);
+        req->rq_reqlen = msg->lm_buflens[0];
+
+        req->rq_svc_ctx = &plain_svc_ctx;
+        atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+        RETURN(SECSVC_OK);
+}
+
+static
+int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+        struct ptlrpc_reply_state *rs;
+        struct ptlrpc_bulk_sec_desc *bsd;
+        int bufcnt = 1, buflens[2];
+        int rs_size = sizeof(*rs);
+        ENTRY;
+
+        LASSERT(msgsize % 8 == 0);
+
+        buflens[0] = msgsize;
+        if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) &&
+            (req->rq_bulk_read || req->rq_bulk_write)) {
+                bsd = lustre_msg_buf(req->rq_reqbuf,
+                                     req->rq_reqbuf->lm_bufcount - 1,
+                                     sizeof(*bsd));
+                LASSERT(bsd);
+
+                buflens[bufcnt++] = bulk_sec_desc_size(bsd->bsd_csum_alg, 0,
+                                                       req->rq_bulk_read);
+        }
+        rs_size += lustre_msg_size_v2(bufcnt, buflens);
+
+        rs = req->rq_reply_state;
+
+        if (rs) {
+                /* pre-allocated */
+                LASSERT(rs->rs_size >= rs_size);
+        } else {
+                OBD_ALLOC(rs, rs_size);
+                if (rs == NULL)
+                        RETURN(-ENOMEM);
+
+                rs->rs_size = rs_size;
+        }
+
+        rs->rs_svc_ctx = req->rq_svc_ctx;
+        atomic_inc(&req->rq_svc_ctx->sc_refcount);
+        rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+        rs->rs_repbuf_len = rs_size - sizeof(*rs);
+
+        lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL);
+        rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, 0, 0);
+
+        req->rq_reply_state = rs;
+        RETURN(0);
+}
+
+static
+void plain_free_rs(struct ptlrpc_reply_state *rs)
+{
+        ENTRY;
+
+        LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+        atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+        if (!rs->rs_prealloc)
+                OBD_FREE(rs, rs->rs_size);
+        EXIT;
+}
+
+static
+int plain_authorize(struct ptlrpc_request *req)
+{
+        struct ptlrpc_reply_state *rs = req->rq_reply_state;
+        struct lustre_msg_v2      *msg = rs->rs_repbuf;
+        int                        len;
+        ENTRY;
+
+        LASSERT(rs);
+        LASSERT(msg);
+
+        if (req->rq_replen != msg->lm_buflens[0])
+                len = lustre_shrink_msg(msg, 0, req->rq_replen, 1);
+        else
+                len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+        msg->lm_secflvr = req->rq_sec_flavor;
+        rs->rs_repdata_len = len;
+        RETURN(0);
+}
+
+static
+int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
+                          struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+        LASSERT(rs);
+
+        return bulk_csum_svc(desc, req->rq_bulk_read,
+                             req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1,
+                             rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1);
+}
+
+static
+int plain_svc_wrap_bulk(struct ptlrpc_request *req,
+                        struct ptlrpc_bulk_desc *desc)
+{
+        struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+        LASSERT(rs);
+
+        return bulk_csum_svc(desc, req->rq_bulk_read,
+                             req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1,
+                             rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1);
+}
+
+static struct ptlrpc_sec_cops plain_sec_cops = {
+        .create_sec             = plain_create_sec,
+        .destroy_sec            = plain_destroy_sec,
+        .lookup_ctx             = plain_lookup_ctx,
+        .alloc_reqbuf           = plain_alloc_reqbuf,
+        .alloc_repbuf           = plain_alloc_repbuf,
+        .free_reqbuf            = plain_free_reqbuf,
+        .free_repbuf            = plain_free_repbuf,
+};
+
+static struct ptlrpc_sec_sops plain_sec_sops = {
+        .accept                 = plain_accept,
+        .alloc_rs               = plain_alloc_rs,
+        .authorize              = plain_authorize,
+        .free_rs                = plain_free_rs,
+        .unwrap_bulk            = plain_svc_unwrap_bulk,
+        .wrap_bulk              = plain_svc_wrap_bulk,
+};
+
+static struct ptlrpc_sec_policy plain_policy = {
+        .sp_owner               = THIS_MODULE,
+        .sp_name                = "sec.plain",
+        .sp_policy              = SPTLRPC_POLICY_PLAIN,
+        .sp_cops                = &plain_sec_cops,
+        .sp_sops                = &plain_sec_sops,
+};
+
+static
+void plain_init_internal(void)
+{
+        static HLIST_HEAD(__list);
+
+        plain_sec.ps_policy = &plain_policy;
+        atomic_set(&plain_sec.ps_refcount, 1);     /* always busy */
+        plain_sec.ps_import = NULL;
+        plain_sec.ps_flavor = SPTLRPC_FLVR_PLAIN;
+        plain_sec.ps_flags = 0;
+        plain_sec.ps_gc_interval = 0;
+        plain_sec.ps_gc_next = 0;
+        spin_lock_init(&plain_sec.ps_lock);
+        plain_sec.ps_ccache_size = 1;
+        plain_sec.ps_ccache = &__list;
+        atomic_set(&plain_sec.ps_busy, 1);         /* for "plain_cli_ctx" */
+
+        hlist_add_head(&plain_cli_ctx.cc_hash, &__list);
+        atomic_set(&plain_cli_ctx.cc_refcount, 1);    /* for hash */
+        plain_cli_ctx.cc_sec = &plain_sec;
+        plain_cli_ctx.cc_ops = &plain_ctx_ops;
+        plain_cli_ctx.cc_expire = 0;
+        plain_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+                                 PTLRPC_CTX_UPTODATE;
+        plain_cli_ctx.cc_vcred.vc_uid = 0;
+        spin_lock_init(&plain_cli_ctx.cc_lock);
+        INIT_LIST_HEAD(&plain_cli_ctx.cc_req_list);
+}
+
+int sptlrpc_plain_init(void)
+{
+        int rc;
+
+        plain_init_internal();
+
+        rc = sptlrpc_register_policy(&plain_policy);
+        if (rc)
+                CERROR("failed to register sec.plain: %d\n", rc);
+
+        return rc;
+}
+
+int sptlrpc_plain_exit(void)
+{
+        int rc;
+
+        rc = sptlrpc_unregister_policy(&plain_policy);
+        if (rc)
+                CERROR("cannot unregister sec.plain: %d\n", rc);
+
+        return rc;
+}
index 370ee76..928f988 100644 (file)
@@ -279,7 +279,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         ENTRY;
 
         LASSERT (nbufs > 0);
-        LASSERT (bufsize >= max_req_size);
+        LASSERT (bufsize >= max_req_size + SPTLRPC_MAX_PAYLOAD);
         LASSERT (ctx_tags != 0);
 
         OBD_ALLOC(service, sizeof(*service));
@@ -294,7 +294,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         cfs_waitq_init(&service->srv_waitq);
 
         service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : nbufs;
-        service->srv_max_req_size = max_req_size;
+        service->srv_max_req_size = max_req_size + SPTLRPC_MAX_PAYLOAD;
         service->srv_buf_size = bufsize;
         service->srv_rep_portal = rep_portal;
         service->srv_req_portal = req_portal;
@@ -333,7 +333,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         /* Now allocate pool of reply buffers */
         /* Increase max reply size to next power of two */
         service->srv_max_reply_size = 1;
-        while (service->srv_max_reply_size < max_reply_size)
+        while (service->srv_max_reply_size <
+               max_reply_size + SPTLRPC_MAX_PAYLOAD)
                 service->srv_max_reply_size <<= 1;
 
         if (proc_entry != NULL)
@@ -359,6 +360,8 @@ static void __ptlrpc_server_free_request(struct ptlrpc_request *req)
                 req->rq_reply_state = NULL;
         }
 
+        sptlrpc_svc_ctx_decref(req);
+
         if (req != &rqbd->rqbd_req) {
                 /* NB request buffers use an embedded
                  * req if the incoming req unlinked the
@@ -560,6 +563,19 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
                                     svc->srv_n_active_reqs);
         }
 
+        rc = sptlrpc_svc_unwrap_request(request);
+        switch (rc) {
+        case SECSVC_OK:
+                break;
+        case SECSVC_COMPLETE:
+                target_send_reply(request, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+                goto put_conn;
+        case SECSVC_DROP:
+                goto out;
+        default:
+                LBUG();
+        }
+
 #if SWAB_PARANOIA
         /* Clear request swab mask; this is a new request */
         request->rq_req_swab_mask = 0;
@@ -667,7 +683,9 @@ put_conn:
         if (timediff / 1000000 > (long)obd_timeout)
                 CERROR("request "LPU64" opc %u from %s processed in %lds "
                        "trans "LPU64" rc %d/%d\n",
-                       request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg),
+                       request->rq_xid,
+                       request->rq_reqmsg ?
+                                lustre_msg_get_opc(request->rq_reqmsg) : 0,
                        libcfs_id2str(request->rq_peer),
                        cfs_timeval_sub(&work_end, &request->rq_arrival_time,
                                        NULL) / 1000000,
@@ -680,7 +698,9 @@ put_conn:
         else
                 CDEBUG(D_HA, "request "LPU64" opc %u from %s processed in "
                        "%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
-                       request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg),
+                       request->rq_xid,
+                       request->rq_reqmsg ?
+                                lustre_msg_get_opc(request->rq_reqmsg) : 0,
                        libcfs_id2str(request->rq_peer), timediff,
                        cfs_timeval_sub(&work_end, &request->rq_arrival_time,
                                        NULL),
@@ -689,7 +709,7 @@ put_conn:
                                 lustre_msg_get_status(request->rq_repmsg) :
                                 -999);
 
-        if (svc->srv_stats != NULL) {
+        if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) {
                 int opc = opcode_offset(lustre_msg_get_opc(request->rq_reqmsg));
                 if (opc > 0) {
                         LASSERT(opc < LUSTRE_MAX_OPCODES);
index c45581e..2e35469 100644 (file)
@@ -67,6 +67,7 @@ static int lfs_quotaoff(int argc, char **argv);
 static int lfs_setquota(int argc, char **argv);
 static int lfs_quota(int argc, char **argv);
 #endif
+static int lfs_flushctx(int argc, char **argv);
 static int lfs_join(int argc, char **argv);
 
 /* all avaialable commands */
@@ -131,6 +132,8 @@ command_t cmdlist[] = {
         {"quota", lfs_quota, 0, "Display disk usage and limits.\n"
          "usage: quota [ -o obd_uuid ] [ -u | -g ] [name] <filesystem>"},
 #endif
+        {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
+         "usage: flushctx [-k] [mountpoint...]"},
         {"help", Parser_help, 0, "help"},
         {"exit", Parser_quit, 0, "quit"},
         {"quit", Parser_quit, 0, "quit"},
@@ -1478,6 +1481,92 @@ static int lfs_quota(int argc, char **argv)
 }
 #endif /* HAVE_QUOTA_SUPPORT */
 
+static int flushctx_ioctl(char *mp)
+{
+        int fd, rc;
+
+        fd = open(mp, O_RDONLY);
+        if (fd == -1) {
+                fprintf(stderr, "flushctx: error open %s: %s\n",
+                        mp, strerror(errno));
+                return -1;
+        }
+
+        rc = ioctl(fd, LL_IOC_FLUSHCTX);
+        if (rc == -1)
+                fprintf(stderr, "flushctx: error ioctl %s: %s\n",
+                        mp, strerror(errno));
+
+        close(fd);
+        return rc;
+}
+
+static int lfs_flushctx(int argc, char **argv)
+{
+        int     kdestroy = 0, c;
+        FILE   *proc;
+        char    procline[PATH_MAX], *line;
+        int     rc = 0;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "k")) != -1) {
+                switch (c) {
+                case 'k':
+                        kdestroy = 1;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' "
+                                        "unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (kdestroy)
+                system("kdestroy > /dev/null");
+
+        if (optind >= argc) {
+                /* flush for all mounted lustre fs. */
+                proc = fopen("/proc/mounts", "r");
+                if (!proc) {
+                        fprintf(stderr, "error: %s: can't open /proc/mounts\n",
+                                argv[0]);
+                        return -1;
+                }
+
+                while ((line = fgets(procline, PATH_MAX, proc)) != NULL) {
+                        char dev[PATH_MAX];
+                        char mp[PATH_MAX];
+                        char fs[PATH_MAX];
+
+                        if (sscanf(line, "%s %s %s", dev, mp, fs) != 3) {
+                                fprintf(stderr, "%s: unexpected format in "
+                                                "/proc/mounts\n",
+                                        argv[0]);
+                                return -1;
+                        }
+
+                        if (strcmp(fs, "lustre") != 0)
+                                continue;
+                        /* we use '@' to determine it's a client. are there
+                         * any other better way?
+                         */
+                        if (strchr(dev, '@') == NULL)
+                                continue;
+
+                        if (flushctx_ioctl(mp))
+                                rc = -1;
+                }
+        } else {
+                /* flush fs as specified */
+                while (optind < argc) {
+                        if (flushctx_ioctl(argv[optind++]))
+                                rc = -1;
+                }
+        }
+
+        return rc;
+}
+
 int main(int argc, char **argv)
 {
         int rc;