land the first part of secure ptlrpc support.
int state;
struct signal pending;
char comm[32];
+ int uid;
+ int gid;
int pid;
int fsuid;
int fsgid;
typedef struct { volatile int counter; } atomic_t;
+#define ATOMIC_INIT(i) { (i) }
#define atomic_read(a) ((a)->counter)
#define atomic_set(a,b) do {(a)->counter = b; } while (0)
#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
#define unlikely(exp) (exp)
#endif
+#define might_sleep()
+#define might_sleep_if(c)
+#define smp_mb()
+
+static inline
+int test_and_set_bit(int nr, unsigned long *addr)
+{
+ int oldbit;
+
+ while (nr >= sizeof(long)) {
+ nr -= sizeof(long);
+ addr++;
+ }
+
+ oldbit = (*addr) & (1 << nr);
+ *addr |= (1 << nr);
+ return oldbit;
+}
+
+static inline
+int test_and_clear_bit(int nr, unsigned long *addr)
+{
+ int oldbit;
+
+ while (nr >= sizeof(long)) {
+ nr -= sizeof(long);
+ addr++;
+ }
+
+ oldbit = (*addr) & (1 << nr);
+ *addr &= ~(1 << nr);
+ return oldbit;
+}
+
/* FIXME sys/capability will finally included linux/fs.h thus
* cause numerous trouble on x86-64. as temporary solution for
* build broken at cary, we copy definition we need from capability.h
}
#define JOIN_FILE_ALIGN 4096
+
+/* security opcodes */
+typedef enum {
+ SEC_CTX_INIT = 801,
+ SEC_CTX_INIT_CONT = 802,
+ SEC_CTX_FINI = 803,
+ SEC_LAST_OPC
+} sec_cmd_t;
+
#endif
#define LL_IOC_JOIN _IOW ('f', 163, long)
#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
#define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *)
+#define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
#define LL_STATFS_MDC 1
#define LL_STATFS_LOV 2
LCFG_LOV_ADD_INA = 0x00ce013,
LCFG_ADD_MDC = 0x00cf014,
LCFG_DEL_MDC = 0x00cf015,
+ LCFG_SEC_FLAVOR = 0x00ce016,
};
struct lustre_cfg_bufs {
int lmd_exclude_count;
char *lmd_dev; /* device name */
char *lmd_profile; /* client only */
+ char *lmd_sec_mdt; /* sec from mdt (to ost/mdt) */
+ char *lmd_sec_cli; /* sec from client (to ost/mdt) */
char *lmd_opts; /* lustre mount options (as opposed to
_device_ mount options) */
__u32 *lmd_exclude; /* array of OSTs to ignore */
struct list_head imp_delayed_list;
struct obd_device *imp_obd;
+ struct ptlrpc_sec *imp_sec;
cfs_waitq_t imp_recovery_waitq;
atomic_t imp_inflight;
struct lustre_handle imp_remote_handle;
cfs_time_t imp_next_ping; /* jiffies */
__u64 imp_last_success_conn; /* jiffies, 64-bit */
+ cfs_time_t imp_next_reconnect; /* seconds */
/* all available obd_import_conn linked here */
struct list_head imp_conn_list;
imp_pingable:1, /* pingable */
imp_resend_replay:1, /* resend for replay */
imp_recon_bk:1, /* turn off reconnect if all failovers fail */
- imp_last_recon:1; /* internally used by above */
+ imp_last_recon:1, /* internally used by above */
+ imp_force_reconnect:1; /* need to reconnect
+ * even the status is
+ * FULL */
__u32 imp_connect_op;
struct obd_connect_data imp_connect_data;
__u64 imp_connect_flags_orig;
#include <lnet/lnet.h>
#include <lustre/lustre_idl.h>
#include <lustre_ha.h>
+#include <lustre_sec.h>
#include <lustre_import.h>
#include <lprocfs_status.h>
lnet_handle_md_t rs_md_h;
atomic_t rs_refcount;
+ struct ptlrpc_svc_ctx *rs_svc_ctx;
+ struct lustre_msg *rs_repbuf; /* wrapper */
+ int rs_repbuf_len; /* wrapper buf length */
+ int rs_repdata_len; /* wrapper msg length */
+ struct lustre_msg *rs_msg; /* reply message */
+
/* locks awaiting client reply ACK */
int rs_nlocks;
struct lustre_handle rs_locks[RS_MAX_LOCKS];
ldlm_mode_t rs_modes[RS_MAX_LOCKS];
- /* last member: variable sized reply message */
- struct lustre_msg *rs_msg;
};
struct ptlrpc_thread;
*/
rq_replay:1,
rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
- rq_no_delay:1, rq_net_err:1;
+ rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1;
enum rq_phase rq_phase; /* one of RQ_PHASE_* */
atomic_t rq_refcount; /* client-side refcount for SENT race */
__u64 rq_xid;
struct list_head rq_replay_list;
+ struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */
+ struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */
+ struct list_head rq_ctx_chain; /* link to waited ctx */
+ ptlrpc_flavor_t rq_sec_flavor; /* client & server */
+ /* client security flags */
+ unsigned int rq_ctx_init:1, /* context initiation */
+ rq_ctx_fini:1, /* context destroy */
+ rq_bulk_read:1, /* request bulk read */
+ rq_bulk_write:1, /* request bulk write */
+ /* server authentication flags */
+ rq_auth_gss:1, /* authenticated by gss */
+ rq_auth_remote:1, /* authed as remote user */
+ rq_auth_usr_root:1, /* authed as root */
+ rq_auth_usr_mds:1; /* authed as mds */
+
+ uid_t rq_auth_uid; /* authed uid */
+ uid_t rq_auth_mapped_uid; /* authed uid mapped to */
+
+ /* (server side), pointed directly into req buffer */
+ struct ptlrpc_user_desc *rq_user_desc;
+
+ /* various buffer pointers */
+ struct lustre_msg *rq_reqbuf; /* req wrapper */
+ int rq_reqbuf_len; /* req wrapper buf len */
+ int rq_reqdata_len; /* req wrapper msg len */
+ struct lustre_msg *rq_repbuf; /* rep wrapper */
+ int rq_repbuf_len; /* rep wrapper buf len */
+ int rq_repdata_len; /* rep wrapper msg len */
+ struct lustre_msg *rq_clrbuf; /* only in priv mode */
+ int rq_clrbuf_len; /* only in priv mode */
+ int rq_clrdata_len; /* only in priv mode */
+
#if SWAB_PARANOIA
__u32 rq_req_swab_mask;
__u32 rq_rep_swab_mask;
FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
FLAG(req->rq_no_resend, "N"), \
- FLAG(req->rq_waiting, "W")
+ FLAG(req->rq_waiting, "W"), \
+ FLAG(req->rq_wait_ctx, "C")
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s"
#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \
CDEB_TYPE(level, "@@@ " fmt \
lnet_handle_md_t bd_md_h; /* associated MD */
#if defined(__KERNEL__)
+ lnet_kiov_t *bd_enc_iov; /* used in privacy mode */
lnet_kiov_t bd_iov[0];
#else
+ lnet_md_iovec_t *bd_enc_iov;
lnet_md_iovec_t bd_iov[0];
#endif
};
struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp,
__u32 version, int opcode,
int count, int *lengths, char **bufs,
- struct ptlrpc_request_pool *pool);
+ struct ptlrpc_request_pool *pool,
+ struct ptlrpc_cli_ctx *ctx);
void ptlrpc_free_req(struct ptlrpc_request *request);
void ptlrpc_req_finished(struct ptlrpc_request *request);
void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
/* ptlrpc/pack_generic.c */
int lustre_msg_swabbed(struct lustre_msg *msg);
int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens,
+ char **bufs);
int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count,
int *lens, char **bufs);
int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens,
char **bufs);
-void lustre_shrink_reply(struct ptlrpc_request *req, int segment,
- unsigned int newlen, int move_data);
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+ int *lens, char **bufs);
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+ unsigned int newlen, int move_data);
void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
int lustre_msg_size(__u32 magic, int count, int *lengths);
+int lustre_msg_size_v2(int count, int *lengths);
int lustre_unpack_msg(struct lustre_msg *m, int len);
int lustre_unpack_ptlrpc_body(struct lustre_msg *m);
+void *lustre_msg_buf_v1(void *msg, int n, int min_size);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
int lustre_msg_buflen(struct lustre_msg *m, int n);
void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len);
void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt);
static inline void
+lustre_shrink_reply(struct ptlrpc_request *req, int segment,
+ unsigned int newlen, int move_data)
+{
+ LASSERT(req->rq_reply_state);
+ LASSERT(req->rq_repmsg);
+ req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
+ newlen, move_data);
+}
+
+static inline void
ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
{
LASSERT(atomic_read(&rs->rs_refcount) > 0);
int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
+/* ptlrpc/pers.c */
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc);
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc);
+
/* ptlrpc/pinger.c */
int ptlrpc_pinger_add_import(struct obd_import *imp);
int ptlrpc_pinger_del_import(struct obd_import *imp);
#define PARAM_LOV_STRIPE_COUNT PARAM_LOV"stripecount="
#define PARAM_LOV_STRIPE_OFFSET PARAM_LOV"stripeoffset="
#define PARAM_LOV_STRIPE_PATTERN PARAM_LOV"stripetype="
+#define PARAM_SEC "security."
+#define PARAM_SEC_RPC PARAM_SEC"rpc."
+#define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt="
+#define PARAM_SEC_RPC_CLI PARAM_SEC_RPC"cli="
#endif // _LUSTRE_PARAM_H
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LUSTRE_SEC_H_
+#define _LUSTRE_SEC_H_
+
+/*
+ * to avoid include
+ */
+struct obd_import;
+struct ptlrpc_request;
+struct ptlrpc_reply_state;
+struct ptlrpc_bulk_desc;
+struct brw_page;
+
+/*
+ * forward declaration
+ */
+struct ptlrpc_sec_policy;
+struct ptlrpc_sec_cops;
+struct ptlrpc_sec_sops;
+struct ptlrpc_sec;
+struct ptlrpc_svc_ctx;
+struct ptlrpc_cli_ctx;
+struct ptlrpc_ctx_ops;
+
+/*
+ * flavor constants
+ */
+enum sptlrpc_policies {
+ SPTLRPC_POLICY_NULL = 0,
+ SPTLRPC_POLICY_PLAIN = 1,
+ SPTLRPC_POLICY_GSS = 2,
+ SPTLRPC_POLICY_MAX,
+};
+
+enum sptlrpc_subpolicy_null {
+ SPTLRPC_SUBPOLICY_NULL = 0,
+ SPTLRPC_SUBPOLICY_NULL_MAX,
+};
+
+enum sptlrpc_subpolicy_plain {
+ SPTLRPC_SUBPOLICY_PLAIN = 0,
+ SPTLRPC_SUBPOLICY_PLAIN_MAX,
+};
+
+enum sptlrpc_subpolicy_gss {
+ SPTLRPC_SUBPOLICY_GSS_NONE = 0,
+ SPTLRPC_SUBPOLICY_GSS_KRB5 = 1,
+ SPTLRPC_SUBPOLICY_GSS_MAX,
+};
+
+enum sptlrpc_service_type {
+ SPTLRPC_SVC_NONE = 0, /* no security */
+ SPTLRPC_SVC_AUTH = 1, /* authentication */
+ SPTLRPC_SVC_PRIV = 2, /* privacy */
+ SPTLRPC_SVC_MAX,
+};
+
+/*
+ * flavor compose/extract
+ */
+
+typedef __u32 ptlrpc_flavor_t;
+
+/*
+ * 8b (reserved) | 8b (flags) | 6b (policy) | 6b (subpolicy) | 4b (svc)
+ */
+#define SEC_FLAVOR_FLAGS_OFFSET (16)
+#define SEC_FLAVOR_POLICY_OFFSET (10)
+#define SEC_FLAVOR_SUBPOLICY_OFFSET (4)
+#define SEC_FLAVOR_SVC_OFFSET (0)
+
+#define SEC_MAKE_RPC_FLAVOR(policy, subpolicy, svc) \
+ (((__u32)(policy) << SEC_FLAVOR_POLICY_OFFSET) | \
+ ((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \
+ ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET))
+
+#define SEC_MAKE_RPC_SUBFLAVOR(subpolicy, svc) \
+ (((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \
+ ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET))
+
+#define SEC_FLAVOR_POLICY(flavor) \
+ ((((__u32)(flavor)) >> SEC_FLAVOR_POLICY_OFFSET) & 0x3F)
+#define SEC_FLAVOR_SUBPOLICY(flavor) \
+ ((((__u32)(flavor)) >> SEC_FLAVOR_SUBPOLICY_OFFSET) & 0x3F)
+#define SEC_FLAVOR_SVC(flavor) \
+ ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0xF)
+#define SEC_FLAVOR_SUB(flavor) \
+ ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0x3FF)
+
+#define SEC_FLAVOR_RPC(f) \
+ (((__u32) f) & ((1 << SEC_FLAVOR_FLAGS_OFFSET) - 1))
+
+/*
+ * general gss flavors
+ */
+#define SPTLRPC_FLVR_GSS_NONE \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_NONE, \
+ SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_GSS_AUTH \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_NONE, \
+ SPTLRPC_SVC_AUTH)
+#define SPTLRPC_FLVR_GSS_PRIV \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_NONE, \
+ SPTLRPC_SVC_PRIV)
+
+/*
+ * gss subflavors
+ */
+#define SPTLRPC_SUBFLVR_KRB5 \
+ SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_NONE)
+#define SPTLRPC_SUBFLVR_KRB5I \
+ SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_AUTH)
+#define SPTLRPC_SUBFLVR_KRB5P \
+ SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_PRIV)
+
+/*
+ * "end user" flavors
+ */
+#define SPTLRPC_FLVR_NULL \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_NULL, \
+ SPTLRPC_SUBPOLICY_NULL, \
+ SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_PLAIN \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_PLAIN, \
+ SPTLRPC_SUBPOLICY_PLAIN, \
+ SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_KRB5 \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_NONE)
+#define SPTLRPC_FLVR_KRB5I \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_AUTH)
+#define SPTLRPC_FLVR_KRB5P \
+ SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_SUBPOLICY_GSS_KRB5, \
+ SPTLRPC_SVC_PRIV)
+
+#define SPTLRPC_FLVR_INVALID (-1)
+
+#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL
+
+/*
+ * flavor flags (maximum 8 flags)
+ */
+#define SEC_FLAVOR_FL_BULK (1 << (0 + SEC_FLAVOR_FLAGS_OFFSET))
+#define SEC_FLAVOR_FL_USER (1 << (1 + SEC_FLAVOR_FLAGS_OFFSET))
+
+#define SEC_FLAVOR_HAS_BULK(flavor) \
+ (((flavor) & SEC_FLAVOR_FL_BULK) != 0)
+#define SEC_FLAVOR_HAS_USER(flavor) \
+ (((flavor) & SEC_FLAVOR_FL_USER) != 0)
+
+
+struct sec_flavor_config {
+ __u32 sfc_rpc_flavor; /* main rpc flavor */
+ __u32 sfc_bulk_priv; /* bulk encryption algorithm */
+ __u32 sfc_bulk_csum; /* bulk checksum algorithm */
+ __u32 sfc_flags; /* extra flags */
+};
+
+enum lustre_part {
+ LUSTRE_CLI = 0,
+ LUSTRE_MDT,
+ LUSTRE_OST,
+ LUSTRE_MGC,
+ LUSTRE_MGS,
+};
+
+/* The maximum length of security payload. 1024 is enough for Kerberos 5,
+ * and should be enough for other future mechanisms but not sure.
+ * Only used by pre-allocated request/reply pool.
+ */
+#define SPTLRPC_MAX_PAYLOAD (1024)
+
+
+struct vfs_cred {
+ uint32_t vc_uid;
+ uint32_t vc_gid;
+};
+
+struct ptlrpc_ctx_ops {
+ int (*match) (struct ptlrpc_cli_ctx *ctx,
+ struct vfs_cred *vcred);
+ int (*refresh) (struct ptlrpc_cli_ctx *ctx);
+ /*
+ * rpc data transform
+ */
+ int (*sign) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+ int (*verify) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+ int (*seal) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+ int (*unseal) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req);
+ /*
+ * bulk transform
+ */
+ int (*wrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+ int (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+};
+
+#define PTLRPC_CTX_UPTODATE_BIT (0) /* uptodate */
+#define PTLRPC_CTX_DEAD_BIT (1) /* mark expired gracefully */
+#define PTLRPC_CTX_ERROR_BIT (2) /* fatal error (refresh, etc.) */
+#define PTLRPC_CTX_HASHED_BIT (8) /* in hash table */
+#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */
+
+#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT)
+#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT)
+#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT)
+#define PTLRPC_CTX_HASHED (1 << PTLRPC_CTX_HASHED_BIT)
+#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT)
+
+#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_UPTODATE | \
+ PTLRPC_CTX_DEAD | \
+ PTLRPC_CTX_ERROR)
+
+struct ptlrpc_cli_ctx {
+ struct hlist_node cc_hash; /* linked into hash table */
+ atomic_t cc_refcount;
+ struct ptlrpc_sec *cc_sec;
+ struct ptlrpc_ctx_ops *cc_ops;
+ cfs_time_t cc_expire; /* in seconds */
+ unsigned long cc_flags;
+ struct vfs_cred cc_vcred;
+ spinlock_t cc_lock;
+ struct list_head cc_req_list; /* waiting reqs linked here */
+};
+
+struct ptlrpc_sec_cops {
+ /*
+ * ptlrpc_sec constructor/destructor
+ */
+ struct ptlrpc_sec * (*create_sec) (struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ __u32 flavor,
+ unsigned long flags);
+ void (*destroy_sec) (struct ptlrpc_sec *sec);
+ /*
+ * search ctx for a certain user, if this function is missing,
+ * a generic function will be invoked by caller. implement this
+ * for any special need.
+ */
+ struct ptlrpc_cli_ctx * (*lookup_ctx) (struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred);
+ /*
+ * ptlrpc_cli_ctx constructor/destructor
+ */
+ struct ptlrpc_cli_ctx * (*create_ctx) (struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred);
+ void (*destroy_ctx) (struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+ /* reverse service */
+ int (*install_rctx)(struct obd_import *imp,
+ struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+ /*
+ * request/reply buffer manipulation
+ */
+ int (*alloc_reqbuf)(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int lustre_msg_size);
+ void (*free_reqbuf) (struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req);
+ int (*alloc_repbuf)(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int lustre_msg_size);
+ void (*free_repbuf) (struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req);
+};
+
+struct ptlrpc_sec_sops {
+ int (*accept) (struct ptlrpc_request *req);
+ int (*authorize) (struct ptlrpc_request *req);
+ /* buffer manipulation */
+ int (*alloc_rs) (struct ptlrpc_request *req,
+ int msgsize);
+ void (*free_rs) (struct ptlrpc_reply_state *rs);
+ void (*free_ctx) (struct ptlrpc_svc_ctx *ctx);
+ /* reverse credential */
+ int (*install_rctx)(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx);
+ /* bulk transform */
+ int (*unwrap_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+ int (*wrap_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+};
+
+struct ptlrpc_sec_policy {
+ struct module *sp_owner;
+ char *sp_name;
+ __u32 sp_policy; /* policy number */
+ struct ptlrpc_sec_cops *sp_cops; /* client ops */
+ struct ptlrpc_sec_sops *sp_sops; /* server ops */
+};
+
+#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */
+#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */
+
+struct ptlrpc_sec {
+ struct ptlrpc_sec_policy *ps_policy;
+ atomic_t ps_refcount;
+ __u32 ps_flavor; /* rpc flavor */
+ unsigned long ps_flags; /* PTLRPC_SEC_FL_XX */
+ struct obd_import *ps_import; /* owning import */
+ spinlock_t ps_lock; /* protect ccache */
+ int ps_ccache_size; /* must be 2^n */
+ struct hlist_head *ps_ccache; /* ctx cache hash */
+ atomic_t ps_busy; /* busy count */
+ cfs_time_t ps_gc_interval; /* in seconds */
+ cfs_time_t ps_gc_next; /* in seconds */
+};
+
+struct ptlrpc_svc_ctx {
+ atomic_t sc_refcount;
+ struct ptlrpc_sec_policy *sc_policy;
+};
+
+/*
+ * user identity descriptor
+ */
+#define LUSTRE_MAX_GROUPS (128)
+
+struct ptlrpc_user_desc {
+ __u32 pud_uid;
+ __u32 pud_gid;
+ __u32 pud_fsuid;
+ __u32 pud_fsgid;
+ __u32 pud_cap;
+ __u32 pud_ngroups;
+ __u32 pud_groups[0];
+};
+
+/*
+ * bulk flavors
+ */
+enum bulk_checksum_alg {
+ BULK_CSUM_ALG_NULL = 0,
+ BULK_CSUM_ALG_CRC32,
+ BULK_CSUM_ALG_MD5,
+ BULK_CSUM_ALG_SHA1,
+ BULK_CSUM_ALG_SHA256,
+ BULK_CSUM_ALG_SHA384,
+ BULK_CSUM_ALG_SHA512,
+ BULK_CSUM_ALG_MAX
+};
+
+enum bulk_encrypt_alg {
+ BULK_PRIV_ALG_NULL = 0,
+ BULK_PRIV_ALG_ARC4,
+ BULK_PRIV_ALG_MAX
+};
+
+struct ptlrpc_bulk_sec_desc {
+ __u32 bsd_version;
+ __u32 bsd_pad;
+ __u32 bsd_csum_alg; /* checksum algorithm */
+ __u32 bsd_priv_alg; /* encrypt algorithm */
+ __u8 bsd_iv[16]; /* encrypt iv */
+ __u8 bsd_csum[0];
+};
+
+/*
+ * security type
+ */
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
+
+__u32 sptlrpc_name2flavor(const char *name);
+char *sptlrpc_flavor2name(__u32 flavor);
+
+static inline
+struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
+{
+ __module_get(policy->sp_owner);
+ return policy;
+}
+
+static inline
+void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy)
+{
+ module_put(policy->sp_owner);
+}
+
+/*
+ * client credential
+ */
+struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync);
+void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx);
+void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new);
+void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+
+/*
+ * client wrap/buffers
+ */
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req);
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req);
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize);
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize);
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req);
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req);
+void sptlrpc_request_out_callback(struct ptlrpc_request *req);
+
+/*
+ * higher interface of import & request
+ */
+int sptlrpc_import_get_sec(struct obd_import *imp, struct ptlrpc_svc_ctx *svc_ctx,
+ __u32 flavor, unsigned long flags);
+void sptlrpc_import_put_sec(struct obd_import *imp);
+int sptlrpc_import_check_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp);
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp);
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req);
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req);
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout);
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode);
+
+int sptlrpc_parse_flavor(enum lustre_part from, enum lustre_part to,
+ char *str, struct sec_flavor_config *conf);
+/* misc */
+const char * sec2target_str(struct ptlrpc_sec *sec);
+int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
+
+/*
+ * server side
+ */
+enum secsvc_accept_res {
+ SECSVC_OK = 0,
+ SECSVC_COMPLETE,
+ SECSVC_DROP,
+};
+
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req);
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen);
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req);
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs);
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req);
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req);
+
+/*
+ * reverse context
+ */
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx);
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_cli_ctx *ctx);
+
+/* bulk security api */
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+ int nob, obd_count pg_count,
+ struct brw_page **pga);
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
+
+/* user descriptor helpers */
+int sptlrpc_user_desc_size(void);
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset);
+
+/* bulk helpers (internal use only by policies) */
+int bulk_sec_desc_size(__u32 csum_alg, int request, int read);
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset);
+
+int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
+ __u32 alg, struct lustre_msg *rmsg, int roff);
+int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *rmsg, int roff,
+ struct lustre_msg *vmsg, int voff);
+int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *vmsg, int voff,
+ struct lustre_msg *rmsg, int roff);
+#endif /* _LUSTRE_SEC_H_ */
int cl_max_mds_cookiesize;
kdev_t cl_sandev;
+ /* security configuration */
+ struct sec_flavor_config cl_sec_conf;
+
//struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */
void *cl_llcd_offset;
};
/* get/set_info keys */
-#define KEY_MDS_CONN "mds_conn"
-#define KEY_NEXT_ID "next_id"
-#define KEY_LOVDESC "lovdesc"
-#define KEY_INIT_RECOV "initial_recov"
-#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+#define KEY_MDS_CONN "mds_conn"
+#define KEY_NEXT_ID "next_id"
+#define KEY_LOVDESC "lovdesc"
+#define KEY_INIT_RECOV "initial_recov"
+#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+#define KEY_FLUSH_CTX "flush_ctx"
struct lu_context;
#include <lustre_mds.h>
#include <lustre_dlm.h>
#include <lustre_net.h>
+#include <lustre_sec.h>
#include <lustre_ver.h>
/* @priority: if non-zero, move the selected to the list head
RETURN(rc);
}
+static
+void destroy_import(struct obd_import *imp)
+{
+ /* drop security policy instance after all rpc finished/aborted
+ * to let all busy credentials be released.
+ */
+ class_import_get(imp);
+ class_destroy_import(imp);
+ sptlrpc_import_put_sec(imp);
+ class_import_put(imp);
+}
+
/* configure an RPC client OBD device
*
* lcfg parameters:
sema_init(&cli->cl_sem, 1);
sema_init(&cli->cl_mgc_sem, 1);
+ cli->cl_sec_conf.sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+ cli->cl_sec_conf.sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+ cli->cl_sec_conf.sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+ cli->cl_sec_conf.sfc_flags = 0;
cli->cl_conn_count = 0;
memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
if (rc != 0)
GOTO(out_ldlm, rc);
+ rc = sptlrpc_import_get_sec(imp, NULL, cli->cl_sec_conf.sfc_rpc_flavor,
+ cli->cl_sec_conf.sfc_flags);
+ if (rc)
+ GOTO(out_ldlm, rc);
+
ocd = &imp->imp_connect_data;
if (data) {
*ocd = *data;
ptlrpc_invalidate_import(imp);
ptlrpc_free_rq_pool(imp->imp_rq_pool);
- class_destroy_import(imp);
+ destroy_import(imp);
cli->cl_import = NULL;
EXIT;
req->rq_self,
&remote_uuid);
- if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT)
+ if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) {
+ LASSERT(export->exp_imp_reverse);
+ sptlrpc_svc_install_rvs_ctx(export->exp_imp_reverse,
+ req->rq_svc_ctx);
GOTO(out, rc = 0);
+ }
if (target->obd_recovering)
target->obd_connected_clients++;
sizeof conn);
if (export->exp_imp_reverse != NULL)
- class_destroy_import(export->exp_imp_reverse);
+ destroy_import(export->exp_imp_reverse);
revimp = export->exp_imp_reverse = class_new_import(target);
revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection);
revimp->imp_client = &export->exp_obd->obd_ldlm_client;
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_NEXT_VER);
}
+ rc = sptlrpc_import_get_sec(revimp, req->rq_svc_ctx,
+ req->rq_sec_flavor, 0);
+ if (rc) {
+ CERROR("Failed to get sec for reverse import: %d\n", rc);
+ export->exp_imp_reverse = NULL;
+ class_destroy_import(revimp);
+ }
+
class_import_put(revimp);
out:
if (export)
/* exports created from last_rcvd data, and "fake"
exports created by lctl don't have an import */
if (exp->exp_imp_reverse != NULL)
- class_destroy_import(exp->exp_imp_reverse);
+ destroy_import(exp->exp_imp_reverse);
/* We cancel locks at disconnect time, but this will catch any locks
* granted in a race with recovery-induced disconnect. */
*/
-static void target_release_saved_req(struct ptlrpc_request *req)
+static
+struct ptlrpc_request *target_save_req(struct ptlrpc_request *src)
{
- if (req->rq_reply_state != NULL) {
- ptlrpc_rs_decref(req->rq_reply_state);
- /* req->rq_reply_state = NULL; */
+ struct ptlrpc_request *req;
+ struct lustre_msg *reqmsg;
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (!req)
+ return NULL;
+
+ OBD_ALLOC(reqmsg, src->rq_reqlen);
+ if (!reqmsg) {
+ OBD_FREE(req, sizeof(*req));
+ return NULL;
}
+ memcpy(req, src, sizeof(*req));
+ memcpy(reqmsg, src->rq_reqmsg, src->rq_reqlen);
+ req->rq_reqmsg = reqmsg;
+
+ class_export_get(req->rq_export);
+ CFS_INIT_LIST_HEAD(&req->rq_list);
+ sptlrpc_svc_ctx_addref(req);
+ if (req->rq_reply_state)
+ ptlrpc_rs_addref(req->rq_reply_state);
+
+ /* repmsg have been taken over, in privacy mode this might point to
+ * invalid data. prevent further access on it.
+ */
+ src->rq_repmsg = NULL;
+ src->rq_replen = 0;
+
+ return req;
+}
+
+static
+void target_release_saved_req(struct ptlrpc_request *req)
+{
+ if (req->rq_reply_state) {
+ ptlrpc_rs_decref(req->rq_reply_state);
+ req->rq_reply_state = NULL;
+ }
+ sptlrpc_svc_ctx_decref(req);
class_export_put(req->rq_export);
+
OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
- OBD_FREE(req, sizeof *req);
+ OBD_FREE(req, sizeof(*req));
}
static void target_finish_recovery(struct obd_device *obd)
reset_recovery_timer(obd);
/* bug 1580: decide how to properly sync() in recovery */
//mds_fsync_super(obd->u.obt.obt_sb);
- class_export_put(req->rq_export);
- if (req->rq_reply_state != NULL) {
- ptlrpc_rs_decref(req->rq_reply_state);
- /* req->rq_reply_state = NULL; */
- }
- OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
- OBD_FREE(req, sizeof *req);
+ target_release_saved_req(req);
+
spin_lock_bh(&obd->obd_processing_task_lock);
obd->obd_next_recovery_transno++;
if (list_empty(&obd->obd_recovery_queue)) {
int inserted = 0;
__u64 transno = lustre_msg_get_transno(req->rq_reqmsg);
struct ptlrpc_request *saved_req;
- struct lustre_msg *reqmsg;
/* CAVEAT EMPTOR: The incoming request message has been swabbed
* (i.e. buflens etc are in my own byte order), but type-dependent
}
/* XXX If I were a real man, these LBUGs would be sane cleanups. */
- /* XXX just like the request-dup code in queue_final_reply */
- OBD_ALLOC(saved_req, sizeof *saved_req);
+ saved_req = target_save_req(req);
if (!saved_req)
LBUG();
- OBD_ALLOC(reqmsg, req->rq_reqlen);
- if (!reqmsg)
- LBUG();
spin_lock_bh(&obd->obd_processing_task_lock);
/* Processing the queue right now, don't re-add. */
LASSERT(list_empty(&req->rq_list));
spin_unlock_bh(&obd->obd_processing_task_lock);
- OBD_FREE(reqmsg, req->rq_reqlen);
- OBD_FREE(saved_req, sizeof *saved_req);
+
+ target_release_saved_req(saved_req);
return 1;
}
(MSG_RESENT | MSG_REPLAY)) {
DEBUG_REQ(D_ERROR, req, "dropping resent queued req");
spin_unlock_bh(&obd->obd_processing_task_lock);
- OBD_FREE(reqmsg, req->rq_reqlen);
- OBD_FREE(saved_req, sizeof *saved_req);
+
+ target_release_saved_req(saved_req);
return 0;
}
- memcpy(saved_req, req, sizeof *req);
- memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
req = saved_req;
- req->rq_reqmsg = reqmsg;
- class_export_get(req->rq_export);
- CFS_INIT_LIST_HEAD(&req->rq_list);
/* XXX O(n^2) */
list_for_each(tmp, &obd->obd_recovery_queue) {
{
struct obd_device *obd = target_req2obd(req);
struct ptlrpc_request *saved_req;
- struct lustre_msg *reqmsg;
int recovery_done = 0;
LASSERT ((rc == 0) == (req->rq_reply_state != NULL));
LASSERT (!req->rq_reply_state->rs_difficult);
LASSERT(list_empty(&req->rq_list));
- /* XXX a bit like the request-dup code in queue_recovery_request */
- OBD_ALLOC(saved_req, sizeof *saved_req);
+
+ saved_req = target_save_req(req);
if (!saved_req)
LBUG();
- OBD_ALLOC(reqmsg, req->rq_reqlen);
- if (!reqmsg)
- LBUG();
- *saved_req = *req;
- memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen);
/* Don't race cleanup */
spin_lock_bh(&obd->obd_processing_task_lock);
if (obd->obd_stopping) {
spin_unlock_bh(&obd->obd_processing_task_lock);
- OBD_FREE(reqmsg, req->rq_reqlen);
- OBD_FREE(saved_req, sizeof *req);
+ target_release_saved_req(saved_req);
req->rq_status = -ENOTCONN;
/* rv is ignored anyhow */
return -ENOTCONN;
}
- ptlrpc_rs_addref(req->rq_reply_state); /* +1 ref for saved reply */
+
req = saved_req;
- req->rq_reqmsg = reqmsg;
- class_export_get(req->rq_export);
list_add(&req->rq_list, &obd->obd_delayed_reply_queue);
/* only count the first "replay over" request from each
*
* Returns 1 if it finds an already-existing lock that is compatible; in this
* case, lockh is filled in with a addref()ed lock
+ *
+ * we also check security context, if that failed we simply return 0 (to keep
+ * caller code unchanged), the context failure will be discovered by caller
+ * sometime later.
*/
int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
struct ldlm_res_id *res_id, ldlm_type_t type,
res_id->name[2] : policy->l_extent.start,
(type == LDLM_PLAIN || type == LDLM_IBITS) ?
res_id->name[3] : policy->l_extent.end);
+
+ /* check user's security context */
+ if (lock->l_conn_export &&
+ sptlrpc_import_check_ctx(
+ class_exp2cliimp(lock->l_conn_export))) {
+ if (!(flags & LDLM_FL_TEST_LOCK))
+ ldlm_lock_decref_internal(lock, mode);
+ rc = 0;
+ }
+
+ if (flags & LDLM_FL_TEST_LOCK)
+ LDLM_LOCK_PUT(lock);
} else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/
LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
}
if (old_lock)
LDLM_LOCK_PUT(old_lock);
- if (flags & LDLM_FL_TEST_LOCK && rc)
- LDLM_LOCK_PUT(lock);
return rc;
}
strncpy(current->comm, comm, sizeof(current->comm));
current->pid = getpid();
+ current->gid = getgid();
current->fsuid = geteuid();
current->fsgid = getegid();
memset(¤t->pending, 0, sizeof(current->pending));
RETURN (-EFAULT);
RETURN(0);
}
+ case LL_IOC_FLUSHCTX:
+ RETURN(ll_flush_ctx(inode));
+
default:
RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg));
}
case EXT3_IOC_SETVERSION_OLD:
case EXT3_IOC_SETVERSION:
*/
+ case LL_IOC_FLUSHCTX:
+ RETURN(ll_flush_ctx(inode));
default:
RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
(void *)arg));
void ll_delete_inode(struct inode *inode);
int ll_iocontrol(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
+int ll_flush_ctx(struct inode *inode);
void ll_umount_begin(struct super_block *sb);
int ll_remount_fs(struct super_block *sb, int *flags, char *data);
int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
RETURN(0);
}
+int ll_flush_ctx(struct inode *inode)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ CDEBUG(D_SEC, "flush context for user %d\n", current->uid);
+
+ obd_set_info_async(sbi->ll_md_exp,
+ sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX,
+ 0, NULL, NULL);
+ obd_set_info_async(sbi->ll_dt_exp,
+ sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX,
+ 0, NULL, NULL);
+ return 0;
+}
+
/* umount -f client means force down, don't save state */
void ll_umount_begin(struct super_block *sb)
{
}
lmv = &obd->u.lmv;
- /* maybe this could be default */
- if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) ||
- (keylen == strlen("sec_flags") && strcmp(key, "sec_flags") == 0) ||
- (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) {
- struct obd_export *exp;
- int err, i;
-
- spin_lock(&lmv->lmv_lock);
- for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
- i++, tgt++) {
- exp = tgt->ltd_exp;
- /* during setup time the connections to mdc might
- * haven't been established.
- */
- if (exp == NULL) {
- struct obd_device *tgt_obd;
-
- tgt_obd = class_find_client_obd(&tgt->uuid,
- LUSTRE_MDC_NAME,
- &obd->obd_uuid);
- if (!tgt_obd) {
- CERROR("can't set info %s, "
- "device %s not attached?\n",
- (char *) key, tgt->uuid.uuid);
- rc = -EINVAL;
- continue;
- }
- exp = tgt_obd->obd_self_export;
- }
-
- err = obd_set_info_async(exp, keylen, key, vallen, val, set);
- if (!rc)
- rc = err;
- }
- spin_unlock(&lmv->lmv_lock);
+ if (KEY_IS(KEY_FLUSH_CTX)) {
+ int i, err = 0;
- RETURN(rc);
- }
- if (((keylen == strlen("flush_cred") &&
- strcmp(key, "flush_cred") == 0)) ||
- ((keylen == strlen("crypto_type") &&
- strcmp(key, "crypto_type") == 0))) {
- int i;
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ tgt = &lmv->tgts[i];
- for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count;
- i++, tgt++) {
if (!tgt->ltd_exp)
continue;
- rc = obd_set_info_async(tgt->ltd_exp,
- keylen, key, vallen,
- val, set);
- if (rc)
- RETURN(rc);
+
+ err = obd_set_info_async(tgt->ltd_exp,
+ keylen, key, vallen, val, set);
+ if (err && rc == 0)
+ rc = err;
}
- RETURN(0);
+ RETURN(rc);
}
RETURN(-EINVAL);
if (KEY_IS("unlinked")) {
if (vallen != 0 && KEY_IS("unlinked"))
GOTO(out, rc = -EINVAL);
+ } else if (KEY_IS(KEY_FLUSH_CTX)) {
+ /* fall through */
} else {
GOTO(out, rc = -EINVAL);
}
{ "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
{ "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight,
mdc_wr_max_rpcs_in_flight, 0 },
+ { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 },
{ 0 }
};
RETURN(rc);
}
+ if (KEY_IS(KEY_FLUSH_CTX)) {
+ sptlrpc_import_flush_my_ctx(imp);
+ RETURN(0);
+ }
+
RETURN(rc);
}
case MDS_CONNECT:
case MDS_DISCONNECT:
case OBD_PING:
+ case SEC_CTX_INIT:
+ case SEC_CTX_INIT_CONT:
+ case SEC_CTX_FINI:
rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION);
if (rc)
CERROR("bad opc %u version %08x, expecting %08x\n",
ENTRY;
- if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT)
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_CONNECT:
+ case SEC_CTX_INIT:
+ case SEC_CTX_INIT_CONT:
+ case SEC_CTX_FINI:
RETURN(+1);
+ }
if (req->rq_export == NULL) {
CERROR("operation %d on unconnected MDS from %s\n",
static struct mdt_handler mdt_llog_ops[] = {
};
+static struct mdt_handler mdt_sec_ops[] = {
+};
+
static struct mdt_opc_slice mdt_regular_handlers[] = {
{
.mos_opc_start = MDS_GETATTR,
.mos_hs = mdt_llog_ops
},
{
+ .mos_opc_start = SEC_CTX_INIT,
+ .mos_opc_end = SEC_LAST_OPC,
+ .mos_hs = mdt_sec_ops
+ },
+ {
.mos_hs = NULL
}
};
#include <lustre_disk.h>
#include <lustre_param.h>
#include <lustre_ver.h>
+#include <lustre_sec.h>
#include "mgs_internal.h"
/********************** Class fns ********************/
return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4);
}
+static inline int record_sec_flavor(struct obd_device *obd,
+ struct llog_handle *llh, char *devname,
+ struct sec_flavor_config *conf)
+{
+ struct lustre_cfg_bufs bufs;
+ struct lustre_cfg *lcfg;
+ int rc;
+
+ lustre_cfg_bufs_reset(&bufs, devname);
+ lustre_cfg_bufs_set(&bufs, 1, conf, sizeof(*conf));
+ lcfg = lustre_cfg_new(LCFG_SEC_FLAVOR, &bufs);
+
+ rc = record_lcfg(obd, llh, lcfg);
+
+ lustre_cfg_free(lcfg);
+ return rc;
+}
+
static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh,
char *devname, struct lov_desc *desc)
{
}
struct temp_comp
{
- struct mgs_target_info *comp_tmti;
- struct mgs_target_info *comp_mti;
- struct fs_db *comp_fsdb;
- struct obd_device *comp_obd;
+ struct mgs_target_info *comp_tmti;
+ struct mgs_target_info *comp_mti;
+ struct fs_db *comp_fsdb;
+ struct obd_device *comp_obd;
+ struct sec_flavor_config comp_sec;
};
static int mgs_write_log_mdc_to_mdt(struct obd_device *, struct fs_db *,
- struct mgs_target_info *, char *);
+ struct mgs_target_info *,
+ struct sec_flavor_config *, char *);
static int mgs_steal_llog_handler(struct llog_handle *llh,
struct llog_rec_hdr *rec,
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
struct lustre_cfg *lcfg;
+ struct sec_flavor_config *sec_conf;
int rc = 0;
struct llog_handle *mdt_llh = NULL;
static int got_an_osc_or_mdc = 0;
tmti = ((struct temp_comp*)data)->comp_tmti;
fsdb = ((struct temp_comp*)data)->comp_fsdb;
obd = ((struct temp_comp*)data)->comp_obd;
+ sec_conf = &((struct temp_comp*)data)->comp_sec;
if (rec->lrh_type != OBD_CFG_REC) {
CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
RETURN(rc);
}
+ if (lcfg->lcfg_command == LCFG_SEC_FLAVOR) {
+ memcpy(sec_conf, lustre_cfg_buf(lcfg, 1), sizeof(*sec_conf));
+
+ RETURN(rc);
+ }
+
if (lcfg->lcfg_command == LCFG_ADD_MDC) {
int index;
if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
RETURN (-EINVAL);
-
+
memcpy(tmti->mti_fsname, mti->mti_fsname,
strlen(mti->mti_fsname));
tmti->mti_stripe_index = index;
-
- mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, mti->mti_svname);
+
+ mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, sec_conf,
+ mti->mti_svname);
memset(tmti, 0, sizeof(*tmti));
RETURN(rc);
}
return rc;
}
+static
+void extract_sec_flavor(char *params, char *key, char **ptr)
+{
+ char *val = NULL, *tail;
+ int len;
+
+ *ptr = NULL;
+
+ if (class_find_param(params, key, &val))
+ return;
+
+ tail = strchr(val, ' ');
+ if (tail == NULL)
+ len = strlen(val);
+ else
+ len = tail - val;
+
+ OBD_ALLOC(*ptr, len + 1);
+ if (*ptr == NULL)
+ return;
+
+ memcpy(*ptr, val, len);
+ (*ptr)[len] = '\0';
+}
/***************************************BEGIN PROTO****************************/
static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb,
struct mgs_target_info *mti,
+ struct sec_flavor_config *sec_conf,
char *logname, char *lmvname)
{
struct llog_handle *llh = NULL;
rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+ rc = record_sec_flavor(obd, llh, mdcname, sec_conf);
rc = mgs_write_log_failnids(obd, mti, llh, mdcname);
snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
rc = record_mdc_add(obd, llh, lmvname, mdcuuid, mti->mti_uuid,
/* add new mdc to already existent MDS */
static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb,
- struct mgs_target_info *mti, char *logname)
+ struct mgs_target_info *mti,
+ struct sec_flavor_config *sec_conf,
+ char *logname)
{
struct llog_handle *llh = NULL;
char *nodeuuid, *mdcname, *mdcuuid, *mdtuuid;
}
rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid);
rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0);
+ rc = record_sec_flavor(obd, llh, mdcname, sec_conf);
rc = mgs_write_log_failnids(obd, mti, llh, mdcname);
snprintf(index, sizeof(index), "%d", idx);
static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb,
struct mgs_target_info *mti)
{
- char *cliname;
+ char *cliname, *sec;
struct llog_handle *llh = NULL;
struct temp_comp comp = { 0 };
+ struct sec_flavor_config sec_conf_mdt, sec_conf_cli;
char mdt_index[9];
int rc, i = 0;
ENTRY;
"%s_UUID", mti->mti_svname);
}
+ /* security flavor */
+ extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec);
+ rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_MDT, sec, &sec_conf_mdt);
+ name_destroy(sec);
+ if (rc)
+ RETURN(rc);
+
+ extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec);
+ rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_MDT, sec, &sec_conf_cli);
+ name_destroy(sec);
+ if (rc)
+ RETURN(rc);
+
/* add mdt */
rc = mgs_write_log_mdt0(obd, fsdb, mti);
rc = mgs_steal_llog_for_mdt_from_client(obd, cliname,
&comp);
- rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, cliname,
- fsdb->fsdb_clilmv);
+ rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, &sec_conf_cli,
+ cliname, fsdb->fsdb_clilmv);
/* add mountopts */
rc = record_start_log(obd, &llh, cliname);
if (rc)
sprintf(mdt_index,"-MDT%04x",i);
name_create(&mdtname, mti->mti_fsname, mdt_index);
- rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti, mdtname);
+ rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti,
+ &sec_conf_mdt, mdtname);
name_destroy(mdtname);
}
}
/* Add the ost info to the client/mdt lov */
static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb,
struct mgs_target_info *mti,
- char *logname, char *lovname, int flags)
+ char *logname, char *lovname,
+ struct sec_flavor_config *sec_conf,
+ int flags)
{
struct llog_handle *llh = NULL;
char *nodeuuid, *svname, *oscname, *oscuuid, *lovuuid;
}
rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0);
+ rc = record_sec_flavor(obd, llh, oscname, sec_conf);
rc = mgs_write_log_failnids(obd, mti, llh, oscname);
snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1");
struct mgs_target_info *mti)
{
struct llog_handle *llh = NULL;
- char *logname, *lovname;
+ char *logname, *lovname, *sec;
char mdt_index[9];
char *ptr = mti->mti_params;
+ struct sec_flavor_config sec_conf_mdt, sec_conf_cli;
int rc, flags = 0, failout = 0, i;
ENTRY;
" all logs.\n", mti->mti_svname);
RETURN(-EALREADY);
}
+
+ /* security flavors */
+ extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec);
+ rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_OST, sec, &sec_conf_mdt);
+ name_destroy(sec);
+ if (rc)
+ RETURN(rc);
+
+ extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec);
+ rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_OST, sec, &sec_conf_cli);
+ name_destroy(sec);
+ if (rc)
+ RETURN(rc);
+
/*
attach obdfilter ost1 ost1_UUID
setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
name_create(&logname, mti->mti_fsname, mdt_index);
name_create(&lovname, logname, "-mdtlov");
mgs_write_log_osc_to_lov(obd, fsdb, mti, logname,
- lovname, flags);
+ lovname, &sec_conf_mdt, flags);
name_destroy(logname);
name_destroy(lovname);
}
/* Append ost info to the client log */
name_create(&logname, mti->mti_fsname, "-client");
- mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov, 0);
+ mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov,
+ &sec_conf_cli, 0);
name_destroy(logname);
RETURN(rc);
if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0)
GOTO(end_while, rc);
+ /* Processed in mgs_write_log_mdt/mgs_write_log_ost */
+ if (class_match_param(ptr, PARAM_SEC_RPC_MDT, NULL) == 0 ||
+ class_match_param(ptr, PARAM_SEC_RPC_CLI, NULL) == 0)
+ GOTO(end_while, rc);
+
if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
/* Add a failover nidlist */
rc = 0;
}
LASSERT(list_empty(&import->imp_handle.h_link));
+ LASSERT(import->imp_sec == NULL);
class_decref(import->imp_obd);
OBD_FREE(import, sizeof(*import));
EXIT;
RETURN(rc);
}
+int class_sec_flavor(struct obd_device *obd, struct lustre_cfg *lcfg)
+{
+ struct sec_flavor_config *conf;
+ ENTRY;
+
+ if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) &&
+ strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+ CERROR("Can't set security flavor on obd %s\n",
+ obd->obd_type->typ_name);
+ RETURN(-EINVAL);
+ }
+
+ if (LUSTRE_CFG_BUFLEN(lcfg, 1) != sizeof(*conf)) {
+ CERROR("invalid data\n");
+ RETURN(-EINVAL);
+ }
+
+ conf = &obd->u.cli.cl_sec_conf;
+ memcpy(conf, lustre_cfg_buf(lcfg, 1), sizeof(*conf));
+
+#ifdef __BIG_ENDIAN
+ __swab32s(&conf->sfc_rpc_flavor);
+ __swab32s(&conf->sfc_bulk_csum);
+ __swab32s(&conf->sfc_bulk_priv);
+ __swab32s(&conf->sfc_flags);
+#endif
+
+ RETURN(0);
+}
+
CFS_LIST_HEAD(lustre_profile_list);
struct lustre_profile *class_get_profile(const char * prof)
err = class_del_conn(obd, lcfg);
GOTO(out, err = 0);
}
+ case LCFG_SEC_FLAVOR: {
+ err = class_sec_flavor(obd, lcfg);
+ GOTO(out, err = 0);
+ }
default: {
err = obd_process_config(obd, sizeof(*lcfg), lcfg);
GOTO(out, err);
return(0);
}
+static
+int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd)
+{
+ char *s1, *s2;
+
+ if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) {
+ /* just let on-disk params do its work. but we have an
+ * assumption that any changes of on-disk data by tune2fs
+ * should lead to server rewrite log.
+ */
+ return 0;
+ }
+
+ /* filter out existing sec options */
+ s1 = mti->mti_params;
+ while (*s1) {
+ int clear;
+
+ while (*s1 == ' ')
+ s1++;
+
+ if (strncmp(s1, PARAM_SEC_RPC_MDT,
+ sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 ||
+ strncmp(s1, PARAM_SEC_RPC_CLI,
+ sizeof(PARAM_SEC_RPC_CLI) - 1) == 0)
+ clear = 1;
+ else
+ clear = 0;
+
+ s2 = strchr(s1, ' ');
+ if (s2 == NULL) {
+ if (clear)
+ *s1 = '\0';
+ break;
+ }
+ s2++;
+ if (clear)
+ memmove(s1, s2, strlen(s2) + 1);
+ else
+ s1 = s2;
+ }
+
+ /* append sec options from lmd */
+ /* FIXME add flag LDD_F_UPDATE after mountconf start supporting
+ * log updating.
+ */
+ if (lmd->lmd_sec_mdt) {
+ if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) +
+ sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) {
+ CERROR("security params too big for mti\n");
+ return -ENOMEM;
+ }
+ strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT);
+ strcat(mti->mti_params, lmd->lmd_sec_mdt);
+ //mti->mti_flags |= LDD_F_UPDATE;
+ }
+ if (lmd->lmd_sec_cli) {
+ if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) +
+ sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) {
+ CERROR("security params too big for mti\n");
+ return -ENOMEM;
+ }
+ strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI);
+ strcat(mti->mti_params, lmd->lmd_sec_cli);
+ //mti->mti_flags |= LDD_F_UPDATE;
+ }
+
+ return 0;
+}
+
static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti)
{
- struct lustre_sb_info *lsi = s2lsi(sb);
- struct lustre_disk_data *ldd = lsi->lsi_ldd;
- lnet_process_id_t id;
+ struct lustre_sb_info *lsi = s2lsi(sb);
+ struct lustre_disk_data *ldd = lsi->lsi_ldd;
+ struct lustre_mount_data *lmd = lsi->lsi_lmd;
+ lnet_process_id_t id;
int i = 0;
ENTRY;
/* FIXME we can't send a msg much bigger than 4k - use bulk? */
}
memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params));
- RETURN(0);
+
+ RETURN(mti_set_sec_opts(mti, lmd));
}
/* Register an old or new target with the MGS. If needed MGS will construct
if (lsi->lsi_lmd->lmd_profile != NULL)
OBD_FREE(lsi->lsi_lmd->lmd_profile,
strlen(lsi->lsi_lmd->lmd_profile) + 1);
+ if (lsi->lsi_lmd->lmd_sec_mdt != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt,
+ strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1);
+ if (lsi->lsi_lmd->lmd_sec_cli != NULL)
+ OBD_FREE(lsi->lsi_lmd->lmd_sec_cli,
+ strlen(lsi->lsi_lmd->lmd_sec_cli) + 1);
if (lsi->lsi_lmd->lmd_opts != NULL)
OBD_FREE(lsi->lsi_lmd->lmd_opts,
strlen(lsi->lsi_lmd->lmd_opts) + 1);
PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile);
PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev);
PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags);
+ if (lmd->lmd_sec_mdt)
+ PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt);
+ if (lmd->lmd_sec_cli)
+ PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli);
if (lmd->lmd_opts)
PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts);
for (i = 0; i < lmd->lmd_exclude_count; i++) {
RETURN(rc);
}
+static
+int lmd_set_sec_opts(char **set, char *opts, int length)
+{
+ if (*set)
+ OBD_FREE(*set, strlen(*set) + 1);
+
+ OBD_ALLOC(*set, length + 1);
+ if (*set == NULL)
+ return -ENOMEM;
+
+ memcpy(*set, opts, length);
+ (*set)[length] = '\0';
+
+ return 0;
+}
+
+static
+int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr)
+{
+ char *tail;
+ char **set = NULL;
+ int length;
+
+ /* check peer name */
+ if (strncmp(ptr, "sec_mdt=", 8) == 0) {
+ set = &lmd->lmd_sec_mdt;
+ ptr += 8;
+ } else if (strncmp(ptr, "sec_cli=", 8) == 0) {
+ set = &lmd->lmd_sec_cli;
+ ptr += 8;
+ } else if (strncmp(ptr, "sec=", 4) == 0) {
+ /* leave 'set' be null */
+ ptr += 4;
+ } else {
+ CERROR("invalid security options: %s\n", ptr);
+ return -EINVAL;
+ }
+
+ tail = strchr(ptr, ',');
+ if (tail == NULL)
+ length = strlen(ptr);
+ else
+ length = tail - ptr;
+
+ if (set) {
+ if (lmd_set_sec_opts(set, ptr, length))
+ return -EINVAL;
+ } else {
+ if (lmd->lmd_sec_mdt == NULL &&
+ lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length))
+ return -EINVAL;
+
+ if (lmd->lmd_sec_cli == NULL &&
+ lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */
static int lmd_parse(char *options, struct lustre_mount_data *lmd)
{
if (rc)
goto invalid;
clear++;
+ } else if (strncmp(s1, "sec", 3) == 0) {
+ rc = lmd_parse_sec_opts(lmd, s1);
+ if (rc)
+ goto invalid;
+ clear++;
}
/* Linux 2.4 doesn't pass the device, so we stuck it at the
{ "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
{ "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
{ "checksums", osc_rd_checksum, osc_wr_checksum, 0 },
+ { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 },
{ 0 }
};
# include <liblustre.h>
#endif
-# include <lustre_dlm.h>
+#include <lustre_dlm.h>
#include <libcfs/kp30.h>
#include <lustre_net.h>
#include <lustre/lustre_user.h>
OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM);
req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL,
- pool);
+ pool, NULL);
if (req == NULL)
RETURN (-ENOMEM);
requested_nob, page_count, pga);
}
+ sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk);
+
RETURN(check_write_rcs(req, requested_nob, niocount,
page_count, pga));
}
cksum_missed, libcfs_nid2str(peer->nid));
}
+ sptlrpc_cli_unwrap_bulk_read(req, rc, page_count, pga);
+
RETURN(0);
}
size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr);
req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SAN_WRITE, 4, size, NULL, imp->imp_rq_pool);
+ OST_SAN_WRITE, 4, size, NULL,
+ imp->imp_rq_pool, NULL);
if (!req)
RETURN(-ENOMEM);
RETURN(0);
}
+ if (KEY_IS(KEY_FLUSH_CTX)) {
+ sptlrpc_import_flush_my_ctx(imp);
+ RETURN(0);
+ }
+
if (!set)
RETURN(-EINVAL);
int comms_error = 0, niocount, npages, nob = 0, rc, i, do_checksum;
ENTRY;
+ req->rq_bulk_read = 1;
+
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK))
GOTO(out, rc = -EIO);
if (rc == 0) {
if (desc->bd_export->exp_failed)
rc = -ENOTCONN;
- else
+ else {
+ sptlrpc_svc_wrap_bulk(req, desc);
+
rc = ptlrpc_start_bulk_transfer(desc);
+ }
+
if (rc == 0) {
lwi = LWI_TIMEOUT_INTERVAL(obd_timeout * HZ / 4, HZ,
ost_bulk_timeout, desc);
int rc, swab, i, j, do_checksum;
ENTRY;
+ req->rq_bulk_write = 1;
+
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
GOTO(out, rc = -EIO);
}
}
+ sptlrpc_svc_unwrap_bulk(req, desc);
+
/* Must commit after prep above in all cases */
rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
objcount, ioo, npages, local_nb, oti, rc);
case OST_CONNECT:
case OST_DISCONNECT:
case OBD_PING:
+ case SEC_CTX_INIT:
+ case SEC_CTX_INIT_CONT:
+ case SEC_CTX_FINI:
rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION);
if (rc)
CERROR("bad opc %u version %08x, expecting %08x\n",
ENTRY;
LASSERT(current->journal_info == NULL);
+
+ /* primordial rpcs don't affect server recovery */
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case SEC_CTX_INIT:
+ case SEC_CTX_INIT_CONT:
+ case SEC_CTX_FINI:
+ GOTO(out, rc = 0);
+ }
+
/* XXX identical to MDS */
if (lustre_msg_get_opc(req->rq_reqmsg) != OST_CONNECT) {
int abort_recovery, recovering;
if (rc)
RETURN(rc);
- rc = ost_msg_check_version(req->rq_reqmsg);
- if (rc)
- RETURN(rc);
-
switch (lustre_msg_get_opc(req->rq_reqmsg)) {
case OST_CONNECT: {
CDEBUG(D_INODE, "connect\n");
ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o
ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o
ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
+ptlrpc_objs += sec.o sec_null.o sec_plain.o
ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs)
COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \
events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \
llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \
- ptlrpc_internal.h layout.c $(LDLM_COMM_SOURCES)
+ ptlrpc_internal.h layout.c sec.c sec_null.c sec_plain.c \
+ $(LDLM_COMM_SOURCES)
if LIBLUSTRE
recov_thread.c \
service.c \
wiretest.c \
+ sec.c \
+ sec_null.c \
+ sec_plain.c \
$(LDLM_COMM_SOURCES)
ptlrpc_CFLAGS := $(EXTRA_KCFLAGS)
LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
LASSERT(!desc->bd_network_rw); /* network hands off or */
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
+
+ ptlrpc_bulk_free_enc_pages(desc);
+
if (desc->bd_export)
class_export_put(desc->bd_export);
else
list_for_each_safe(l, tmp, &pool->prp_req_list) {
req = list_entry(l, struct ptlrpc_request, rq_list);
list_del(&req->rq_list);
- LASSERT (req->rq_reqmsg);
- OBD_FREE(req->rq_reqmsg, pool->prp_rq_size);
+ LASSERT(req->rq_reqbuf);
+ LASSERT(req->rq_reqbuf_len == pool->prp_rq_size);
+ OBD_FREE(req->rq_reqbuf, pool->prp_rq_size);
OBD_FREE(req, sizeof(*req));
}
OBD_FREE(pool, sizeof(*pool));
int i;
int size = 1;
- while (size < pool->prp_rq_size)
+ while (size < pool->prp_rq_size + SPTLRPC_MAX_PAYLOAD)
size <<= 1;
LASSERTF(list_empty(&pool->prp_req_list) || size == pool->prp_rq_size,
OBD_FREE(req, sizeof(struct ptlrpc_request));
return;
}
- req->rq_reqmsg = msg;
+ req->rq_reqbuf = msg;
+ req->rq_reqbuf_len = size;
req->rq_pool = pool;
spin_lock(&pool->prp_lock);
list_add_tail(&req->rq_list, &pool->prp_req_list);
static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
{
struct ptlrpc_request *request;
- struct lustre_msg *reqmsg;
+ struct lustre_msg *reqbuf;
if (!pool)
return NULL;
list_del(&request->rq_list);
spin_unlock(&pool->prp_lock);
- LASSERT(request->rq_reqmsg);
+ LASSERT(request->rq_reqbuf);
LASSERT(request->rq_pool);
- reqmsg = request->rq_reqmsg;
+ reqbuf = request->rq_reqbuf;
memset(request, 0, sizeof(*request));
- request->rq_reqmsg = reqmsg;
+ request->rq_reqbuf = reqbuf;
+ request->rq_reqbuf_len = pool->prp_rq_size;
request->rq_pool = pool;
- request->rq_reqlen = pool->prp_rq_size;
return request;
}
+static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
+{
+ struct ptlrpc_request_pool *pool = request->rq_pool;
+
+ spin_lock(&pool->prp_lock);
+ list_add_tail(&request->rq_list, &pool->prp_req_list);
+ spin_unlock(&pool->prp_lock);
+}
+
struct ptlrpc_request *
ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
int count, int *lengths, char **bufs,
- struct ptlrpc_request_pool *pool)
+ struct ptlrpc_request_pool *pool,
+ struct ptlrpc_cli_ctx *ctx)
{
struct ptlrpc_request *request = NULL;
int rc;
RETURN(NULL);
}
+ request->rq_import = class_import_get(imp);
+
+ if (unlikely(ctx))
+ request->rq_cli_ctx = sptlrpc_ctx_get(ctx);
+ else {
+ rc = sptlrpc_req_get_ctx(request);
+ if (rc)
+ GOTO(out_free, rc);
+ }
+
+ sptlrpc_req_set_flavor(request, opcode);
+
rc = lustre_pack_request(request, imp->imp_msg_magic, count, lengths,
bufs);
if (rc) {
LASSERT(!request->rq_pool);
- OBD_FREE(request, sizeof(*request));
- RETURN(NULL);
+ GOTO(out_ctx, rc);
}
lustre_msg_add_version(request->rq_reqmsg, version);
request->rq_timeout = obd_timeout;
request->rq_send_state = LUSTRE_IMP_FULL;
request->rq_type = PTL_RPC_MSG_REQUEST;
- request->rq_import = class_import_get(imp);
request->rq_export = NULL;
request->rq_req_cbid.cbid_fn = request_out_callback;
spin_lock_init(&request->rq_lock);
CFS_INIT_LIST_HEAD(&request->rq_list);
CFS_INIT_LIST_HEAD(&request->rq_replay_list);
+ CFS_INIT_LIST_HEAD(&request->rq_ctx_chain);
CFS_INIT_LIST_HEAD(&request->rq_set_chain);
cfs_waitq_init(&request->rq_reply_waitq);
request->rq_xid = ptlrpc_next_xid();
lustre_msg_set_flags(request->rq_reqmsg, 0);
RETURN(request);
+out_ctx:
+ sptlrpc_req_put_ctx(request);
+out_free:
+ class_import_put(imp);
+ if (request->rq_pool)
+ __ptlrpc_free_req_to_pool(request);
+ else
+ OBD_FREE(request, sizeof(*request));
+ return NULL;
}
struct ptlrpc_request *
int *lengths, char **bufs)
{
return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs,
- NULL);
+ NULL, NULL);
}
struct ptlrpc_request_set *ptlrpc_prep_set(void)
LASSERT (status != NULL);
*status = 0;
- if (imp->imp_state == LUSTRE_IMP_NEW) {
+ if (req->rq_ctx_init || req->rq_ctx_fini) {
+ /* always allow ctx init/fini rpc go through */
+ } else if (imp->imp_state == LUSTRE_IMP_NEW) {
DEBUG_REQ(D_ERROR, req, "Uninitialized import.");
*status = -EIO;
LBUG();
ENTRY;
LASSERT(!req->rq_receiving_reply);
+ LASSERT(req->rq_nob_received <= req->rq_repbuf_len);
/* NB Until this point, the whole of the incoming message,
* including buflens, status etc is in the sender's byte order. */
/* Clear reply swab mask; this is a new reply in sender's byte order */
req->rq_rep_swab_mask = 0;
#endif
- LASSERT (req->rq_nob_received <= req->rq_replen);
- rc = lustre_unpack_msg(req->rq_repmsg, req->rq_nob_received);
+ rc = sptlrpc_cli_unwrap_reply(req);
+ if (rc) {
+ DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc);
+ RETURN(rc);
+ }
+
+ /* security layer unwrap might ask resend this request */
+ if (req->rq_resend)
+ RETURN(0);
+
+ rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
if (rc) {
DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d\n", rc);
RETURN(-EPROTO);
spin_unlock(&imp->imp_lock);
lustre_msg_set_status(req->rq_reqmsg, cfs_curproc_pid());
+
+ rc = sptlrpc_req_refresh_ctx(req, -1);
+ if (rc) {
+ if (req->rq_err) {
+ req->rq_status = rc;
+ RETURN(1);
+ } else {
+ /* here begins timeout counting */
+ req->rq_sent = CURRENT_SECONDS;
+ req->rq_wait_ctx = 1;
+ RETURN(0);
+ }
+ }
+
CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc"
" %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
imp->imp_obd->obd_uuid.uuid,
* path sets rq_intr irrespective of whether ptlrpcd has
* seen a timeout. our policy is to only interpret
* interrupted rpcs after they have timed out */
- if (req->rq_intr && (req->rq_timedout || req->rq_waiting)) {
+ if (req->rq_intr && (req->rq_timedout || req->rq_waiting ||
+ req->rq_wait_ctx)) {
/* NB could be on delayed list */
ptlrpc_unregister_reply(req);
req->rq_status = -EINTR;
}
if (req->rq_phase == RQ_PHASE_RPC) {
- if (req->rq_timedout||req->rq_waiting||req->rq_resend) {
+ if (req->rq_timedout || req->rq_resend ||
+ req->rq_waiting || req->rq_wait_ctx) {
int status;
+ /* rq_wait_ctx is only touched in ptlrpcd,
+ * no lock needed here.
+ */
+ if (req->rq_wait_ctx)
+ goto check_ctx;
+
ptlrpc_unregister_reply(req);
spin_lock(&imp->imp_lock);
spin_unlock(&imp->imp_lock);
GOTO(interpret, req->rq_status);
}
- if (req->rq_no_resend) {
+ if (req->rq_no_resend && !req->rq_wait_ctx) {
req->rq_status = -ENOTCONN;
req->rq_phase = RQ_PHASE_INTERPRET;
spin_unlock(&imp->imp_lock);
old_xid, req->rq_xid);
}
}
+check_ctx:
+ status = sptlrpc_req_refresh_ctx(req, -1);
+ if (status) {
+ if (req->rq_err) {
+ req->rq_status = status;
+ force_timer_recalc = 1;
+ }
+ if (!req->rq_wait_ctx) {
+ /* begins timeout counting */
+ req->rq_sent = CURRENT_SECONDS;
+ req->rq_wait_ctx = 1;
+ }
+ continue;
+ } else {
+ req->rq_sent = 0;
+ req->rq_wait_ctx = 0;
+ }
rc = ptl_send_rpc(req, 0);
if (rc) {
spin_lock(&req->rq_lock);
req->rq_timedout = 1;
+ req->rq_wait_ctx = 0;
spin_unlock(&req->rq_lock);
ptlrpc_unregister_reply (req);
/* If this request is for recovery or other primordial tasks,
* then error it out here. */
- if (req->rq_send_state != LUSTRE_IMP_FULL ||
+ if (req->rq_ctx_init || req->rq_ctx_fini ||
+ req->rq_send_state != LUSTRE_IMP_FULL ||
imp->imp_obd->obd_no_recov) {
spin_lock(&req->rq_lock);
req->rq_status = -ETIMEDOUT;
RETURN(rc);
}
-static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
-{
- struct ptlrpc_request_pool *pool = request->rq_pool;
-
- spin_lock(&pool->prp_lock);
- list_add_tail(&request->rq_list, &pool->prp_req_list);
- spin_unlock(&pool->prp_lock);
-}
-
static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
{
ENTRY;
LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */
LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+ LASSERT(request->rq_cli_ctx);
/* We must take it off the imp_replay_list first. Otherwise, we'll set
* request->rq_reqmsg to NULL while osc_close is dereferencing it. */
LBUG();
}
- if (request->rq_repmsg != NULL) {
- OBD_FREE(request->rq_repmsg, request->rq_replen);
- request->rq_repmsg = NULL;
- }
+ if (request->rq_repbuf != NULL)
+ sptlrpc_cli_free_repbuf(request);
if (request->rq_export != NULL) {
class_export_put(request->rq_export);
request->rq_export = NULL;
if (request->rq_bulk != NULL)
ptlrpc_free_bulk(request->rq_bulk);
- if (request->rq_pool) {
+ if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL)
+ sptlrpc_cli_free_reqbuf(request);
+
+ sptlrpc_req_put_ctx(request);
+
+ if (request->rq_pool)
__ptlrpc_free_req_to_pool(request);
- } else {
- if (request->rq_reqmsg != NULL) {
- OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
- request->rq_reqmsg = NULL;
- }
+ else
OBD_FREE(request, sizeof(*request));
- }
EXIT;
}
list_add_tail(&req->rq_list, &imp->imp_sending_list);
spin_unlock(&imp->imp_lock);
+ rc = sptlrpc_req_refresh_ctx(req, 0);
+ if (rc) {
+ if (req->rq_err) {
+ /* we got fatal ctx refresh error, directly jump out
+ * thus we can pass back the actual error code.
+ */
+ spin_lock(&imp->imp_lock);
+ list_del_init(&req->rq_list);
+ spin_unlock(&imp->imp_lock);
+
+ CERROR("Failed to refresh ctx of req %p: %d\n", req, rc);
+ GOTO(out, rc);
+ }
+ /* simulating we got error during send rpc */
+ goto after_send;
+ }
+
rc = ptl_send_rpc(req, 0);
if (rc) {
DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc);
l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi);
DEBUG_REQ(D_NET, req, "-- done sleeping");
+after_send:
CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc "
"%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
imp->imp_obd->obd_uuid.uuid,
#endif
#include <obd_class.h>
#include <lustre_net.h>
+#include <lustre_sec.h>
#include "ptlrpc_internal.h"
lnet_handle_eq_t ptlrpc_eq_h;
DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req,
"type %d, status %d", ev->type, ev->status);
+ sptlrpc_request_out_callback(req);
+
if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
/* Failed send: make it seem like the reply timed out, just
LASSERT (ev->type == LNET_EVENT_PUT ||
ev->type == LNET_EVENT_UNLINK);
LASSERT (ev->unlinked);
- LASSERT (ev->md.start == req->rq_repmsg);
+ LASSERT (ev->md.start == req->rq_repbuf);
LASSERT (ev->offset == 0);
- LASSERT (ev->mlength <= req->rq_replen);
+ LASSERT (ev->mlength <= req->rq_repbuf_len);
DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req,
"type %d, status %d", ev->type, ev->status);
desc->bd_nob_transferred = ev->mlength;
}
+ ptlrpc_bulk_free_enc_pages(desc);
+
/* NB don't unlock till after wakeup; desc can disappear under us
* otherwise */
ptlrpc_wake_client_req(desc->bd_req);
* flags are reset and scalars are zero. We only set the message
* size to non-zero if this was a successful receive. */
req->rq_xid = ev->match_bits;
- req->rq_reqmsg = ev->md.start + ev->offset;
+ req->rq_reqbuf = ev->md.start + ev->offset;
if (ev->type == LNET_EVENT_PUT && ev->status == 0)
- req->rq_reqlen = ev->mlength;
+ req->rq_reqdata_len = ev->mlength;
do_gettimeofday(&req->rq_arrival_time);
req->rq_peer = ev->initiator;
req->rq_self = ev->target.nid;
spin_unlock(&imp->imp_lock);
CERROR("can't connect to a closed import\n");
RETURN(-EINVAL);
- } else if (imp->imp_state == LUSTRE_IMP_FULL) {
+ } else if (imp->imp_state == LUSTRE_IMP_FULL &&
+ imp->imp_force_reconnect == 0) {
spin_unlock(&imp->imp_lock);
CERROR("already connected\n");
RETURN(0);
spin_unlock(&imp->imp_lock);
RETURN(0);
}
+ imp->imp_force_reconnect = 0;
spin_unlock(&imp->imp_lock);
if (rc)
GOTO(out, rc);
+ rc = sptlrpc_cli_install_rvs_ctx(imp, request->rq_cli_ctx);
+ if (rc)
+ GOTO(out, rc);
+
LASSERT(imp->imp_conn_current);
msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
if (rc == -EPROTO) {
struct obd_connect_data *ocd;
+
+ /* reply message might not be ready */
+ if (request->rq_repmsg != NULL)
+ RETURN(-EPROTO);
+
ocd = lustre_swab_repbuf(request, REPLY_REC_OFF,
sizeof *ocd,
lustre_swab_connect);
int rc;
/* We must already have a reply buffer (only ptlrpc_error() may be
- * called without one). We must also have a request buffer which
- * is either the actual (swabbed) incoming request, or a saved copy
- * if this is a req saved in target_queue_final_reply(). */
- LASSERT (req->rq_reqmsg != NULL);
+ * called without one). The reply generated by security layer (e.g.
+ * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must
+ * have a request buffer which is either the actual (swabbed) incoming
+ * request, or a saved copy if this is a req saved in
+ * target_queue_final_reply().
+ */
+ LASSERT (req->rq_reqbuf != NULL);
LASSERT (rs != NULL);
- LASSERT (req->rq_repmsg != NULL);
LASSERT (may_be_difficult || !rs->rs_difficult);
+ LASSERT (req->rq_repmsg != NULL);
LASSERT (req->rq_repmsg == rs->rs_msg);
LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback);
LASSERT (rs->rs_cb_id.cbid_arg == rs);
- LASSERT (req->rq_repmsg != NULL);
if (req->rq_export && req->rq_export->exp_obd &&
req->rq_export->exp_obd->obd_fail) {
lustre_msg_set_type(req->rq_repmsg, req->rq_type);
lustre_msg_set_status(req->rq_repmsg, req->rq_status);
- lustre_msg_set_opc(req->rq_repmsg, lustre_msg_get_opc(req->rq_reqmsg));
+ lustre_msg_set_opc(req->rq_repmsg,
+ req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
atomic_inc (&svc->srv_outstanding_replies);
ptlrpc_rs_addref(rs); /* +1 ref for the network */
- rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen,
+ rc = sptlrpc_svc_wrap_reply(req);
+ if (rc)
+ goto out;
+
+ rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len,
rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ,
&rs->rs_cb_id, conn,
svc->srv_rep_portal, req->rq_xid);
+out:
if (rc != 0) {
atomic_dec (&svc->srv_outstanding_replies);
ptlrpc_rs_decref(rs);
connection = request->rq_import->imp_connection;
- if (request->rq_bulk != NULL) {
- rc = ptlrpc_register_bulk (request);
- if (rc != 0)
- RETURN(rc);
- }
-
lustre_msg_set_handle(request->rq_reqmsg,
&request->rq_import->imp_remote_handle);
lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
lustre_msg_set_conn_cnt(request->rq_reqmsg,
request->rq_import->imp_conn_cnt);
+ rc = sptlrpc_cli_wrap_request(request);
+ if (rc)
+ RETURN(rc);
+
+ /* bulk register should be done after wrap_request() */
+ if (request->rq_bulk != NULL) {
+ rc = ptlrpc_register_bulk (request);
+ if (rc != 0)
+ RETURN(rc);
+ }
+
if (!noreply) {
LASSERT (request->rq_replen != 0);
- if (request->rq_repmsg == NULL)
- OBD_ALLOC(request->rq_repmsg, request->rq_replen);
- if (request->rq_repmsg == NULL)
- GOTO(cleanup_bulk, rc = -ENOMEM);
+ if (request->rq_repbuf == NULL) {
+ rc = sptlrpc_cli_alloc_repbuf(request,
+ request->rq_replen);
+ if (rc)
+ GOTO(cleanup_bulk, rc);
+ }
rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/
connection->c_peer, request->rq_xid, 0,
if (rc != 0) {
CERROR("LNetMEAttach failed: %d\n", rc);
LASSERT (rc == -ENOMEM);
- GOTO(cleanup_repmsg, rc = -ENOMEM);
+ GOTO(cleanup_bulk, rc = -ENOMEM);
}
}
spin_unlock(&request->rq_lock);
if (!noreply) {
- reply_md.start = request->rq_repmsg;
- reply_md.length = request->rq_replen;
+ reply_md.start = request->rq_repbuf;
+ reply_md.length = request->rq_repbuf_len;
reply_md.threshold = 1;
reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT;
reply_md.user_ptr = &request->rq_reply_cbid;
CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64
", portal %u\n",
- request->rq_replen, request->rq_xid,
+ request->rq_repbuf_len, request->rq_xid,
request->rq_reply_portal);
}
request->rq_sent = CURRENT_SECONDS;
ptlrpc_pinger_sending_on_import(request->rq_import);
rc = ptl_send_buf(&request->rq_req_md_h,
- request->rq_reqmsg, request->rq_reqlen,
- LNET_NOACK_REQ, &request->rq_req_cbid,
+ request->rq_reqbuf, request->rq_reqdata_len,
+ LNET_NOACK_REQ, &request->rq_req_cbid,
connection,
request->rq_request_portal,
request->rq_xid);
/* UNLINKED callback called synchronously */
LASSERT (!request->rq_receiving_reply);
- cleanup_repmsg:
- OBD_FREE(request->rq_repmsg, request->rq_replen);
- request->rq_repmsg = NULL;
-
cleanup_bulk:
if (request->rq_bulk != NULL)
ptlrpc_unregister_bulk(request);
return size;
}
-static inline int lustre_msg_size_v2(int count, int *lengths)
+int lustre_msg_size_v2(int count, int *lengths)
{
int size;
int i;
return size;
}
+EXPORT_SYMBOL(lustre_msg_size_v2);
/* This returns the size of the buffer that is required to hold a lustre_msg
* with the given sub-buffer lengths. */
}
}
-static void
-lustre_init_msg_v1(void *m, int count, int *lens, char **bufs)
+static
+void lustre_init_msg_v1(void *m, int count, int *lens, char **bufs)
{
struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)m;
char *ptr;
}
}
-static void
-lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs)
+void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens,
+ char **bufs)
{
char *ptr;
int i;
LOGL(tmp, lens[i], ptr);
}
}
+EXPORT_SYMBOL(lustre_init_msg_v2);
static int lustre_pack_request_v1(struct ptlrpc_request *req,
int count, int *lens, char **bufs)
{
- int reqlen;
+ int reqlen, rc;
reqlen = lustre_msg_size_v1(count, lens);
- /* See if we got it from prealloc pool */
- if (req->rq_reqmsg) {
- /* Cannot return error here, that would create
- infinite loop in ptlrpc_prep_req_pool */
- /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen
- to maximum size that would fit into this preallocated
- request */
- LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, "
- "reqlen %d\n",req->rq_reqlen,
- reqlen);
- memset(req->rq_reqmsg, 0, reqlen);
- } else {
- OBD_ALLOC(req->rq_reqmsg, reqlen);
- if (req->rq_reqmsg == NULL) {
- CERROR("alloc reqmsg (len %d) failed\n", reqlen);
- return -ENOMEM;
- }
- }
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+ if (rc)
+ return rc;
req->rq_reqlen = reqlen;
static int lustre_pack_request_v2(struct ptlrpc_request *req,
int count, int *lens, char **bufs)
{
- int reqlen;
+ int reqlen, rc;
reqlen = lustre_msg_size_v2(count, lens);
- /* See if we got it from prealloc pool */
- if (req->rq_reqmsg) {
- /* Cannot return error here, that would create
- infinite loop in ptlrpc_prep_req_pool */
- /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen
- to maximum size that would fit into this preallocated
- request */
- LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, "
- "reqlen %d\n",req->rq_reqlen,
- reqlen);
- memset(req->rq_reqmsg, 0, reqlen);
- } else {
- OBD_ALLOC(req->rq_reqmsg, reqlen);
- if (req->rq_reqmsg == NULL) {
- CERROR("alloc reqmsg (len %d) failed\n", reqlen);
- return -ENOMEM;
- }
- }
+ rc = sptlrpc_cli_alloc_reqbuf(req, reqlen);
+ if (rc)
+ return rc;
req->rq_reqlen = reqlen;
LASSERT(count > 0);
LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body));
+ /* if we choose policy other than null, we have also choosed
+ * to use new message format.
+ */
+ if (magic == LUSTRE_MSG_MAGIC_V1 &&
+ req->rq_sec_flavor != SPTLRPC_FLVR_NULL)
+ magic = LUSTRE_MSG_MAGIC_V2;
+
switch (magic) {
case LUSTRE_MSG_MAGIC_V1:
return lustre_pack_request_v1(req, count - 1, lens + 1,
# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0)
#endif
-static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc,
- int size)
+struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc)
{
struct ptlrpc_reply_state *rs = NULL;
list_del(&rs->rs_list);
spin_unlock(&svc->srv_lock);
LASSERT(rs);
- LASSERTF(svc->srv_max_reply_size > size, "Want %d, prealloc %d\n", size,
- svc->srv_max_reply_size);
- memset(rs, 0, size);
+ memset(rs, 0, svc->srv_max_reply_size);
+ rs->rs_service = svc;
rs->rs_prealloc = 1;
out:
return rs;
}
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_service *svc = rs->rs_service;
+
+ LASSERT(svc);
+
+ spin_lock(&svc->srv_lock);
+ list_add(&rs->rs_list, &svc->srv_free_rs_list);
+ spin_unlock(&svc->srv_lock);
+ cfs_waitq_signal(&svc->srv_free_rs_waitq);
+}
+
static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count,
int *lens, char **bufs)
{
struct ptlrpc_reply_state *rs;
- int msg_len;
- int size;
+ int msg_len, rc;
ENTRY;
LASSERT (req->rq_reply_state == NULL);
msg_len = lustre_msg_size_v1(count, lens);
- size = sizeof(struct ptlrpc_reply_state) + msg_len;
- OBD_ALLOC(rs, size);
- if (unlikely(rs == NULL)) {
- rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size);
- if (!rs)
- RETURN (-ENOMEM);
- }
+ rc = sptlrpc_svc_alloc_rs(req, msg_len);
+ if (rc)
+ RETURN(rc);
+
+ rs = req->rq_reply_state;
atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */
rs->rs_cb_id.cbid_fn = reply_out_callback;
rs->rs_cb_id.cbid_arg = rs;
rs->rs_service = req->rq_rqbd->rqbd_service;
- rs->rs_size = size;
CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
- rs->rs_msg = (struct lustre_msg *)(rs + 1);
req->rq_replen = msg_len;
req->rq_reply_state = rs;
RETURN (0);
}
-static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
- int *lens, char **bufs)
+int lustre_pack_reply_v2(struct ptlrpc_request *req, int count,
+ int *lens, char **bufs)
{
struct ptlrpc_reply_state *rs;
- int msg_len;
- int size;
+ int msg_len, rc;
ENTRY;
LASSERT(req->rq_reply_state == NULL);
msg_len = lustre_msg_size_v2(count, lens);
- size = sizeof(struct ptlrpc_reply_state) + msg_len;
- OBD_ALLOC(rs, size);
- if (unlikely(rs == NULL)) {
- rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size);
- if (!rs)
- RETURN (-ENOMEM);
- }
+ rc = sptlrpc_svc_alloc_rs(req, msg_len);
+ if (rc)
+ RETURN(rc);
+
+ rs = req->rq_reply_state;
atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */
rs->rs_cb_id.cbid_fn = reply_out_callback;
rs->rs_cb_id.cbid_arg = rs;
rs->rs_service = req->rq_rqbd->rqbd_service;
- rs->rs_size = size;
CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
- rs->rs_msg = (struct lustre_msg *)(rs + 1);
req->rq_replen = msg_len;
req->rq_reply_state = rs;
RETURN(0);
}
+EXPORT_SYMBOL(lustre_pack_reply_v2);
int lustre_pack_reply(struct ptlrpc_request *req, int count, int *lens,
char **bufs)
}
}
-void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment,
- unsigned int newlen, int move_data)
+int lustre_shrink_msg_v1(struct lustre_msg_v1 *msg, int segment,
+ unsigned int newlen, int move_data)
{
- struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)req->rq_repmsg;
- char *tail = NULL, *newpos;
- int tail_len = 0, n;
+ char *tail = NULL, *newpos;
+ int tail_len = 0, n;
- LASSERT(req->rq_reply_state);
LASSERT(msg);
LASSERT(segment >= 0);
LASSERT(msg->lm_bufcount > segment);
LASSERT(msg->lm_buflens[segment] >= newlen);
if (msg->lm_buflens[segment] == newlen)
- return;
+ goto out;
if (move_data && msg->lm_bufcount > segment + 1) {
tail = lustre_msg_buf_v1(msg, segment + 1, 0);
msg->lm_buflens[msg->lm_bufcount - 1] = 0;
}
- req->rq_replen = lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens);
+out:
+ return lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens);
}
-void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment,
- unsigned int newlen, int move_data)
+int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+ unsigned int newlen, int move_data)
{
- struct lustre_msg_v2 *msg = req->rq_repmsg;
- char *tail = NULL, *newpos;
- int tail_len = 0, n;
+ char *tail = NULL, *newpos;
+ int tail_len = 0, n;
- LASSERT(req->rq_reply_state);
LASSERT(msg);
LASSERT(msg->lm_bufcount > segment);
LASSERT(msg->lm_buflens[segment] >= newlen);
if (msg->lm_buflens[segment] == newlen)
- return;
+ goto out;
if (move_data && msg->lm_bufcount > segment + 1) {
tail = lustre_msg_buf_v2(msg, segment + 1, 0);
msg->lm_buflens[msg->lm_bufcount - 1] = 0;
}
- req->rq_replen = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+out:
+ return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
}
/*
- * shrink @segment to size @newlen. if @move_data is non-zero, we also move
- * data forward from @segment + 1.
+ * for @msg, shrink @segment to size @newlen. if @move_data is non-zero,
+ * we also move data forward from @segment + 1.
*
* if @newlen == 0, we remove the segment completely, but we still keep the
* totally bufcount the same to save possible data moving. this will leave a
* unused segment with size 0 at the tail, but that's ok.
*
+ * return new msg size after shrinking.
+ *
* CAUTION:
* + if any buffers higher than @segment has been filled in, must call shrink
* with non-zero @move_data.
* + caller should NOT keep pointers to msg buffers which higher than @segment
* after call shrink.
*/
-void lustre_shrink_reply(struct ptlrpc_request *req, int segment,
- unsigned int newlen, int move_data)
+int lustre_shrink_msg(struct lustre_msg *msg, int segment,
+ unsigned int newlen, int move_data)
{
- switch (req->rq_repmsg->lm_magic) {
+ switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V1:
- lustre_shrink_reply_v1(req, segment - 1, newlen, move_data);
- return;
+ return lustre_shrink_msg_v1((struct lustre_msg_v1 *) msg,
+ segment - 1, newlen, move_data);
case LUSTRE_MSG_MAGIC_V2:
- lustre_shrink_reply_v2(req, segment, newlen, move_data);
- return;
+ return lustre_shrink_msg_v2(msg, segment, newlen, move_data);
default:
- LASSERTF(0, "incorrect message magic: %08x\n",
- req->rq_repmsg->lm_magic);
+ LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
}
}
LASSERT (list_empty(&rs->rs_exp_list));
LASSERT (list_empty(&rs->rs_obd_list));
- if (unlikely(rs->rs_prealloc)) {
- struct ptlrpc_service *svc = rs->rs_service;
-
- spin_lock(&svc->srv_lock);
- list_add(&rs->rs_list,
- &svc->srv_free_rs_list);
- spin_unlock(&svc->srv_lock);
- cfs_waitq_signal(&svc->srv_free_rs_waitq);
- } else {
- OBD_FREE(rs, rs->rs_size);
- }
+ sptlrpc_svc_free_rs(rs);
}
int lustre_unpack_msg_v1(void *msg, int len)
LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS)));
md->options |= LNET_MD_KIOV;
- md->start = &desc->bd_iov[0];
+ md->start = desc->bd_enc_iov ? desc->bd_enc_iov : &desc->bd_iov[0];
md->length = desc->bd_iov_count;
}
}
}
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+ int i, alloc_size;
+
+ LASSERT(desc->bd_enc_iov == NULL);
+
+ if (desc->bd_iov_count == 0)
+ return 0;
+
+ alloc_size = desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]);
+
+ OBD_ALLOC(desc->bd_enc_iov, alloc_size);
+ if (desc->bd_enc_iov == NULL)
+ return -ENOMEM;
+
+ memcpy(desc->bd_enc_iov, desc->bd_iov, alloc_size);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ desc->bd_enc_iov[i].kiov_page =
+ cfs_alloc_page(CFS_ALLOC_IO | CFS_ALLOC_HIGH);
+ if (desc->bd_enc_iov[i].kiov_page == NULL) {
+ CERROR("Failed to alloc %d encryption pages\n",
+ desc->bd_iov_count);
+ break;
+ }
+ }
+
+ if (i == desc->bd_iov_count)
+ return 0;
+
+ /* error, cleanup */
+ for (i = i - 1; i >= 0; i--)
+ __free_page(desc->bd_enc_iov[i].kiov_page);
+ OBD_FREE(desc->bd_enc_iov, alloc_size);
+ desc->bd_enc_iov = NULL;
+ return -ENOMEM;
+}
+
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+ int i;
+
+ if (desc->bd_enc_iov == NULL)
+ return;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page);
+ __free_page(desc->bd_enc_iov[i].kiov_page);
+ }
+
+ OBD_FREE(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]));
+ desc->bd_enc_iov = NULL;
+}
+
#else /* !__KERNEL__ */
void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc)
memset(iov->iov_base, 0xab, iov->iov_len);
}
}
+
+int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+ return 0;
+}
+void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
+{
+}
#endif /* !__KERNEL__ */
}
#ifdef __KERNEL__
+static
+int check_import_reconnect(struct obd_import *imp)
+{
+ spin_lock(&imp->imp_lock);
+
+ /* next_reconnect == 0 mean never need reconnect.
+ */
+ if (imp->imp_next_reconnect == 0 ||
+ cfs_time_before(cfs_time_current_sec(), imp->imp_next_reconnect)) {
+ spin_unlock(&imp->imp_lock);
+ return 0;
+ }
+
+ if (imp->imp_state != LUSTRE_IMP_FULL ||
+ imp->imp_force_reconnect == 1) {
+ spin_unlock(&imp->imp_lock);
+ return 0;
+ }
+
+ imp->imp_force_reconnect = 1;
+
+ /* prevent concurrent reconnect. if this reconnect failed, import
+ * will be set to non-FULL; if success, next_reconnect value will
+ * will be updated by security module.
+ */
+ imp->imp_next_reconnect = 0;
+
+ spin_unlock(&imp->imp_lock);
+
+ CWARN("issue a force reconnect on imp %p(%s) to %s\n",
+ imp, ptlrpc_import_state_name(imp->imp_state),
+ imp->imp_obd->u.cli.cl_target_uuid.uuid);
+
+ /* usually the root context should be still valid, because import
+ * reconnect have a nice time advance, thus we have little chance
+ * that a newly created & refreshing context be wrongly flushed by us.
+ * but even that we are still fine.
+ */
+ sptlrpc_import_flush_root_ctx(imp);
+
+ ptlrpc_connect_import(imp, NULL);
+ return 1;
+}
+
static int ptlrpc_pinger_main(void *arg)
{
struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
imp_pinger_chain);
int force, level;
+ if (check_import_reconnect(imp)) {
+ /* if a forced reconnect was issued, we don't
+ * need additional ping at this time.
+ */
+ if (imp->imp_pingable)
+ ptlrpc_update_next_ping(imp);
+ continue;
+ }
+
spin_lock(&imp->imp_lock);
level = imp->imp_state;
force = imp->imp_force_verify;
int pageoffset, int len);
void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc);
+/* pack_generic.c */
+struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc);
+void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs);
+
/* pinger.c */
int ptlrpc_start_pinger(void);
int ptlrpc_stop_pinger(void);
#define ping_evictor_wake(exp) 1
#endif
+/* sec_null.c */
+int sptlrpc_null_init(void);
+int sptlrpc_null_exit(void);
+
+/* sec_plain.c */
+int sptlrpc_plain_init(void);
+int sptlrpc_plain_exit(void);
+
+/* sec.c */
+int sptlrpc_init(void);
+int sptlrpc_exit(void);
+
#endif /* PTLRPC_INTERNAL_H */
rc = ldlm_init();
if (rc)
GOTO(cleanup, rc);
+ cleanup_phase = 5;
+
+ rc = sptlrpc_init();
+ if (rc)
+ GOTO(cleanup, rc);
+
RETURN(0);
cleanup:
switch(cleanup_phase) {
+ case 5:
+ ldlm_exit();
case 4:
ptlrpc_stop_pinger();
case 3:
#ifdef __KERNEL__
static void __exit ptlrpc_exit(void)
{
+ sptlrpc_exit();
ldlm_exit();
ptlrpc_stop_pinger();
ptlrpc_exit_portals();
EXPORT_SYMBOL(lustre_msg_check_version);
EXPORT_SYMBOL(lustre_pack_request);
EXPORT_SYMBOL(lustre_pack_reply);
-EXPORT_SYMBOL(lustre_shrink_reply);
+EXPORT_SYMBOL(lustre_shrink_msg);
EXPORT_SYMBOL(lustre_free_reply_state);
EXPORT_SYMBOL(lustre_msg_size);
EXPORT_SYMBOL(lustre_unpack_msg);
EXPORT_SYMBOL(ptlrpc_fail_import);
EXPORT_SYMBOL(ptlrpc_recover_import);
+/* pers.c */
+EXPORT_SYMBOL(ptlrpc_bulk_alloc_enc_pages);
+EXPORT_SYMBOL(ptlrpc_bulk_free_enc_pages);
+
/* pinger.c */
EXPORT_SYMBOL(ptlrpc_pinger_add_import);
EXPORT_SYMBOL(ptlrpc_pinger_del_import);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <libcfs/libcfs.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <libcfs/list.h>
+#else
+#include <linux/crypto.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
+static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx);
+static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx);
+
+/***********************************************
+ * policy registers *
+ ***********************************************/
+
+static spinlock_t policy_lock = SPIN_LOCK_UNLOCKED;
+static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
+ NULL,
+};
+
+int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u32 number = policy->sp_policy;
+
+ LASSERT(policy->sp_name);
+ LASSERT(policy->sp_cops);
+ LASSERT(policy->sp_sops);
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return -EINVAL;
+
+ spin_lock(&policy_lock);
+ if (policies[number]) {
+ spin_unlock(&policy_lock);
+ return -EALREADY;
+ }
+ policies[number] = policy;
+ spin_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_register_policy);
+
+int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
+{
+ __u32 number = policy->sp_policy;
+
+ LASSERT(number < SPTLRPC_POLICY_MAX);
+
+ spin_lock(&policy_lock);
+ if (!policies[number]) {
+ spin_unlock(&policy_lock);
+ CERROR("%s: already unregistered\n", policy->sp_name);
+ return -EINVAL;
+ }
+
+ LASSERT(policies[number] == policy);
+ policies[number] = NULL;
+ spin_unlock(&policy_lock);
+
+ CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unregister_policy);
+
+static
+struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_flavor_t flavor)
+{
+ static int load_module = 0;
+ struct ptlrpc_sec_policy *policy;
+ __u32 number = SEC_FLAVOR_POLICY(flavor);
+
+ if (number >= SPTLRPC_POLICY_MAX)
+ return NULL;
+
+again:
+ spin_lock(&policy_lock);
+ policy = policies[number];
+ if (policy && !try_module_get(policy->sp_owner))
+ policy = NULL;
+ spin_unlock(&policy_lock);
+
+ /* if failure, try to load gss module, once */
+ if (policy == NULL && load_module == 0 &&
+ number == SPTLRPC_POLICY_GSS) {
+ load_module = 1;
+ if (request_module("ptlrpc_gss") == 0)
+ goto again;
+ }
+
+ return policy;
+}
+
+ptlrpc_flavor_t sptlrpc_name2flavor(const char *name)
+{
+ if (!strcmp(name, "null"))
+ return SPTLRPC_FLVR_NULL;
+ if (!strcmp(name, "plain"))
+ return SPTLRPC_FLVR_PLAIN;
+ if (!strcmp(name, "krb5"))
+ return SPTLRPC_FLVR_KRB5;
+ if (!strcmp(name, "krb5i"))
+ return SPTLRPC_FLVR_KRB5I;
+ if (!strcmp(name, "krb5p"))
+ return SPTLRPC_FLVR_KRB5P;
+
+ return SPTLRPC_FLVR_INVALID;
+}
+EXPORT_SYMBOL(sptlrpc_name2flavor);
+
+char *sptlrpc_flavor2name(ptlrpc_flavor_t flavor)
+{
+ switch (flavor) {
+ case SPTLRPC_FLVR_NULL:
+ return "null";
+ case SPTLRPC_FLVR_PLAIN:
+ return "plain";
+ case SPTLRPC_FLVR_KRB5:
+ return "krb5";
+ case SPTLRPC_FLVR_KRB5I:
+ return "krb5i";
+ case SPTLRPC_FLVR_KRB5P:
+ return "krb5p";
+ default:
+ CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor,
+ SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor),
+ SEC_FLAVOR_SVC(flavor));
+ }
+ return "UNKNOWN";
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name);
+
+/***********************************************
+ * context helpers *
+ * internal APIs *
+ * cache management *
+ ***********************************************/
+
+static inline
+unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx)
+{
+ smp_mb();
+ return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
+}
+
+static inline
+int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+ return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
+}
+
+static inline
+int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
+{
+ return (ctx_status(ctx) != 0);
+}
+
+static inline
+int ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
+{
+ smp_mb();
+ return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
+}
+
+static inline
+int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
+{
+ smp_mb();
+ return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
+}
+
+static
+int ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount));
+
+ if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
+ cfs_time_t now = cfs_time_current_sec();
+
+ smp_mb();
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire))
+ CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n",
+ ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec),
+ cfs_time_sub(now, ctx->cc_expire));
+ else
+ CWARN("ctx %p(%u->%s): force to die (%lds remains)\n",
+ ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec),
+ ctx->cc_expire == 0 ? 0 :
+ cfs_time_sub(ctx->cc_expire, now));
+
+ return 1;
+ }
+ return 0;
+}
+
+static
+void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
+{
+ set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
+ atomic_inc(&ctx->cc_refcount);
+ hlist_add_head(&ctx->cc_hash, hash);
+}
+
+static
+void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+ LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
+ LASSERT(!hlist_unhashed(&ctx->cc_hash));
+
+ clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
+
+ if (atomic_dec_and_test(&ctx->cc_refcount)) {
+ __hlist_del(&ctx->cc_hash);
+ hlist_add_head(&ctx->cc_hash, freelist);
+ } else
+ hlist_del_init(&ctx->cc_hash);
+}
+
+/*
+ * return 1 if the context is dead.
+ */
+static
+int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
+{
+ if (unlikely(ctx_is_dead(ctx)))
+ goto unhash;
+
+ /* expire is 0 means never expire. a newly created gss context
+ * which during upcall also has 0 expiration
+ */
+ smp_mb();
+ if (ctx->cc_expire == 0)
+ return 0;
+
+ /* check real expiration */
+ smp_mb();
+ if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
+ return 0;
+
+ ctx_expire(ctx);
+
+unhash:
+ if (freelist)
+ ctx_unhash(ctx, freelist);
+
+ return 1;
+}
+
+static inline
+int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx,
+ struct hlist_head *freelist)
+{
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
+ LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
+
+ return ctx_check_death(ctx, freelist);
+}
+
+static
+int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx))
+ return 1;
+ return 0;
+}
+
+static inline
+int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
+{
+ /* a little bit optimization for null policy */
+ if (!ctx->cc_ops->match)
+ return 1;
+
+ return ctx->cc_ops->match(ctx, vcred);
+}
+
+static
+void ctx_list_destroy(struct hlist_head *head)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ while (!hlist_empty(head)) {
+ ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash);
+
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
+
+ hlist_del_init(&ctx->cc_hash);
+ sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx);
+ }
+}
+
+static
+void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *pos, *next;
+ int i;
+ ENTRY;
+
+ CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
+
+ for (i = 0; i < sec->ps_ccache_size; i++) {
+ hlist_for_each_entry_safe(ctx, pos, next,
+ &sec->ps_ccache[i], cc_hash)
+ ctx_check_death_locked(ctx, freelist);
+ }
+
+ sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
+ EXIT;
+}
+
+/*
+ * @uid: which user. "-1" means flush all.
+ * @grace: mark context DEAD, allow graceful destroy like notify
+ * server side, etc.
+ * @force: also flush busy entries.
+ *
+ * return the number of busy context encountered.
+ *
+ * In any cases, never touch "eternal" contexts.
+ */
+static
+int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *pos, *next;
+ HLIST_HEAD(freelist);
+ int i, busy = 0;
+ ENTRY;
+
+ might_sleep_if(grace);
+
+ spin_lock(&sec->ps_lock);
+ for (i = 0; i < sec->ps_ccache_size; i++) {
+ hlist_for_each_entry_safe(ctx, pos, next,
+ &sec->ps_ccache[i], cc_hash) {
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (ctx_is_eternal(ctx))
+ continue;
+ if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
+ continue;
+
+ if (atomic_read(&ctx->cc_refcount) > 1) {
+ busy++;
+ if (!force)
+ continue;
+
+ CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
+ "grace %d\n",
+ atomic_read(&ctx->cc_refcount),
+ ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec), grace);
+ }
+ ctx_unhash(ctx, &freelist);
+
+ set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
+ if (!grace)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT,
+ &ctx->cc_flags);
+ }
+ }
+ spin_unlock(&sec->ps_lock);
+
+ ctx_list_destroy(&freelist);
+ RETURN(busy);
+}
+
+static inline
+unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key)
+{
+ return (unsigned int) (key & (sec->ps_ccache_size - 1));
+}
+
+/*
+ * return matched context. If it's a newly created one, we also give the
+ * first push to refresh. return NULL if error happens.
+ */
+static
+struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred,
+ int create, int remove_dead)
+{
+ struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL;
+ struct hlist_head *hash_head;
+ struct hlist_node *pos, *next;
+ HLIST_HEAD(freelist);
+ unsigned int hash, gc = 0, found = 0;
+ ENTRY;
+
+ might_sleep();
+
+ hash = ctx_hash_index(sec, (__u64) vcred->vc_uid);
+ LASSERT(hash < sec->ps_ccache_size);
+ hash_head = &sec->ps_ccache[hash];
+
+retry:
+ spin_lock(&sec->ps_lock);
+
+ /* gc_next == 0 means never do gc */
+ if (remove_dead && sec->ps_gc_next &&
+ cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
+ ctx_cache_gc(sec, &freelist);
+ gc = 1;
+ }
+
+ hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) {
+ if (gc == 0 &&
+ ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL))
+ continue;
+
+ if (ctx_match(ctx, vcred)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ if (new && new != ctx) {
+ /* lost the race, just free it */
+ hlist_add_head(&new->cc_hash, &freelist);
+ new = NULL;
+ }
+
+ /* hot node, move to head */
+ if (hash_head->first != &ctx->cc_hash) {
+ __hlist_del(&ctx->cc_hash);
+ hlist_add_head(&ctx->cc_hash, hash_head);
+ }
+ } else {
+ /* don't allocate for reverse sec */
+ if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
+ spin_unlock(&sec->ps_lock);
+ RETURN(NULL);
+ }
+
+ if (new) {
+ ctx_enhash(new, hash_head);
+ ctx = new;
+ } else if (create) {
+ spin_unlock(&sec->ps_lock);
+ new = sec->ps_policy->sp_cops->create_ctx(sec, vcred);
+ if (new) {
+ atomic_inc(&sec->ps_busy);
+ goto retry;
+ }
+ } else
+ ctx = NULL;
+ }
+
+ /* hold a ref */
+ if (ctx)
+ atomic_inc(&ctx->cc_refcount);
+
+ spin_unlock(&sec->ps_lock);
+
+ /* the allocator of the context must give the first push to refresh */
+ if (new) {
+ LASSERT(new == ctx);
+ sptlrpc_ctx_refresh(new);
+ }
+
+ ctx_list_destroy(&freelist);
+ RETURN(ctx);
+}
+
+static inline
+struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
+{
+ struct vfs_cred vcred = { cfs_current()->uid, cfs_current()->gid };
+ int create = 1, remove_dead = 1;
+
+ if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
+ vcred.vc_uid = 0;
+ create = 0;
+ remove_dead = 0;
+ } else if (sec->ps_flags & PTLRPC_SEC_FL_ROOTONLY)
+ vcred.vc_uid = 0;
+
+ if (sec->ps_policy->sp_cops->lookup_ctx)
+ return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred);
+ else
+ return ctx_cache_lookup(sec, &vcred, create, remove_dead);
+}
+
+/**************************************************
+ * client context APIs *
+ **************************************************/
+
+static
+void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh)
+ ctx->cc_ops->refresh(ctx);
+}
+
+struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+ atomic_inc(&ctx->cc_refcount);
+ return ctx;
+}
+EXPORT_SYMBOL(sptlrpc_ctx_get);
+
+void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
+{
+ struct ptlrpc_sec *sec = ctx->cc_sec;
+
+ LASSERT(sec);
+ LASSERT(atomic_read(&ctx->cc_refcount));
+
+ if (!atomic_dec_and_test(&ctx->cc_refcount))
+ return;
+
+ LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
+ LASSERT(hlist_unhashed(&ctx->cc_hash));
+
+ /* if required async, we must clear the UPTODATE bit to prevent extra
+ * rpcs during destroy procedure.
+ */
+ if (!sync)
+ clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
+
+ /* destroy this context */
+ if (!sptlrpc_sec_destroy_ctx(sec, ctx))
+ return;
+
+ CWARN("%s@%p: put last ctx, also destroy the sec\n",
+ sec->ps_policy->sp_name, sec);
+
+ sptlrpc_sec_destroy(sec);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_put);
+
+/*
+ * mark a ctx as DEAD, and pull it out from hash table.
+ *
+ * NOTE: the caller must hold at least 1 ref on the ctx.
+ */
+void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(ctx->cc_sec);
+ LASSERT(atomic_read(&ctx->cc_refcount) > 0);
+
+ ctx_expire(ctx);
+
+ spin_lock(&ctx->cc_sec->ps_lock);
+
+ if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) {
+ LASSERT(!hlist_unhashed(&ctx->cc_hash));
+ LASSERT(atomic_read(&ctx->cc_refcount) > 1);
+
+ hlist_del_init(&ctx->cc_hash);
+ if (atomic_dec_and_test(&ctx->cc_refcount))
+ LBUG();
+ }
+
+ spin_unlock(&ctx->cc_sec->ps_lock);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_expire);
+
+void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *pos, *next;
+ HLIST_HEAD(freelist);
+ unsigned int hash;
+ ENTRY;
+
+ hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid);
+ LASSERT(hash < sec->ps_ccache_size);
+
+ spin_lock(&sec->ps_lock);
+
+ hlist_for_each_entry_safe(ctx, pos, next,
+ &sec->ps_ccache[hash], cc_hash) {
+ if (!ctx_match(ctx, &new->cc_vcred))
+ continue;
+
+ ctx_expire(ctx);
+ ctx_unhash(ctx, &freelist);
+ break;
+ }
+
+ ctx_enhash(new, &sec->ps_ccache[hash]);
+ atomic_inc(&sec->ps_busy);
+
+ spin_unlock(&sec->ps_lock);
+
+ ctx_list_destroy(&freelist);
+ EXIT;
+}
+EXPORT_SYMBOL(sptlrpc_ctx_replace);
+
+int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
+{
+ struct obd_import *imp = req->rq_import;
+ ENTRY;
+
+ LASSERT(!req->rq_cli_ctx);
+ LASSERT(imp);
+
+ req->rq_cli_ctx = get_my_ctx(imp->imp_sec);
+
+ if (!req->rq_cli_ctx) {
+ CERROR("req %p: fail to get context from cache\n", req);
+ RETURN(-ENOMEM);
+ }
+
+ RETURN(0);
+}
+
+void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_request *req, *next;
+
+ spin_lock(&ctx->cc_lock);
+ list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
+ list_del_init(&req->rq_ctx_chain);
+ ptlrpc_wake_client_req(req);
+ }
+ spin_unlock(&ctx->cc_lock);
+}
+EXPORT_SYMBOL(sptlrpc_ctx_wakeup);
+
+void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
+{
+ ENTRY;
+
+ LASSERT(req);
+ LASSERT(req->rq_cli_ctx);
+
+ /* request might be asked to release earlier while still
+ * in the context waiting list.
+ */
+ if (!list_empty(&req->rq_ctx_chain)) {
+ spin_lock(&req->rq_cli_ctx->cc_lock);
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&req->rq_cli_ctx->cc_lock);
+ }
+
+ /* this could be called with spinlock hold, use async mode */
+ sptlrpc_ctx_put(req->rq_cli_ctx, 0);
+ req->rq_cli_ctx = NULL;
+ EXIT;
+}
+
+/*
+ * request must have a context. if failed to get new context,
+ * just restore the old one
+ */
+int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags));
+
+ /* make sure not on context waiting list */
+ spin_lock(&ctx->cc_lock);
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+
+ sptlrpc_ctx_get(ctx);
+ sptlrpc_req_put_ctx(req);
+ rc = sptlrpc_req_get_ctx(req);
+ if (!rc) {
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_cli_ctx != ctx);
+ sptlrpc_ctx_put(ctx, 1);
+ } else {
+ LASSERT(!req->rq_cli_ctx);
+ req->rq_cli_ctx = ctx;
+ }
+ RETURN(rc);
+}
+
+static
+int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ smp_mb();
+ if (ctx_is_refreshed(ctx))
+ return 1;
+ return 0;
+}
+
+static
+int ctx_refresh_timeout(void *data)
+{
+ struct ptlrpc_request *req = data;
+ int rc;
+
+ /* conn_cnt is needed in expire_one_request */
+ lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
+
+ rc = ptlrpc_expire_one_request(req);
+ /* if we started recovery, we should mark this ctx dead; otherwise
+ * in case of lgssd died nobody would retire this ctx, following
+ * connecting will still find the same ctx thus cause deadlock.
+ * there's an assumption that expire time of the request should be
+ * later than the context refresh expire time.
+ */
+ if (rc == 0)
+ ctx_expire(req->rq_cli_ctx);
+ return rc;
+}
+
+static
+void ctx_refresh_interrupt(void *data)
+{
+ /* do nothing */
+}
+
+/*
+ * the status of context could be subject to be changed by other threads at any
+ * time. we allow this race. but once we return with 0, the caller will
+ * suppose it's uptodated and keep using it until the affected rpc is done.
+ *
+ * @timeout:
+ * < 0 - don't wait
+ * = 0 - wait until success or fatal error occur
+ * > 0 - timeout value
+ *
+ * return 0 only if the context is uptodated.
+ */
+int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct l_wait_info lwi;
+ int rc;
+ ENTRY;
+
+ LASSERT(ctx);
+
+ /* special ctxs */
+ if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
+ RETURN(0);
+
+ /* reverse ctxs, don't refresh */
+ if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
+ RETURN(0);
+
+ spin_lock(&ctx->cc_lock);
+again:
+ if (ctx_check_uptodate(ctx)) {
+ if (!list_empty(&req->rq_ctx_chain))
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+ RETURN(0);
+ }
+
+ if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) {
+ req->rq_err = 1;
+ if (!list_empty(&req->rq_ctx_chain))
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+ RETURN(-EPERM);
+ }
+
+ /* This is subtle. For resent message we have to keep original
+ * context to survive following situation:
+ * 1. the request sent to server
+ * 2. recovery was kick start
+ * 3. recovery finished, the request marked as resent
+ * 4. resend the request
+ * 5. old reply from server received (because xid is the same)
+ * 6. verify reply (has to be success)
+ * 7. new reply from server received, lnet drop it
+ *
+ * Note we can't simply change xid for resent request because
+ * server reply on it for reply reconstruction.
+ *
+ * Commonly the original context should be uptodate because we
+ * have a expiry nice time; And server will keep their half part
+ * context because we at least hold a ref of old context which
+ * prevent the context detroy RPC be sent. So server still can
+ * accept the request and finish RPC. Two cases:
+ * 1. If server side context has been trimed, a NO_CONTEXT will
+ * be returned, gss_cli_ctx_verify/unseal will switch to new
+ * context by force.
+ * 2. Current context never be refreshed, then we are fine: we
+ * never really send request with old context before.
+ */
+ if (test_bit(PTLRPC_CTX_UPTODATE, &ctx->cc_flags) &&
+ req->rq_reqmsg &&
+ lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ if (!list_empty(&req->rq_ctx_chain))
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+ RETURN(0);
+ }
+
+ if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
+ spin_unlock(&ctx->cc_lock);
+
+ /* don't have to, but we don't want to release it too soon */
+ sptlrpc_ctx_get(ctx);
+
+ rc = sptlrpc_req_replace_dead_ctx(req);
+ if (rc) {
+ LASSERT(ctx == req->rq_cli_ctx);
+ CERROR("req %p: failed to replace dead ctx %p\n",
+ req, ctx);
+ req->rq_err = 1;
+ LASSERT(list_empty(&req->rq_ctx_chain));
+ sptlrpc_ctx_put(ctx, 1);
+ RETURN(-ENOMEM);
+ }
+
+ LASSERT(ctx != req->rq_cli_ctx);
+ CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n",
+ req, ctx, ctx->cc_vcred.vc_uid,
+ sec2target_str(ctx->cc_sec), req->rq_cli_ctx);
+
+ sptlrpc_ctx_put(ctx, 1);
+ ctx = req->rq_cli_ctx;
+ LASSERT(list_empty(&req->rq_ctx_chain));
+
+ spin_lock(&ctx->cc_lock);
+ goto again;
+ }
+
+ /* Now we're sure this context is during upcall, add myself into
+ * waiting list
+ */
+ if (list_empty(&req->rq_ctx_chain))
+ list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
+
+ spin_unlock(&ctx->cc_lock);
+
+ if (timeout < 0) {
+ RETURN(-EWOULDBLOCK);
+ }
+
+ /* Clear any flags that may be present from previous sends */
+ LASSERT(req->rq_receiving_reply == 0);
+ spin_lock(&req->rq_lock);
+ req->rq_err = 0;
+ req->rq_timedout = 0;
+ req->rq_resend = 0;
+ req->rq_restart = 0;
+ spin_unlock(&req->rq_lock);
+
+ lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ,
+ ctx_refresh_timeout, ctx_refresh_interrupt, req);
+ rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
+
+ spin_lock(&ctx->cc_lock);
+ /* five cases we are here:
+ * 1. successfully refreshed;
+ * 2. someone else mark this ctx dead by force;
+ * 3. interruptted;
+ * 4. timedout, and we don't want recover from the failure;
+ * 5. timedout, and waked up upon recovery finished;
+ */
+ if (!ctx_is_refreshed(ctx)) {
+ /* timed out or interruptted */
+ list_del_init(&req->rq_ctx_chain);
+ spin_unlock(&ctx->cc_lock);
+
+ LASSERT(rc != 0);
+ RETURN(rc);
+ }
+
+ goto again;
+}
+
+void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
+{
+ struct sec_flavor_config *conf;
+
+ LASSERT(req->rq_import);
+ LASSERT(req->rq_import->imp_sec);
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_cli_ctx->cc_sec);
+ LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
+
+ /* special security flags accoding to opcode */
+ switch (opcode) {
+ case OST_READ:
+ case OST_SAN_READ:
+ req->rq_bulk_read = 1;
+ break;
+ case OST_WRITE:
+ case OST_SAN_WRITE:
+ req->rq_bulk_write = 1;
+ break;
+ case SEC_CTX_INIT:
+ req->rq_ctx_init = 1;
+ break;
+ case SEC_CTX_FINI:
+ req->rq_ctx_fini = 1;
+ break;
+ }
+
+ req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor;
+
+ /* force SVC_NONE for context initiation rpc, SVC_AUTH for context
+ * destruction rpc
+ */
+ if (unlikely(req->rq_ctx_init)) {
+ req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
+ SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+ SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
+ SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE));
+ } else if (unlikely(req->rq_ctx_fini)) {
+ req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
+ SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+ SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
+ SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH));
+ }
+
+ conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+
+ /* user descriptor flag, except ROOTONLY which don't need, and
+ * null security which can't
+ */
+ if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 &&
+ req->rq_sec_flavor != SPTLRPC_FLVR_NULL)
+ req->rq_sec_flavor |= SEC_FLAVOR_FL_USER;
+
+ /* bulk security flag */
+ if ((req->rq_bulk_read || req->rq_bulk_write) &&
+ (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL ||
+ conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL))
+ req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK;
+}
+
+void sptlrpc_request_out_callback(struct ptlrpc_request *req)
+{
+ if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV)
+ return;
+
+ LASSERT(req->rq_clrbuf);
+ if (req->rq_pool || !req->rq_reqbuf)
+ return;
+
+ OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+}
+
+/*
+ * check whether current user have valid context for an import or not.
+ * might repeatedly try in case of non-fatal errors.
+ * return 0 on success, < 0 on failure
+ */
+int sptlrpc_import_check_ctx(struct obd_import *imp)
+{
+ struct ptlrpc_cli_ctx *ctx;
+ struct ptlrpc_request *req = NULL;
+ int rc;
+ ENTRY;
+
+ might_sleep();
+
+ ctx = get_my_ctx(imp->imp_sec);
+ if (!ctx)
+ RETURN(1);
+
+ if (ctx_is_eternal(ctx)) {
+ sptlrpc_ctx_put(ctx, 1);
+ RETURN(0);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (!req)
+ RETURN(-ENOMEM);
+
+ spin_lock_init(&req->rq_lock);
+ atomic_set(&req->rq_refcount, 10000);
+ INIT_LIST_HEAD(&req->rq_ctx_chain);
+ init_waitqueue_head(&req->rq_reply_waitq);
+ req->rq_import = imp;
+ req->rq_cli_ctx = ctx;
+
+ rc = sptlrpc_req_refresh_ctx(req, 0);
+ LASSERT(list_empty(&req->rq_ctx_chain));
+ sptlrpc_ctx_put(req->rq_cli_ctx, 1);
+ OBD_FREE(req, sizeof(*req));
+
+ RETURN(rc);
+}
+
+int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* we wrap bulk request here because now we can be sure
+ * the context is uptodate.
+ */
+ if (req->rq_bulk) {
+ rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
+ if (rc)
+ RETURN(rc);
+ }
+
+ switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
+ case SPTLRPC_SVC_NONE:
+ case SPTLRPC_SVC_AUTH:
+ LASSERT(ctx->cc_ops->sign);
+ rc = ctx->cc_ops->sign(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->seal);
+ rc = ctx->cc_ops->seal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+
+ if (rc == 0) {
+ LASSERT(req->rq_reqdata_len);
+ LASSERT(req->rq_reqdata_len % 8 == 0);
+ LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
+ }
+
+ RETURN(rc);
+}
+
+/*
+ * rq_nob_received is the actual received data length
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ int rc;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_ops);
+ LASSERT(req->rq_repbuf);
+
+ req->rq_repdata_len = req->rq_nob_received;
+
+ if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+ CERROR("replied data length %d too small\n",
+ req->rq_nob_received);
+ RETURN(-EPROTO);
+ }
+
+ if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
+ req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
+ /* it's must be null flavor, so our requets also should be
+ * in null flavor */
+ if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+ SPTLRPC_POLICY_NULL) {
+ CERROR("request flavor is %x but reply with null\n",
+ req->rq_sec_flavor);
+ RETURN(-EPROTO);
+ }
+ } else {
+ /* v2 message... */
+ ptlrpc_flavor_t tmpf = req->rq_repbuf->lm_secflvr;
+
+ if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
+ __swab32s(&tmpf);
+
+ if (SEC_FLAVOR_POLICY(tmpf) !=
+ SEC_FLAVOR_POLICY(req->rq_sec_flavor)) {
+ CERROR("request policy %u while reply with %d\n",
+ SEC_FLAVOR_POLICY(req->rq_sec_flavor),
+ SEC_FLAVOR_POLICY(tmpf));
+ RETURN(-EPROTO);
+ }
+
+ if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+ SPTLRPC_POLICY_NULL) &&
+ lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received))
+ RETURN(-EPROTO);
+ }
+
+ switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
+ case SPTLRPC_SVC_NONE:
+ case SPTLRPC_SVC_AUTH:
+ LASSERT(ctx->cc_ops->verify);
+ rc = ctx->cc_ops->verify(ctx, req);
+ break;
+ case SPTLRPC_SVC_PRIV:
+ LASSERT(ctx->cc_ops->unseal);
+ rc = ctx->cc_ops->unseal(ctx, req);
+ break;
+ default:
+ LBUG();
+ }
+
+ LASSERT(rc || req->rq_repmsg);
+ RETURN(rc);
+}
+
+/**************************************************
+ * security APIs *
+ **************************************************/
+
+/*
+ * let policy module to determine whether take refrence of
+ * import or not.
+ */
+static
+struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ __u32 flavor,
+ unsigned long flags)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_sec *sec;
+ ENTRY;
+
+ flavor = SEC_FLAVOR_RPC(flavor);
+
+ if (ctx) {
+ LASSERT(imp->imp_dlm_fake == 1);
+
+ CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(flavor));
+
+ policy = sptlrpc_policy_get(ctx->sc_policy);
+ flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+ } else {
+ LASSERT(imp->imp_dlm_fake == 0);
+
+ CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
+ imp->imp_obd->obd_type->typ_name,
+ imp->imp_obd->obd_name,
+ sptlrpc_flavor2name(flavor));
+
+ policy = sptlrpc_flavor2policy(flavor);
+ if (!policy) {
+ CERROR("invalid flavor 0x%x\n", flavor);
+ RETURN(NULL);
+ }
+ }
+
+ sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags);
+ if (sec) {
+ atomic_inc(&sec->ps_refcount);
+
+ /* take 1 busy count on behalf of sec itself,
+ * balanced in sptlrpc_set_put()
+ */
+ atomic_inc(&sec->ps_busy);
+ } else
+ sptlrpc_policy_put(policy);
+
+ RETURN(sec);
+}
+
+static
+void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ LASSERT(policy);
+ LASSERT(atomic_read(&sec->ps_refcount) == 0);
+ LASSERT(atomic_read(&sec->ps_busy) == 0);
+ LASSERT(policy->sp_cops->destroy_sec);
+
+ policy->sp_cops->destroy_sec(sec);
+ sptlrpc_policy_put(policy);
+}
+
+static
+void sptlrpc_sec_put(struct ptlrpc_sec *sec)
+{
+ struct ptlrpc_sec_policy *policy = sec->ps_policy;
+
+ if (!atomic_dec_and_test(&sec->ps_refcount)) {
+ sptlrpc_policy_put(policy);
+ return;
+ }
+
+ ctx_cache_flush(sec, -1, 1, 1);
+
+ if (atomic_dec_and_test(&sec->ps_busy))
+ sptlrpc_sec_destroy(sec);
+ else
+ CWARN("delay to destroy %s@%p: busy contexts\n",
+ policy->sp_name, sec);
+}
+
+/*
+ * return 1 means we should also destroy the sec structure.
+ * normally return 0
+ */
+static
+int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ LASSERT(sec == ctx->cc_sec);
+ LASSERT(atomic_read(&sec->ps_busy));
+ LASSERT(atomic_read(&ctx->cc_refcount) == 0);
+ LASSERT(hlist_unhashed(&ctx->cc_hash));
+ LASSERT(list_empty(&ctx->cc_req_list));
+ LASSERT(sec->ps_policy->sp_cops->destroy_ctx);
+
+ sec->ps_policy->sp_cops->destroy_ctx(sec, ctx);
+
+ if (atomic_dec_and_test(&sec->ps_busy)) {
+ LASSERT(atomic_read(&sec->ps_refcount) == 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * when complete successfully, req->rq_reqmsg should point to the
+ * right place.
+ */
+int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+
+ LASSERT(ctx);
+ LASSERT(atomic_read(&ctx->cc_refcount));
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT(req->rq_reqmsg == NULL);
+
+ policy = ctx->cc_sec->ps_policy;
+ rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
+ if (!rc) {
+ LASSERT(req->rq_reqmsg);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ /* zeroing preallocated buffer */
+ if (req->rq_pool)
+ memset(req->rq_reqmsg, 0, msgsize);
+ }
+
+ return rc;
+}
+
+void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+
+ LASSERT(ctx);
+ LASSERT(atomic_read(&ctx->cc_refcount));
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT(req->rq_reqbuf || req->rq_clrbuf);
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
+}
+
+int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(atomic_read(&ctx->cc_refcount));
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+
+ if (req->rq_repbuf)
+ RETURN(0);
+
+ policy = ctx->cc_sec->ps_policy;
+ RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
+}
+
+void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
+{
+ struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
+ struct ptlrpc_sec_policy *policy;
+ ENTRY;
+
+ LASSERT(ctx);
+ LASSERT(atomic_read(&ctx->cc_refcount));
+ LASSERT(ctx->cc_sec);
+ LASSERT(ctx->cc_sec->ps_policy);
+ LASSERT(req->rq_repbuf);
+
+ policy = ctx->cc_sec->ps_policy;
+ policy->sp_cops->free_repbuf(ctx->cc_sec, req);
+ EXIT;
+}
+
+int sptlrpc_import_get_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ __u32 flavor,
+ unsigned long flags)
+{
+ struct obd_device *obd = imp->imp_obd;
+ ENTRY;
+
+ LASSERT(obd);
+ LASSERT(obd->obd_type);
+
+ /* old sec might be still there in reconnecting */
+ if (imp->imp_sec)
+ RETURN(0);
+
+ imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags);
+ if (!imp->imp_sec)
+ RETURN(-EINVAL);
+
+ RETURN(0);
+}
+
+void sptlrpc_import_put_sec(struct obd_import *imp)
+{
+ if (imp->imp_sec == NULL)
+ return;
+
+ sptlrpc_sec_put(imp->imp_sec);
+ imp->imp_sec = NULL;
+}
+
+void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
+{
+ if (imp == NULL || imp->imp_sec == NULL)
+ return;
+
+ /* use 'grace' mode, it's crutial see explain in
+ * sptlrpc_req_refresh_ctx()
+ */
+ ctx_cache_flush(imp->imp_sec, 0, 1, 1);
+}
+
+void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
+{
+ if (imp == NULL || imp->imp_sec == NULL)
+ return;
+
+ ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1);
+}
+EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
+
+int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_cli_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
+
+ if (!policy->sp_cops->install_rctx)
+ return 0;
+ return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
+}
+
+int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx)
+{
+ struct ptlrpc_sec_policy *policy = ctx->sc_policy;
+
+ if (!policy->sp_sops->install_rctx)
+ return 0;
+ return policy->sp_sops->install_rctx(imp, ctx);
+}
+
+/****************************************
+ * server side security *
+ ****************************************/
+
+int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int rc;
+ ENTRY;
+
+ LASSERT(msg);
+ LASSERT(req->rq_reqmsg == NULL);
+ LASSERT(req->rq_repmsg == NULL);
+
+ /*
+ * in any case we avoid to call unpack_msg() for request of null flavor
+ * which will later be done by ptlrpc_server_handle_request().
+ */
+ if (req->rq_reqdata_len < sizeof(struct lustre_msg)) {
+ CERROR("request size %d too small\n", req->rq_reqdata_len);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
+ msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
+ req->rq_sec_flavor = SPTLRPC_FLVR_NULL;
+ } else {
+ req->rq_sec_flavor = msg->lm_secflvr;
+
+ if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
+ __swab32s(&req->rq_sec_flavor);
+
+ if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
+ SPTLRPC_POLICY_NULL) &&
+ lustre_unpack_msg(msg, req->rq_reqdata_len))
+ RETURN(SECSVC_DROP);
+ }
+
+ policy = sptlrpc_flavor2policy(req->rq_sec_flavor);
+ if (!policy) {
+ CERROR("unsupported security flavor %x\n", req->rq_sec_flavor);
+ RETURN(SECSVC_DROP);
+ }
+
+ LASSERT(policy->sp_sops->accept);
+ rc = policy->sp_sops->accept(req);
+
+ LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+ sptlrpc_policy_put(policy);
+
+ /* FIXME move to proper place */
+ if (rc == SECSVC_OK) {
+ __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ if (opc == OST_WRITE || opc == OST_SAN_WRITE)
+ req->rq_bulk_write = 1;
+ else if (opc == OST_READ || opc == OST_SAN_READ)
+ req->rq_bulk_read = 1;
+ }
+
+ RETURN(rc);
+}
+
+int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req,
+ int msglen)
+{
+ struct ptlrpc_sec_policy *policy;
+ struct ptlrpc_reply_state *rs;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->alloc_rs);
+
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (unlikely(rc == -ENOMEM)) {
+ /* failed alloc, try emergency pool */
+ rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
+ if (rs == NULL)
+ RETURN(-ENOMEM);
+
+ req->rq_reply_state = rs;
+ rc = policy->sp_sops->alloc_rs(req, msglen);
+ if (rc) {
+ lustre_put_emerg_rs(rs);
+ req->rq_reply_state = NULL;
+ }
+ }
+
+ LASSERT(rc != 0 ||
+ (req->rq_reply_state && req->rq_reply_state->rs_msg));
+
+ RETURN(rc);
+}
+
+int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
+{
+ struct ptlrpc_sec_policy *policy;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_svc_ctx->sc_policy);
+
+ policy = req->rq_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->authorize);
+
+ rc = policy->sp_sops->authorize(req);
+ LASSERT(rc || req->rq_reply_state->rs_repdata_len);
+
+ RETURN(rc);
+}
+
+void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
+{
+ struct ptlrpc_sec_policy *policy;
+ unsigned int prealloc;
+ ENTRY;
+
+ LASSERT(rs->rs_svc_ctx);
+ LASSERT(rs->rs_svc_ctx->sc_policy);
+
+ policy = rs->rs_svc_ctx->sc_policy;
+ LASSERT(policy->sp_sops->free_rs);
+
+ prealloc = rs->rs_prealloc;
+ policy->sp_sops->free_rs(rs);
+
+ if (prealloc)
+ lustre_put_emerg_rs(rs);
+ EXIT;
+}
+
+void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT(atomic_read(&ctx->sc_refcount) > 0);
+ atomic_inc(&ctx->sc_refcount);
+}
+
+void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
+{
+ struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
+
+ if (ctx == NULL)
+ return;
+
+ LASSERT(atomic_read(&ctx->sc_refcount) > 0);
+ if (atomic_dec_and_test(&ctx->sc_refcount)) {
+ if (ctx->sc_policy->sp_sops->free_ctx)
+ ctx->sc_policy->sp_sops->free_ctx(ctx);
+ }
+ req->rq_svc_ctx = NULL;
+}
+
+/****************************************
+ * bulk security *
+ ****************************************/
+
+int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+ return 0;
+
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->wrap_bulk)
+ return ctx->cc_ops->wrap_bulk(ctx, req, desc);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
+
+static
+void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
+ struct ptlrpc_bulk_desc *desc)
+{
+ int i;
+
+ LASSERT(pga);
+ LASSERT(*pga);
+
+ for (i = 0; i < pg_count && nob > 0; i++) {
+#ifdef __KERNEL__
+ desc->bd_iov[i].kiov_page = pga[i]->pg;
+ desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
+ nob : pga[i]->count;
+ desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
+#else
+#warning FIXME for liblustre!
+ desc->bd_iov[i].iov_base = pga[i]->pg->addr;
+ desc->bd_iov[i].iov_len = pga[i]->count > nob ?
+ nob : pga[i]->count;
+#endif
+
+ desc->bd_iov_count++;
+ nob -= pga[i]->count;
+ }
+}
+
+int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
+ int nob, obd_count pg_count,
+ struct brw_page **pga)
+{
+ struct ptlrpc_bulk_desc *desc;
+ struct ptlrpc_cli_ctx *ctx;
+ int rc = 0;
+
+ if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+ return 0;
+
+ LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
+
+ OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
+ if (desc == NULL) {
+ CERROR("out of memory, can't verify bulk read data\n");
+ return -ENOMEM;
+ }
+
+ pga_to_bulk_desc(nob, pg_count, pga, desc);
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk)
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+
+ OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
+
+ return rc;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+
+int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_cli_ctx *ctx;
+
+ if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+ return 0;
+
+ LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
+
+ ctx = req->rq_cli_ctx;
+ if (ctx->cc_ops->unwrap_bulk)
+ return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
+
+int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_svc_ctx *ctx;
+
+ if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+ return 0;
+
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ ctx = req->rq_svc_ctx;
+ if (ctx->sc_policy->sp_sops->wrap_bulk)
+ return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
+
+int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_svc_ctx *ctx;
+
+ if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
+ return 0;
+
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ ctx = req->rq_svc_ctx;
+ if (ctx->sc_policy->sp_sops->unwrap_bulk);
+ return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
+
+
+/****************************************
+ * user descriptor helpers *
+ ****************************************/
+
+int sptlrpc_user_desc_size(void)
+{
+#ifdef __KERNEL__
+ int ngroups = current_ngroups;
+
+ if (ngroups > LUSTRE_MAX_GROUPS)
+ ngroups = LUSTRE_MAX_GROUPS;
+
+ return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
+#else
+ return sizeof(struct ptlrpc_user_desc);
+#endif
+}
+EXPORT_SYMBOL(sptlrpc_user_desc_size);
+
+int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_user_desc *pud;
+
+ pud = lustre_msg_buf(msg, offset, 0);
+
+ pud->pud_uid = cfs_current()->uid;
+ pud->pud_gid = cfs_current()->gid;
+ pud->pud_fsuid = cfs_current()->fsuid;
+ pud->pud_fsgid = cfs_current()->fsgid;
+ pud->pud_cap = cfs_current()->cap_effective;
+ pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
+
+#ifdef __KERNEL__
+ task_lock(current);
+ if (pud->pud_ngroups > current_ngroups)
+ pud->pud_ngroups = current_ngroups;
+ memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0],
+ pud->pud_ngroups * sizeof(__u32));
+ task_unlock(current);
+#endif
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_pack_user_desc);
+
+int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_user_desc *pud;
+ int i;
+
+ pud = lustre_msg_buf(msg, offset, sizeof(*pud));
+ if (!pud)
+ return -EINVAL;
+
+ if (lustre_msg_swabbed(msg)) {
+ __swab32s(&pud->pud_uid);
+ __swab32s(&pud->pud_gid);
+ __swab32s(&pud->pud_fsuid);
+ __swab32s(&pud->pud_fsgid);
+ __swab32s(&pud->pud_cap);
+ __swab32s(&pud->pud_ngroups);
+ }
+
+ if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
+ CERROR("%u groups is too large\n", pud->pud_ngroups);
+ return -EINVAL;
+ }
+
+ if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
+ msg->lm_buflens[offset]) {
+ CERROR("%u groups are claimed but bufsize only %u\n",
+ pud->pud_ngroups, msg->lm_buflens[offset]);
+ return -EINVAL;
+ }
+
+ if (lustre_msg_swabbed(msg)) {
+ for (i = 0; i < pud->pud_ngroups; i++)
+ __swab32s(&pud->pud_groups[i]);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
+
+/****************************************
+ * Helpers to assist policy modules to *
+ * implement checksum funcationality *
+ ****************************************/
+
+struct {
+ char *name;
+ int size;
+} csum_types[] = {
+ [BULK_CSUM_ALG_NULL] = { "null", 0 },
+ [BULK_CSUM_ALG_CRC32] = { "crc32", 4 },
+ [BULK_CSUM_ALG_MD5] = { "md5", 16 },
+ [BULK_CSUM_ALG_SHA1] = { "sha1", 20 },
+ [BULK_CSUM_ALG_SHA256] = { "sha256", 32 },
+ [BULK_CSUM_ALG_SHA384] = { "sha384", 48 },
+ [BULK_CSUM_ALG_SHA512] = { "sha512", 64 },
+};
+
+int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
+{
+ int size = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
+
+ /* read request don't need extra data */
+ if (!(read && request))
+ size += csum_types[csum_alg].size;
+
+ return size;
+}
+EXPORT_SYMBOL(bulk_sec_desc_size);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int size = msg->lm_buflens[offset];
+
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ if (bsd == NULL) {
+ CERROR("Invalid bulk sec desc: size %d\n", size);
+ return -EINVAL;
+ }
+
+ if (lustre_msg_swabbed(msg)) {
+ __swab32s(&bsd->bsd_version);
+ __swab32s(&bsd->bsd_pad);
+ __swab32s(&bsd->bsd_csum_alg);
+ __swab32s(&bsd->bsd_priv_alg);
+ }
+
+ if (bsd->bsd_version != 0) {
+ CERROR("Unexpected version %u\n", bsd->bsd_version);
+ return -EPROTO;
+ }
+
+ if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
+ CERROR("Unsupported checksum algorithm %u\n",
+ bsd->bsd_csum_alg);
+ return -EINVAL;
+ }
+ if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
+ CERROR("Unsupported cipher algorithm %u\n",
+ bsd->bsd_priv_alg);
+ return -EINVAL;
+ }
+
+ if (size > sizeof(*bsd) &&
+ size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
+ CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
+ bsd->bsd_csum_alg, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+#ifdef __KERNEL__
+static
+int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
+{
+ struct page *page;
+ int off;
+ char *ptr;
+ __u32 crc32 = ~0;
+ int len, i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ page = desc->bd_iov[i].kiov_page;
+ off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ ptr = cfs_kmap(page) + off;
+ len = desc->bd_iov[i].kiov_len;
+
+ crc32 = crc32_le(crc32, ptr, len);
+
+ cfs_kunmap(page);
+ }
+
+ *((__u32 *) buf) = crc32;
+ return 0;
+}
+
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+ struct crypto_tfm *tfm;
+ struct scatterlist *sl;
+ int i, rc = 0;
+
+ LASSERT(alg > BULK_CSUM_ALG_NULL &&
+ alg < BULK_CSUM_ALG_MAX);
+
+ if (alg == BULK_CSUM_ALG_CRC32)
+ return do_bulk_checksum_crc32(desc, buf);
+
+ tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
+ if (tfm == NULL) {
+ CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
+ return -ENOMEM;
+ }
+
+ OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
+ if (sl == NULL) {
+ rc = -ENOMEM;
+ goto out_tfm;
+ }
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ sl[i].page = desc->bd_iov[i].kiov_page;
+ sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ sl[i].length = desc->bd_iov[i].kiov_len;
+ }
+
+ crypto_digest_init(tfm);
+ crypto_digest_update(tfm, sl, desc->bd_iov_count);
+ crypto_digest_final(tfm, buf);
+
+ OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
+
+out_tfm:
+ crypto_free_tfm(tfm);
+ return rc;
+}
+
+#else /* !__KERNEL__ */
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+ __u32 crc32 = ~0;
+ int i;
+
+ LASSERT(alg == BULK_CSUM_ALG_CRC32);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ char *ptr = desc->bd_iov[i].iov_base;
+ int len = desc->bd_iov[i].iov_len;
+
+ crc32 = crc32_le(crc32, ptr, len);
+ }
+
+ *((__u32 *) buf) = crc32;
+ return 0;
+}
+#endif
+
+/*
+ * perform algorithm @alg checksum on @desc, store result in @buf.
+ * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
+ */
+static
+int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
+ struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
+{
+ int rc;
+
+ LASSERT(bsd);
+ LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+ bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+ if (alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
+
+ rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
+ if (rc == 0)
+ bsd->bsd_csum_alg = alg;
+
+ return rc;
+}
+
+static
+int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
+ struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
+ struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
+{
+ char *csum_p;
+ char *buf = NULL;
+ int csum_size, rc = 0;
+
+ LASSERT(bsdv);
+ LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+
+ if (bsdr)
+ bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+ if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ /* for all supported algorithms */
+ csum_size = csum_types[bsdv->bsd_csum_alg].size;
+
+ if (bsdvsize < sizeof(*bsdv) + csum_size) {
+ CERROR("verifier size %d too small, require %d\n",
+ bsdvsize, sizeof(*bsdv) + csum_size);
+ return -EINVAL;
+ }
+
+ if (bsdr) {
+ LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
+ csum_p = (char *) bsdr->bsd_csum;
+ } else {
+ OBD_ALLOC(buf, csum_size);
+ if (buf == NULL)
+ return -EINVAL;
+ csum_p = buf;
+ }
+
+ rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
+
+ if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
+ CERROR("BAD %s CHECKSUM (%s), data mutated during "
+ "transfer!\n", read ? "READ" : "WRITE",
+ csum_types[bsdv->bsd_csum_alg].name);
+ rc = -EINVAL;
+ } else {
+ CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
+ read ? "read" : "write",
+ csum_types[bsdv->bsd_csum_alg].name);
+ }
+
+ if (bsdr) {
+ bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
+ memcpy(bsdr->bsd_csum, csum_p, csum_size);
+ } else {
+ LASSERT(buf);
+ OBD_FREE(buf, csum_size);
+ }
+
+ return rc;
+}
+
+int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
+ __u32 alg, struct lustre_msg *rmsg, int roff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdr;
+ int rsize, rc = 0;
+
+ rsize = rmsg->lm_buflens[roff];
+ bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
+
+ LASSERT(bsdr);
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+ if (read)
+ bsdr->bsd_csum_alg = alg;
+ else {
+ rc = generate_bulk_csum(desc, alg, bsdr, rsize);
+ if (rc) {
+ CERROR("client bulk write: failed to perform "
+ "checksum: %d\n", rc);
+ }
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(bulk_csum_cli_request);
+
+int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *rmsg, int roff,
+ struct lustre_msg *vmsg, int voff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+ int rsize, vsize;
+
+ rsize = rmsg->lm_buflens[roff];
+ vsize = vmsg->lm_buflens[voff];
+ bsdr = lustre_msg_buf(rmsg, roff, 0);
+ bsdv = lustre_msg_buf(vmsg, voff, 0);
+
+ if (bsdv == NULL || vsize < sizeof(*bsdv)) {
+ CERROR("Invalid checksum verifier from server: size %d\n",
+ vsize);
+ return -EINVAL;
+ }
+
+ LASSERT(bsdr);
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(vsize >= sizeof(*bsdv));
+
+ if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
+ CERROR("bulk %s: checksum algorithm mismatch: client request "
+ "%s but server reply with %s. try to use the new one "
+ "for checksum verification\n",
+ read ? "read" : "write",
+ csum_types[bsdr->bsd_csum_alg].name,
+ csum_types[bsdv->bsd_csum_alg].name);
+ }
+
+ if (read)
+ return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
+ else {
+ char *cli, *srv, *new = NULL;
+ int csum_size = csum_types[bsdr->bsd_csum_alg].size;
+
+ LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+ if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ if (vsize < sizeof(*bsdv) + csum_size) {
+ CERROR("verifier size %d too small, require %d\n",
+ vsize, sizeof(*bsdv) + csum_size);
+ return -EINVAL;
+ }
+
+ cli = (char *) (bsdr + 1);
+ srv = (char *) (bsdv + 1);
+
+ if (!memcmp(cli, srv, csum_size)) {
+ /* checksum confirmed */
+ CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ return 0;
+ }
+
+ /* checksum mismatch, re-compute a new one and compare with
+ * others, give out proper warnings.
+ */
+ OBD_ALLOC(new, csum_size);
+ if (new == NULL)
+ return -ENOMEM;
+
+ do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
+
+ if (!memcmp(new, srv, csum_size)) {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "on the client after we checksummed them\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ } else if (!memcmp(new, cli, csum_size)) {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "in transit\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ } else {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "in transit, and the current page contents "
+ "don't match the originals and what the server "
+ "received\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ }
+ OBD_FREE(new, csum_size);
+
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(bulk_csum_cli_reply);
+
+int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *vmsg, int voff,
+ struct lustre_msg *rmsg, int roff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+ int vsize, rsize, rc;
+
+ vsize = vmsg->lm_buflens[voff];
+ rsize = rmsg->lm_buflens[roff];
+ bsdv = lustre_msg_buf(vmsg, voff, 0);
+ bsdr = lustre_msg_buf(rmsg, roff, 0);
+
+ LASSERT(vsize >= sizeof(*bsdv));
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(bsdv && bsdr);
+
+ if (read) {
+ rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
+ if (rc)
+ CERROR("bulk read: server failed to generate %s "
+ "checksum: %d\n",
+ csum_types[bsdv->bsd_csum_alg].name, rc);
+ } else
+ rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
+
+ return rc;
+}
+EXPORT_SYMBOL(bulk_csum_svc);
+
+/****************************************
+ * user supplied flavor string parsing *
+ ****************************************/
+
+static
+int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf)
+{
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+ conf->sfc_flags = 0;
+
+ switch (to_part) {
+ case LUSTRE_MDT:
+ conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN;
+ return 0;
+ case LUSTRE_OST:
+ conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+ return 0;
+ default:
+ CERROR("Unknown to lustre part %d, apply defaults\n", to_part);
+ conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
+ return -EINVAL;
+ }
+}
+
+static
+void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf)
+{
+ conf->sfc_rpc_flavor = rpc_flavor;
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+ conf->sfc_flags = 0;
+
+ switch (rpc_flavor) {
+ case SPTLRPC_FLVR_NULL:
+ case SPTLRPC_FLVR_PLAIN:
+ break;
+ case SPTLRPC_FLVR_KRB5P:
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+ /* fall through */
+ case SPTLRPC_FLVR_KRB5I:
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
+ break;
+ default:
+ LBUG();
+ }
+}
+
+static
+void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv,
+ struct sec_flavor_config *conf)
+{
+ if (bulk_priv)
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+ else
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+
+ switch (rpc_flavor) {
+ case SPTLRPC_FLVR_PLAIN:
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5;
+ break;
+ case SPTLRPC_FLVR_KRB5I:
+ case SPTLRPC_FLVR_KRB5P:
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
+ break;
+ default:
+ LBUG();
+ }
+}
+
+static __u32 __flavors[] = {
+ SPTLRPC_FLVR_NULL,
+ SPTLRPC_FLVR_PLAIN,
+ SPTLRPC_FLVR_KRB5I,
+ SPTLRPC_FLVR_KRB5P,
+};
+
+#define __nflavors (sizeof(__flavors)/sizeof(__u32))
+
+/*
+ * flavor string format: rpc[-bulk[:cksum/enc]]
+ * for examples:
+ * null
+ * plain-bulki
+ * krb5p-bulkn
+ * krb5i-bulkp
+ * krb5i-bulkp:sha512/arc4
+ */
+int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part,
+ char *str, struct sec_flavor_config *conf)
+{
+ char *f, *bulk, *alg, *enc;
+ char buf[64];
+ int i, bulk_priv;
+ ENTRY;
+
+ if (str == NULL) {
+ if (get_default_flavor(to_part, conf))
+ return -EINVAL;
+ goto set_flags;
+ }
+
+ for (i = 0; i < __nflavors; i++) {
+ f = sptlrpc_flavor2name(__flavors[i]);
+ if (strncmp(str, f, strlen(f)) == 0)
+ break;
+ }
+
+ if (i >= __nflavors)
+ GOTO(invalid, -EINVAL);
+
+ /* prepare local buffer thus we can modify it as we want */
+ strncpy(buf, str, 64);
+ buf[64 - 1] = '\0';
+
+ /* find bulk string */
+ bulk = strchr(buf, '-');
+ if (bulk)
+ *bulk++ = '\0';
+
+ /* now the first part must equal to rpc flavor name */
+ if (strcmp(buf, f) != 0)
+ GOTO(invalid, -EINVAL);
+
+ get_flavor_by_rpc(__flavors[i], conf);
+
+ if (bulk == NULL)
+ goto set_flags;
+
+ /* null flavor should not have any suffix */
+ if (__flavors[i] == SPTLRPC_FLVR_NULL)
+ GOTO(invalid, -EINVAL);
+
+ /* find bulk algorithm string */
+ alg = strchr(bulk, ':');
+ if (alg)
+ *alg++ = '\0';
+
+ /* verify bulk section */
+ if (strcmp(bulk, "bulkn") == 0) {
+ conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
+ goto set_flags;
+ }
+
+ if (strcmp(bulk, "bulki") == 0)
+ bulk_priv = 0;
+ else if (strcmp(bulk, "bulkp") == 0)
+ bulk_priv = 1;
+ else
+ GOTO(invalid, -EINVAL);
+
+ /* plain policy dosen't support bulk encryption */
+ if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN)
+ GOTO(invalid, -EINVAL);
+
+ get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf);
+
+ if (alg == NULL)
+ goto set_flags;
+
+ /* find encryption algorithm string */
+ enc = strchr(alg, '/');
+ if (enc)
+ *enc++ = '\0';
+
+ /* bulk combination sanity check */
+ if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc))
+ GOTO(invalid, -EINVAL);
+
+ /* checksum algorithm */
+ for (i = 0; i < BULK_CSUM_ALG_MAX; i++) {
+ if (strcmp(alg, csum_types[i].name) == 0) {
+ conf->sfc_bulk_csum = i;
+ break;
+ }
+ }
+ if (i >= BULK_CSUM_ALG_MAX)
+ GOTO(invalid, -EINVAL);
+
+ /* privacy algorithm */
+ if (enc) {
+ if (strcmp(enc, "arc4") != 0)
+ GOTO(invalid, -EINVAL);
+ conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
+ }
+
+set_flags:
+ /* set ROOTONLY flag to:
+ * - to OST
+ * - from MDT to MDT
+ */
+ if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) ||
+ to_part == LUSTRE_OST)
+ conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY;
+
+#ifdef __BIG_ENDIAN
+ __swab32s(&conf->sfc_rpc_flavor);
+ __swab32s(&conf->sfc_bulk_csum);
+ __swab32s(&conf->sfc_bulk_priv);
+ __swab32s(&conf->sfc_flags);
+#endif
+ return 0;
+invalid:
+ CERROR("invalid flavor string: %s\n", str);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(sptlrpc_parse_flavor);
+
+/****************************************
+ * misc helpers *
+ ****************************************/
+
+const char * sec2target_str(struct ptlrpc_sec *sec)
+{
+ if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
+ return "*";
+ if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
+ return "c";
+ return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
+}
+EXPORT_SYMBOL(sec2target_str);
+
+int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device *obd = data;
+ struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
+ struct ptlrpc_sec *sec = NULL;
+ char flags_str[20];
+
+ if (obd == NULL)
+ return 0;
+
+ LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+ LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX);
+ LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX);
+
+ if (obd->u.cli.cl_import)
+ sec = obd->u.cli.cl_import->imp_sec;
+
+ flags_str[0] = '\0';
+ if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE)
+ strncat(flags_str, "reverse,", sizeof(flags_str));
+ if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY)
+ strncat(flags_str, "rootonly,", sizeof(flags_str));
+ if (flags_str[0] != '\0')
+ flags_str[strlen(flags_str) - 1] = '\0';
+
+ return snprintf(page, count,
+ "rpc_flavor: %s\n"
+ "bulk_flavor: %s checksum, %s encryption\n"
+ "flags: %s\n"
+ "ctx_cache: size %u, busy %d\n"
+ "gc: interval %lus, next %lds\n",
+ sptlrpc_flavor2name(conf->sfc_rpc_flavor),
+ csum_types[conf->sfc_bulk_csum].name,
+ conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ?
+ "null" : "arc4", // XXX
+ flags_str,
+ sec ? sec->ps_ccache_size : 0,
+ sec ? atomic_read(&sec->ps_busy) : 0,
+ sec ? sec->ps_gc_interval: 0,
+ sec ? (sec->ps_gc_interval ?
+ sec->ps_gc_next - cfs_time_current_sec() : 0)
+ : 0);
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_rd);
+
+
+int sptlrpc_init(void)
+{
+ int rc;
+
+ rc = sptlrpc_null_init();
+ if (rc)
+ goto out;
+
+ rc = sptlrpc_plain_init();
+ if (rc)
+ goto out_null;
+ return 0;
+
+out_null:
+ sptlrpc_null_exit();
+out:
+ return rc;
+}
+
+int sptlrpc_exit(void)
+{
+ sptlrpc_plain_exit();
+ sptlrpc_null_exit();
+ return 0;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004-2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+static struct ptlrpc_sec_policy null_policy;
+static struct ptlrpc_sec null_sec;
+static struct ptlrpc_cli_ctx null_cli_ctx;
+static struct ptlrpc_svc_ctx null_svc_ctx;
+
+static
+int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ if (req->rq_reqbuf->lm_magic != LUSTRE_MSG_MAGIC_V1)
+ req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+ req->rq_reqdata_len = req->rq_reqlen;
+ return 0;
+}
+
+static
+int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ req->rq_repmsg = req->rq_repbuf;
+ req->rq_replen = req->rq_repdata_len;
+ return 0;
+}
+
+static struct ptlrpc_ctx_ops null_ctx_ops = {
+ .refresh = null_ctx_refresh,
+ .sign = null_ctx_sign,
+ .verify = null_ctx_verify,
+};
+
+static struct ptlrpc_svc_ctx null_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &null_policy,
+};
+
+static
+struct ptlrpc_sec* null_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ __u32 flavor,
+ unsigned long flags)
+{
+ LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_NULL);
+ return &null_sec;
+}
+
+static
+void null_destroy_sec(struct ptlrpc_sec *sec)
+{
+ LASSERT(sec == &null_sec);
+}
+
+static
+struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred)
+{
+ atomic_inc(&null_cli_ctx.cc_refcount);
+ return &null_cli_ctx;
+}
+
+static
+int null_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ if (!req->rq_reqbuf) {
+ LASSERT(!req->rq_pool);
+ OBD_ALLOC(req->rq_reqbuf, msgsize);
+ if (!req->rq_reqbuf)
+ return -ENOMEM;
+
+ req->rq_reqbuf_len = msgsize;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= msgsize);
+ memset(req->rq_reqbuf, 0, msgsize);
+ }
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ return 0;
+}
+
+static
+void null_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ if (!req->rq_pool) {
+ OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+}
+
+static
+int null_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ OBD_ALLOC(req->rq_repbuf, msgsize);
+ if (!req->rq_repbuf)
+ return -ENOMEM;
+
+ req->rq_repbuf_len = msgsize;
+ return 0;
+}
+
+static
+void null_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ OBD_FREE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+}
+
+static
+int null_accept(struct ptlrpc_request *req)
+{
+ LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_NULL);
+
+ if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_NULL) {
+ CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor);
+ return SECSVC_DROP;
+ }
+
+ req->rq_reqmsg = req->rq_reqbuf;
+ req->rq_reqlen = req->rq_reqdata_len;
+
+ req->rq_svc_ctx = &null_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ return SECSVC_OK;
+}
+
+static
+int null_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ int rs_size = sizeof(*rs) + msgsize;
+
+ LASSERT(msgsize % 8 == 0);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC(rs, rs_size);
+ if (rs == NULL)
+ return -ENOMEM;
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+ rs->rs_msg = rs->rs_repbuf;
+
+ req->rq_reply_state = rs;
+ return 0;
+}
+
+static
+void null_free_rs(struct ptlrpc_reply_state *rs)
+{
+ LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE(rs, rs->rs_size);
+}
+
+static
+int null_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+ LASSERT(rs);
+ if (rs->rs_repbuf->lm_magic != LUSTRE_MSG_MAGIC_V1)
+ rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL;
+ rs->rs_repdata_len = req->rq_replen;
+ return 0;
+}
+
+static struct ptlrpc_sec_cops null_sec_cops = {
+ .create_sec = null_create_sec,
+ .destroy_sec = null_destroy_sec,
+ .lookup_ctx = null_lookup_ctx,
+ .alloc_reqbuf = null_alloc_reqbuf,
+ .alloc_repbuf = null_alloc_repbuf,
+ .free_reqbuf = null_free_reqbuf,
+ .free_repbuf = null_free_repbuf,
+};
+
+static struct ptlrpc_sec_sops null_sec_sops = {
+ .accept = null_accept,
+ .alloc_rs = null_alloc_rs,
+ .authorize = null_authorize,
+ .free_rs = null_free_rs,
+};
+
+static struct ptlrpc_sec_policy null_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "sec.null",
+ .sp_policy = SPTLRPC_POLICY_NULL,
+ .sp_cops = &null_sec_cops,
+ .sp_sops = &null_sec_sops,
+};
+
+static
+void null_init_internal(void)
+{
+ static HLIST_HEAD(__list);
+
+ null_sec.ps_policy = &null_policy;
+ atomic_set(&null_sec.ps_refcount, 1); /* always busy */
+ null_sec.ps_import = NULL;
+ null_sec.ps_flavor = SPTLRPC_FLVR_NULL;
+ null_sec.ps_flags = 0;
+ null_sec.ps_gc_interval = 0;
+ null_sec.ps_gc_next = 0;
+ spin_lock_init(&null_sec.ps_lock);
+ null_sec.ps_ccache_size = 1;
+ null_sec.ps_ccache = &__list;
+ atomic_set(&null_sec.ps_busy, 1); /* for "null_cli_ctx" */
+
+ hlist_add_head(&null_cli_ctx.cc_hash, &__list);
+ atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */
+ null_cli_ctx.cc_sec = &null_sec;
+ null_cli_ctx.cc_ops = &null_ctx_ops;
+ null_cli_ctx.cc_expire = 0;
+ null_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+ PTLRPC_CTX_UPTODATE;
+ null_cli_ctx.cc_vcred.vc_uid = 0;
+ spin_lock_init(&null_cli_ctx.cc_lock);
+ INIT_LIST_HEAD(&null_cli_ctx.cc_req_list);
+}
+
+int sptlrpc_null_init(void)
+{
+ int rc;
+
+ null_init_internal();
+
+ rc = sptlrpc_register_policy(&null_policy);
+ if (rc)
+ CERROR("failed to register sec.null: %d\n", rc);
+
+ return rc;
+}
+
+int sptlrpc_null_exit(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&null_policy);
+ if (rc)
+ CERROR("cannot unregister sec.null: %d\n", rc);
+
+ return rc;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_sec.h>
+
+static struct ptlrpc_sec_policy plain_policy;
+static struct ptlrpc_sec plain_sec;
+static struct ptlrpc_cli_ctx plain_cli_ctx;
+static struct ptlrpc_svc_ctx plain_svc_ctx;
+
+static
+int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
+{
+ /* should never reach here */
+ LBUG();
+ return 0;
+}
+
+static
+int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg_v2 *msg = req->rq_reqbuf;
+ ENTRY;
+
+ msg->lm_secflvr = req->rq_sec_flavor;
+ req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
+ msg->lm_buflens);
+ RETURN(0);
+}
+
+static
+int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_repbuf;
+ ENTRY;
+
+ if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+ if (msg->lm_bufcount != 2) {
+ CERROR("Protocol error: invalid buf count %d\n",
+ msg->lm_bufcount);
+ RETURN(-EPROTO);
+ }
+
+ if (bulk_sec_desc_unpack(msg, 1)) {
+ CERROR("Mal-formed bulk checksum reply\n");
+ RETURN(-EINVAL);
+ }
+ }
+
+ req->rq_repmsg = lustre_msg_buf(msg, 0, 0);
+ req->rq_replen = msg->lm_buflens[0];
+ RETURN(0);
+}
+
+static
+int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct sec_flavor_config *conf;
+
+ LASSERT(req->rq_import);
+ LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor));
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+
+ conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+ return bulk_csum_cli_request(desc, req->rq_bulk_read,
+ conf->sfc_bulk_csum,
+ req->rq_reqbuf,
+ req->rq_reqbuf->lm_bufcount - 1);
+}
+
+static
+int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
+ struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor));
+ LASSERT(req->rq_reqbuf->lm_bufcount >= 2);
+ LASSERT(req->rq_repbuf->lm_bufcount >= 2);
+
+ return bulk_csum_cli_reply(desc, req->rq_bulk_read,
+ req->rq_reqbuf,
+ req->rq_reqbuf->lm_bufcount - 1,
+ req->rq_repbuf,
+ req->rq_repbuf->lm_bufcount - 1);
+}
+
+static struct ptlrpc_ctx_ops plain_ctx_ops = {
+ .refresh = plain_ctx_refresh,
+ .sign = plain_ctx_sign,
+ .verify = plain_ctx_verify,
+ .wrap_bulk = plain_cli_wrap_bulk,
+ .unwrap_bulk = plain_cli_unwrap_bulk,
+};
+
+static struct ptlrpc_svc_ctx plain_svc_ctx = {
+ .sc_refcount = ATOMIC_INIT(1),
+ .sc_policy = &plain_policy,
+};
+
+static
+struct ptlrpc_sec* plain_create_sec(struct obd_import *imp,
+ struct ptlrpc_svc_ctx *ctx,
+ __u32 flavor,
+ unsigned long flags)
+{
+ ENTRY;
+ LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_PLAIN);
+ RETURN(&plain_sec);
+}
+
+static
+void plain_destroy_sec(struct ptlrpc_sec *sec)
+{
+ ENTRY;
+ LASSERT(sec == &plain_sec);
+ EXIT;
+}
+
+static
+struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec,
+ struct vfs_cred *vcred)
+{
+ ENTRY;
+ atomic_inc(&plain_cli_ctx.cc_refcount);
+ RETURN(&plain_cli_ctx);
+}
+
+static
+int plain_alloc_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ struct sec_flavor_config *conf;
+ int bufcnt = 1, buflens[2], alloc_len;
+ ENTRY;
+
+ buflens[0] = msgsize;
+
+ if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor))
+ buflens[bufcnt++] = sptlrpc_user_desc_size();
+
+ if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+ buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1,
+ req->rq_bulk_read);
+ }
+
+ alloc_len = lustre_msg_size_v2(bufcnt, buflens);
+
+
+ if (!req->rq_reqbuf) {
+ LASSERT(!req->rq_pool);
+ OBD_ALLOC(req->rq_reqbuf, alloc_len);
+ if (!req->rq_reqbuf)
+ RETURN(-ENOMEM);
+
+ req->rq_reqbuf_len = alloc_len;
+ } else {
+ LASSERT(req->rq_pool);
+ LASSERT(req->rq_reqbuf_len >= alloc_len);
+ memset(req->rq_reqbuf, 0, alloc_len);
+ }
+
+ lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL);
+ req->rq_reqmsg = lustre_msg_buf_v2(req->rq_reqbuf, 0, 0);
+
+ if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor))
+ sptlrpc_pack_user_desc(req->rq_reqbuf, 1);
+
+ RETURN(0);
+}
+
+static
+void plain_free_reqbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ ENTRY;
+ if (!req->rq_pool) {
+ OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
+ req->rq_reqbuf = NULL;
+ req->rq_reqbuf_len = 0;
+ }
+ EXIT;
+}
+
+static
+int plain_alloc_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req,
+ int msgsize)
+{
+ struct sec_flavor_config *conf;
+ int bufcnt = 1, buflens[2], alloc_len;
+ ENTRY;
+
+ buflens[0] = msgsize;
+
+ if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
+ conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
+ buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 0,
+ req->rq_bulk_read);
+ }
+
+ alloc_len = lustre_msg_size_v2(bufcnt, buflens);
+
+ OBD_ALLOC(req->rq_repbuf, alloc_len);
+ if (!req->rq_repbuf)
+ RETURN(-ENOMEM);
+
+ req->rq_repbuf_len = alloc_len;
+ RETURN(0);
+}
+
+static
+void plain_free_repbuf(struct ptlrpc_sec *sec,
+ struct ptlrpc_request *req)
+{
+ ENTRY;
+ OBD_FREE(req->rq_repbuf, req->rq_repbuf_len);
+ req->rq_repbuf = NULL;
+ req->rq_repbuf_len = 0;
+ EXIT;
+}
+
+static
+int plain_accept(struct ptlrpc_request *req)
+{
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int bufcnt = 1;
+ ENTRY;
+
+ LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_PLAIN);
+
+ if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_PLAIN) {
+ CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor);
+ return SECSVC_DROP;
+ }
+
+ if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) {
+ if (msg->lm_bufcount < ++bufcnt) {
+ CERROR("Protocal error: too small buf count %d\n",
+ msg->lm_bufcount);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (sptlrpc_unpack_user_desc(msg, bufcnt - 1)) {
+ CERROR("Mal-formed user descriptor\n");
+ RETURN(SECSVC_DROP);
+ }
+
+ req->rq_user_desc = lustre_msg_buf(msg, bufcnt - 1, 0);
+ }
+
+ if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) {
+ if (msg->lm_bufcount != ++bufcnt) {
+ CERROR("Protocal error: invalid buf count %d\n",
+ msg->lm_bufcount);
+ RETURN(SECSVC_DROP);
+ }
+
+ if (bulk_sec_desc_unpack(msg, bufcnt - 1)) {
+ CERROR("Mal-formed bulk checksum request\n");
+ RETURN(SECSVC_DROP);
+ }
+ }
+
+ req->rq_reqmsg = lustre_msg_buf(msg, 0, 0);
+ req->rq_reqlen = msg->lm_buflens[0];
+
+ req->rq_svc_ctx = &plain_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+
+ RETURN(SECSVC_OK);
+}
+
+static
+int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
+{
+ struct ptlrpc_reply_state *rs;
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int bufcnt = 1, buflens[2];
+ int rs_size = sizeof(*rs);
+ ENTRY;
+
+ LASSERT(msgsize % 8 == 0);
+
+ buflens[0] = msgsize;
+ if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) &&
+ (req->rq_bulk_read || req->rq_bulk_write)) {
+ bsd = lustre_msg_buf(req->rq_reqbuf,
+ req->rq_reqbuf->lm_bufcount - 1,
+ sizeof(*bsd));
+ LASSERT(bsd);
+
+ buflens[bufcnt++] = bulk_sec_desc_size(bsd->bsd_csum_alg, 0,
+ req->rq_bulk_read);
+ }
+ rs_size += lustre_msg_size_v2(bufcnt, buflens);
+
+ rs = req->rq_reply_state;
+
+ if (rs) {
+ /* pre-allocated */
+ LASSERT(rs->rs_size >= rs_size);
+ } else {
+ OBD_ALLOC(rs, rs_size);
+ if (rs == NULL)
+ RETURN(-ENOMEM);
+
+ rs->rs_size = rs_size;
+ }
+
+ rs->rs_svc_ctx = req->rq_svc_ctx;
+ atomic_inc(&req->rq_svc_ctx->sc_refcount);
+ rs->rs_repbuf = (struct lustre_msg *) (rs + 1);
+ rs->rs_repbuf_len = rs_size - sizeof(*rs);
+
+ lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL);
+ rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, 0, 0);
+
+ req->rq_reply_state = rs;
+ RETURN(0);
+}
+
+static
+void plain_free_rs(struct ptlrpc_reply_state *rs)
+{
+ ENTRY;
+
+ LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1);
+ atomic_dec(&rs->rs_svc_ctx->sc_refcount);
+
+ if (!rs->rs_prealloc)
+ OBD_FREE(rs, rs->rs_size);
+ EXIT;
+}
+
+static
+int plain_authorize(struct ptlrpc_request *req)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct lustre_msg_v2 *msg = rs->rs_repbuf;
+ int len;
+ ENTRY;
+
+ LASSERT(rs);
+ LASSERT(msg);
+
+ if (req->rq_replen != msg->lm_buflens[0])
+ len = lustre_shrink_msg(msg, 0, req->rq_replen, 1);
+ else
+ len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
+
+ msg->lm_secflvr = req->rq_sec_flavor;
+ rs->rs_repdata_len = len;
+ RETURN(0);
+}
+
+static
+int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+ LASSERT(rs);
+
+ return bulk_csum_svc(desc, req->rq_bulk_read,
+ req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1,
+ rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1);
+}
+
+static
+int plain_svc_wrap_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+
+ LASSERT(rs);
+
+ return bulk_csum_svc(desc, req->rq_bulk_read,
+ req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1,
+ rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1);
+}
+
+static struct ptlrpc_sec_cops plain_sec_cops = {
+ .create_sec = plain_create_sec,
+ .destroy_sec = plain_destroy_sec,
+ .lookup_ctx = plain_lookup_ctx,
+ .alloc_reqbuf = plain_alloc_reqbuf,
+ .alloc_repbuf = plain_alloc_repbuf,
+ .free_reqbuf = plain_free_reqbuf,
+ .free_repbuf = plain_free_repbuf,
+};
+
+static struct ptlrpc_sec_sops plain_sec_sops = {
+ .accept = plain_accept,
+ .alloc_rs = plain_alloc_rs,
+ .authorize = plain_authorize,
+ .free_rs = plain_free_rs,
+ .unwrap_bulk = plain_svc_unwrap_bulk,
+ .wrap_bulk = plain_svc_wrap_bulk,
+};
+
+static struct ptlrpc_sec_policy plain_policy = {
+ .sp_owner = THIS_MODULE,
+ .sp_name = "sec.plain",
+ .sp_policy = SPTLRPC_POLICY_PLAIN,
+ .sp_cops = &plain_sec_cops,
+ .sp_sops = &plain_sec_sops,
+};
+
+static
+void plain_init_internal(void)
+{
+ static HLIST_HEAD(__list);
+
+ plain_sec.ps_policy = &plain_policy;
+ atomic_set(&plain_sec.ps_refcount, 1); /* always busy */
+ plain_sec.ps_import = NULL;
+ plain_sec.ps_flavor = SPTLRPC_FLVR_PLAIN;
+ plain_sec.ps_flags = 0;
+ plain_sec.ps_gc_interval = 0;
+ plain_sec.ps_gc_next = 0;
+ spin_lock_init(&plain_sec.ps_lock);
+ plain_sec.ps_ccache_size = 1;
+ plain_sec.ps_ccache = &__list;
+ atomic_set(&plain_sec.ps_busy, 1); /* for "plain_cli_ctx" */
+
+ hlist_add_head(&plain_cli_ctx.cc_hash, &__list);
+ atomic_set(&plain_cli_ctx.cc_refcount, 1); /* for hash */
+ plain_cli_ctx.cc_sec = &plain_sec;
+ plain_cli_ctx.cc_ops = &plain_ctx_ops;
+ plain_cli_ctx.cc_expire = 0;
+ plain_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL |
+ PTLRPC_CTX_UPTODATE;
+ plain_cli_ctx.cc_vcred.vc_uid = 0;
+ spin_lock_init(&plain_cli_ctx.cc_lock);
+ INIT_LIST_HEAD(&plain_cli_ctx.cc_req_list);
+}
+
+int sptlrpc_plain_init(void)
+{
+ int rc;
+
+ plain_init_internal();
+
+ rc = sptlrpc_register_policy(&plain_policy);
+ if (rc)
+ CERROR("failed to register sec.plain: %d\n", rc);
+
+ return rc;
+}
+
+int sptlrpc_plain_exit(void)
+{
+ int rc;
+
+ rc = sptlrpc_unregister_policy(&plain_policy);
+ if (rc)
+ CERROR("cannot unregister sec.plain: %d\n", rc);
+
+ return rc;
+}
ENTRY;
LASSERT (nbufs > 0);
- LASSERT (bufsize >= max_req_size);
+ LASSERT (bufsize >= max_req_size + SPTLRPC_MAX_PAYLOAD);
LASSERT (ctx_tags != 0);
OBD_ALLOC(service, sizeof(*service));
cfs_waitq_init(&service->srv_waitq);
service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : nbufs;
- service->srv_max_req_size = max_req_size;
+ service->srv_max_req_size = max_req_size + SPTLRPC_MAX_PAYLOAD;
service->srv_buf_size = bufsize;
service->srv_rep_portal = rep_portal;
service->srv_req_portal = req_portal;
/* Now allocate pool of reply buffers */
/* Increase max reply size to next power of two */
service->srv_max_reply_size = 1;
- while (service->srv_max_reply_size < max_reply_size)
+ while (service->srv_max_reply_size <
+ max_reply_size + SPTLRPC_MAX_PAYLOAD)
service->srv_max_reply_size <<= 1;
if (proc_entry != NULL)
req->rq_reply_state = NULL;
}
+ sptlrpc_svc_ctx_decref(req);
+
if (req != &rqbd->rqbd_req) {
/* NB request buffers use an embedded
* req if the incoming req unlinked the
svc->srv_n_active_reqs);
}
+ rc = sptlrpc_svc_unwrap_request(request);
+ switch (rc) {
+ case SECSVC_OK:
+ break;
+ case SECSVC_COMPLETE:
+ target_send_reply(request, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+ goto put_conn;
+ case SECSVC_DROP:
+ goto out;
+ default:
+ LBUG();
+ }
+
#if SWAB_PARANOIA
/* Clear request swab mask; this is a new request */
request->rq_req_swab_mask = 0;
if (timediff / 1000000 > (long)obd_timeout)
CERROR("request "LPU64" opc %u from %s processed in %lds "
"trans "LPU64" rc %d/%d\n",
- request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg),
+ request->rq_xid,
+ request->rq_reqmsg ?
+ lustre_msg_get_opc(request->rq_reqmsg) : 0,
libcfs_id2str(request->rq_peer),
cfs_timeval_sub(&work_end, &request->rq_arrival_time,
NULL) / 1000000,
else
CDEBUG(D_HA, "request "LPU64" opc %u from %s processed in "
"%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
- request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg),
+ request->rq_xid,
+ request->rq_reqmsg ?
+ lustre_msg_get_opc(request->rq_reqmsg) : 0,
libcfs_id2str(request->rq_peer), timediff,
cfs_timeval_sub(&work_end, &request->rq_arrival_time,
NULL),
lustre_msg_get_status(request->rq_repmsg) :
-999);
- if (svc->srv_stats != NULL) {
+ if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) {
int opc = opcode_offset(lustre_msg_get_opc(request->rq_reqmsg));
if (opc > 0) {
LASSERT(opc < LUSTRE_MAX_OPCODES);
static int lfs_setquota(int argc, char **argv);
static int lfs_quota(int argc, char **argv);
#endif
+static int lfs_flushctx(int argc, char **argv);
static int lfs_join(int argc, char **argv);
/* all avaialable commands */
{"quota", lfs_quota, 0, "Display disk usage and limits.\n"
"usage: quota [ -o obd_uuid ] [ -u | -g ] [name] <filesystem>"},
#endif
+ {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
+ "usage: flushctx [-k] [mountpoint...]"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
}
#endif /* HAVE_QUOTA_SUPPORT */
+static int flushctx_ioctl(char *mp)
+{
+ int fd, rc;
+
+ fd = open(mp, O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "flushctx: error open %s: %s\n",
+ mp, strerror(errno));
+ return -1;
+ }
+
+ rc = ioctl(fd, LL_IOC_FLUSHCTX);
+ if (rc == -1)
+ fprintf(stderr, "flushctx: error ioctl %s: %s\n",
+ mp, strerror(errno));
+
+ close(fd);
+ return rc;
+}
+
+static int lfs_flushctx(int argc, char **argv)
+{
+ int kdestroy = 0, c;
+ FILE *proc;
+ char procline[PATH_MAX], *line;
+ int rc = 0;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, "k")) != -1) {
+ switch (c) {
+ case 'k':
+ kdestroy = 1;
+ break;
+ default:
+ fprintf(stderr, "error: %s: option '-%c' "
+ "unrecognized\n", argv[0], c);
+ return CMD_HELP;
+ }
+ }
+
+ if (kdestroy)
+ system("kdestroy > /dev/null");
+
+ if (optind >= argc) {
+ /* flush for all mounted lustre fs. */
+ proc = fopen("/proc/mounts", "r");
+ if (!proc) {
+ fprintf(stderr, "error: %s: can't open /proc/mounts\n",
+ argv[0]);
+ return -1;
+ }
+
+ while ((line = fgets(procline, PATH_MAX, proc)) != NULL) {
+ char dev[PATH_MAX];
+ char mp[PATH_MAX];
+ char fs[PATH_MAX];
+
+ if (sscanf(line, "%s %s %s", dev, mp, fs) != 3) {
+ fprintf(stderr, "%s: unexpected format in "
+ "/proc/mounts\n",
+ argv[0]);
+ return -1;
+ }
+
+ if (strcmp(fs, "lustre") != 0)
+ continue;
+ /* we use '@' to determine it's a client. are there
+ * any other better way?
+ */
+ if (strchr(dev, '@') == NULL)
+ continue;
+
+ if (flushctx_ioctl(mp))
+ rc = -1;
+ }
+ } else {
+ /* flush fs as specified */
+ while (optind < argc) {
+ if (flushctx_ioctl(argv[optind++]))
+ rc = -1;
+ }
+ }
+
+ return rc;
+}
+
int main(int argc, char **argv)
{
int rc;