From 6c556a77fa1759d20c18f59024465e165504a288 Mon Sep 17 00:00:00 2001 From: ericm Date: Mon, 11 Sep 2006 16:53:38 +0000 Subject: [PATCH] branch: b_new_cmd land the first part of secure ptlrpc support. --- lustre/include/liblustre.h | 37 + lustre/include/lustre/lustre_idl.h | 9 + lustre/include/lustre/lustre_user.h | 1 + lustre/include/lustre_cfg.h | 1 + lustre/include/lustre_disk.h | 2 + lustre/include/lustre_import.h | 7 +- lustre/include/lustre_net.h | 78 +- lustre/include/lustre_param.h | 4 + lustre/include/lustre_sec.h | 514 ++++++++ lustre/include/obd.h | 14 +- lustre/ldlm/ldlm_lib.c | 135 +- lustre/ldlm/ldlm_lock.c | 18 +- lustre/liblustre/lutil.c | 1 + lustre/llite/dir.c | 3 + lustre/llite/file.c | 2 + lustre/llite/llite_internal.h | 1 + lustre/llite/llite_lib.c | 15 + lustre/lmv/lmv_obd.c | 60 +- lustre/lov/lov_obd.c | 2 + lustre/mdc/lproc_mdc.c | 1 + lustre/mdc/mdc_request.c | 5 + lustre/mds/handler.c | 3 + lustre/mdt/mdt_handler.c | 15 +- lustre/mgs/mgs_llog.c | 132 +- lustre/obdclass/genops.c | 1 + lustre/obdclass/obd_config.c | 34 + lustre/obdclass/obd_mount.c | 155 ++- lustre/osc/lproc_osc.c | 1 + lustre/osc/osc_request.c | 16 +- lustre/ost/ost_handler.c | 28 +- lustre/ptlrpc/Makefile.in | 1 + lustre/ptlrpc/autoMakefile.am | 6 +- lustre/ptlrpc/client.c | 178 ++- lustre/ptlrpc/events.c | 13 +- lustre/ptlrpc/import.c | 13 +- lustre/ptlrpc/niobuf.c | 67 +- lustre/ptlrpc/pack_generic.c | 188 ++- lustre/ptlrpc/pers.c | 65 +- lustre/ptlrpc/pinger.c | 53 + lustre/ptlrpc/ptlrpc_internal.h | 16 + lustre/ptlrpc/ptlrpc_module.c | 15 +- lustre/ptlrpc/sec.c | 2495 +++++++++++++++++++++++++++++++++++ lustre/ptlrpc/sec_null.c | 305 +++++ lustre/ptlrpc/sec_plain.c | 498 +++++++ lustre/ptlrpc/service.c | 32 +- lustre/utils/lfs.c | 89 ++ 46 files changed, 4996 insertions(+), 333 deletions(-) create mode 100644 lustre/include/lustre_sec.h create mode 100644 lustre/ptlrpc/sec.c create mode 100644 lustre/ptlrpc/sec_null.c create mode 100644 lustre/ptlrpc/sec_plain.c diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index fa39903..e23fec2 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -583,6 +583,8 @@ struct task_struct { int state; struct signal pending; char comm[32]; + int uid; + int gid; int pid; int fsuid; int fsgid; @@ -705,6 +707,7 @@ static inline void del_timer(struct timer_list *l) typedef struct { volatile int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } #define atomic_read(a) ((a)->counter) #define atomic_set(a,b) do {(a)->counter = b; } while (0) #define atomic_dec_and_test(a) ((--((a)->counter)) == 0) @@ -721,6 +724,40 @@ typedef struct { volatile int counter; } atomic_t; #define unlikely(exp) (exp) #endif +#define might_sleep() +#define might_sleep_if(c) +#define smp_mb() + +static inline +int test_and_set_bit(int nr, unsigned long *addr) +{ + int oldbit; + + while (nr >= sizeof(long)) { + nr -= sizeof(long); + addr++; + } + + oldbit = (*addr) & (1 << nr); + *addr |= (1 << nr); + return oldbit; +} + +static inline +int test_and_clear_bit(int nr, unsigned long *addr) +{ + int oldbit; + + while (nr >= sizeof(long)) { + nr -= sizeof(long); + addr++; + } + + oldbit = (*addr) & (1 << nr); + *addr &= ~(1 << nr); + return oldbit; +} + /* FIXME sys/capability will finally included linux/fs.h thus * cause numerous trouble on x86-64. as temporary solution for * build broken at cary, we copy definition we need from capability.h diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 30be4cb..17fd2d2 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1784,4 +1784,13 @@ static inline int fid_res_name_eq(const struct lu_fid *f, } #define JOIN_FILE_ALIGN 4096 + +/* security opcodes */ +typedef enum { + SEC_CTX_INIT = 801, + SEC_CTX_INIT_CONT = 802, + SEC_CTX_FINI = 803, + SEC_LAST_OPC +} sec_cmd_t; + #endif diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 5183732..f1119e8 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -58,6 +58,7 @@ struct obd_statfs; #define LL_IOC_JOIN _IOW ('f', 163, long) #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) #define IOC_LOV_GETINFO _IOWR('f', 165, struct lov_user_mds_data *) +#define LL_IOC_FLUSHCTX _IOW ('f', 166, long) #define LL_STATFS_MDC 1 #define LL_STATFS_LOV 2 diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index f508bdf..ae21989 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -58,6 +58,7 @@ enum lcfg_command_type { LCFG_LOV_ADD_INA = 0x00ce013, LCFG_ADD_MDC = 0x00cf014, LCFG_DEL_MDC = 0x00cf015, + LCFG_SEC_FLAVOR = 0x00ce016, }; struct lustre_cfg_bufs { diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index b2b7d48..69f07d7 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -138,6 +138,8 @@ struct lustre_mount_data { int lmd_exclude_count; char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ + char *lmd_sec_mdt; /* sec from mdt (to ost/mdt) */ + char *lmd_sec_cli; /* sec from client (to ost/mdt) */ char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 0639d79..f29ea11 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -64,6 +64,7 @@ struct obd_import { struct list_head imp_delayed_list; struct obd_device *imp_obd; + struct ptlrpc_sec *imp_sec; cfs_waitq_t imp_recovery_waitq; atomic_t imp_inflight; @@ -78,6 +79,7 @@ struct obd_import { struct lustre_handle imp_remote_handle; cfs_time_t imp_next_ping; /* jiffies */ __u64 imp_last_success_conn; /* jiffies, 64-bit */ + cfs_time_t imp_next_reconnect; /* seconds */ /* all available obd_import_conn linked here */ struct list_head imp_conn_list; @@ -98,7 +100,10 @@ struct obd_import { imp_pingable:1, /* pingable */ imp_resend_replay:1, /* resend for replay */ imp_recon_bk:1, /* turn off reconnect if all failovers fail */ - imp_last_recon:1; /* internally used by above */ + imp_last_recon:1, /* internally used by above */ + imp_force_reconnect:1; /* need to reconnect + * even the status is + * FULL */ __u32 imp_connect_op; struct obd_connect_data imp_connect_data; __u64 imp_connect_flags_orig; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index a2fd0df..8c2f0af 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -278,12 +279,16 @@ struct ptlrpc_reply_state { lnet_handle_md_t rs_md_h; atomic_t rs_refcount; + struct ptlrpc_svc_ctx *rs_svc_ctx; + struct lustre_msg *rs_repbuf; /* wrapper */ + int rs_repbuf_len; /* wrapper buf length */ + int rs_repdata_len; /* wrapper msg length */ + struct lustre_msg *rs_msg; /* reply message */ + /* locks awaiting client reply ACK */ int rs_nlocks; struct lustre_handle rs_locks[RS_MAX_LOCKS]; ldlm_mode_t rs_modes[RS_MAX_LOCKS]; - /* last member: variable sized reply message */ - struct lustre_msg *rs_msg; }; struct ptlrpc_thread; @@ -324,7 +329,7 @@ struct ptlrpc_request { */ rq_replay:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1; + rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ atomic_t rq_refcount; /* client-side refcount for SENT race */ @@ -345,6 +350,38 @@ struct ptlrpc_request { __u64 rq_xid; struct list_head rq_replay_list; + struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */ + struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */ + struct list_head rq_ctx_chain; /* link to waited ctx */ + ptlrpc_flavor_t rq_sec_flavor; /* client & server */ + /* client security flags */ + unsigned int rq_ctx_init:1, /* context initiation */ + rq_ctx_fini:1, /* context destroy */ + rq_bulk_read:1, /* request bulk read */ + rq_bulk_write:1, /* request bulk write */ + /* server authentication flags */ + rq_auth_gss:1, /* authenticated by gss */ + rq_auth_remote:1, /* authed as remote user */ + rq_auth_usr_root:1, /* authed as root */ + rq_auth_usr_mds:1; /* authed as mds */ + + uid_t rq_auth_uid; /* authed uid */ + uid_t rq_auth_mapped_uid; /* authed uid mapped to */ + + /* (server side), pointed directly into req buffer */ + struct ptlrpc_user_desc *rq_user_desc; + + /* various buffer pointers */ + struct lustre_msg *rq_reqbuf; /* req wrapper */ + int rq_reqbuf_len; /* req wrapper buf len */ + int rq_reqdata_len; /* req wrapper msg len */ + struct lustre_msg *rq_repbuf; /* rep wrapper */ + int rq_repbuf_len; /* rep wrapper buf len */ + int rq_repdata_len; /* rep wrapper msg len */ + struct lustre_msg *rq_clrbuf; /* only in priv mode */ + int rq_clrbuf_len; /* only in priv mode */ + int rq_clrdata_len; /* only in priv mode */ + #if SWAB_PARANOIA __u32 rq_req_swab_mask; __u32 rq_rep_swab_mask; @@ -421,9 +458,10 @@ ptlrpc_rqphase2str(const struct ptlrpc_request *req) FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W") + FLAG(req->rq_waiting, "W"), \ + FLAG(req->rq_wait_ctx, "C") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s" #define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \ CDEB_TYPE(level, "@@@ " fmt \ @@ -490,8 +528,10 @@ struct ptlrpc_bulk_desc { lnet_handle_md_t bd_md_h; /* associated MD */ #if defined(__KERNEL__) + lnet_kiov_t *bd_enc_iov; /* used in privacy mode */ lnet_kiov_t bd_iov[0]; #else + lnet_md_iovec_t *bd_enc_iov; lnet_md_iovec_t bd_iov[0]; #endif }; @@ -709,7 +749,8 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs, - struct ptlrpc_request_pool *pool); + struct ptlrpc_request_pool *pool, + struct ptlrpc_cli_ctx *ctx); void ptlrpc_free_req(struct ptlrpc_request *request); void ptlrpc_req_finished(struct ptlrpc_request *request); void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request); @@ -785,16 +826,23 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp); /* ptlrpc/pack_generic.c */ int lustre_msg_swabbed(struct lustre_msg *msg); int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); +void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, + char **bufs); int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count, int *lens, char **bufs); int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens, char **bufs); -void lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data); +int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, + int *lens, char **bufs); +int lustre_shrink_msg(struct lustre_msg *msg, int segment, + unsigned int newlen, int move_data); void lustre_free_reply_state(struct ptlrpc_reply_state *rs); int lustre_msg_size(__u32 magic, int count, int *lengths); +int lustre_msg_size_v2(int count, int *lengths); int lustre_unpack_msg(struct lustre_msg *m, int len); int lustre_unpack_ptlrpc_body(struct lustre_msg *m); +void *lustre_msg_buf_v1(void *msg, int n, int min_size); +void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size); void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); int lustre_msg_buflen(struct lustre_msg *m, int n); void lustre_msg_set_buflen(struct lustre_msg *m, int n, int len); @@ -833,6 +881,16 @@ void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); static inline void +lustre_shrink_reply(struct ptlrpc_request *req, int segment, + unsigned int newlen, int move_data) +{ + LASSERT(req->rq_reply_state); + LASSERT(req->rq_repmsg); + req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment, + newlen, move_data); +} + +static inline void ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) { LASSERT(atomic_read(&rs->rs_refcount) > 0); @@ -887,6 +945,10 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); +/* ptlrpc/pers.c */ +int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc); +void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc); + /* ptlrpc/pinger.c */ int ptlrpc_pinger_add_import(struct obd_import *imp); int ptlrpc_pinger_del_import(struct obd_import *imp); diff --git a/lustre/include/lustre_param.h b/lustre/include/lustre_param.h index 427973d..adff8f1 100644 --- a/lustre/include/lustre_param.h +++ b/lustre/include/lustre_param.h @@ -53,5 +53,9 @@ int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, #define PARAM_LOV_STRIPE_COUNT PARAM_LOV"stripecount=" #define PARAM_LOV_STRIPE_OFFSET PARAM_LOV"stripeoffset=" #define PARAM_LOV_STRIPE_PATTERN PARAM_LOV"stripetype=" +#define PARAM_SEC "security." +#define PARAM_SEC_RPC PARAM_SEC"rpc." +#define PARAM_SEC_RPC_MDT PARAM_SEC_RPC"mdt=" +#define PARAM_SEC_RPC_CLI PARAM_SEC_RPC"cli=" #endif // _LUSTRE_PARAM_H diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h new file mode 100644 index 0000000..1170cc5 --- /dev/null +++ b/lustre/include/lustre_sec.h @@ -0,0 +1,514 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LUSTRE_SEC_H_ +#define _LUSTRE_SEC_H_ + +/* + * to avoid include + */ +struct obd_import; +struct ptlrpc_request; +struct ptlrpc_reply_state; +struct ptlrpc_bulk_desc; +struct brw_page; + +/* + * forward declaration + */ +struct ptlrpc_sec_policy; +struct ptlrpc_sec_cops; +struct ptlrpc_sec_sops; +struct ptlrpc_sec; +struct ptlrpc_svc_ctx; +struct ptlrpc_cli_ctx; +struct ptlrpc_ctx_ops; + +/* + * flavor constants + */ +enum sptlrpc_policies { + SPTLRPC_POLICY_NULL = 0, + SPTLRPC_POLICY_PLAIN = 1, + SPTLRPC_POLICY_GSS = 2, + SPTLRPC_POLICY_MAX, +}; + +enum sptlrpc_subpolicy_null { + SPTLRPC_SUBPOLICY_NULL = 0, + SPTLRPC_SUBPOLICY_NULL_MAX, +}; + +enum sptlrpc_subpolicy_plain { + SPTLRPC_SUBPOLICY_PLAIN = 0, + SPTLRPC_SUBPOLICY_PLAIN_MAX, +}; + +enum sptlrpc_subpolicy_gss { + SPTLRPC_SUBPOLICY_GSS_NONE = 0, + SPTLRPC_SUBPOLICY_GSS_KRB5 = 1, + SPTLRPC_SUBPOLICY_GSS_MAX, +}; + +enum sptlrpc_service_type { + SPTLRPC_SVC_NONE = 0, /* no security */ + SPTLRPC_SVC_AUTH = 1, /* authentication */ + SPTLRPC_SVC_PRIV = 2, /* privacy */ + SPTLRPC_SVC_MAX, +}; + +/* + * flavor compose/extract + */ + +typedef __u32 ptlrpc_flavor_t; + +/* + * 8b (reserved) | 8b (flags) | 6b (policy) | 6b (subpolicy) | 4b (svc) + */ +#define SEC_FLAVOR_FLAGS_OFFSET (16) +#define SEC_FLAVOR_POLICY_OFFSET (10) +#define SEC_FLAVOR_SUBPOLICY_OFFSET (4) +#define SEC_FLAVOR_SVC_OFFSET (0) + +#define SEC_MAKE_RPC_FLAVOR(policy, subpolicy, svc) \ + (((__u32)(policy) << SEC_FLAVOR_POLICY_OFFSET) | \ + ((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \ + ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET)) + +#define SEC_MAKE_RPC_SUBFLAVOR(subpolicy, svc) \ + (((__u32)(subpolicy) << SEC_FLAVOR_SUBPOLICY_OFFSET) | \ + ((__u32)(svc) << SEC_FLAVOR_SVC_OFFSET)) + +#define SEC_FLAVOR_POLICY(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_POLICY_OFFSET) & 0x3F) +#define SEC_FLAVOR_SUBPOLICY(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SUBPOLICY_OFFSET) & 0x3F) +#define SEC_FLAVOR_SVC(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0xF) +#define SEC_FLAVOR_SUB(flavor) \ + ((((__u32)(flavor)) >> SEC_FLAVOR_SVC_OFFSET) & 0x3FF) + +#define SEC_FLAVOR_RPC(f) \ + (((__u32) f) & ((1 << SEC_FLAVOR_FLAGS_OFFSET) - 1)) + +/* + * general gss flavors + */ +#define SPTLRPC_FLVR_GSS_NONE \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_GSS_AUTH \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_FLVR_GSS_PRIV \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_NONE, \ + SPTLRPC_SVC_PRIV) + +/* + * gss subflavors + */ +#define SPTLRPC_SUBFLVR_KRB5 \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_SUBFLVR_KRB5I \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_SUBFLVR_KRB5P \ + SEC_MAKE_RPC_SUBFLAVOR(SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_PRIV) + +/* + * "end user" flavors + */ +#define SPTLRPC_FLVR_NULL \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_NULL, \ + SPTLRPC_SUBPOLICY_NULL, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_PLAIN \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_PLAIN, \ + SPTLRPC_SUBPOLICY_PLAIN, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_KRB5 \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_NONE) +#define SPTLRPC_FLVR_KRB5I \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_AUTH) +#define SPTLRPC_FLVR_KRB5P \ + SEC_MAKE_RPC_FLAVOR(SPTLRPC_POLICY_GSS, \ + SPTLRPC_SUBPOLICY_GSS_KRB5, \ + SPTLRPC_SVC_PRIV) + +#define SPTLRPC_FLVR_INVALID (-1) + +#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL + +/* + * flavor flags (maximum 8 flags) + */ +#define SEC_FLAVOR_FL_BULK (1 << (0 + SEC_FLAVOR_FLAGS_OFFSET)) +#define SEC_FLAVOR_FL_USER (1 << (1 + SEC_FLAVOR_FLAGS_OFFSET)) + +#define SEC_FLAVOR_HAS_BULK(flavor) \ + (((flavor) & SEC_FLAVOR_FL_BULK) != 0) +#define SEC_FLAVOR_HAS_USER(flavor) \ + (((flavor) & SEC_FLAVOR_FL_USER) != 0) + + +struct sec_flavor_config { + __u32 sfc_rpc_flavor; /* main rpc flavor */ + __u32 sfc_bulk_priv; /* bulk encryption algorithm */ + __u32 sfc_bulk_csum; /* bulk checksum algorithm */ + __u32 sfc_flags; /* extra flags */ +}; + +enum lustre_part { + LUSTRE_CLI = 0, + LUSTRE_MDT, + LUSTRE_OST, + LUSTRE_MGC, + LUSTRE_MGS, +}; + +/* The maximum length of security payload. 1024 is enough for Kerberos 5, + * and should be enough for other future mechanisms but not sure. + * Only used by pre-allocated request/reply pool. + */ +#define SPTLRPC_MAX_PAYLOAD (1024) + + +struct vfs_cred { + uint32_t vc_uid; + uint32_t vc_gid; +}; + +struct ptlrpc_ctx_ops { + int (*match) (struct ptlrpc_cli_ctx *ctx, + struct vfs_cred *vcred); + int (*refresh) (struct ptlrpc_cli_ctx *ctx); + /* + * rpc data transform + */ + int (*sign) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*verify) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*seal) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + int (*unseal) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req); + /* + * bulk transform + */ + int (*wrap_bulk) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + int (*unwrap_bulk) (struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +}; + +#define PTLRPC_CTX_UPTODATE_BIT (0) /* uptodate */ +#define PTLRPC_CTX_DEAD_BIT (1) /* mark expired gracefully */ +#define PTLRPC_CTX_ERROR_BIT (2) /* fatal error (refresh, etc.) */ +#define PTLRPC_CTX_HASHED_BIT (8) /* in hash table */ +#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */ + +#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT) +#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT) +#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT) +#define PTLRPC_CTX_HASHED (1 << PTLRPC_CTX_HASHED_BIT) +#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT) + +#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_UPTODATE | \ + PTLRPC_CTX_DEAD | \ + PTLRPC_CTX_ERROR) + +struct ptlrpc_cli_ctx { + struct hlist_node cc_hash; /* linked into hash table */ + atomic_t cc_refcount; + struct ptlrpc_sec *cc_sec; + struct ptlrpc_ctx_ops *cc_ops; + cfs_time_t cc_expire; /* in seconds */ + unsigned long cc_flags; + struct vfs_cred cc_vcred; + spinlock_t cc_lock; + struct list_head cc_req_list; /* waiting reqs linked here */ +}; + +struct ptlrpc_sec_cops { + /* + * ptlrpc_sec constructor/destructor + */ + struct ptlrpc_sec * (*create_sec) (struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags); + void (*destroy_sec) (struct ptlrpc_sec *sec); + /* + * search ctx for a certain user, if this function is missing, + * a generic function will be invoked by caller. implement this + * for any special need. + */ + struct ptlrpc_cli_ctx * (*lookup_ctx) (struct ptlrpc_sec *sec, + struct vfs_cred *vcred); + /* + * ptlrpc_cli_ctx constructor/destructor + */ + struct ptlrpc_cli_ctx * (*create_ctx) (struct ptlrpc_sec *sec, + struct vfs_cred *vcred); + void (*destroy_ctx) (struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); + /* reverse service */ + int (*install_rctx)(struct obd_import *imp, + struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); + /* + * request/reply buffer manipulation + */ + int (*alloc_reqbuf)(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int lustre_msg_size); + void (*free_reqbuf) (struct ptlrpc_sec *sec, + struct ptlrpc_request *req); + int (*alloc_repbuf)(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int lustre_msg_size); + void (*free_repbuf) (struct ptlrpc_sec *sec, + struct ptlrpc_request *req); +}; + +struct ptlrpc_sec_sops { + int (*accept) (struct ptlrpc_request *req); + int (*authorize) (struct ptlrpc_request *req); + /* buffer manipulation */ + int (*alloc_rs) (struct ptlrpc_request *req, + int msgsize); + void (*free_rs) (struct ptlrpc_reply_state *rs); + void (*free_ctx) (struct ptlrpc_svc_ctx *ctx); + /* reverse credential */ + int (*install_rctx)(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx); + /* bulk transform */ + int (*unwrap_bulk) (struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + int (*wrap_bulk) (struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +}; + +struct ptlrpc_sec_policy { + struct module *sp_owner; + char *sp_name; + __u32 sp_policy; /* policy number */ + struct ptlrpc_sec_cops *sp_cops; /* client ops */ + struct ptlrpc_sec_sops *sp_sops; /* server ops */ +}; + +#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */ +#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */ + +struct ptlrpc_sec { + struct ptlrpc_sec_policy *ps_policy; + atomic_t ps_refcount; + __u32 ps_flavor; /* rpc flavor */ + unsigned long ps_flags; /* PTLRPC_SEC_FL_XX */ + struct obd_import *ps_import; /* owning import */ + spinlock_t ps_lock; /* protect ccache */ + int ps_ccache_size; /* must be 2^n */ + struct hlist_head *ps_ccache; /* ctx cache hash */ + atomic_t ps_busy; /* busy count */ + cfs_time_t ps_gc_interval; /* in seconds */ + cfs_time_t ps_gc_next; /* in seconds */ +}; + +struct ptlrpc_svc_ctx { + atomic_t sc_refcount; + struct ptlrpc_sec_policy *sc_policy; +}; + +/* + * user identity descriptor + */ +#define LUSTRE_MAX_GROUPS (128) + +struct ptlrpc_user_desc { + __u32 pud_uid; + __u32 pud_gid; + __u32 pud_fsuid; + __u32 pud_fsgid; + __u32 pud_cap; + __u32 pud_ngroups; + __u32 pud_groups[0]; +}; + +/* + * bulk flavors + */ +enum bulk_checksum_alg { + BULK_CSUM_ALG_NULL = 0, + BULK_CSUM_ALG_CRC32, + BULK_CSUM_ALG_MD5, + BULK_CSUM_ALG_SHA1, + BULK_CSUM_ALG_SHA256, + BULK_CSUM_ALG_SHA384, + BULK_CSUM_ALG_SHA512, + BULK_CSUM_ALG_MAX +}; + +enum bulk_encrypt_alg { + BULK_PRIV_ALG_NULL = 0, + BULK_PRIV_ALG_ARC4, + BULK_PRIV_ALG_MAX +}; + +struct ptlrpc_bulk_sec_desc { + __u32 bsd_version; + __u32 bsd_pad; + __u32 bsd_csum_alg; /* checksum algorithm */ + __u32 bsd_priv_alg; /* encrypt algorithm */ + __u8 bsd_iv[16]; /* encrypt iv */ + __u8 bsd_csum[0]; +}; + +/* + * security type + */ +int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy); +int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy); + +__u32 sptlrpc_name2flavor(const char *name); +char *sptlrpc_flavor2name(__u32 flavor); + +static inline +struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy) +{ + __module_get(policy->sp_owner); + return policy; +} + +static inline +void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy) +{ + module_put(policy->sp_owner); +} + +/* + * client credential + */ +struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx); +void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync); +void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx); +void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new); +void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx); + +/* + * client wrap/buffers + */ +int sptlrpc_cli_wrap_request(struct ptlrpc_request *req); +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req); +int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize); +int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize); +void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req); +void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req); +void sptlrpc_request_out_callback(struct ptlrpc_request *req); + +/* + * higher interface of import & request + */ +int sptlrpc_import_get_sec(struct obd_import *imp, struct ptlrpc_svc_ctx *svc_ctx, + __u32 flavor, unsigned long flags); +void sptlrpc_import_put_sec(struct obd_import *imp); +int sptlrpc_import_check_ctx(struct obd_import *imp); +void sptlrpc_import_flush_root_ctx(struct obd_import *imp); +void sptlrpc_import_flush_my_ctx(struct obd_import *imp); +int sptlrpc_req_get_ctx(struct ptlrpc_request *req); +void sptlrpc_req_put_ctx(struct ptlrpc_request *req); +int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout); +void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode); + +int sptlrpc_parse_flavor(enum lustre_part from, enum lustre_part to, + char *str, struct sec_flavor_config *conf); +/* misc */ +const char * sec2target_str(struct ptlrpc_sec *sec); +int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, + int *eof, void *data); + +/* + * server side + */ +enum secsvc_accept_res { + SECSVC_OK = 0, + SECSVC_COMPLETE, + SECSVC_DROP, +}; + +int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req); +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen); +int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req); +void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs); +void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req); +void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req); + +/* + * reverse context + */ +int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx); +int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_cli_ctx *ctx); + +/* bulk security api */ +int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, + int nob, obd_count pg_count, + struct brw_page **pga); +int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); +int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc); + +/* user descriptor helpers */ +int sptlrpc_user_desc_size(void); +int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset); +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset); + +/* bulk helpers (internal use only by policies) */ +int bulk_sec_desc_size(__u32 csum_alg, int request, int read); +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset); + +int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, + __u32 alg, struct lustre_msg *rmsg, int roff); +int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *rmsg, int roff, + struct lustre_msg *vmsg, int voff); +int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *vmsg, int voff, + struct lustre_msg *rmsg, int roff); +#endif /* _LUSTRE_SEC_H_ */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 6192e9a..debc5cf 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -386,6 +386,9 @@ struct client_obd { int cl_max_mds_cookiesize; kdev_t cl_sandev; + /* security configuration */ + struct sec_flavor_config cl_sec_conf; + //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ void *cl_llcd_offset; @@ -918,11 +921,12 @@ enum obd_cleanup_stage { }; /* get/set_info keys */ -#define KEY_MDS_CONN "mds_conn" -#define KEY_NEXT_ID "next_id" -#define KEY_LOVDESC "lovdesc" -#define KEY_INIT_RECOV "initial_recov" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_MDS_CONN "mds_conn" +#define KEY_NEXT_ID "next_id" +#define KEY_LOVDESC "lovdesc" +#define KEY_INIT_RECOV "initial_recov" +#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_FLUSH_CTX "flush_ctx" struct lu_context; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 41742c6..5ccc999 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -36,6 +36,7 @@ #include #include #include +#include #include /* @priority: if non-zero, move the selected to the list head @@ -176,6 +177,18 @@ out: RETURN(rc); } +static +void destroy_import(struct obd_import *imp) +{ + /* drop security policy instance after all rpc finished/aborted + * to let all busy credentials be released. + */ + class_import_get(imp); + class_destroy_import(imp); + sptlrpc_import_put_sec(imp); + class_import_put(imp); +} + /* configure an RPC client OBD device * * lcfg parameters: @@ -235,6 +248,10 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) sema_init(&cli->cl_sem, 1); sema_init(&cli->cl_mgc_sem, 1); + cli->cl_sec_conf.sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + cli->cl_sec_conf.sfc_bulk_csum = BULK_CSUM_ALG_NULL; + cli->cl_sec_conf.sfc_bulk_priv = BULK_PRIV_ALG_NULL; + cli->cl_sec_conf.sfc_flags = 0; cli->cl_conn_count = 0; memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2), min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), @@ -374,6 +391,11 @@ int client_connect_import(const struct lu_context *ctx, if (rc != 0) GOTO(out_ldlm, rc); + rc = sptlrpc_import_get_sec(imp, NULL, cli->cl_sec_conf.sfc_rpc_flavor, + cli->cl_sec_conf.sfc_flags); + if (rc) + GOTO(out_ldlm, rc); + ocd = &imp->imp_connect_data; if (data) { *ocd = *data; @@ -465,7 +487,7 @@ int client_disconnect_export(struct obd_export *exp) ptlrpc_invalidate_import(imp); ptlrpc_free_rq_pool(imp->imp_rq_pool); - class_destroy_import(imp); + destroy_import(imp); cli->cl_import = NULL; EXIT; @@ -776,8 +798,12 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) req->rq_self, &remote_uuid); - if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) + if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) { + LASSERT(export->exp_imp_reverse); + sptlrpc_svc_install_rvs_ctx(export->exp_imp_reverse, + req->rq_svc_ctx); GOTO(out, rc = 0); + } if (target->obd_recovering) target->obd_connected_clients++; @@ -787,7 +813,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) sizeof conn); if (export->exp_imp_reverse != NULL) - class_destroy_import(export->exp_imp_reverse); + destroy_import(export->exp_imp_reverse); revimp = export->exp_imp_reverse = class_new_import(target); revimp->imp_connection = ptlrpc_connection_addref(export->exp_connection); revimp->imp_client = &export->exp_obd->obd_ldlm_client; @@ -800,6 +826,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_NEXT_VER); } + rc = sptlrpc_import_get_sec(revimp, req->rq_svc_ctx, + req->rq_sec_flavor, 0); + if (rc) { + CERROR("Failed to get sec for reverse import: %d\n", rc); + export->exp_imp_reverse = NULL; + class_destroy_import(revimp); + } + class_import_put(revimp); out: if (export) @@ -830,7 +864,7 @@ void target_destroy_export(struct obd_export *exp) /* exports created from last_rcvd data, and "fake" exports created by lctl don't have an import */ if (exp->exp_imp_reverse != NULL) - class_destroy_import(exp->exp_imp_reverse); + destroy_import(exp->exp_imp_reverse); /* We cancel locks at disconnect time, but this will catch any locks * granted in a race with recovery-induced disconnect. */ @@ -843,16 +877,53 @@ void target_destroy_export(struct obd_export *exp) */ -static void target_release_saved_req(struct ptlrpc_request *req) +static +struct ptlrpc_request *target_save_req(struct ptlrpc_request *src) { - if (req->rq_reply_state != NULL) { - ptlrpc_rs_decref(req->rq_reply_state); - /* req->rq_reply_state = NULL; */ + struct ptlrpc_request *req; + struct lustre_msg *reqmsg; + + OBD_ALLOC(req, sizeof(*req)); + if (!req) + return NULL; + + OBD_ALLOC(reqmsg, src->rq_reqlen); + if (!reqmsg) { + OBD_FREE(req, sizeof(*req)); + return NULL; } + memcpy(req, src, sizeof(*req)); + memcpy(reqmsg, src->rq_reqmsg, src->rq_reqlen); + req->rq_reqmsg = reqmsg; + + class_export_get(req->rq_export); + CFS_INIT_LIST_HEAD(&req->rq_list); + sptlrpc_svc_ctx_addref(req); + if (req->rq_reply_state) + ptlrpc_rs_addref(req->rq_reply_state); + + /* repmsg have been taken over, in privacy mode this might point to + * invalid data. prevent further access on it. + */ + src->rq_repmsg = NULL; + src->rq_replen = 0; + + return req; +} + +static +void target_release_saved_req(struct ptlrpc_request *req) +{ + if (req->rq_reply_state) { + ptlrpc_rs_decref(req->rq_reply_state); + req->rq_reply_state = NULL; + } + sptlrpc_svc_ctx_decref(req); class_export_put(req->rq_export); + OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); + OBD_FREE(req, sizeof(*req)); } static void target_finish_recovery(struct obd_device *obd) @@ -1108,13 +1179,8 @@ static void process_recovery_queue(struct obd_device *obd) reset_recovery_timer(obd); /* bug 1580: decide how to properly sync() in recovery */ //mds_fsync_super(obd->u.obt.obt_sb); - class_export_put(req->rq_export); - if (req->rq_reply_state != NULL) { - ptlrpc_rs_decref(req->rq_reply_state); - /* req->rq_reply_state = NULL; */ - } - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); + target_release_saved_req(req); + spin_lock_bh(&obd->obd_processing_task_lock); obd->obd_next_recovery_transno++; if (list_empty(&obd->obd_recovery_queue)) { @@ -1134,7 +1200,6 @@ int target_queue_recovery_request(struct ptlrpc_request *req, int inserted = 0; __u64 transno = lustre_msg_get_transno(req->rq_reqmsg); struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; /* CAVEAT EMPTOR: The incoming request message has been swabbed * (i.e. buflens etc are in my own byte order), but type-dependent @@ -1147,13 +1212,9 @@ int target_queue_recovery_request(struct ptlrpc_request *req, } /* XXX If I were a real man, these LBUGs would be sane cleanups. */ - /* XXX just like the request-dup code in queue_final_reply */ - OBD_ALLOC(saved_req, sizeof *saved_req); + saved_req = target_save_req(req); if (!saved_req) LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); spin_lock_bh(&obd->obd_processing_task_lock); @@ -1172,8 +1233,8 @@ int target_queue_recovery_request(struct ptlrpc_request *req, /* Processing the queue right now, don't re-add. */ LASSERT(list_empty(&req->rq_list)); spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); + + target_release_saved_req(saved_req); return 1; } @@ -1183,17 +1244,12 @@ int target_queue_recovery_request(struct ptlrpc_request *req, (MSG_RESENT | MSG_REPLAY)) { DEBUG_REQ(D_ERROR, req, "dropping resent queued req"); spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); + + target_release_saved_req(saved_req); return 0; } - memcpy(saved_req, req, sizeof *req); - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); req = saved_req; - req->rq_reqmsg = reqmsg; - class_export_get(req->rq_export); - CFS_INIT_LIST_HEAD(&req->rq_list); /* XXX O(n^2) */ list_for_each(tmp, &obd->obd_recovery_queue) { @@ -1241,7 +1297,6 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) { struct obd_device *obd = target_req2obd(req); struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; int recovery_done = 0; LASSERT ((rc == 0) == (req->rq_reply_state != NULL)); @@ -1255,30 +1310,22 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) LASSERT (!req->rq_reply_state->rs_difficult); LASSERT(list_empty(&req->rq_list)); - /* XXX a bit like the request-dup code in queue_recovery_request */ - OBD_ALLOC(saved_req, sizeof *saved_req); + + saved_req = target_save_req(req); if (!saved_req) LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); - *saved_req = *req; - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); /* Don't race cleanup */ spin_lock_bh(&obd->obd_processing_task_lock); if (obd->obd_stopping) { spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *req); + target_release_saved_req(saved_req); req->rq_status = -ENOTCONN; /* rv is ignored anyhow */ return -ENOTCONN; } - ptlrpc_rs_addref(req->rq_reply_state); /* +1 ref for saved reply */ + req = saved_req; - req->rq_reqmsg = reqmsg; - class_export_get(req->rq_export); list_add(&req->rq_list, &obd->obd_delayed_reply_queue); /* only count the first "replay over" request from each diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 3c21a00..fa4657b 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -757,6 +757,10 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock) * * Returns 1 if it finds an already-existing lock that is compatible; in this * case, lockh is filled in with a addref()ed lock + * + * we also check security context, if that failed we simply return 0 (to keep + * caller code unchanged), the context failure will be discovered by caller + * sometime later. */ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *res_id, ldlm_type_t type, @@ -836,6 +840,18 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, res_id->name[2] : policy->l_extent.start, (type == LDLM_PLAIN || type == LDLM_IBITS) ? res_id->name[3] : policy->l_extent.end); + + /* check user's security context */ + if (lock->l_conn_export && + sptlrpc_import_check_ctx( + class_exp2cliimp(lock->l_conn_export))) { + if (!(flags & LDLM_FL_TEST_LOCK)) + ldlm_lock_decref_internal(lock, mode); + rc = 0; + } + + if (flags & LDLM_FL_TEST_LOCK) + LDLM_LOCK_PUT(lock); } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " LPU64"/"LPU64" ("LPU64" "LPU64")", ns, @@ -847,8 +863,6 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, } if (old_lock) LDLM_LOCK_PUT(old_lock); - if (flags & LDLM_FL_TEST_LOCK && rc) - LDLM_LOCK_PUT(lock); return rc; } diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index b4689d5..0789c72 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -217,6 +217,7 @@ int liblustre_init_current(char *comm) strncpy(current->comm, comm, sizeof(current->comm)); current->pid = getpid(); + current->gid = getgid(); current->fsuid = geteuid(); current->fsgid = getegid(); memset(¤t->pending, 0, sizeof(current->pending)); diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 1eb10e2..807bb2d 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -1090,6 +1090,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, RETURN (-EFAULT); RETURN(0); } + case LL_IOC_FLUSHCTX: + RETURN(ll_flush_ctx(inode)); + default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 5d682fa..21052ba 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2017,6 +2017,8 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case EXT3_IOC_SETVERSION_OLD: case EXT3_IOC_SETVERSION: */ + case LL_IOC_FLUSHCTX: + RETURN(ll_flush_ctx(inode)); default: RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, (void *)arg)); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 49f4cd4..523ac02 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -486,6 +486,7 @@ void ll_read_inode2(struct inode *inode, void *opaque); void ll_delete_inode(struct inode *inode); int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +int ll_flush_ctx(struct inode *inode); void ll_umount_begin(struct super_block *sb); int ll_remount_fs(struct super_block *sb, int *flags, char *data); int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 09a9120..58df98f 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1835,6 +1835,21 @@ int ll_iocontrol(struct inode *inode, struct file *file, RETURN(0); } +int ll_flush_ctx(struct inode *inode) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + + CDEBUG(D_SEC, "flush context for user %d\n", current->uid); + + obd_set_info_async(sbi->ll_md_exp, + sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + 0, NULL, NULL); + obd_set_info_async(sbi->ll_dt_exp, + sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + 0, NULL, NULL); + return 0; +} + /* umount -f client means force down, don't save state */ void ll_umount_begin(struct super_block *sb) { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 8bd4538..d9342be 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -2080,62 +2080,22 @@ int lmv_set_info_async(struct obd_export *exp, obd_count keylen, } lmv = &obd->u.lmv; - /* maybe this could be default */ - if ((keylen == strlen("sec") && strcmp(key, "sec") == 0) || - (keylen == strlen("sec_flags") && strcmp(key, "sec_flags") == 0) || - (keylen == strlen("nllu") && strcmp(key, "nllu") == 0)) { - struct obd_export *exp; - int err, i; - - spin_lock(&lmv->lmv_lock); - for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; - i++, tgt++) { - exp = tgt->ltd_exp; - /* during setup time the connections to mdc might - * haven't been established. - */ - if (exp == NULL) { - struct obd_device *tgt_obd; - - tgt_obd = class_find_client_obd(&tgt->uuid, - LUSTRE_MDC_NAME, - &obd->obd_uuid); - if (!tgt_obd) { - CERROR("can't set info %s, " - "device %s not attached?\n", - (char *) key, tgt->uuid.uuid); - rc = -EINVAL; - continue; - } - exp = tgt_obd->obd_self_export; - } - - err = obd_set_info_async(exp, keylen, key, vallen, val, set); - if (!rc) - rc = err; - } - spin_unlock(&lmv->lmv_lock); + if (KEY_IS(KEY_FLUSH_CTX)) { + int i, err = 0; - RETURN(rc); - } - if (((keylen == strlen("flush_cred") && - strcmp(key, "flush_cred") == 0)) || - ((keylen == strlen("crypto_type") && - strcmp(key, "crypto_type") == 0))) { - int i; + for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + tgt = &lmv->tgts[i]; - for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; - i++, tgt++) { if (!tgt->ltd_exp) continue; - rc = obd_set_info_async(tgt->ltd_exp, - keylen, key, vallen, - val, set); - if (rc) - RETURN(rc); + + err = obd_set_info_async(tgt->ltd_exp, + keylen, key, vallen, val, set); + if (err && rc == 0) + rc = err; } - RETURN(0); + RETURN(rc); } RETURN(-EINVAL); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 50c7a85..48b7c326 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -2468,6 +2468,8 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS("unlinked")) { if (vallen != 0 && KEY_IS("unlinked")) GOTO(out, rc = -EINVAL); + } else if (KEY_IS(KEY_FLUSH_CTX)) { + /* fall through */ } else { GOTO(out, rc = -EINVAL); } diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 27107cd..d7b00dc 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -79,6 +79,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, + { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 }, { 0 } }; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 8ed7055..6a7edaa 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -943,6 +943,11 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(rc); } + if (KEY_IS(KEY_FLUSH_CTX)) { + sptlrpc_import_flush_my_ctx(imp); + RETURN(0); + } + RETURN(rc); } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 9cf8423..4458f76 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1347,6 +1347,9 @@ int mds_msg_check_version(struct lustre_msg *msg) case MDS_CONNECT: case MDS_DISCONNECT: case OBD_PING: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 4f8900f..a4ccba6 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1420,8 +1420,13 @@ static int mdt_recovery(struct ptlrpc_request *req) ENTRY; - if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT) + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case MDS_CONNECT: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: RETURN(+1); + } if (req->rq_export == NULL) { CERROR("operation %d on unconnected MDS from %s\n", @@ -3411,6 +3416,9 @@ static struct mdt_handler mdt_dlm_ops[] = { static struct mdt_handler mdt_llog_ops[] = { }; +static struct mdt_handler mdt_sec_ops[] = { +}; + static struct mdt_opc_slice mdt_regular_handlers[] = { { .mos_opc_start = MDS_GETATTR, @@ -3433,6 +3441,11 @@ static struct mdt_opc_slice mdt_regular_handlers[] = { .mos_hs = mdt_llog_ops }, { + .mos_opc_start = SEC_CTX_INIT, + .mos_opc_end = SEC_LAST_OPC, + .mos_hs = mdt_sec_ops + }, + { .mos_hs = NULL } }; diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 9ff6eb2..dfca3ee 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "mgs_internal.h" /********************** Class fns ********************/ @@ -617,6 +618,24 @@ static inline int record_setup(struct obd_device *obd, struct llog_handle *llh, return record_base(obd,llh,devname,0,LCFG_SETUP,s1,s2,s3,s4); } +static inline int record_sec_flavor(struct obd_device *obd, + struct llog_handle *llh, char *devname, + struct sec_flavor_config *conf) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int rc; + + lustre_cfg_bufs_reset(&bufs, devname); + lustre_cfg_bufs_set(&bufs, 1, conf, sizeof(*conf)); + lcfg = lustre_cfg_new(LCFG_SEC_FLAVOR, &bufs); + + rc = record_lcfg(obd, llh, lcfg); + + lustre_cfg_free(lcfg); + return rc; +} + static int record_lov_setup(struct obd_device *obd, struct llog_handle *llh, char *devname, struct lov_desc *desc) { @@ -854,14 +873,16 @@ int mgs_write_log_direct_all(struct obd_device *obd, struct fs_db *fsdb, } struct temp_comp { - struct mgs_target_info *comp_tmti; - struct mgs_target_info *comp_mti; - struct fs_db *comp_fsdb; - struct obd_device *comp_obd; + struct mgs_target_info *comp_tmti; + struct mgs_target_info *comp_mti; + struct fs_db *comp_fsdb; + struct obd_device *comp_obd; + struct sec_flavor_config comp_sec; }; static int mgs_write_log_mdc_to_mdt(struct obd_device *, struct fs_db *, - struct mgs_target_info *, char *); + struct mgs_target_info *, + struct sec_flavor_config *, char *); static int mgs_steal_llog_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, @@ -873,6 +894,7 @@ static int mgs_steal_llog_handler(struct llog_handle *llh, int cfg_len = rec->lrh_len; char *cfg_buf = (char*) (rec + 1); struct lustre_cfg *lcfg; + struct sec_flavor_config *sec_conf; int rc = 0; struct llog_handle *mdt_llh = NULL; static int got_an_osc_or_mdc = 0; @@ -888,6 +910,7 @@ static int mgs_steal_llog_handler(struct llog_handle *llh, tmti = ((struct temp_comp*)data)->comp_tmti; fsdb = ((struct temp_comp*)data)->comp_fsdb; obd = ((struct temp_comp*)data)->comp_obd; + sec_conf = &((struct temp_comp*)data)->comp_sec; if (rec->lrh_type != OBD_CFG_REC) { CERROR("unhandled lrh_type: %#x\n", rec->lrh_type); @@ -966,17 +989,24 @@ static int mgs_steal_llog_handler(struct llog_handle *llh, RETURN(rc); } + if (lcfg->lcfg_command == LCFG_SEC_FLAVOR) { + memcpy(sec_conf, lustre_cfg_buf(lcfg, 1), sizeof(*sec_conf)); + + RETURN(rc); + } + if (lcfg->lcfg_command == LCFG_ADD_MDC) { int index; if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) RETURN (-EINVAL); - + memcpy(tmti->mti_fsname, mti->mti_fsname, strlen(mti->mti_fsname)); tmti->mti_stripe_index = index; - - mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, mti->mti_svname); + + mgs_write_log_mdc_to_mdt(obd, fsdb, tmti, sec_conf, + mti->mti_svname); memset(tmti, 0, sizeof(*tmti)); RETURN(rc); } @@ -1155,10 +1185,35 @@ static int mgs_write_log_failnids(struct obd_device *obd, return rc; } +static +void extract_sec_flavor(char *params, char *key, char **ptr) +{ + char *val = NULL, *tail; + int len; + + *ptr = NULL; + + if (class_find_param(params, key, &val)) + return; + + tail = strchr(val, ' '); + if (tail == NULL) + len = strlen(val); + else + len = tail - val; + + OBD_ALLOC(*ptr, len + 1); + if (*ptr == NULL) + return; + + memcpy(*ptr, val, len); + (*ptr)[len] = '\0'; +} /***************************************BEGIN PROTO****************************/ static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti, + struct sec_flavor_config *sec_conf, char *logname, char *lmvname) { struct llog_handle *llh = NULL; @@ -1193,6 +1248,7 @@ static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb, rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid); rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, mdcname, sec_conf); rc = mgs_write_log_failnids(obd, mti, llh, mdcname); snprintf(index, sizeof(index), "%d", mti->mti_stripe_index); rc = record_mdc_add(obd, llh, lmvname, mdcuuid, mti->mti_uuid, @@ -1210,7 +1266,9 @@ static int mgs_write_log_mdc_to_lmv(struct obd_device *obd, struct fs_db *fsdb, /* add new mdc to already existent MDS */ static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb, - struct mgs_target_info *mti, char *logname) + struct mgs_target_info *mti, + struct sec_flavor_config *sec_conf, + char *logname) { struct llog_handle *llh = NULL; char *nodeuuid, *mdcname, *mdcuuid, *mdtuuid; @@ -1241,6 +1299,7 @@ static int mgs_write_log_mdc_to_mdt(struct obd_device *obd, struct fs_db *fsdb, } rc = record_attach(obd, llh, mdcname, LUSTRE_MDC_NAME, mdcuuid); rc = record_setup(obd, llh, mdcname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, mdcname, sec_conf); rc = mgs_write_log_failnids(obd, mti, llh, mdcname); snprintf(index, sizeof(index), "%d", idx); @@ -1299,9 +1358,10 @@ out: static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti) { - char *cliname; + char *cliname, *sec; struct llog_handle *llh = NULL; struct temp_comp comp = { 0 }; + struct sec_flavor_config sec_conf_mdt, sec_conf_cli; char mdt_index[9]; int rc, i = 0; ENTRY; @@ -1330,6 +1390,19 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, "%s_UUID", mti->mti_svname); } + /* security flavor */ + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_MDT, sec, &sec_conf_mdt); + name_destroy(sec); + if (rc) + RETURN(rc); + + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_MDT, sec, &sec_conf_cli); + name_destroy(sec); + if (rc) + RETURN(rc); + /* add mdt */ rc = mgs_write_log_mdt0(obd, fsdb, mti); @@ -1384,8 +1457,8 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, rc = mgs_steal_llog_for_mdt_from_client(obd, cliname, &comp); - rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, cliname, - fsdb->fsdb_clilmv); + rc = mgs_write_log_mdc_to_lmv(obd, fsdb, mti, &sec_conf_cli, + cliname, fsdb->fsdb_clilmv); /* add mountopts */ rc = record_start_log(obd, &llh, cliname); if (rc) @@ -1411,7 +1484,8 @@ out: sprintf(mdt_index,"-MDT%04x",i); name_create(&mdtname, mti->mti_fsname, mdt_index); - rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti, mdtname); + rc = mgs_write_log_mdc_to_mdt(obd, fsdb, mti, + &sec_conf_mdt, mdtname); name_destroy(mdtname); } } @@ -1422,7 +1496,9 @@ out: /* Add the ost info to the client/mdt lov */ static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti, - char *logname, char *lovname, int flags) + char *logname, char *lovname, + struct sec_flavor_config *sec_conf, + int flags) { struct llog_handle *llh = NULL; char *nodeuuid, *svname, *oscname, *oscuuid, *lovuuid; @@ -1470,6 +1546,7 @@ static int mgs_write_log_osc_to_lov(struct obd_device *obd, struct fs_db *fsdb, } rc = record_attach(obd, llh, oscname, LUSTRE_OSC_NAME, lovuuid); rc = record_setup(obd, llh, oscname, mti->mti_uuid, nodeuuid, 0, 0); + rc = record_sec_flavor(obd, llh, oscname, sec_conf); rc = mgs_write_log_failnids(obd, mti, llh, oscname); snprintf(index, sizeof(index), "%d", mti->mti_stripe_index); rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1"); @@ -1489,9 +1566,10 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti) { struct llog_handle *llh = NULL; - char *logname, *lovname; + char *logname, *lovname, *sec; char mdt_index[9]; char *ptr = mti->mti_params; + struct sec_flavor_config sec_conf_mdt, sec_conf_cli; int rc, flags = 0, failout = 0, i; ENTRY; @@ -1509,6 +1587,20 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, " all logs.\n", mti->mti_svname); RETURN(-EALREADY); } + + /* security flavors */ + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_MDT, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_MDT, LUSTRE_OST, sec, &sec_conf_mdt); + name_destroy(sec); + if (rc) + RETURN(rc); + + extract_sec_flavor(mti->mti_params, PARAM_SEC_RPC_CLI, &sec); + rc = sptlrpc_parse_flavor(LUSTRE_CLI, LUSTRE_OST, sec, &sec_conf_cli); + name_destroy(sec); + if (rc) + RETURN(rc); + /* attach obdfilter ost1 ost1_UUID setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr @@ -1547,7 +1639,7 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, name_create(&logname, mti->mti_fsname, mdt_index); name_create(&lovname, logname, "-mdtlov"); mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, - lovname, flags); + lovname, &sec_conf_mdt, flags); name_destroy(logname); name_destroy(lovname); } @@ -1556,7 +1648,8 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, /* Append ost info to the client log */ name_create(&logname, mti->mti_fsname, "-client"); - mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov, 0); + mgs_write_log_osc_to_lov(obd, fsdb, mti, logname, fsdb->fsdb_clilov, + &sec_conf_cli, 0); name_destroy(logname); RETURN(rc); @@ -1664,6 +1757,11 @@ static int mgs_write_log_params(struct obd_device *obd, struct fs_db *fsdb, if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) GOTO(end_while, rc); + /* Processed in mgs_write_log_mdt/mgs_write_log_ost */ + if (class_match_param(ptr, PARAM_SEC_RPC_MDT, NULL) == 0 || + class_match_param(ptr, PARAM_SEC_RPC_CLI, NULL) == 0) + GOTO(end_while, rc); + if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) { /* Add a failover nidlist */ rc = 0; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 9613962..49a686f 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -749,6 +749,7 @@ void class_import_put(struct obd_import *import) } LASSERT(list_empty(&import->imp_handle.h_link)); + LASSERT(import->imp_sec == NULL); class_decref(import->imp_obd); OBD_FREE(import, sizeof(*import)); EXIT; diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index bc41a0b..f0c4ad8 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -559,6 +559,36 @@ int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(rc); } +int class_sec_flavor(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct sec_flavor_config *conf; + ENTRY; + + if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) && + strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) { + CERROR("Can't set security flavor on obd %s\n", + obd->obd_type->typ_name); + RETURN(-EINVAL); + } + + if (LUSTRE_CFG_BUFLEN(lcfg, 1) != sizeof(*conf)) { + CERROR("invalid data\n"); + RETURN(-EINVAL); + } + + conf = &obd->u.cli.cl_sec_conf; + memcpy(conf, lustre_cfg_buf(lcfg, 1), sizeof(*conf)); + +#ifdef __BIG_ENDIAN + __swab32s(&conf->sfc_rpc_flavor); + __swab32s(&conf->sfc_bulk_csum); + __swab32s(&conf->sfc_bulk_priv); + __swab32s(&conf->sfc_flags); +#endif + + RETURN(0); +} + CFS_LIST_HEAD(lustre_profile_list); struct lustre_profile *class_get_profile(const char * prof) @@ -787,6 +817,10 @@ int class_process_config(struct lustre_cfg *lcfg) err = class_del_conn(obd, lcfg); GOTO(out, err = 0); } + case LCFG_SEC_FLAVOR: { + err = class_sec_flavor(obd, lcfg); + GOTO(out, err = 0); + } default: { err = obd_process_config(obd, sizeof(*lcfg), lcfg); GOTO(out, err); diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 7779095..83062ac 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -851,11 +851,82 @@ int server_mti_print(char *title, struct mgs_target_info *mti) return(0); } +static +int mti_set_sec_opts(struct mgs_target_info *mti, struct lustre_mount_data *lmd) +{ + char *s1, *s2; + + if (lmd->lmd_sec_mdt == NULL && lmd->lmd_sec_cli == NULL) { + /* just let on-disk params do its work. but we have an + * assumption that any changes of on-disk data by tune2fs + * should lead to server rewrite log. + */ + return 0; + } + + /* filter out existing sec options */ + s1 = mti->mti_params; + while (*s1) { + int clear; + + while (*s1 == ' ') + s1++; + + if (strncmp(s1, PARAM_SEC_RPC_MDT, + sizeof(PARAM_SEC_RPC_MDT) - 1) == 0 || + strncmp(s1, PARAM_SEC_RPC_CLI, + sizeof(PARAM_SEC_RPC_CLI) - 1) == 0) + clear = 1; + else + clear = 0; + + s2 = strchr(s1, ' '); + if (s2 == NULL) { + if (clear) + *s1 = '\0'; + break; + } + s2++; + if (clear) + memmove(s1, s2, strlen(s2) + 1); + else + s1 = s2; + } + + /* append sec options from lmd */ + /* FIXME add flag LDD_F_UPDATE after mountconf start supporting + * log updating. + */ + if (lmd->lmd_sec_mdt) { + if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_mdt) + + sizeof(PARAM_SEC_RPC_MDT) + 1 >= sizeof(mti->mti_params)) { + CERROR("security params too big for mti\n"); + return -ENOMEM; + } + strcat(mti->mti_params, " "PARAM_SEC_RPC_MDT); + strcat(mti->mti_params, lmd->lmd_sec_mdt); + //mti->mti_flags |= LDD_F_UPDATE; + } + if (lmd->lmd_sec_cli) { + if (strlen(mti->mti_params) + strlen(lmd->lmd_sec_cli) + + sizeof(PARAM_SEC_RPC_CLI) + 2 > sizeof(mti->mti_params)) { + CERROR("security params too big for mti\n"); + return -ENOMEM; + } + strcat(mti->mti_params, " "PARAM_SEC_RPC_CLI); + strcat(mti->mti_params, lmd->lmd_sec_cli); + //mti->mti_flags |= LDD_F_UPDATE; + } + + return 0; +} + static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) { - struct lustre_sb_info *lsi = s2lsi(sb); - struct lustre_disk_data *ldd = lsi->lsi_ldd; - lnet_process_id_t id; + struct lustre_sb_info *lsi = s2lsi(sb); + struct lustre_disk_data *ldd = lsi->lsi_ldd; + struct lustre_mount_data *lmd = lsi->lsi_lmd; + lnet_process_id_t id; int i = 0; ENTRY; @@ -891,7 +962,8 @@ static int server_sb2mti(struct super_block *sb, struct mgs_target_info *mti) /* FIXME we can't send a msg much bigger than 4k - use bulk? */ } memcpy(mti->mti_params, ldd->ldd_params, sizeof(mti->mti_params)); - RETURN(0); + + RETURN(mti_set_sec_opts(mti, lmd)); } /* Register an old or new target with the MGS. If needed MGS will construct @@ -1139,6 +1211,12 @@ static int lustre_free_lsi(struct super_block *sb) if (lsi->lsi_lmd->lmd_profile != NULL) OBD_FREE(lsi->lsi_lmd->lmd_profile, strlen(lsi->lsi_lmd->lmd_profile) + 1); + if (lsi->lsi_lmd->lmd_sec_mdt != NULL) + OBD_FREE(lsi->lsi_lmd->lmd_sec_mdt, + strlen(lsi->lsi_lmd->lmd_sec_mdt) + 1); + if (lsi->lsi_lmd->lmd_sec_cli != NULL) + OBD_FREE(lsi->lsi_lmd->lmd_sec_cli, + strlen(lsi->lsi_lmd->lmd_sec_cli) + 1); if (lsi->lsi_lmd->lmd_opts != NULL) OBD_FREE(lsi->lsi_lmd->lmd_opts, strlen(lsi->lsi_lmd->lmd_opts) + 1); @@ -1591,6 +1669,10 @@ static void lmd_print(struct lustre_mount_data *lmd) PRINT_CMD(PRINT_MASK, "profile: %s\n", lmd->lmd_profile); PRINT_CMD(PRINT_MASK, "device: %s\n", lmd->lmd_dev); PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags); + if (lmd->lmd_sec_mdt) + PRINT_CMD(PRINT_MASK, "sec_mdt: %s\n", lmd->lmd_sec_mdt); + if (lmd->lmd_sec_cli) + PRINT_CMD(PRINT_MASK, "sec_cli: %s\n", lmd->lmd_sec_cli); if (lmd->lmd_opts) PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts); for (i = 0; i < lmd->lmd_exclude_count; i++) { @@ -1676,6 +1758,66 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) RETURN(rc); } +static +int lmd_set_sec_opts(char **set, char *opts, int length) +{ + if (*set) + OBD_FREE(*set, strlen(*set) + 1); + + OBD_ALLOC(*set, length + 1); + if (*set == NULL) + return -ENOMEM; + + memcpy(*set, opts, length); + (*set)[length] = '\0'; + + return 0; +} + +static +int lmd_parse_sec_opts(struct lustre_mount_data *lmd, char *ptr) +{ + char *tail; + char **set = NULL; + int length; + + /* check peer name */ + if (strncmp(ptr, "sec_mdt=", 8) == 0) { + set = &lmd->lmd_sec_mdt; + ptr += 8; + } else if (strncmp(ptr, "sec_cli=", 8) == 0) { + set = &lmd->lmd_sec_cli; + ptr += 8; + } else if (strncmp(ptr, "sec=", 4) == 0) { + /* leave 'set' be null */ + ptr += 4; + } else { + CERROR("invalid security options: %s\n", ptr); + return -EINVAL; + } + + tail = strchr(ptr, ','); + if (tail == NULL) + length = strlen(ptr); + else + length = tail - ptr; + + if (set) { + if (lmd_set_sec_opts(set, ptr, length)) + return -EINVAL; + } else { + if (lmd->lmd_sec_mdt == NULL && + lmd_set_sec_opts(&lmd->lmd_sec_mdt, ptr, length)) + return -EINVAL; + + if (lmd->lmd_sec_cli == NULL && + lmd_set_sec_opts(&lmd->lmd_sec_cli, ptr, length)) + return -EINVAL; + } + + return 0; +} + /* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */ static int lmd_parse(char *options, struct lustre_mount_data *lmd) { @@ -1726,6 +1868,11 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) if (rc) goto invalid; clear++; + } else if (strncmp(s1, "sec", 3) == 0) { + rc = lmd_parse_sec_opts(lmd, s1); + if (rc) + goto invalid; + clear++; } /* Linux 2.4 doesn't pass the device, so we stuck it at the diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 7079547..439671e 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -286,6 +286,7 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 }, { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 }, { "checksums", osc_rd_checksum, osc_wr_checksum, 0 }, + { "sptlrpc", sptlrpc_lprocfs_rd, 0, 0 }, { 0 } }; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 495af7b..33a9710 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -40,7 +40,7 @@ # include #endif -# include +#include #include #include #include @@ -844,7 +844,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM); req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL, - pool); + pool, NULL); if (req == NULL) RETURN (-ENOMEM); @@ -1032,6 +1032,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, requested_nob, page_count, pga); } + sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk); + RETURN(check_write_rcs(req, requested_nob, niocount, page_count, pga)); } @@ -1085,6 +1087,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, cksum_missed, libcfs_nid2str(peer->nid)); } + sptlrpc_cli_unwrap_bulk_read(req, rc, page_count, pga); + RETURN(0); } @@ -2635,7 +2639,8 @@ static int sanosc_brw_write(struct obd_export *exp, struct obd_info *oinfo, size[REQ_REC_OFF + 2] = page_count * sizeof(*nioptr); req = ptlrpc_prep_req_pool(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SAN_WRITE, 4, size, NULL, imp->imp_rq_pool); + OST_SAN_WRITE, 4, size, NULL, + imp->imp_rq_pool, NULL); if (!req) RETURN(-ENOMEM); @@ -3462,6 +3467,11 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(0); } + if (KEY_IS(KEY_FLUSH_CTX)) { + sptlrpc_import_flush_my_ctx(imp); + RETURN(0); + } + if (!set) RETURN(-EINVAL); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 3643451..c2183cf 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -628,6 +628,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) int comms_error = 0, niocount, npages, nob = 0, rc, i, do_checksum; ENTRY; + req->rq_bulk_read = 1; + if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK)) GOTO(out, rc = -EIO); @@ -740,8 +742,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc == 0) { if (desc->bd_export->exp_failed) rc = -ENOTCONN; - else + else { + sptlrpc_svc_wrap_bulk(req, desc); + rc = ptlrpc_start_bulk_transfer(desc); + } + if (rc == 0) { lwi = LWI_TIMEOUT_INTERVAL(obd_timeout * HZ / 4, HZ, ost_bulk_timeout, desc); @@ -839,6 +845,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, swab, i, j, do_checksum; ENTRY; + req->rq_bulk_write = 1; + if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK)) GOTO(out, rc = -EIO); @@ -1010,6 +1018,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) } } + sptlrpc_svc_unwrap_bulk(req, desc); + /* Must commit after prep above in all cases */ rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa, objcount, ioo, npages, local_nb, oti, rc); @@ -1297,6 +1307,9 @@ int ost_msg_check_version(struct lustre_msg *msg) case OST_CONNECT: case OST_DISCONNECT: case OBD_PING: + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1363,6 +1376,15 @@ static int ost_handle(struct ptlrpc_request *req) ENTRY; LASSERT(current->journal_info == NULL); + + /* primordial rpcs don't affect server recovery */ + switch (lustre_msg_get_opc(req->rq_reqmsg)) { + case SEC_CTX_INIT: + case SEC_CTX_INIT_CONT: + case SEC_CTX_FINI: + GOTO(out, rc = 0); + } + /* XXX identical to MDS */ if (lustre_msg_get_opc(req->rq_reqmsg) != OST_CONNECT) { int abort_recovery, recovering; @@ -1397,10 +1419,6 @@ static int ost_handle(struct ptlrpc_request *req) if (rc) RETURN(rc); - rc = ost_msg_check_version(req->rq_reqmsg); - if (rc) - RETURN(rc); - switch (lustre_msg_get_opc(req->rq_reqmsg)) { case OST_CONNECT: { CDEBUG(D_INODE, "connect\n"); diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index 38a9b33..405c896 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -13,6 +13,7 @@ ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o +ptlrpc_objs += sec.o sec_null.o sec_plain.o ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs) diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index c80a8ca..1f1bd42 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -18,7 +18,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \ - ptlrpc_internal.h layout.c $(LDLM_COMM_SOURCES) + ptlrpc_internal.h layout.c sec.c sec_null.c sec_plain.c \ + $(LDLM_COMM_SOURCES) if LIBLUSTRE @@ -57,6 +58,9 @@ ptlrpc_SOURCES := \ recov_thread.c \ service.c \ wiretest.c \ + sec.c \ + sec_null.c \ + sec_plain.c \ $(LDLM_COMM_SOURCES) ptlrpc_CFLAGS := $(EXTRA_KCFLAGS) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index d636621..9b79a1e 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -179,6 +179,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ LASSERT(!desc->bd_network_rw); /* network hands off or */ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); + + ptlrpc_bulk_free_enc_pages(desc); + if (desc->bd_export) class_export_put(desc->bd_export); else @@ -200,8 +203,9 @@ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool) list_for_each_safe(l, tmp, &pool->prp_req_list) { req = list_entry(l, struct ptlrpc_request, rq_list); list_del(&req->rq_list); - LASSERT (req->rq_reqmsg); - OBD_FREE(req->rq_reqmsg, pool->prp_rq_size); + LASSERT(req->rq_reqbuf); + LASSERT(req->rq_reqbuf_len == pool->prp_rq_size); + OBD_FREE(req->rq_reqbuf, pool->prp_rq_size); OBD_FREE(req, sizeof(*req)); } OBD_FREE(pool, sizeof(*pool)); @@ -212,7 +216,7 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) int i; int size = 1; - while (size < pool->prp_rq_size) + while (size < pool->prp_rq_size + SPTLRPC_MAX_PAYLOAD) size <<= 1; LASSERTF(list_empty(&pool->prp_req_list) || size == pool->prp_rq_size, @@ -234,7 +238,8 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) OBD_FREE(req, sizeof(struct ptlrpc_request)); return; } - req->rq_reqmsg = msg; + req->rq_reqbuf = msg; + req->rq_reqbuf_len = size; req->rq_pool = pool; spin_lock(&pool->prp_lock); list_add_tail(&req->rq_list, &pool->prp_req_list); @@ -273,7 +278,7 @@ struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int num_rq, int msgsize, static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool) { struct ptlrpc_request *request; - struct lustre_msg *reqmsg; + struct lustre_msg *reqbuf; if (!pool) return NULL; @@ -294,21 +299,31 @@ static struct ptlrpc_request *ptlrpc_prep_req_from_pool(struct ptlrpc_request_po list_del(&request->rq_list); spin_unlock(&pool->prp_lock); - LASSERT(request->rq_reqmsg); + LASSERT(request->rq_reqbuf); LASSERT(request->rq_pool); - reqmsg = request->rq_reqmsg; + reqbuf = request->rq_reqbuf; memset(request, 0, sizeof(*request)); - request->rq_reqmsg = reqmsg; + request->rq_reqbuf = reqbuf; + request->rq_reqbuf_len = pool->prp_rq_size; request->rq_pool = pool; - request->rq_reqlen = pool->prp_rq_size; return request; } +static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) +{ + struct ptlrpc_request_pool *pool = request->rq_pool; + + spin_lock(&pool->prp_lock); + list_add_tail(&request->rq_list, &pool->prp_req_list); + spin_unlock(&pool->prp_lock); +} + struct ptlrpc_request * ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs, - struct ptlrpc_request_pool *pool) + struct ptlrpc_request_pool *pool, + struct ptlrpc_cli_ctx *ctx) { struct ptlrpc_request *request = NULL; int rc; @@ -330,12 +345,23 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, RETURN(NULL); } + request->rq_import = class_import_get(imp); + + if (unlikely(ctx)) + request->rq_cli_ctx = sptlrpc_ctx_get(ctx); + else { + rc = sptlrpc_req_get_ctx(request); + if (rc) + GOTO(out_free, rc); + } + + sptlrpc_req_set_flavor(request, opcode); + rc = lustre_pack_request(request, imp->imp_msg_magic, count, lengths, bufs); if (rc) { LASSERT(!request->rq_pool); - OBD_FREE(request, sizeof(*request)); - RETURN(NULL); + GOTO(out_ctx, rc); } lustre_msg_add_version(request->rq_reqmsg, version); @@ -346,7 +372,6 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, request->rq_timeout = obd_timeout; request->rq_send_state = LUSTRE_IMP_FULL; request->rq_type = PTL_RPC_MSG_REQUEST; - request->rq_import = class_import_get(imp); request->rq_export = NULL; request->rq_req_cbid.cbid_fn = request_out_callback; @@ -364,6 +389,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, spin_lock_init(&request->rq_lock); CFS_INIT_LIST_HEAD(&request->rq_list); CFS_INIT_LIST_HEAD(&request->rq_replay_list); + CFS_INIT_LIST_HEAD(&request->rq_ctx_chain); CFS_INIT_LIST_HEAD(&request->rq_set_chain); cfs_waitq_init(&request->rq_reply_waitq); request->rq_xid = ptlrpc_next_xid(); @@ -373,6 +399,15 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, lustre_msg_set_flags(request->rq_reqmsg, 0); RETURN(request); +out_ctx: + sptlrpc_req_put_ctx(request); +out_free: + class_import_put(imp); + if (request->rq_pool) + __ptlrpc_free_req_to_pool(request); + else + OBD_FREE(request, sizeof(*request)); + return NULL; } struct ptlrpc_request * @@ -380,7 +415,7 @@ ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count, int *lengths, char **bufs) { return ptlrpc_prep_req_pool(imp, version, opcode, count, lengths, bufs, - NULL); + NULL, NULL); } struct ptlrpc_request_set *ptlrpc_prep_set(void) @@ -497,7 +532,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, LASSERT (status != NULL); *status = 0; - if (imp->imp_state == LUSTRE_IMP_NEW) { + if (req->rq_ctx_init || req->rq_ctx_fini) { + /* always allow ctx init/fini rpc go through */ + } else if (imp->imp_state == LUSTRE_IMP_NEW) { DEBUG_REQ(D_ERROR, req, "Uninitialized import."); *status = -EIO; LBUG(); @@ -597,6 +634,7 @@ static int after_reply(struct ptlrpc_request *req) ENTRY; LASSERT(!req->rq_receiving_reply); + LASSERT(req->rq_nob_received <= req->rq_repbuf_len); /* NB Until this point, the whole of the incoming message, * including buflens, status etc is in the sender's byte order. */ @@ -605,8 +643,17 @@ static int after_reply(struct ptlrpc_request *req) /* Clear reply swab mask; this is a new reply in sender's byte order */ req->rq_rep_swab_mask = 0; #endif - LASSERT (req->rq_nob_received <= req->rq_replen); - rc = lustre_unpack_msg(req->rq_repmsg, req->rq_nob_received); + rc = sptlrpc_cli_unwrap_reply(req); + if (rc) { + DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc); + RETURN(rc); + } + + /* security layer unwrap might ask resend this request */ + if (req->rq_resend) + RETURN(0); + + rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen); if (rc) { DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d\n", rc); RETURN(-EPROTO); @@ -710,6 +757,20 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) spin_unlock(&imp->imp_lock); lustre_msg_set_status(req->rq_reqmsg, cfs_curproc_pid()); + + rc = sptlrpc_req_refresh_ctx(req, -1); + if (rc) { + if (req->rq_err) { + req->rq_status = rc; + RETURN(1); + } else { + /* here begins timeout counting */ + req->rq_sent = CURRENT_SECONDS; + req->rq_wait_ctx = 1; + RETURN(0); + } + } + CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc" " %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, @@ -782,7 +843,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) * path sets rq_intr irrespective of whether ptlrpcd has * seen a timeout. our policy is to only interpret * interrupted rpcs after they have timed out */ - if (req->rq_intr && (req->rq_timedout || req->rq_waiting)) { + if (req->rq_intr && (req->rq_timedout || req->rq_waiting || + req->rq_wait_ctx)) { /* NB could be on delayed list */ ptlrpc_unregister_reply(req); req->rq_status = -EINTR; @@ -796,9 +858,16 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } if (req->rq_phase == RQ_PHASE_RPC) { - if (req->rq_timedout||req->rq_waiting||req->rq_resend) { + if (req->rq_timedout || req->rq_resend || + req->rq_waiting || req->rq_wait_ctx) { int status; + /* rq_wait_ctx is only touched in ptlrpcd, + * no lock needed here. + */ + if (req->rq_wait_ctx) + goto check_ctx; + ptlrpc_unregister_reply(req); spin_lock(&imp->imp_lock); @@ -815,7 +884,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); } - if (req->rq_no_resend) { + if (req->rq_no_resend && !req->rq_wait_ctx) { req->rq_status = -ENOTCONN; req->rq_phase = RQ_PHASE_INTERPRET; spin_unlock(&imp->imp_lock); @@ -843,6 +912,23 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) old_xid, req->rq_xid); } } +check_ctx: + status = sptlrpc_req_refresh_ctx(req, -1); + if (status) { + if (req->rq_err) { + req->rq_status = status; + force_timer_recalc = 1; + } + if (!req->rq_wait_ctx) { + /* begins timeout counting */ + req->rq_sent = CURRENT_SECONDS; + req->rq_wait_ctx = 1; + } + continue; + } else { + req->rq_sent = 0; + req->rq_wait_ctx = 0; + } rc = ptl_send_rpc(req, 0); if (rc) { @@ -951,6 +1037,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) spin_lock(&req->rq_lock); req->rq_timedout = 1; + req->rq_wait_ctx = 0; spin_unlock(&req->rq_lock); ptlrpc_unregister_reply (req); @@ -972,7 +1059,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) /* If this request is for recovery or other primordial tasks, * then error it out here. */ - if (req->rq_send_state != LUSTRE_IMP_FULL || + if (req->rq_ctx_init || req->rq_ctx_fini || + req->rq_send_state != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { spin_lock(&req->rq_lock); req->rq_status = -ETIMEDOUT; @@ -1138,15 +1226,6 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) RETURN(rc); } -static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) -{ - struct ptlrpc_request_pool *pool = request->rq_pool; - - spin_lock(&pool->prp_lock); - list_add_tail(&request->rq_list, &pool->prp_req_list); - spin_unlock(&pool->prp_lock); -} - static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) { ENTRY; @@ -1159,6 +1238,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */ LASSERTF(list_empty(&request->rq_list), "req %p\n", request); LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); + LASSERT(request->rq_cli_ctx); /* We must take it off the imp_replay_list first. Otherwise, we'll set * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ @@ -1177,10 +1257,8 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) LBUG(); } - if (request->rq_repmsg != NULL) { - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - } + if (request->rq_repbuf != NULL) + sptlrpc_cli_free_repbuf(request); if (request->rq_export != NULL) { class_export_put(request->rq_export); request->rq_export = NULL; @@ -1192,15 +1270,15 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (request->rq_bulk != NULL) ptlrpc_free_bulk(request->rq_bulk); - if (request->rq_pool) { + if (request->rq_reqbuf != NULL || request->rq_clrbuf != NULL) + sptlrpc_cli_free_reqbuf(request); + + sptlrpc_req_put_ctx(request); + + if (request->rq_pool) __ptlrpc_free_req_to_pool(request); - } else { - if (request->rq_reqmsg != NULL) { - OBD_FREE(request->rq_reqmsg, request->rq_reqlen); - request->rq_reqmsg = NULL; - } + else OBD_FREE(request, sizeof(*request)); - } EXIT; } @@ -1563,6 +1641,23 @@ restart: list_add_tail(&req->rq_list, &imp->imp_sending_list); spin_unlock(&imp->imp_lock); + rc = sptlrpc_req_refresh_ctx(req, 0); + if (rc) { + if (req->rq_err) { + /* we got fatal ctx refresh error, directly jump out + * thus we can pass back the actual error code. + */ + spin_lock(&imp->imp_lock); + list_del_init(&req->rq_list); + spin_unlock(&imp->imp_lock); + + CERROR("Failed to refresh ctx of req %p: %d\n", req, rc); + GOTO(out, rc); + } + /* simulating we got error during send rpc */ + goto after_send; + } + rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc); @@ -1577,6 +1672,7 @@ restart: l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); DEBUG_REQ(D_NET, req, "-- done sleeping"); +after_send: CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc " "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 96aa14b..8863dc2 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -30,6 +30,7 @@ #endif #include #include +#include #include "ptlrpc_internal.h" lnet_handle_eq_t ptlrpc_eq_h; @@ -50,6 +51,8 @@ void request_out_callback(lnet_event_t *ev) DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, "type %d, status %d", ev->type, ev->status); + sptlrpc_request_out_callback(req); + if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { /* Failed send: make it seem like the reply timed out, just @@ -81,9 +84,9 @@ void reply_in_callback(lnet_event_t *ev) LASSERT (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT (ev->unlinked); - LASSERT (ev->md.start == req->rq_repmsg); + LASSERT (ev->md.start == req->rq_repbuf); LASSERT (ev->offset == 0); - LASSERT (ev->mlength <= req->rq_replen); + LASSERT (ev->mlength <= req->rq_repbuf_len); DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, "type %d, status %d", ev->type, ev->status); @@ -136,6 +139,8 @@ void client_bulk_callback (lnet_event_t *ev) desc->bd_nob_transferred = ev->mlength; } + ptlrpc_bulk_free_enc_pages(desc); + /* NB don't unlock till after wakeup; desc can disappear under us * otherwise */ ptlrpc_wake_client_req(desc->bd_req); @@ -193,9 +198,9 @@ void request_in_callback(lnet_event_t *ev) * flags are reset and scalars are zero. We only set the message * size to non-zero if this was a successful receive. */ req->rq_xid = ev->match_bits; - req->rq_reqmsg = ev->md.start + ev->offset; + req->rq_reqbuf = ev->md.start + ev->offset; if (ev->type == LNET_EVENT_PUT && ev->status == 0) - req->rq_reqlen = ev->mlength; + req->rq_reqdata_len = ev->mlength; do_gettimeofday(&req->rq_arrival_time); req->rq_peer = ev->initiator; req->rq_self = ev->target.nid; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index b7a9e49..600b514 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -350,7 +350,8 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) spin_unlock(&imp->imp_lock); CERROR("can't connect to a closed import\n"); RETURN(-EINVAL); - } else if (imp->imp_state == LUSTRE_IMP_FULL) { + } else if (imp->imp_state == LUSTRE_IMP_FULL && + imp->imp_force_reconnect == 0) { spin_unlock(&imp->imp_lock); CERROR("already connected\n"); RETURN(0); @@ -499,11 +500,16 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, spin_unlock(&imp->imp_lock); RETURN(0); } + imp->imp_force_reconnect = 0; spin_unlock(&imp->imp_lock); if (rc) GOTO(out, rc); + rc = sptlrpc_cli_install_rvs_ctx(imp, request->rq_cli_ctx); + if (rc) + GOTO(out, rc); + LASSERT(imp->imp_conn_current); msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); @@ -719,6 +725,11 @@ finish: if (rc == -EPROTO) { struct obd_connect_data *ocd; + + /* reply message might not be ready */ + if (request->rq_repmsg != NULL) + RETURN(-EPROTO); + ocd = lustre_swab_repbuf(request, REPLY_REC_OFF, sizeof *ocd, lustre_swab_connect); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 53fac91..5aa2460 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -311,17 +311,19 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) int rc; /* We must already have a reply buffer (only ptlrpc_error() may be - * called without one). We must also have a request buffer which - * is either the actual (swabbed) incoming request, or a saved copy - * if this is a req saved in target_queue_final_reply(). */ - LASSERT (req->rq_reqmsg != NULL); + * called without one). The reply generated by security layer (e.g. + * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must + * have a request buffer which is either the actual (swabbed) incoming + * request, or a saved copy if this is a req saved in + * target_queue_final_reply(). + */ + LASSERT (req->rq_reqbuf != NULL); LASSERT (rs != NULL); - LASSERT (req->rq_repmsg != NULL); LASSERT (may_be_difficult || !rs->rs_difficult); + LASSERT (req->rq_repmsg != NULL); LASSERT (req->rq_repmsg == rs->rs_msg); LASSERT (rs->rs_cb_id.cbid_fn == reply_out_callback); LASSERT (rs->rs_cb_id.cbid_arg == rs); - LASSERT (req->rq_repmsg != NULL); if (req->rq_export && req->rq_export->exp_obd && req->rq_export->exp_obd->obd_fail) { @@ -337,7 +339,8 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) lustre_msg_set_type(req->rq_repmsg, req->rq_type); lustre_msg_set_status(req->rq_repmsg, req->rq_status); - lustre_msg_set_opc(req->rq_repmsg, lustre_msg_get_opc(req->rq_reqmsg)); + lustre_msg_set_opc(req->rq_repmsg, + req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0); if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL); @@ -351,10 +354,15 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) atomic_inc (&svc->srv_outstanding_replies); ptlrpc_rs_addref(rs); /* +1 ref for the network */ - rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen, + rc = sptlrpc_svc_wrap_reply(req); + if (rc) + goto out; + + rc = ptl_send_buf (&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, svc->srv_rep_portal, req->rq_xid); +out: if (rc != 0) { atomic_dec (&svc->srv_outstanding_replies); ptlrpc_rs_decref(rs); @@ -413,24 +421,31 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) connection = request->rq_import->imp_connection; - if (request->rq_bulk != NULL) { - rc = ptlrpc_register_bulk (request); - if (rc != 0) - RETURN(rc); - } - lustre_msg_set_handle(request->rq_reqmsg, &request->rq_import->imp_remote_handle); lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); lustre_msg_set_conn_cnt(request->rq_reqmsg, request->rq_import->imp_conn_cnt); + rc = sptlrpc_cli_wrap_request(request); + if (rc) + RETURN(rc); + + /* bulk register should be done after wrap_request() */ + if (request->rq_bulk != NULL) { + rc = ptlrpc_register_bulk (request); + if (rc != 0) + RETURN(rc); + } + if (!noreply) { LASSERT (request->rq_replen != 0); - if (request->rq_repmsg == NULL) - OBD_ALLOC(request->rq_repmsg, request->rq_replen); - if (request->rq_repmsg == NULL) - GOTO(cleanup_bulk, rc = -ENOMEM); + if (request->rq_repbuf == NULL) { + rc = sptlrpc_cli_alloc_repbuf(request, + request->rq_replen); + if (rc) + GOTO(cleanup_bulk, rc); + } rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ connection->c_peer, request->rq_xid, 0, @@ -438,7 +453,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) if (rc != 0) { CERROR("LNetMEAttach failed: %d\n", rc); LASSERT (rc == -ENOMEM); - GOTO(cleanup_repmsg, rc = -ENOMEM); + GOTO(cleanup_bulk, rc = -ENOMEM); } } @@ -455,8 +470,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) spin_unlock(&request->rq_lock); if (!noreply) { - reply_md.start = request->rq_repmsg; - reply_md.length = request->rq_replen; + reply_md.start = request->rq_repbuf; + reply_md.length = request->rq_repbuf_len; reply_md.threshold = 1; reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT; reply_md.user_ptr = &request->rq_reply_cbid; @@ -476,7 +491,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 ", portal %u\n", - request->rq_replen, request->rq_xid, + request->rq_repbuf_len, request->rq_xid, request->rq_reply_portal); } @@ -489,8 +504,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_sent = CURRENT_SECONDS; ptlrpc_pinger_sending_on_import(request->rq_import); rc = ptl_send_buf(&request->rq_req_md_h, - request->rq_reqmsg, request->rq_reqlen, - LNET_NOACK_REQ, &request->rq_req_cbid, + request->rq_reqbuf, request->rq_reqdata_len, + LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid); @@ -516,10 +531,6 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) /* UNLINKED callback called synchronously */ LASSERT (!request->rq_receiving_reply); - cleanup_repmsg: - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - cleanup_bulk: if (request->rq_bulk != NULL) ptlrpc_unregister_bulk(request); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 49e571c..5724089 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -98,7 +98,7 @@ static inline int lustre_msg_size_v1(int count, int *lengths) return size; } -static inline int lustre_msg_size_v2(int count, int *lengths) +int lustre_msg_size_v2(int count, int *lengths) { int size; int i; @@ -109,6 +109,7 @@ static inline int lustre_msg_size_v2(int count, int *lengths) return size; } +EXPORT_SYMBOL(lustre_msg_size_v2); /* This returns the size of the buffer that is required to hold a lustre_msg * with the given sub-buffer lengths. */ @@ -135,8 +136,8 @@ int lustre_msg_size(__u32 magic, int count, int *lens) } } -static void -lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) +static +void lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) { struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)m; char *ptr; @@ -160,8 +161,8 @@ lustre_init_msg_v1(void *m, int count, int *lens, char **bufs) } } -static void -lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs) +void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, + char **bufs) { char *ptr; int i; @@ -182,32 +183,18 @@ lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, int *lens, char **bufs) LOGL(tmp, lens[i], ptr); } } +EXPORT_SYMBOL(lustre_init_msg_v2); static int lustre_pack_request_v1(struct ptlrpc_request *req, int count, int *lens, char **bufs) { - int reqlen; + int reqlen, rc; reqlen = lustre_msg_size_v1(count, lens); - /* See if we got it from prealloc pool */ - if (req->rq_reqmsg) { - /* Cannot return error here, that would create - infinite loop in ptlrpc_prep_req_pool */ - /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen - to maximum size that would fit into this preallocated - request */ - LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, " - "reqlen %d\n",req->rq_reqlen, - reqlen); - memset(req->rq_reqmsg, 0, reqlen); - } else { - OBD_ALLOC(req->rq_reqmsg, reqlen); - if (req->rq_reqmsg == NULL) { - CERROR("alloc reqmsg (len %d) failed\n", reqlen); - return -ENOMEM; - } - } + rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); + if (rc) + return rc; req->rq_reqlen = reqlen; @@ -218,28 +205,13 @@ static int lustre_pack_request_v1(struct ptlrpc_request *req, static int lustre_pack_request_v2(struct ptlrpc_request *req, int count, int *lens, char **bufs) { - int reqlen; + int reqlen, rc; reqlen = lustre_msg_size_v2(count, lens); - /* See if we got it from prealloc pool */ - if (req->rq_reqmsg) { - /* Cannot return error here, that would create - infinite loop in ptlrpc_prep_req_pool */ - /* In this case ptlrpc_prep_req_from_pool sets req->rq_reqlen - to maximum size that would fit into this preallocated - request */ - LASSERTF(req->rq_reqlen >= reqlen, "req->rq_reqlen %d, " - "reqlen %d\n",req->rq_reqlen, - reqlen); - memset(req->rq_reqmsg, 0, reqlen); - } else { - OBD_ALLOC(req->rq_reqmsg, reqlen); - if (req->rq_reqmsg == NULL) { - CERROR("alloc reqmsg (len %d) failed\n", reqlen); - return -ENOMEM; - } - } + rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); + if (rc) + return rc; req->rq_reqlen = reqlen; @@ -261,6 +233,13 @@ int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count, LASSERT(count > 0); LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); + /* if we choose policy other than null, we have also choosed + * to use new message format. + */ + if (magic == LUSTRE_MSG_MAGIC_V1 && + req->rq_sec_flavor != SPTLRPC_FLVR_NULL) + magic = LUSTRE_MSG_MAGIC_V2; + switch (magic) { case LUSTRE_MSG_MAGIC_V1: return lustre_pack_request_v1(req, count - 1, lens + 1, @@ -295,8 +274,7 @@ do { \ # define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while(0) #endif -static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc, - int size) +struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc) { struct ptlrpc_reply_state *rs = NULL; @@ -321,40 +299,46 @@ static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc list_del(&rs->rs_list); spin_unlock(&svc->srv_lock); LASSERT(rs); - LASSERTF(svc->srv_max_reply_size > size, "Want %d, prealloc %d\n", size, - svc->srv_max_reply_size); - memset(rs, 0, size); + memset(rs, 0, svc->srv_max_reply_size); + rs->rs_service = svc; rs->rs_prealloc = 1; out: return rs; } +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_service *svc = rs->rs_service; + + LASSERT(svc); + + spin_lock(&svc->srv_lock); + list_add(&rs->rs_list, &svc->srv_free_rs_list); + spin_unlock(&svc->srv_lock); + cfs_waitq_signal(&svc->srv_free_rs_waitq); +} + static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count, int *lens, char **bufs) { struct ptlrpc_reply_state *rs; - int msg_len; - int size; + int msg_len, rc; ENTRY; LASSERT (req->rq_reply_state == NULL); msg_len = lustre_msg_size_v1(count, lens); - size = sizeof(struct ptlrpc_reply_state) + msg_len; - OBD_ALLOC(rs, size); - if (unlikely(rs == NULL)) { - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size); - if (!rs) - RETURN (-ENOMEM); - } + rc = sptlrpc_svc_alloc_rs(req, msg_len); + if (rc) + RETURN(rc); + + rs = req->rq_reply_state; atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ rs->rs_cb_id.cbid_fn = reply_out_callback; rs->rs_cb_id.cbid_arg = rs; rs->rs_service = req->rq_rqbd->rqbd_service; - rs->rs_size = size; CFS_INIT_LIST_HEAD(&rs->rs_exp_list); CFS_INIT_LIST_HEAD(&rs->rs_obd_list); - rs->rs_msg = (struct lustre_msg *)(rs + 1); req->rq_replen = msg_len; req->rq_reply_state = rs; @@ -366,32 +350,27 @@ static int lustre_pack_reply_v1(struct ptlrpc_request *req, int count, RETURN (0); } -static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - int *lens, char **bufs) +int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, + int *lens, char **bufs) { struct ptlrpc_reply_state *rs; - int msg_len; - int size; + int msg_len, rc; ENTRY; LASSERT(req->rq_reply_state == NULL); msg_len = lustre_msg_size_v2(count, lens); - size = sizeof(struct ptlrpc_reply_state) + msg_len; - OBD_ALLOC(rs, size); - if (unlikely(rs == NULL)) { - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size); - if (!rs) - RETURN (-ENOMEM); - } + rc = sptlrpc_svc_alloc_rs(req, msg_len); + if (rc) + RETURN(rc); + + rs = req->rq_reply_state; atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ rs->rs_cb_id.cbid_fn = reply_out_callback; rs->rs_cb_id.cbid_arg = rs; rs->rs_service = req->rq_rqbd->rqbd_service; - rs->rs_size = size; CFS_INIT_LIST_HEAD(&rs->rs_exp_list); CFS_INIT_LIST_HEAD(&rs->rs_obd_list); - rs->rs_msg = (struct lustre_msg *)(rs + 1); req->rq_replen = msg_len; req->rq_reply_state = rs; @@ -403,6 +382,7 @@ static int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, RETURN(0); } +EXPORT_SYMBOL(lustre_pack_reply_v2); int lustre_pack_reply(struct ptlrpc_request *req, int count, int *lens, char **bufs) @@ -505,21 +485,19 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size) } } -void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg_v1(struct lustre_msg_v1 *msg, int segment, + unsigned int newlen, int move_data) { - struct lustre_msg_v1 *msg = (struct lustre_msg_v1 *)req->rq_repmsg; - char *tail = NULL, *newpos; - int tail_len = 0, n; + char *tail = NULL, *newpos; + int tail_len = 0, n; - LASSERT(req->rq_reply_state); LASSERT(msg); LASSERT(segment >= 0); LASSERT(msg->lm_bufcount > segment); LASSERT(msg->lm_buflens[segment] >= newlen); if (msg->lm_buflens[segment] == newlen) - return; + goto out; if (move_data && msg->lm_bufcount > segment + 1) { tail = lustre_msg_buf_v1(msg, segment + 1, 0); @@ -542,23 +520,22 @@ void lustre_shrink_reply_v1(struct ptlrpc_request *req, int segment, msg->lm_buflens[msg->lm_bufcount - 1] = 0; } - req->rq_replen = lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens); +out: + return lustre_msg_size_v1(msg->lm_bufcount, msg->lm_buflens); } -void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment, + unsigned int newlen, int move_data) { - struct lustre_msg_v2 *msg = req->rq_repmsg; - char *tail = NULL, *newpos; - int tail_len = 0, n; + char *tail = NULL, *newpos; + int tail_len = 0, n; - LASSERT(req->rq_reply_state); LASSERT(msg); LASSERT(msg->lm_bufcount > segment); LASSERT(msg->lm_buflens[segment] >= newlen); if (msg->lm_buflens[segment] == newlen) - return; + goto out; if (move_data && msg->lm_bufcount > segment + 1) { tail = lustre_msg_buf_v2(msg, segment + 1, 0); @@ -581,36 +558,37 @@ void lustre_shrink_reply_v2(struct ptlrpc_request *req, int segment, msg->lm_buflens[msg->lm_bufcount - 1] = 0; } - req->rq_replen = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); +out: + return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); } /* - * shrink @segment to size @newlen. if @move_data is non-zero, we also move - * data forward from @segment + 1. + * for @msg, shrink @segment to size @newlen. if @move_data is non-zero, + * we also move data forward from @segment + 1. * * if @newlen == 0, we remove the segment completely, but we still keep the * totally bufcount the same to save possible data moving. this will leave a * unused segment with size 0 at the tail, but that's ok. * + * return new msg size after shrinking. + * * CAUTION: * + if any buffers higher than @segment has been filled in, must call shrink * with non-zero @move_data. * + caller should NOT keep pointers to msg buffers which higher than @segment * after call shrink. */ -void lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) +int lustre_shrink_msg(struct lustre_msg *msg, int segment, + unsigned int newlen, int move_data) { - switch (req->rq_repmsg->lm_magic) { + switch (msg->lm_magic) { case LUSTRE_MSG_MAGIC_V1: - lustre_shrink_reply_v1(req, segment - 1, newlen, move_data); - return; + return lustre_shrink_msg_v1((struct lustre_msg_v1 *) msg, + segment - 1, newlen, move_data); case LUSTRE_MSG_MAGIC_V2: - lustre_shrink_reply_v2(req, segment, newlen, move_data); - return; + return lustre_shrink_msg_v2(msg, segment, newlen, move_data); default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_repmsg->lm_magic); + LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); } } @@ -627,17 +605,7 @@ void lustre_free_reply_state(struct ptlrpc_reply_state *rs) LASSERT (list_empty(&rs->rs_exp_list)); LASSERT (list_empty(&rs->rs_obd_list)); - if (unlikely(rs->rs_prealloc)) { - struct ptlrpc_service *svc = rs->rs_service; - - spin_lock(&svc->srv_lock); - list_add(&rs->rs_list, - &svc->srv_free_rs_list); - spin_unlock(&svc->srv_lock); - cfs_waitq_signal(&svc->srv_free_rs_waitq); - } else { - OBD_FREE(rs, rs->rs_size); - } + sptlrpc_svc_free_rs(rs); } int lustre_unpack_msg_v1(void *msg, int len) diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 865dcf0..8e92509 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -45,7 +45,7 @@ void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc) LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS))); md->options |= LNET_MD_KIOV; - md->start = &desc->bd_iov[0]; + md->start = desc->bd_enc_iov ? desc->bd_enc_iov : &desc->bd_iov[0]; md->length = desc->bd_iov_count; } @@ -73,6 +73,61 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) } } +int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc) +{ + int i, alloc_size; + + LASSERT(desc->bd_enc_iov == NULL); + + if (desc->bd_iov_count == 0) + return 0; + + alloc_size = desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]); + + OBD_ALLOC(desc->bd_enc_iov, alloc_size); + if (desc->bd_enc_iov == NULL) + return -ENOMEM; + + memcpy(desc->bd_enc_iov, desc->bd_iov, alloc_size); + + for (i = 0; i < desc->bd_iov_count; i++) { + desc->bd_enc_iov[i].kiov_page = + cfs_alloc_page(CFS_ALLOC_IO | CFS_ALLOC_HIGH); + if (desc->bd_enc_iov[i].kiov_page == NULL) { + CERROR("Failed to alloc %d encryption pages\n", + desc->bd_iov_count); + break; + } + } + + if (i == desc->bd_iov_count) + return 0; + + /* error, cleanup */ + for (i = i - 1; i >= 0; i--) + __free_page(desc->bd_enc_iov[i].kiov_page); + OBD_FREE(desc->bd_enc_iov, alloc_size); + desc->bd_enc_iov = NULL; + return -ENOMEM; +} + +void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc) +{ + int i; + + if (desc->bd_enc_iov == NULL) + return; + + for (i = 0; i < desc->bd_iov_count; i++) { + LASSERT(desc->bd_enc_iov[i].kiov_page); + __free_page(desc->bd_enc_iov[i].kiov_page); + } + + OBD_FREE(desc->bd_enc_iov, + desc->bd_iov_count * sizeof(desc->bd_enc_iov[0])); + desc->bd_enc_iov = NULL; +} + #else /* !__KERNEL__ */ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc) @@ -127,4 +182,12 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) memset(iov->iov_base, 0xab, iov->iov_len); } } + +int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc) +{ + return 0; +} +void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc) +{ +} #endif /* !__KERNEL__ */ diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 787074c..9b00e1e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -76,6 +76,50 @@ void ptlrpc_ping_import_soon(struct obd_import *imp) } #ifdef __KERNEL__ +static +int check_import_reconnect(struct obd_import *imp) +{ + spin_lock(&imp->imp_lock); + + /* next_reconnect == 0 mean never need reconnect. + */ + if (imp->imp_next_reconnect == 0 || + cfs_time_before(cfs_time_current_sec(), imp->imp_next_reconnect)) { + spin_unlock(&imp->imp_lock); + return 0; + } + + if (imp->imp_state != LUSTRE_IMP_FULL || + imp->imp_force_reconnect == 1) { + spin_unlock(&imp->imp_lock); + return 0; + } + + imp->imp_force_reconnect = 1; + + /* prevent concurrent reconnect. if this reconnect failed, import + * will be set to non-FULL; if success, next_reconnect value will + * will be updated by security module. + */ + imp->imp_next_reconnect = 0; + + spin_unlock(&imp->imp_lock); + + CWARN("issue a force reconnect on imp %p(%s) to %s\n", + imp, ptlrpc_import_state_name(imp->imp_state), + imp->imp_obd->u.cli.cl_target_uuid.uuid); + + /* usually the root context should be still valid, because import + * reconnect have a nice time advance, thus we have little chance + * that a newly created & refreshing context be wrongly flushed by us. + * but even that we are still fine. + */ + sptlrpc_import_flush_root_ctx(imp); + + ptlrpc_connect_import(imp, NULL); + return 1; +} + static int ptlrpc_pinger_main(void *arg) { struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; @@ -102,6 +146,15 @@ static int ptlrpc_pinger_main(void *arg) imp_pinger_chain); int force, level; + if (check_import_reconnect(imp)) { + /* if a forced reconnect was issued, we don't + * need additional ping at this time. + */ + if (imp->imp_pingable) + ptlrpc_update_next_ping(imp); + continue; + } + spin_lock(&imp->imp_lock); level = imp->imp_state; force = imp->imp_force_verify; diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 54abe7a..bd92a26 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -126,6 +126,10 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len); void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc); +/* pack_generic.c */ +struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc); +void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs); + /* pinger.c */ int ptlrpc_start_pinger(void); int ptlrpc_stop_pinger(void); @@ -138,4 +142,16 @@ int ping_evictor_wake(struct obd_export *exp); #define ping_evictor_wake(exp) 1 #endif +/* sec_null.c */ +int sptlrpc_null_init(void); +int sptlrpc_null_exit(void); + +/* sec_plain.c */ +int sptlrpc_plain_init(void); +int sptlrpc_plain_exit(void); + +/* sec.c */ +int sptlrpc_init(void); +int sptlrpc_exit(void); + #endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index c809437..be470c9 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -85,10 +85,18 @@ __init int ptlrpc_init(void) rc = ldlm_init(); if (rc) GOTO(cleanup, rc); + cleanup_phase = 5; + + rc = sptlrpc_init(); + if (rc) + GOTO(cleanup, rc); + RETURN(0); cleanup: switch(cleanup_phase) { + case 5: + ldlm_exit(); case 4: ptlrpc_stop_pinger(); case 3: @@ -107,6 +115,7 @@ cleanup: #ifdef __KERNEL__ static void __exit ptlrpc_exit(void) { + sptlrpc_exit(); ldlm_exit(); ptlrpc_stop_pinger(); ptlrpc_exit_portals(); @@ -187,7 +196,7 @@ EXPORT_SYMBOL(lustre_msg_swabbed); EXPORT_SYMBOL(lustre_msg_check_version); EXPORT_SYMBOL(lustre_pack_request); EXPORT_SYMBOL(lustre_pack_reply); -EXPORT_SYMBOL(lustre_shrink_reply); +EXPORT_SYMBOL(lustre_shrink_msg); EXPORT_SYMBOL(lustre_free_reply_state); EXPORT_SYMBOL(lustre_msg_size); EXPORT_SYMBOL(lustre_unpack_msg); @@ -277,6 +286,10 @@ EXPORT_SYMBOL(ptlrpc_invalidate_import); EXPORT_SYMBOL(ptlrpc_fail_import); EXPORT_SYMBOL(ptlrpc_recover_import); +/* pers.c */ +EXPORT_SYMBOL(ptlrpc_bulk_alloc_enc_pages); +EXPORT_SYMBOL(ptlrpc_bulk_free_enc_pages); + /* pinger.c */ EXPORT_SYMBOL(ptlrpc_pinger_add_import); EXPORT_SYMBOL(ptlrpc_pinger_del_import); diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c new file mode 100644 index 0000000..b9dddba --- /dev/null +++ b/lustre/ptlrpc/sec.c @@ -0,0 +1,2495 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include +#ifndef __KERNEL__ +#include +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "ptlrpc_internal.h" + +static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec); +static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx); +static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx); + +/*********************************************** + * policy registers * + ***********************************************/ + +static spinlock_t policy_lock = SPIN_LOCK_UNLOCKED; +static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = { + NULL, +}; + +int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) +{ + __u32 number = policy->sp_policy; + + LASSERT(policy->sp_name); + LASSERT(policy->sp_cops); + LASSERT(policy->sp_sops); + + if (number >= SPTLRPC_POLICY_MAX) + return -EINVAL; + + spin_lock(&policy_lock); + if (policies[number]) { + spin_unlock(&policy_lock); + return -EALREADY; + } + policies[number] = policy; + spin_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: registered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_register_policy); + +int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) +{ + __u32 number = policy->sp_policy; + + LASSERT(number < SPTLRPC_POLICY_MAX); + + spin_lock(&policy_lock); + if (!policies[number]) { + spin_unlock(&policy_lock); + CERROR("%s: already unregistered\n", policy->sp_name); + return -EINVAL; + } + + LASSERT(policies[number] == policy); + policies[number] = NULL; + spin_unlock(&policy_lock); + + CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name); + return 0; +} +EXPORT_SYMBOL(sptlrpc_unregister_policy); + +static +struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_flavor_t flavor) +{ + static int load_module = 0; + struct ptlrpc_sec_policy *policy; + __u32 number = SEC_FLAVOR_POLICY(flavor); + + if (number >= SPTLRPC_POLICY_MAX) + return NULL; + +again: + spin_lock(&policy_lock); + policy = policies[number]; + if (policy && !try_module_get(policy->sp_owner)) + policy = NULL; + spin_unlock(&policy_lock); + + /* if failure, try to load gss module, once */ + if (policy == NULL && load_module == 0 && + number == SPTLRPC_POLICY_GSS) { + load_module = 1; + if (request_module("ptlrpc_gss") == 0) + goto again; + } + + return policy; +} + +ptlrpc_flavor_t sptlrpc_name2flavor(const char *name) +{ + if (!strcmp(name, "null")) + return SPTLRPC_FLVR_NULL; + if (!strcmp(name, "plain")) + return SPTLRPC_FLVR_PLAIN; + if (!strcmp(name, "krb5")) + return SPTLRPC_FLVR_KRB5; + if (!strcmp(name, "krb5i")) + return SPTLRPC_FLVR_KRB5I; + if (!strcmp(name, "krb5p")) + return SPTLRPC_FLVR_KRB5P; + + return SPTLRPC_FLVR_INVALID; +} +EXPORT_SYMBOL(sptlrpc_name2flavor); + +char *sptlrpc_flavor2name(ptlrpc_flavor_t flavor) +{ + switch (flavor) { + case SPTLRPC_FLVR_NULL: + return "null"; + case SPTLRPC_FLVR_PLAIN: + return "plain"; + case SPTLRPC_FLVR_KRB5: + return "krb5"; + case SPTLRPC_FLVR_KRB5I: + return "krb5i"; + case SPTLRPC_FLVR_KRB5P: + return "krb5p"; + default: + CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor, + SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor), + SEC_FLAVOR_SVC(flavor)); + } + return "UNKNOWN"; +} +EXPORT_SYMBOL(sptlrpc_flavor2name); + +/*********************************************** + * context helpers * + * internal APIs * + * cache management * + ***********************************************/ + +static inline +unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK); +} + +static inline +int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx) +{ + return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE); +} + +static inline +int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx) +{ + return (ctx_status(ctx) != 0); +} + +static inline +int ctx_is_dead(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0); +} + +static inline +int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0); +} + +static +int ctx_expire(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount)); + + if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) { + cfs_time_t now = cfs_time_current_sec(); + + smp_mb(); + clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); + + if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire)) + CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n", + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), + cfs_time_sub(now, ctx->cc_expire)); + else + CWARN("ctx %p(%u->%s): force to die (%lds remains)\n", + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), + ctx->cc_expire == 0 ? 0 : + cfs_time_sub(ctx->cc_expire, now)); + + return 1; + } + return 0; +} + +static +void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash) +{ + set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); + atomic_inc(&ctx->cc_refcount); + hlist_add_head(&ctx->cc_hash, hash); +} + +static +void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) +{ + LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); + LASSERT(!hlist_unhashed(&ctx->cc_hash)); + + clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); + + if (atomic_dec_and_test(&ctx->cc_refcount)) { + __hlist_del(&ctx->cc_hash); + hlist_add_head(&ctx->cc_hash, freelist); + } else + hlist_del_init(&ctx->cc_hash); +} + +/* + * return 1 if the context is dead. + */ +static +int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) +{ + if (unlikely(ctx_is_dead(ctx))) + goto unhash; + + /* expire is 0 means never expire. a newly created gss context + * which during upcall also has 0 expiration + */ + smp_mb(); + if (ctx->cc_expire == 0) + return 0; + + /* check real expiration */ + smp_mb(); + if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec())) + return 0; + + ctx_expire(ctx); + +unhash: + if (freelist) + ctx_unhash(ctx, freelist); + + return 1; +} + +static inline +int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx, + struct hlist_head *freelist) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); + + return ctx_check_death(ctx, freelist); +} + +static +int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx)) + return 1; + return 0; +} + +static inline +int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred) +{ + /* a little bit optimization for null policy */ + if (!ctx->cc_ops->match) + return 1; + + return ctx->cc_ops->match(ctx, vcred); +} + +static +void ctx_list_destroy(struct hlist_head *head) +{ + struct ptlrpc_cli_ctx *ctx; + + while (!hlist_empty(head)) { + ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash); + + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); + + hlist_del_init(&ctx->cc_hash); + sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx); + } +} + +static +void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + int i; + ENTRY; + + CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec); + + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) + ctx_check_death_locked(ctx, freelist); + } + + sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval; + EXIT; +} + +/* + * @uid: which user. "-1" means flush all. + * @grace: mark context DEAD, allow graceful destroy like notify + * server side, etc. + * @force: also flush busy entries. + * + * return the number of busy context encountered. + * + * In any cases, never touch "eternal" contexts. + */ +static +int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + int i, busy = 0; + ENTRY; + + might_sleep_if(grace); + + spin_lock(&sec->ps_lock); + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) { + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (ctx_is_eternal(ctx)) + continue; + if (uid != -1 && uid != ctx->cc_vcred.vc_uid) + continue; + + if (atomic_read(&ctx->cc_refcount) > 1) { + busy++; + if (!force) + continue; + + CWARN("flush busy(%d) ctx %p(%u->%s) by force, " + "grace %d\n", + atomic_read(&ctx->cc_refcount), + ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), grace); + } + ctx_unhash(ctx, &freelist); + + set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags); + if (!grace) + clear_bit(PTLRPC_CTX_UPTODATE_BIT, + &ctx->cc_flags); + } + } + spin_unlock(&sec->ps_lock); + + ctx_list_destroy(&freelist); + RETURN(busy); +} + +static inline +unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key) +{ + return (unsigned int) (key & (sec->ps_ccache_size - 1)); +} + +/* + * return matched context. If it's a newly created one, we also give the + * first push to refresh. return NULL if error happens. + */ +static +struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec, + struct vfs_cred *vcred, + int create, int remove_dead) +{ + struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL; + struct hlist_head *hash_head; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + unsigned int hash, gc = 0, found = 0; + ENTRY; + + might_sleep(); + + hash = ctx_hash_index(sec, (__u64) vcred->vc_uid); + LASSERT(hash < sec->ps_ccache_size); + hash_head = &sec->ps_ccache[hash]; + +retry: + spin_lock(&sec->ps_lock); + + /* gc_next == 0 means never do gc */ + if (remove_dead && sec->ps_gc_next && + cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) { + ctx_cache_gc(sec, &freelist); + gc = 1; + } + + hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) { + if (gc == 0 && + ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL)) + continue; + + if (ctx_match(ctx, vcred)) { + found = 1; + break; + } + } + + if (found) { + if (new && new != ctx) { + /* lost the race, just free it */ + hlist_add_head(&new->cc_hash, &freelist); + new = NULL; + } + + /* hot node, move to head */ + if (hash_head->first != &ctx->cc_hash) { + __hlist_del(&ctx->cc_hash); + hlist_add_head(&ctx->cc_hash, hash_head); + } + } else { + /* don't allocate for reverse sec */ + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + spin_unlock(&sec->ps_lock); + RETURN(NULL); + } + + if (new) { + ctx_enhash(new, hash_head); + ctx = new; + } else if (create) { + spin_unlock(&sec->ps_lock); + new = sec->ps_policy->sp_cops->create_ctx(sec, vcred); + if (new) { + atomic_inc(&sec->ps_busy); + goto retry; + } + } else + ctx = NULL; + } + + /* hold a ref */ + if (ctx) + atomic_inc(&ctx->cc_refcount); + + spin_unlock(&sec->ps_lock); + + /* the allocator of the context must give the first push to refresh */ + if (new) { + LASSERT(new == ctx); + sptlrpc_ctx_refresh(new); + } + + ctx_list_destroy(&freelist); + RETURN(ctx); +} + +static inline +struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec) +{ + struct vfs_cred vcred = { cfs_current()->uid, cfs_current()->gid }; + int create = 1, remove_dead = 1; + + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + vcred.vc_uid = 0; + create = 0; + remove_dead = 0; + } else if (sec->ps_flags & PTLRPC_SEC_FL_ROOTONLY) + vcred.vc_uid = 0; + + if (sec->ps_policy->sp_cops->lookup_ctx) + return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred); + else + return ctx_cache_lookup(sec, &vcred, create, remove_dead); +} + +/************************************************** + * client context APIs * + **************************************************/ + +static +void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh) + ctx->cc_ops->refresh(ctx); +} + +struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + atomic_inc(&ctx->cc_refcount); + return ctx; +} +EXPORT_SYMBOL(sptlrpc_ctx_get); + +void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) +{ + struct ptlrpc_sec *sec = ctx->cc_sec; + + LASSERT(sec); + LASSERT(atomic_read(&ctx->cc_refcount)); + + if (!atomic_dec_and_test(&ctx->cc_refcount)) + return; + + LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); + LASSERT(hlist_unhashed(&ctx->cc_hash)); + + /* if required async, we must clear the UPTODATE bit to prevent extra + * rpcs during destroy procedure. + */ + if (!sync) + clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); + + /* destroy this context */ + if (!sptlrpc_sec_destroy_ctx(sec, ctx)) + return; + + CWARN("%s@%p: put last ctx, also destroy the sec\n", + sec->ps_policy->sp_name, sec); + + sptlrpc_sec_destroy(sec); +} +EXPORT_SYMBOL(sptlrpc_ctx_put); + +/* + * mark a ctx as DEAD, and pull it out from hash table. + * + * NOTE: the caller must hold at least 1 ref on the ctx. + */ +void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(ctx->cc_sec); + LASSERT(atomic_read(&ctx->cc_refcount) > 0); + + ctx_expire(ctx); + + spin_lock(&ctx->cc_sec->ps_lock); + + if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) { + LASSERT(!hlist_unhashed(&ctx->cc_hash)); + LASSERT(atomic_read(&ctx->cc_refcount) > 1); + + hlist_del_init(&ctx->cc_hash); + if (atomic_dec_and_test(&ctx->cc_refcount)) + LBUG(); + } + + spin_unlock(&ctx->cc_sec->ps_lock); +} +EXPORT_SYMBOL(sptlrpc_ctx_expire); + +void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new) +{ + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + HLIST_HEAD(freelist); + unsigned int hash; + ENTRY; + + hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid); + LASSERT(hash < sec->ps_ccache_size); + + spin_lock(&sec->ps_lock); + + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[hash], cc_hash) { + if (!ctx_match(ctx, &new->cc_vcred)) + continue; + + ctx_expire(ctx); + ctx_unhash(ctx, &freelist); + break; + } + + ctx_enhash(new, &sec->ps_ccache[hash]); + atomic_inc(&sec->ps_busy); + + spin_unlock(&sec->ps_lock); + + ctx_list_destroy(&freelist); + EXIT; +} +EXPORT_SYMBOL(sptlrpc_ctx_replace); + +int sptlrpc_req_get_ctx(struct ptlrpc_request *req) +{ + struct obd_import *imp = req->rq_import; + ENTRY; + + LASSERT(!req->rq_cli_ctx); + LASSERT(imp); + + req->rq_cli_ctx = get_my_ctx(imp->imp_sec); + + if (!req->rq_cli_ctx) { + CERROR("req %p: fail to get context from cache\n", req); + RETURN(-ENOMEM); + } + + RETURN(0); +} + +void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_request *req, *next; + + spin_lock(&ctx->cc_lock); + list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) { + list_del_init(&req->rq_ctx_chain); + ptlrpc_wake_client_req(req); + } + spin_unlock(&ctx->cc_lock); +} +EXPORT_SYMBOL(sptlrpc_ctx_wakeup); + +void sptlrpc_req_put_ctx(struct ptlrpc_request *req) +{ + ENTRY; + + LASSERT(req); + LASSERT(req->rq_cli_ctx); + + /* request might be asked to release earlier while still + * in the context waiting list. + */ + if (!list_empty(&req->rq_ctx_chain)) { + spin_lock(&req->rq_cli_ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&req->rq_cli_ctx->cc_lock); + } + + /* this could be called with spinlock hold, use async mode */ + sptlrpc_ctx_put(req->rq_cli_ctx, 0); + req->rq_cli_ctx = NULL; + EXIT; +} + +/* + * request must have a context. if failed to get new context, + * just restore the old one + */ +int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc; + ENTRY; + + LASSERT(ctx); + LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)); + + /* make sure not on context waiting list */ + spin_lock(&ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + + sptlrpc_ctx_get(ctx); + sptlrpc_req_put_ctx(req); + rc = sptlrpc_req_get_ctx(req); + if (!rc) { + LASSERT(req->rq_cli_ctx); + LASSERT(req->rq_cli_ctx != ctx); + sptlrpc_ctx_put(ctx, 1); + } else { + LASSERT(!req->rq_cli_ctx); + req->rq_cli_ctx = ctx; + } + RETURN(rc); +} + +static +int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) +{ + smp_mb(); + if (ctx_is_refreshed(ctx)) + return 1; + return 0; +} + +static +int ctx_refresh_timeout(void *data) +{ + struct ptlrpc_request *req = data; + int rc; + + /* conn_cnt is needed in expire_one_request */ + lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); + + rc = ptlrpc_expire_one_request(req); + /* if we started recovery, we should mark this ctx dead; otherwise + * in case of lgssd died nobody would retire this ctx, following + * connecting will still find the same ctx thus cause deadlock. + * there's an assumption that expire time of the request should be + * later than the context refresh expire time. + */ + if (rc == 0) + ctx_expire(req->rq_cli_ctx); + return rc; +} + +static +void ctx_refresh_interrupt(void *data) +{ + /* do nothing */ +} + +/* + * the status of context could be subject to be changed by other threads at any + * time. we allow this race. but once we return with 0, the caller will + * suppose it's uptodated and keep using it until the affected rpc is done. + * + * @timeout: + * < 0 - don't wait + * = 0 - wait until success or fatal error occur + * > 0 - timeout value + * + * return 0 only if the context is uptodated. + */ +int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct l_wait_info lwi; + int rc; + ENTRY; + + LASSERT(ctx); + + /* special ctxs */ + if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini) + RETURN(0); + + /* reverse ctxs, don't refresh */ + if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE) + RETURN(0); + + spin_lock(&ctx->cc_lock); +again: + if (ctx_check_uptodate(ctx)) { + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(0); + } + + if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) { + req->rq_err = 1; + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(-EPERM); + } + + /* This is subtle. For resent message we have to keep original + * context to survive following situation: + * 1. the request sent to server + * 2. recovery was kick start + * 3. recovery finished, the request marked as resent + * 4. resend the request + * 5. old reply from server received (because xid is the same) + * 6. verify reply (has to be success) + * 7. new reply from server received, lnet drop it + * + * Note we can't simply change xid for resent request because + * server reply on it for reply reconstruction. + * + * Commonly the original context should be uptodate because we + * have a expiry nice time; And server will keep their half part + * context because we at least hold a ref of old context which + * prevent the context detroy RPC be sent. So server still can + * accept the request and finish RPC. Two cases: + * 1. If server side context has been trimed, a NO_CONTEXT will + * be returned, gss_cli_ctx_verify/unseal will switch to new + * context by force. + * 2. Current context never be refreshed, then we are fine: we + * never really send request with old context before. + */ + if (test_bit(PTLRPC_CTX_UPTODATE, &ctx->cc_flags) && + req->rq_reqmsg && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + RETURN(0); + } + + if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { + spin_unlock(&ctx->cc_lock); + + /* don't have to, but we don't want to release it too soon */ + sptlrpc_ctx_get(ctx); + + rc = sptlrpc_req_replace_dead_ctx(req); + if (rc) { + LASSERT(ctx == req->rq_cli_ctx); + CERROR("req %p: failed to replace dead ctx %p\n", + req, ctx); + req->rq_err = 1; + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_ctx_put(ctx, 1); + RETURN(-ENOMEM); + } + + LASSERT(ctx != req->rq_cli_ctx); + CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n", + req, ctx, ctx->cc_vcred.vc_uid, + sec2target_str(ctx->cc_sec), req->rq_cli_ctx); + + sptlrpc_ctx_put(ctx, 1); + ctx = req->rq_cli_ctx; + LASSERT(list_empty(&req->rq_ctx_chain)); + + spin_lock(&ctx->cc_lock); + goto again; + } + + /* Now we're sure this context is during upcall, add myself into + * waiting list + */ + if (list_empty(&req->rq_ctx_chain)) + list_add(&req->rq_ctx_chain, &ctx->cc_req_list); + + spin_unlock(&ctx->cc_lock); + + if (timeout < 0) { + RETURN(-EWOULDBLOCK); + } + + /* Clear any flags that may be present from previous sends */ + LASSERT(req->rq_receiving_reply == 0); + spin_lock(&req->rq_lock); + req->rq_err = 0; + req->rq_timedout = 0; + req->rq_resend = 0; + req->rq_restart = 0; + spin_unlock(&req->rq_lock); + + lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ, + ctx_refresh_timeout, ctx_refresh_interrupt, req); + rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); + + spin_lock(&ctx->cc_lock); + /* five cases we are here: + * 1. successfully refreshed; + * 2. someone else mark this ctx dead by force; + * 3. interruptted; + * 4. timedout, and we don't want recover from the failure; + * 5. timedout, and waked up upon recovery finished; + */ + if (!ctx_is_refreshed(ctx)) { + /* timed out or interruptted */ + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); + + LASSERT(rc != 0); + RETURN(rc); + } + + goto again; +} + +void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) +{ + struct sec_flavor_config *conf; + + LASSERT(req->rq_import); + LASSERT(req->rq_import->imp_sec); + LASSERT(req->rq_cli_ctx); + LASSERT(req->rq_cli_ctx->cc_sec); + LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0); + + /* special security flags accoding to opcode */ + switch (opcode) { + case OST_READ: + case OST_SAN_READ: + req->rq_bulk_read = 1; + break; + case OST_WRITE: + case OST_SAN_WRITE: + req->rq_bulk_write = 1; + break; + case SEC_CTX_INIT: + req->rq_ctx_init = 1; + break; + case SEC_CTX_FINI: + req->rq_ctx_fini = 1; + break; + } + + req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor; + + /* force SVC_NONE for context initiation rpc, SVC_AUTH for context + * destruction rpc + */ + if (unlikely(req->rq_ctx_init)) { + req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), + SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE)); + } else if (unlikely(req->rq_ctx_fini)) { + req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), + SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH)); + } + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + + /* user descriptor flag, except ROOTONLY which don't need, and + * null security which can't + */ + if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 && + req->rq_sec_flavor != SPTLRPC_FLVR_NULL) + req->rq_sec_flavor |= SEC_FLAVOR_FL_USER; + + /* bulk security flag */ + if ((req->rq_bulk_read || req->rq_bulk_write) && + (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL || + conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL)) + req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK; +} + +void sptlrpc_request_out_callback(struct ptlrpc_request *req) +{ + if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV) + return; + + LASSERT(req->rq_clrbuf); + if (req->rq_pool || !req->rq_reqbuf) + return; + + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; +} + +/* + * check whether current user have valid context for an import or not. + * might repeatedly try in case of non-fatal errors. + * return 0 on success, < 0 on failure + */ +int sptlrpc_import_check_ctx(struct obd_import *imp) +{ + struct ptlrpc_cli_ctx *ctx; + struct ptlrpc_request *req = NULL; + int rc; + ENTRY; + + might_sleep(); + + ctx = get_my_ctx(imp->imp_sec); + if (!ctx) + RETURN(1); + + if (ctx_is_eternal(ctx)) { + sptlrpc_ctx_put(ctx, 1); + RETURN(0); + } + + OBD_ALLOC(req, sizeof(*req)); + if (!req) + RETURN(-ENOMEM); + + spin_lock_init(&req->rq_lock); + atomic_set(&req->rq_refcount, 10000); + INIT_LIST_HEAD(&req->rq_ctx_chain); + init_waitqueue_head(&req->rq_reply_waitq); + req->rq_import = imp; + req->rq_cli_ctx = ctx; + + rc = sptlrpc_req_refresh_ctx(req, 0); + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_ctx_put(req->rq_cli_ctx, 1); + OBD_FREE(req, sizeof(*req)); + + RETURN(rc); +} + +int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc = 0; + ENTRY; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* we wrap bulk request here because now we can be sure + * the context is uptodate. + */ + if (req->rq_bulk) { + rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk); + if (rc) + RETURN(rc); + } + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_NONE: + case SPTLRPC_SVC_AUTH: + LASSERT(ctx->cc_ops->sign); + rc = ctx->cc_ops->sign(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->seal); + rc = ctx->cc_ops->seal(ctx, req); + break; + default: + LBUG(); + } + + if (rc == 0) { + LASSERT(req->rq_reqdata_len); + LASSERT(req->rq_reqdata_len % 8 == 0); + LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len); + } + + RETURN(rc); +} + +/* + * rq_nob_received is the actual received data length + */ +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + int rc; + ENTRY; + + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_ops); + LASSERT(req->rq_repbuf); + + req->rq_repdata_len = req->rq_nob_received; + + if (req->rq_nob_received < sizeof(struct lustre_msg)) { + CERROR("replied data length %d too small\n", + req->rq_nob_received); + RETURN(-EPROTO); + } + + if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 || + req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { + /* it's must be null flavor, so our requets also should be + * in null flavor */ + if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) { + CERROR("request flavor is %x but reply with null\n", + req->rq_sec_flavor); + RETURN(-EPROTO); + } + } else { + /* v2 message... */ + ptlrpc_flavor_t tmpf = req->rq_repbuf->lm_secflvr; + + if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) + __swab32s(&tmpf); + + if (SEC_FLAVOR_POLICY(tmpf) != + SEC_FLAVOR_POLICY(req->rq_sec_flavor)) { + CERROR("request policy %u while reply with %d\n", + SEC_FLAVOR_POLICY(req->rq_sec_flavor), + SEC_FLAVOR_POLICY(tmpf)); + RETURN(-EPROTO); + } + + if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) && + lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received)) + RETURN(-EPROTO); + } + + switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { + case SPTLRPC_SVC_NONE: + case SPTLRPC_SVC_AUTH: + LASSERT(ctx->cc_ops->verify); + rc = ctx->cc_ops->verify(ctx, req); + break; + case SPTLRPC_SVC_PRIV: + LASSERT(ctx->cc_ops->unseal); + rc = ctx->cc_ops->unseal(ctx, req); + break; + default: + LBUG(); + } + + LASSERT(rc || req->rq_repmsg); + RETURN(rc); +} + +/************************************************** + * security APIs * + **************************************************/ + +/* + * let policy module to determine whether take refrence of + * import or not. + */ +static +struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_sec *sec; + ENTRY; + + flavor = SEC_FLAVOR_RPC(flavor); + + if (ctx) { + LASSERT(imp->imp_dlm_fake == 1); + + CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(flavor)); + + policy = sptlrpc_policy_get(ctx->sc_policy); + flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; + } else { + LASSERT(imp->imp_dlm_fake == 0); + + CDEBUG(D_SEC, "%s %s: select security flavor %s\n", + imp->imp_obd->obd_type->typ_name, + imp->imp_obd->obd_name, + sptlrpc_flavor2name(flavor)); + + policy = sptlrpc_flavor2policy(flavor); + if (!policy) { + CERROR("invalid flavor 0x%x\n", flavor); + RETURN(NULL); + } + } + + sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags); + if (sec) { + atomic_inc(&sec->ps_refcount); + + /* take 1 busy count on behalf of sec itself, + * balanced in sptlrpc_set_put() + */ + atomic_inc(&sec->ps_busy); + } else + sptlrpc_policy_put(policy); + + RETURN(sec); +} + +static +void sptlrpc_sec_destroy(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT(policy); + LASSERT(atomic_read(&sec->ps_refcount) == 0); + LASSERT(atomic_read(&sec->ps_busy) == 0); + LASSERT(policy->sp_cops->destroy_sec); + + policy->sp_cops->destroy_sec(sec); + sptlrpc_policy_put(policy); +} + +static +void sptlrpc_sec_put(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + if (!atomic_dec_and_test(&sec->ps_refcount)) { + sptlrpc_policy_put(policy); + return; + } + + ctx_cache_flush(sec, -1, 1, 1); + + if (atomic_dec_and_test(&sec->ps_busy)) + sptlrpc_sec_destroy(sec); + else + CWARN("delay to destroy %s@%p: busy contexts\n", + policy->sp_name, sec); +} + +/* + * return 1 means we should also destroy the sec structure. + * normally return 0 + */ +static +int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, + struct ptlrpc_cli_ctx *ctx) +{ + LASSERT(sec == ctx->cc_sec); + LASSERT(atomic_read(&sec->ps_busy)); + LASSERT(atomic_read(&ctx->cc_refcount) == 0); + LASSERT(hlist_unhashed(&ctx->cc_hash)); + LASSERT(list_empty(&ctx->cc_req_list)); + LASSERT(sec->ps_policy->sp_cops->destroy_ctx); + + sec->ps_policy->sp_cops->destroy_ctx(sec, ctx); + + if (atomic_dec_and_test(&sec->ps_busy)) { + LASSERT(atomic_read(&sec->ps_refcount) == 0); + return 1; + } + + return 0; +} + +/* + * when complete successfully, req->rq_reqmsg should point to the + * right place. + */ +int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + int rc; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_reqmsg == NULL); + + policy = ctx->cc_sec->ps_policy; + rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize); + if (!rc) { + LASSERT(req->rq_reqmsg); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + /* zeroing preallocated buffer */ + if (req->rq_pool) + memset(req->rq_reqmsg, 0, msgsize); + } + + return rc; +} + +void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_reqbuf || req->rq_clrbuf); + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_reqbuf(ctx->cc_sec, req); +} + +int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + ENTRY; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + + if (req->rq_repbuf) + RETURN(0); + + policy = ctx->cc_sec->ps_policy; + RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize)); +} + +void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req) +{ + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_policy *policy; + ENTRY; + + LASSERT(ctx); + LASSERT(atomic_read(&ctx->cc_refcount)); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT(req->rq_repbuf); + + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_repbuf(ctx->cc_sec, req); + EXIT; +} + +int sptlrpc_import_get_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + struct obd_device *obd = imp->imp_obd; + ENTRY; + + LASSERT(obd); + LASSERT(obd->obd_type); + + /* old sec might be still there in reconnecting */ + if (imp->imp_sec) + RETURN(0); + + imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags); + if (!imp->imp_sec) + RETURN(-EINVAL); + + RETURN(0); +} + +void sptlrpc_import_put_sec(struct obd_import *imp) +{ + if (imp->imp_sec == NULL) + return; + + sptlrpc_sec_put(imp->imp_sec); + imp->imp_sec = NULL; +} + +void sptlrpc_import_flush_root_ctx(struct obd_import *imp) +{ + if (imp == NULL || imp->imp_sec == NULL) + return; + + /* use 'grace' mode, it's crutial see explain in + * sptlrpc_req_refresh_ctx() + */ + ctx_cache_flush(imp->imp_sec, 0, 1, 1); +} + +void sptlrpc_import_flush_my_ctx(struct obd_import *imp) +{ + if (imp == NULL || imp->imp_sec == NULL) + return; + + ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); + +int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy; + + if (!policy->sp_cops->install_rctx) + return 0; + return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx); +} + +int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx) +{ + struct ptlrpc_sec_policy *policy = ctx->sc_policy; + + if (!policy->sp_sops->install_rctx) + return 0; + return policy->sp_sops->install_rctx(imp, ctx); +} + +/**************************************** + * server side security * + ****************************************/ + +int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + struct lustre_msg *msg = req->rq_reqbuf; + int rc; + ENTRY; + + LASSERT(msg); + LASSERT(req->rq_reqmsg == NULL); + LASSERT(req->rq_repmsg == NULL); + + /* + * in any case we avoid to call unpack_msg() for request of null flavor + * which will later be done by ptlrpc_server_handle_request(). + */ + if (req->rq_reqdata_len < sizeof(struct lustre_msg)) { + CERROR("request size %d too small\n", req->rq_reqdata_len); + RETURN(SECSVC_DROP); + } + + if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 || + msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { + req->rq_sec_flavor = SPTLRPC_FLVR_NULL; + } else { + req->rq_sec_flavor = msg->lm_secflvr; + + if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) + __swab32s(&req->rq_sec_flavor); + + if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != + SPTLRPC_POLICY_NULL) && + lustre_unpack_msg(msg, req->rq_reqdata_len)) + RETURN(SECSVC_DROP); + } + + policy = sptlrpc_flavor2policy(req->rq_sec_flavor); + if (!policy) { + CERROR("unsupported security flavor %x\n", req->rq_sec_flavor); + RETURN(SECSVC_DROP); + } + + LASSERT(policy->sp_sops->accept); + rc = policy->sp_sops->accept(req); + + LASSERT(req->rq_reqmsg || rc != SECSVC_OK); + sptlrpc_policy_put(policy); + + /* FIXME move to proper place */ + if (rc == SECSVC_OK) { + __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); + + if (opc == OST_WRITE || opc == OST_SAN_WRITE) + req->rq_bulk_write = 1; + else if (opc == OST_READ || opc == OST_SAN_READ) + req->rq_bulk_read = 1; + } + + RETURN(rc); +} + +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, + int msglen) +{ + struct ptlrpc_sec_policy *policy; + struct ptlrpc_reply_state *rs; + int rc; + ENTRY; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->alloc_rs); + + rc = policy->sp_sops->alloc_rs(req, msglen); + if (unlikely(rc == -ENOMEM)) { + /* failed alloc, try emergency pool */ + rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service); + if (rs == NULL) + RETURN(-ENOMEM); + + req->rq_reply_state = rs; + rc = policy->sp_sops->alloc_rs(req, msglen); + if (rc) { + lustre_put_emerg_rs(rs); + req->rq_reply_state = NULL; + } + } + + LASSERT(rc != 0 || + (req->rq_reply_state && req->rq_reply_state->rs_msg)); + + RETURN(rc); +} + +int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) +{ + struct ptlrpc_sec_policy *policy; + int rc; + ENTRY; + + LASSERT(req->rq_svc_ctx); + LASSERT(req->rq_svc_ctx->sc_policy); + + policy = req->rq_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->authorize); + + rc = policy->sp_sops->authorize(req); + LASSERT(rc || req->rq_reply_state->rs_repdata_len); + + RETURN(rc); +} + +void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) +{ + struct ptlrpc_sec_policy *policy; + unsigned int prealloc; + ENTRY; + + LASSERT(rs->rs_svc_ctx); + LASSERT(rs->rs_svc_ctx->sc_policy); + + policy = rs->rs_svc_ctx->sc_policy; + LASSERT(policy->sp_sops->free_rs); + + prealloc = rs->rs_prealloc; + policy->sp_sops->free_rs(rs); + + if (prealloc) + lustre_put_emerg_rs(rs); + EXIT; +} + +void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT(atomic_read(&ctx->sc_refcount) > 0); + atomic_inc(&ctx->sc_refcount); +} + +void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req) +{ + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + + if (ctx == NULL) + return; + + LASSERT(atomic_read(&ctx->sc_refcount) > 0); + if (atomic_dec_and_test(&ctx->sc_refcount)) { + if (ctx->sc_policy->sp_sops->free_ctx) + ctx->sc_policy->sp_sops->free_ctx(ctx); + } + req->rq_svc_ctx = NULL; +} + +/**************************************** + * bulk security * + ****************************************/ + +int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->wrap_bulk) + return ctx->cc_ops->wrap_bulk(ctx, req, desc); + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk); + +static +void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga, + struct ptlrpc_bulk_desc *desc) +{ + int i; + + LASSERT(pga); + LASSERT(*pga); + + for (i = 0; i < pg_count && nob > 0; i++) { +#ifdef __KERNEL__ + desc->bd_iov[i].kiov_page = pga[i]->pg; + desc->bd_iov[i].kiov_len = pga[i]->count > nob ? + nob : pga[i]->count; + desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK; +#else +#warning FIXME for liblustre! + desc->bd_iov[i].iov_base = pga[i]->pg->addr; + desc->bd_iov[i].iov_len = pga[i]->count > nob ? + nob : pga[i]->count; +#endif + + desc->bd_iov_count++; + nob -= pga[i]->count; + } +} + +int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, + int nob, obd_count pg_count, + struct brw_page **pga) +{ + struct ptlrpc_bulk_desc *desc; + struct ptlrpc_cli_ctx *ctx; + int rc = 0; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read && !req->rq_bulk_write); + + OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); + if (desc == NULL) { + CERROR("out of memory, can't verify bulk read data\n"); + return -ENOMEM; + } + + pga_to_bulk_desc(nob, pg_count, pga, desc); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) + rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); + + OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); + + return rc; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read); + +int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_cli_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(!req->rq_bulk_read && req->rq_bulk_write); + + ctx = req->rq_cli_ctx; + if (ctx->cc_ops->unwrap_bulk) + return ctx->cc_ops->unwrap_bulk(ctx, req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write); + +int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_svc_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_svc_ctx; + if (ctx->sc_policy->sp_sops->wrap_bulk) + return ctx->sc_policy->sp_sops->wrap_bulk(req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk); + +int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_svc_ctx *ctx; + + if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) + return 0; + + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + ctx = req->rq_svc_ctx; + if (ctx->sc_policy->sp_sops->unwrap_bulk); + return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk); + + +/**************************************** + * user descriptor helpers * + ****************************************/ + +int sptlrpc_user_desc_size(void) +{ +#ifdef __KERNEL__ + int ngroups = current_ngroups; + + if (ngroups > LUSTRE_MAX_GROUPS) + ngroups = LUSTRE_MAX_GROUPS; + + return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32); +#else + return sizeof(struct ptlrpc_user_desc); +#endif +} +EXPORT_SYMBOL(sptlrpc_user_desc_size); + +int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_user_desc *pud; + + pud = lustre_msg_buf(msg, offset, 0); + + pud->pud_uid = cfs_current()->uid; + pud->pud_gid = cfs_current()->gid; + pud->pud_fsuid = cfs_current()->fsuid; + pud->pud_fsgid = cfs_current()->fsgid; + pud->pud_cap = cfs_current()->cap_effective; + pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; + +#ifdef __KERNEL__ + task_lock(current); + if (pud->pud_ngroups > current_ngroups) + pud->pud_ngroups = current_ngroups; + memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0], + pud->pud_ngroups * sizeof(__u32)); + task_unlock(current); +#endif + + return 0; +} +EXPORT_SYMBOL(sptlrpc_pack_user_desc); + +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_user_desc *pud; + int i; + + pud = lustre_msg_buf(msg, offset, sizeof(*pud)); + if (!pud) + return -EINVAL; + + if (lustre_msg_swabbed(msg)) { + __swab32s(&pud->pud_uid); + __swab32s(&pud->pud_gid); + __swab32s(&pud->pud_fsuid); + __swab32s(&pud->pud_fsgid); + __swab32s(&pud->pud_cap); + __swab32s(&pud->pud_ngroups); + } + + if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) { + CERROR("%u groups is too large\n", pud->pud_ngroups); + return -EINVAL; + } + + if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) > + msg->lm_buflens[offset]) { + CERROR("%u groups are claimed but bufsize only %u\n", + pud->pud_ngroups, msg->lm_buflens[offset]); + return -EINVAL; + } + + if (lustre_msg_swabbed(msg)) { + for (i = 0; i < pud->pud_ngroups; i++) + __swab32s(&pud->pud_groups[i]); + } + + return 0; +} +EXPORT_SYMBOL(sptlrpc_unpack_user_desc); + +/**************************************** + * Helpers to assist policy modules to * + * implement checksum funcationality * + ****************************************/ + +struct { + char *name; + int size; +} csum_types[] = { + [BULK_CSUM_ALG_NULL] = { "null", 0 }, + [BULK_CSUM_ALG_CRC32] = { "crc32", 4 }, + [BULK_CSUM_ALG_MD5] = { "md5", 16 }, + [BULK_CSUM_ALG_SHA1] = { "sha1", 20 }, + [BULK_CSUM_ALG_SHA256] = { "sha256", 32 }, + [BULK_CSUM_ALG_SHA384] = { "sha384", 48 }, + [BULK_CSUM_ALG_SHA512] = { "sha512", 64 }, +}; + +int bulk_sec_desc_size(__u32 csum_alg, int request, int read) +{ + int size = sizeof(struct ptlrpc_bulk_sec_desc); + + LASSERT(csum_alg < BULK_CSUM_ALG_MAX); + + /* read request don't need extra data */ + if (!(read && request)) + size += csum_types[csum_alg].size; + + return size; +} +EXPORT_SYMBOL(bulk_sec_desc_size); + +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_bulk_sec_desc *bsd; + int size = msg->lm_buflens[offset]; + + bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); + if (bsd == NULL) { + CERROR("Invalid bulk sec desc: size %d\n", size); + return -EINVAL; + } + + if (lustre_msg_swabbed(msg)) { + __swab32s(&bsd->bsd_version); + __swab32s(&bsd->bsd_pad); + __swab32s(&bsd->bsd_csum_alg); + __swab32s(&bsd->bsd_priv_alg); + } + + if (bsd->bsd_version != 0) { + CERROR("Unexpected version %u\n", bsd->bsd_version); + return -EPROTO; + } + + if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) { + CERROR("Unsupported checksum algorithm %u\n", + bsd->bsd_csum_alg); + return -EINVAL; + } + if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) { + CERROR("Unsupported cipher algorithm %u\n", + bsd->bsd_priv_alg); + return -EINVAL; + } + + if (size > sizeof(*bsd) && + size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) { + CERROR("Mal-formed checksum data: csum alg %u, size %d\n", + bsd->bsd_csum_alg, size); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(bulk_sec_desc_unpack); + +#ifdef __KERNEL__ +static +int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf) +{ + struct page *page; + int off; + char *ptr; + __u32 crc32 = ~0; + int len, i; + + for (i = 0; i < desc->bd_iov_count; i++) { + page = desc->bd_iov[i].kiov_page; + off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + ptr = cfs_kmap(page) + off; + len = desc->bd_iov[i].kiov_len; + + crc32 = crc32_le(crc32, ptr, len); + + cfs_kunmap(page); + } + + *((__u32 *) buf) = crc32; + return 0; +} + +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + struct crypto_tfm *tfm; + struct scatterlist *sl; + int i, rc = 0; + + LASSERT(alg > BULK_CSUM_ALG_NULL && + alg < BULK_CSUM_ALG_MAX); + + if (alg == BULK_CSUM_ALG_CRC32) + return do_bulk_checksum_crc32(desc, buf); + + tfm = crypto_alloc_tfm(csum_types[alg].name, 0); + if (tfm == NULL) { + CERROR("Unable to allocate tfm %s\n", csum_types[alg].name); + return -ENOMEM; + } + + OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count); + if (sl == NULL) { + rc = -ENOMEM; + goto out_tfm; + } + + for (i = 0; i < desc->bd_iov_count; i++) { + sl[i].page = desc->bd_iov[i].kiov_page; + sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + sl[i].length = desc->bd_iov[i].kiov_len; + } + + crypto_digest_init(tfm); + crypto_digest_update(tfm, sl, desc->bd_iov_count); + crypto_digest_final(tfm, buf); + + OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count); + +out_tfm: + crypto_free_tfm(tfm); + return rc; +} + +#else /* !__KERNEL__ */ +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + __u32 crc32 = ~0; + int i; + + LASSERT(alg == BULK_CSUM_ALG_CRC32); + + for (i = 0; i < desc->bd_iov_count; i++) { + char *ptr = desc->bd_iov[i].iov_base; + int len = desc->bd_iov[i].iov_len; + + crc32 = crc32_le(crc32, ptr, len); + } + + *((__u32 *) buf) = crc32; + return 0; +} +#endif + +/* + * perform algorithm @alg checksum on @desc, store result in @buf. + * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL. + */ +static +int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg, + struct ptlrpc_bulk_sec_desc *bsd, int bsdsize) +{ + int rc; + + LASSERT(bsd); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (alg == BULK_CSUM_ALG_NULL) + return 0; + + LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size); + + rc = do_bulk_checksum(desc, alg, bsd->bsd_csum); + if (rc == 0) + bsd->bsd_csum_alg = alg; + + return rc; +} + +static +int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read, + struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize, + struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize) +{ + char *csum_p; + char *buf = NULL; + int csum_size, rc = 0; + + LASSERT(bsdv); + LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX); + + if (bsdr) + bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + /* for all supported algorithms */ + csum_size = csum_types[bsdv->bsd_csum_alg].size; + + if (bsdvsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + bsdvsize, sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + if (bsdr) { + LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size); + csum_p = (char *) bsdr->bsd_csum; + } else { + OBD_ALLOC(buf, csum_size); + if (buf == NULL) + return -EINVAL; + csum_p = buf; + } + + rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p); + + if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) { + CERROR("BAD %s CHECKSUM (%s), data mutated during " + "transfer!\n", read ? "READ" : "WRITE", + csum_types[bsdv->bsd_csum_alg].name); + rc = -EINVAL; + } else { + CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n", + read ? "read" : "write", + csum_types[bsdv->bsd_csum_alg].name); + } + + if (bsdr) { + bsdr->bsd_csum_alg = bsdv->bsd_csum_alg; + memcpy(bsdr->bsd_csum, csum_p, csum_size); + } else { + LASSERT(buf); + OBD_FREE(buf, csum_size); + } + + return rc; +} + +int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, + __u32 alg, struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdr; + int rsize, rc = 0; + + rsize = rmsg->lm_buflens[roff]; + bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr)); + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + if (read) + bsdr->bsd_csum_alg = alg; + else { + rc = generate_bulk_csum(desc, alg, bsdr, rsize); + if (rc) { + CERROR("client bulk write: failed to perform " + "checksum: %d\n", rc); + } + } + + return rc; +} +EXPORT_SYMBOL(bulk_csum_cli_request); + +int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *rmsg, int roff, + struct lustre_msg *vmsg, int voff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int rsize, vsize; + + rsize = rmsg->lm_buflens[roff]; + vsize = vmsg->lm_buflens[voff]; + bsdr = lustre_msg_buf(rmsg, roff, 0); + bsdv = lustre_msg_buf(vmsg, voff, 0); + + if (bsdv == NULL || vsize < sizeof(*bsdv)) { + CERROR("Invalid checksum verifier from server: size %d\n", + vsize); + return -EINVAL; + } + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(vsize >= sizeof(*bsdv)); + + if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) { + CERROR("bulk %s: checksum algorithm mismatch: client request " + "%s but server reply with %s. try to use the new one " + "for checksum verification\n", + read ? "read" : "write", + csum_types[bsdr->bsd_csum_alg].name, + csum_types[bsdv->bsd_csum_alg].name); + } + + if (read) + return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0); + else { + char *cli, *srv, *new = NULL; + int csum_size = csum_types[bsdr->bsd_csum_alg].size; + + LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX); + if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + if (vsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + vsize, sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + cli = (char *) (bsdr + 1); + srv = (char *) (bsdv + 1); + + if (!memcmp(cli, srv, csum_size)) { + /* checksum confirmed */ + CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n", + csum_types[bsdr->bsd_csum_alg].name); + return 0; + } + + /* checksum mismatch, re-compute a new one and compare with + * others, give out proper warnings. + */ + OBD_ALLOC(new, csum_size); + if (new == NULL) + return -ENOMEM; + + do_bulk_checksum(desc, bsdr->bsd_csum_alg, new); + + if (!memcmp(new, srv, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "on the client after we checksummed them\n", + csum_types[bsdr->bsd_csum_alg].name); + } else if (!memcmp(new, cli, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit\n", + csum_types[bsdr->bsd_csum_alg].name); + } else { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit, and the current page contents " + "don't match the originals and what the server " + "received\n", + csum_types[bsdr->bsd_csum_alg].name); + } + OBD_FREE(new, csum_size); + + return -EINVAL; + } +} +EXPORT_SYMBOL(bulk_csum_cli_reply); + +int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *vmsg, int voff, + struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int vsize, rsize, rc; + + vsize = vmsg->lm_buflens[voff]; + rsize = rmsg->lm_buflens[roff]; + bsdv = lustre_msg_buf(vmsg, voff, 0); + bsdr = lustre_msg_buf(rmsg, roff, 0); + + LASSERT(vsize >= sizeof(*bsdv)); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(bsdv && bsdr); + + if (read) { + rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize); + if (rc) + CERROR("bulk read: server failed to generate %s " + "checksum: %d\n", + csum_types[bsdv->bsd_csum_alg].name, rc); + } else + rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize); + + return rc; +} +EXPORT_SYMBOL(bulk_csum_svc); + +/**************************************** + * user supplied flavor string parsing * + ****************************************/ + +static +int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf) +{ + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_flags = 0; + + switch (to_part) { + case LUSTRE_MDT: + conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN; + return 0; + case LUSTRE_OST: + conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + return 0; + default: + CERROR("Unknown to lustre part %d, apply defaults\n", to_part); + conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; + return -EINVAL; + } +} + +static +void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf) +{ + conf->sfc_rpc_flavor = rpc_flavor; + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_flags = 0; + + switch (rpc_flavor) { + case SPTLRPC_FLVR_NULL: + case SPTLRPC_FLVR_PLAIN: + break; + case SPTLRPC_FLVR_KRB5P: + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + /* fall through */ + case SPTLRPC_FLVR_KRB5I: + conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; + break; + default: + LBUG(); + } +} + +static +void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv, + struct sec_flavor_config *conf) +{ + if (bulk_priv) + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + else + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + + switch (rpc_flavor) { + case SPTLRPC_FLVR_PLAIN: + conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5; + break; + case SPTLRPC_FLVR_KRB5I: + case SPTLRPC_FLVR_KRB5P: + conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; + break; + default: + LBUG(); + } +} + +static __u32 __flavors[] = { + SPTLRPC_FLVR_NULL, + SPTLRPC_FLVR_PLAIN, + SPTLRPC_FLVR_KRB5I, + SPTLRPC_FLVR_KRB5P, +}; + +#define __nflavors (sizeof(__flavors)/sizeof(__u32)) + +/* + * flavor string format: rpc[-bulk[:cksum/enc]] + * for examples: + * null + * plain-bulki + * krb5p-bulkn + * krb5i-bulkp + * krb5i-bulkp:sha512/arc4 + */ +int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part, + char *str, struct sec_flavor_config *conf) +{ + char *f, *bulk, *alg, *enc; + char buf[64]; + int i, bulk_priv; + ENTRY; + + if (str == NULL) { + if (get_default_flavor(to_part, conf)) + return -EINVAL; + goto set_flags; + } + + for (i = 0; i < __nflavors; i++) { + f = sptlrpc_flavor2name(__flavors[i]); + if (strncmp(str, f, strlen(f)) == 0) + break; + } + + if (i >= __nflavors) + GOTO(invalid, -EINVAL); + + /* prepare local buffer thus we can modify it as we want */ + strncpy(buf, str, 64); + buf[64 - 1] = '\0'; + + /* find bulk string */ + bulk = strchr(buf, '-'); + if (bulk) + *bulk++ = '\0'; + + /* now the first part must equal to rpc flavor name */ + if (strcmp(buf, f) != 0) + GOTO(invalid, -EINVAL); + + get_flavor_by_rpc(__flavors[i], conf); + + if (bulk == NULL) + goto set_flags; + + /* null flavor should not have any suffix */ + if (__flavors[i] == SPTLRPC_FLVR_NULL) + GOTO(invalid, -EINVAL); + + /* find bulk algorithm string */ + alg = strchr(bulk, ':'); + if (alg) + *alg++ = '\0'; + + /* verify bulk section */ + if (strcmp(bulk, "bulkn") == 0) { + conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; + conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; + goto set_flags; + } + + if (strcmp(bulk, "bulki") == 0) + bulk_priv = 0; + else if (strcmp(bulk, "bulkp") == 0) + bulk_priv = 1; + else + GOTO(invalid, -EINVAL); + + /* plain policy dosen't support bulk encryption */ + if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN) + GOTO(invalid, -EINVAL); + + get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf); + + if (alg == NULL) + goto set_flags; + + /* find encryption algorithm string */ + enc = strchr(alg, '/'); + if (enc) + *enc++ = '\0'; + + /* bulk combination sanity check */ + if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc)) + GOTO(invalid, -EINVAL); + + /* checksum algorithm */ + for (i = 0; i < BULK_CSUM_ALG_MAX; i++) { + if (strcmp(alg, csum_types[i].name) == 0) { + conf->sfc_bulk_csum = i; + break; + } + } + if (i >= BULK_CSUM_ALG_MAX) + GOTO(invalid, -EINVAL); + + /* privacy algorithm */ + if (enc) { + if (strcmp(enc, "arc4") != 0) + GOTO(invalid, -EINVAL); + conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + } + +set_flags: + /* set ROOTONLY flag to: + * - to OST + * - from MDT to MDT + */ + if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) || + to_part == LUSTRE_OST) + conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY; + +#ifdef __BIG_ENDIAN + __swab32s(&conf->sfc_rpc_flavor); + __swab32s(&conf->sfc_bulk_csum); + __swab32s(&conf->sfc_bulk_priv); + __swab32s(&conf->sfc_flags); +#endif + return 0; +invalid: + CERROR("invalid flavor string: %s\n", str); + return -EINVAL; +} +EXPORT_SYMBOL(sptlrpc_parse_flavor); + +/**************************************** + * misc helpers * + ****************************************/ + +const char * sec2target_str(struct ptlrpc_sec *sec) +{ + if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) + return "*"; + if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) + return "c"; + return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); +} +EXPORT_SYMBOL(sec2target_str); + +int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = data; + struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf; + struct ptlrpc_sec *sec = NULL; + char flags_str[20]; + + if (obd == NULL) + return 0; + + LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); + LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX); + LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX); + + if (obd->u.cli.cl_import) + sec = obd->u.cli.cl_import->imp_sec; + + flags_str[0] = '\0'; + if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE) + strncat(flags_str, "reverse,", sizeof(flags_str)); + if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) + strncat(flags_str, "rootonly,", sizeof(flags_str)); + if (flags_str[0] != '\0') + flags_str[strlen(flags_str) - 1] = '\0'; + + return snprintf(page, count, + "rpc_flavor: %s\n" + "bulk_flavor: %s checksum, %s encryption\n" + "flags: %s\n" + "ctx_cache: size %u, busy %d\n" + "gc: interval %lus, next %lds\n", + sptlrpc_flavor2name(conf->sfc_rpc_flavor), + csum_types[conf->sfc_bulk_csum].name, + conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ? + "null" : "arc4", // XXX + flags_str, + sec ? sec->ps_ccache_size : 0, + sec ? atomic_read(&sec->ps_busy) : 0, + sec ? sec->ps_gc_interval: 0, + sec ? (sec->ps_gc_interval ? + sec->ps_gc_next - cfs_time_current_sec() : 0) + : 0); +} +EXPORT_SYMBOL(sptlrpc_lprocfs_rd); + + +int sptlrpc_init(void) +{ + int rc; + + rc = sptlrpc_null_init(); + if (rc) + goto out; + + rc = sptlrpc_plain_init(); + if (rc) + goto out_null; + return 0; + +out_null: + sptlrpc_null_exit(); +out: + return rc; +} + +int sptlrpc_exit(void) +{ + sptlrpc_plain_exit(); + sptlrpc_null_exit(); + return 0; +} diff --git a/lustre/ptlrpc/sec_null.c b/lustre/ptlrpc/sec_null.c new file mode 100644 index 0000000..7b1d391 --- /dev/null +++ b/lustre/ptlrpc/sec_null.c @@ -0,0 +1,305 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#ifndef __KERNEL__ +#include +#endif + +#include +#include +#include +#include + +static struct ptlrpc_sec_policy null_policy; +static struct ptlrpc_sec null_sec; +static struct ptlrpc_cli_ctx null_cli_ctx; +static struct ptlrpc_svc_ctx null_svc_ctx; + +static +int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + if (req->rq_reqbuf->lm_magic != LUSTRE_MSG_MAGIC_V1) + req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + req->rq_reqdata_len = req->rq_reqlen; + return 0; +} + +static +int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + req->rq_repmsg = req->rq_repbuf; + req->rq_replen = req->rq_repdata_len; + return 0; +} + +static struct ptlrpc_ctx_ops null_ctx_ops = { + .refresh = null_ctx_refresh, + .sign = null_ctx_sign, + .verify = null_ctx_verify, +}; + +static struct ptlrpc_svc_ctx null_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &null_policy, +}; + +static +struct ptlrpc_sec* null_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_NULL); + return &null_sec; +} + +static +void null_destroy_sec(struct ptlrpc_sec *sec) +{ + LASSERT(sec == &null_sec); +} + +static +struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred) +{ + atomic_inc(&null_cli_ctx.cc_refcount); + return &null_cli_ctx; +} + +static +int null_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + if (!req->rq_reqbuf) { + LASSERT(!req->rq_pool); + OBD_ALLOC(req->rq_reqbuf, msgsize); + if (!req->rq_reqbuf) + return -ENOMEM; + + req->rq_reqbuf_len = msgsize; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= msgsize); + memset(req->rq_reqbuf, 0, msgsize); + } + + req->rq_reqmsg = req->rq_reqbuf; + return 0; +} + +static +void null_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + if (!req->rq_pool) { + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } +} + +static +int null_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + OBD_ALLOC(req->rq_repbuf, msgsize); + if (!req->rq_repbuf) + return -ENOMEM; + + req->rq_repbuf_len = msgsize; + return 0; +} + +static +void null_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + OBD_FREE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; +} + +static +int null_accept(struct ptlrpc_request *req) +{ + LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_NULL); + + if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_NULL) { + CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor); + return SECSVC_DROP; + } + + req->rq_reqmsg = req->rq_reqbuf; + req->rq_reqlen = req->rq_reqdata_len; + + req->rq_svc_ctx = &null_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + return SECSVC_OK; +} + +static +int null_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + int rs_size = sizeof(*rs) + msgsize; + + LASSERT(msgsize % 8 == 0); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC(rs, rs_size); + if (rs == NULL) + return -ENOMEM; + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + rs->rs_msg = rs->rs_repbuf; + + req->rq_reply_state = rs; + return 0; +} + +static +void null_free_rs(struct ptlrpc_reply_state *rs) +{ + LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE(rs, rs->rs_size); +} + +static +int null_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + if (rs->rs_repbuf->lm_magic != LUSTRE_MSG_MAGIC_V1) + rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL; + rs->rs_repdata_len = req->rq_replen; + return 0; +} + +static struct ptlrpc_sec_cops null_sec_cops = { + .create_sec = null_create_sec, + .destroy_sec = null_destroy_sec, + .lookup_ctx = null_lookup_ctx, + .alloc_reqbuf = null_alloc_reqbuf, + .alloc_repbuf = null_alloc_repbuf, + .free_reqbuf = null_free_reqbuf, + .free_repbuf = null_free_repbuf, +}; + +static struct ptlrpc_sec_sops null_sec_sops = { + .accept = null_accept, + .alloc_rs = null_alloc_rs, + .authorize = null_authorize, + .free_rs = null_free_rs, +}; + +static struct ptlrpc_sec_policy null_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.null", + .sp_policy = SPTLRPC_POLICY_NULL, + .sp_cops = &null_sec_cops, + .sp_sops = &null_sec_sops, +}; + +static +void null_init_internal(void) +{ + static HLIST_HEAD(__list); + + null_sec.ps_policy = &null_policy; + atomic_set(&null_sec.ps_refcount, 1); /* always busy */ + null_sec.ps_import = NULL; + null_sec.ps_flavor = SPTLRPC_FLVR_NULL; + null_sec.ps_flags = 0; + null_sec.ps_gc_interval = 0; + null_sec.ps_gc_next = 0; + spin_lock_init(&null_sec.ps_lock); + null_sec.ps_ccache_size = 1; + null_sec.ps_ccache = &__list; + atomic_set(&null_sec.ps_busy, 1); /* for "null_cli_ctx" */ + + hlist_add_head(&null_cli_ctx.cc_hash, &__list); + atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */ + null_cli_ctx.cc_sec = &null_sec; + null_cli_ctx.cc_ops = &null_ctx_ops; + null_cli_ctx.cc_expire = 0; + null_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL | + PTLRPC_CTX_UPTODATE; + null_cli_ctx.cc_vcred.vc_uid = 0; + spin_lock_init(&null_cli_ctx.cc_lock); + INIT_LIST_HEAD(&null_cli_ctx.cc_req_list); +} + +int sptlrpc_null_init(void) +{ + int rc; + + null_init_internal(); + + rc = sptlrpc_register_policy(&null_policy); + if (rc) + CERROR("failed to register sec.null: %d\n", rc); + + return rc; +} + +int sptlrpc_null_exit(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&null_policy); + if (rc) + CERROR("cannot unregister sec.null: %d\n", rc); + + return rc; +} diff --git a/lustre/ptlrpc/sec_plain.c b/lustre/ptlrpc/sec_plain.c new file mode 100644 index 0000000..c2c7df4 --- /dev/null +++ b/lustre/ptlrpc/sec_plain.c @@ -0,0 +1,498 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#ifndef __KERNEL__ +#include +#endif + +#include +#include +#include +#include + +static struct ptlrpc_sec_policy plain_policy; +static struct ptlrpc_sec plain_sec; +static struct ptlrpc_cli_ctx plain_cli_ctx; +static struct ptlrpc_svc_ctx plain_svc_ctx; + +static +int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx) +{ + /* should never reach here */ + LBUG(); + return 0; +} + +static +int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg_v2 *msg = req->rq_reqbuf; + ENTRY; + + msg->lm_secflvr = req->rq_sec_flavor; + req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount, + msg->lm_buflens); + RETURN(0); +} + +static +int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_repbuf; + ENTRY; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount != 2) { + CERROR("Protocol error: invalid buf count %d\n", + msg->lm_bufcount); + RETURN(-EPROTO); + } + + if (bulk_sec_desc_unpack(msg, 1)) { + CERROR("Mal-formed bulk checksum reply\n"); + RETURN(-EINVAL); + } + } + + req->rq_repmsg = lustre_msg_buf(msg, 0, 0); + req->rq_replen = msg->lm_buflens[0]; + RETURN(0); +} + +static +int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct sec_flavor_config *conf; + + LASSERT(req->rq_import); + LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)); + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + return bulk_csum_cli_request(desc, req->rq_bulk_read, + conf->sfc_bulk_csum, + req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1); +} + +static +int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, + struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + LASSERT(SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)); + LASSERT(req->rq_reqbuf->lm_bufcount >= 2); + LASSERT(req->rq_repbuf->lm_bufcount >= 2); + + return bulk_csum_cli_reply(desc, req->rq_bulk_read, + req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, + req->rq_repbuf, + req->rq_repbuf->lm_bufcount - 1); +} + +static struct ptlrpc_ctx_ops plain_ctx_ops = { + .refresh = plain_ctx_refresh, + .sign = plain_ctx_sign, + .verify = plain_ctx_verify, + .wrap_bulk = plain_cli_wrap_bulk, + .unwrap_bulk = plain_cli_unwrap_bulk, +}; + +static struct ptlrpc_svc_ctx plain_svc_ctx = { + .sc_refcount = ATOMIC_INIT(1), + .sc_policy = &plain_policy, +}; + +static +struct ptlrpc_sec* plain_create_sec(struct obd_import *imp, + struct ptlrpc_svc_ctx *ctx, + __u32 flavor, + unsigned long flags) +{ + ENTRY; + LASSERT(SEC_FLAVOR_POLICY(flavor) == SPTLRPC_POLICY_PLAIN); + RETURN(&plain_sec); +} + +static +void plain_destroy_sec(struct ptlrpc_sec *sec) +{ + ENTRY; + LASSERT(sec == &plain_sec); + EXIT; +} + +static +struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec, + struct vfs_cred *vcred) +{ + ENTRY; + atomic_inc(&plain_cli_ctx.cc_refcount); + RETURN(&plain_cli_ctx); +} + +static +int plain_alloc_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int bufcnt = 1, buflens[2], alloc_len; + ENTRY; + + buflens[0] = msgsize; + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + buflens[bufcnt++] = sptlrpc_user_desc_size(); + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 1, + req->rq_bulk_read); + } + + alloc_len = lustre_msg_size_v2(bufcnt, buflens); + + + if (!req->rq_reqbuf) { + LASSERT(!req->rq_pool); + OBD_ALLOC(req->rq_reqbuf, alloc_len); + if (!req->rq_reqbuf) + RETURN(-ENOMEM); + + req->rq_reqbuf_len = alloc_len; + } else { + LASSERT(req->rq_pool); + LASSERT(req->rq_reqbuf_len >= alloc_len); + memset(req->rq_reqbuf, 0, alloc_len); + } + + lustre_init_msg_v2(req->rq_reqbuf, bufcnt, buflens, NULL); + req->rq_reqmsg = lustre_msg_buf_v2(req->rq_reqbuf, 0, 0); + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) + sptlrpc_pack_user_desc(req->rq_reqbuf, 1); + + RETURN(0); +} + +static +void plain_free_reqbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + ENTRY; + if (!req->rq_pool) { + OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len); + req->rq_reqbuf = NULL; + req->rq_reqbuf_len = 0; + } + EXIT; +} + +static +int plain_alloc_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req, + int msgsize) +{ + struct sec_flavor_config *conf; + int bufcnt = 1, buflens[2], alloc_len; + ENTRY; + + buflens[0] = msgsize; + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + LASSERT(req->rq_bulk_read || req->rq_bulk_write); + + conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + buflens[bufcnt++] = bulk_sec_desc_size(conf->sfc_bulk_csum, 0, + req->rq_bulk_read); + } + + alloc_len = lustre_msg_size_v2(bufcnt, buflens); + + OBD_ALLOC(req->rq_repbuf, alloc_len); + if (!req->rq_repbuf) + RETURN(-ENOMEM); + + req->rq_repbuf_len = alloc_len; + RETURN(0); +} + +static +void plain_free_repbuf(struct ptlrpc_sec *sec, + struct ptlrpc_request *req) +{ + ENTRY; + OBD_FREE(req->rq_repbuf, req->rq_repbuf_len); + req->rq_repbuf = NULL; + req->rq_repbuf_len = 0; + EXIT; +} + +static +int plain_accept(struct ptlrpc_request *req) +{ + struct lustre_msg *msg = req->rq_reqbuf; + int bufcnt = 1; + ENTRY; + + LASSERT(SEC_FLAVOR_POLICY(req->rq_sec_flavor) == SPTLRPC_POLICY_PLAIN); + + if (SEC_FLAVOR_RPC(req->rq_sec_flavor) != SPTLRPC_FLVR_PLAIN) { + CERROR("Invalid flavor 0x%x\n", req->rq_sec_flavor); + return SECSVC_DROP; + } + + if (SEC_FLAVOR_HAS_USER(req->rq_sec_flavor)) { + if (msg->lm_bufcount < ++bufcnt) { + CERROR("Protocal error: too small buf count %d\n", + msg->lm_bufcount); + RETURN(SECSVC_DROP); + } + + if (sptlrpc_unpack_user_desc(msg, bufcnt - 1)) { + CERROR("Mal-formed user descriptor\n"); + RETURN(SECSVC_DROP); + } + + req->rq_user_desc = lustre_msg_buf(msg, bufcnt - 1, 0); + } + + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) { + if (msg->lm_bufcount != ++bufcnt) { + CERROR("Protocal error: invalid buf count %d\n", + msg->lm_bufcount); + RETURN(SECSVC_DROP); + } + + if (bulk_sec_desc_unpack(msg, bufcnt - 1)) { + CERROR("Mal-formed bulk checksum request\n"); + RETURN(SECSVC_DROP); + } + } + + req->rq_reqmsg = lustre_msg_buf(msg, 0, 0); + req->rq_reqlen = msg->lm_buflens[0]; + + req->rq_svc_ctx = &plain_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + + RETURN(SECSVC_OK); +} + +static +int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) +{ + struct ptlrpc_reply_state *rs; + struct ptlrpc_bulk_sec_desc *bsd; + int bufcnt = 1, buflens[2]; + int rs_size = sizeof(*rs); + ENTRY; + + LASSERT(msgsize % 8 == 0); + + buflens[0] = msgsize; + if (SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor) && + (req->rq_bulk_read || req->rq_bulk_write)) { + bsd = lustre_msg_buf(req->rq_reqbuf, + req->rq_reqbuf->lm_bufcount - 1, + sizeof(*bsd)); + LASSERT(bsd); + + buflens[bufcnt++] = bulk_sec_desc_size(bsd->bsd_csum_alg, 0, + req->rq_bulk_read); + } + rs_size += lustre_msg_size_v2(bufcnt, buflens); + + rs = req->rq_reply_state; + + if (rs) { + /* pre-allocated */ + LASSERT(rs->rs_size >= rs_size); + } else { + OBD_ALLOC(rs, rs_size); + if (rs == NULL) + RETURN(-ENOMEM); + + rs->rs_size = rs_size; + } + + rs->rs_svc_ctx = req->rq_svc_ctx; + atomic_inc(&req->rq_svc_ctx->sc_refcount); + rs->rs_repbuf = (struct lustre_msg *) (rs + 1); + rs->rs_repbuf_len = rs_size - sizeof(*rs); + + lustre_init_msg_v2(rs->rs_repbuf, bufcnt, buflens, NULL); + rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, 0, 0); + + req->rq_reply_state = rs; + RETURN(0); +} + +static +void plain_free_rs(struct ptlrpc_reply_state *rs) +{ + ENTRY; + + LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); + atomic_dec(&rs->rs_svc_ctx->sc_refcount); + + if (!rs->rs_prealloc) + OBD_FREE(rs, rs->rs_size); + EXIT; +} + +static +int plain_authorize(struct ptlrpc_request *req) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + struct lustre_msg_v2 *msg = rs->rs_repbuf; + int len; + ENTRY; + + LASSERT(rs); + LASSERT(msg); + + if (req->rq_replen != msg->lm_buflens[0]) + len = lustre_shrink_msg(msg, 0, req->rq_replen, 1); + else + len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); + + msg->lm_secflvr = req->rq_sec_flavor; + rs->rs_repdata_len = len; + RETURN(0); +} + +static +int plain_svc_unwrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + + return bulk_csum_svc(desc, req->rq_bulk_read, + req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1, + rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1); +} + +static +int plain_svc_wrap_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_reply_state *rs = req->rq_reply_state; + + LASSERT(rs); + + return bulk_csum_svc(desc, req->rq_bulk_read, + req->rq_reqbuf, req->rq_reqbuf->lm_bufcount - 1, + rs->rs_repbuf, rs->rs_repbuf->lm_bufcount - 1); +} + +static struct ptlrpc_sec_cops plain_sec_cops = { + .create_sec = plain_create_sec, + .destroy_sec = plain_destroy_sec, + .lookup_ctx = plain_lookup_ctx, + .alloc_reqbuf = plain_alloc_reqbuf, + .alloc_repbuf = plain_alloc_repbuf, + .free_reqbuf = plain_free_reqbuf, + .free_repbuf = plain_free_repbuf, +}; + +static struct ptlrpc_sec_sops plain_sec_sops = { + .accept = plain_accept, + .alloc_rs = plain_alloc_rs, + .authorize = plain_authorize, + .free_rs = plain_free_rs, + .unwrap_bulk = plain_svc_unwrap_bulk, + .wrap_bulk = plain_svc_wrap_bulk, +}; + +static struct ptlrpc_sec_policy plain_policy = { + .sp_owner = THIS_MODULE, + .sp_name = "sec.plain", + .sp_policy = SPTLRPC_POLICY_PLAIN, + .sp_cops = &plain_sec_cops, + .sp_sops = &plain_sec_sops, +}; + +static +void plain_init_internal(void) +{ + static HLIST_HEAD(__list); + + plain_sec.ps_policy = &plain_policy; + atomic_set(&plain_sec.ps_refcount, 1); /* always busy */ + plain_sec.ps_import = NULL; + plain_sec.ps_flavor = SPTLRPC_FLVR_PLAIN; + plain_sec.ps_flags = 0; + plain_sec.ps_gc_interval = 0; + plain_sec.ps_gc_next = 0; + spin_lock_init(&plain_sec.ps_lock); + plain_sec.ps_ccache_size = 1; + plain_sec.ps_ccache = &__list; + atomic_set(&plain_sec.ps_busy, 1); /* for "plain_cli_ctx" */ + + hlist_add_head(&plain_cli_ctx.cc_hash, &__list); + atomic_set(&plain_cli_ctx.cc_refcount, 1); /* for hash */ + plain_cli_ctx.cc_sec = &plain_sec; + plain_cli_ctx.cc_ops = &plain_ctx_ops; + plain_cli_ctx.cc_expire = 0; + plain_cli_ctx.cc_flags = PTLRPC_CTX_HASHED | PTLRPC_CTX_ETERNAL | + PTLRPC_CTX_UPTODATE; + plain_cli_ctx.cc_vcred.vc_uid = 0; + spin_lock_init(&plain_cli_ctx.cc_lock); + INIT_LIST_HEAD(&plain_cli_ctx.cc_req_list); +} + +int sptlrpc_plain_init(void) +{ + int rc; + + plain_init_internal(); + + rc = sptlrpc_register_policy(&plain_policy); + if (rc) + CERROR("failed to register sec.plain: %d\n", rc); + + return rc; +} + +int sptlrpc_plain_exit(void) +{ + int rc; + + rc = sptlrpc_unregister_policy(&plain_policy); + if (rc) + CERROR("cannot unregister sec.plain: %d\n", rc); + + return rc; +} diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 370ee76..928f988 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -279,7 +279,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, ENTRY; LASSERT (nbufs > 0); - LASSERT (bufsize >= max_req_size); + LASSERT (bufsize >= max_req_size + SPTLRPC_MAX_PAYLOAD); LASSERT (ctx_tags != 0); OBD_ALLOC(service, sizeof(*service)); @@ -294,7 +294,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, cfs_waitq_init(&service->srv_waitq); service->srv_nbuf_per_group = test_req_buffer_pressure ? 1 : nbufs; - service->srv_max_req_size = max_req_size; + service->srv_max_req_size = max_req_size + SPTLRPC_MAX_PAYLOAD; service->srv_buf_size = bufsize; service->srv_rep_portal = rep_portal; service->srv_req_portal = req_portal; @@ -333,7 +333,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, /* Now allocate pool of reply buffers */ /* Increase max reply size to next power of two */ service->srv_max_reply_size = 1; - while (service->srv_max_reply_size < max_reply_size) + while (service->srv_max_reply_size < + max_reply_size + SPTLRPC_MAX_PAYLOAD) service->srv_max_reply_size <<= 1; if (proc_entry != NULL) @@ -359,6 +360,8 @@ static void __ptlrpc_server_free_request(struct ptlrpc_request *req) req->rq_reply_state = NULL; } + sptlrpc_svc_ctx_decref(req); + if (req != &rqbd->rqbd_req) { /* NB request buffers use an embedded * req if the incoming req unlinked the @@ -560,6 +563,19 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, svc->srv_n_active_reqs); } + rc = sptlrpc_svc_unwrap_request(request); + switch (rc) { + case SECSVC_OK: + break; + case SECSVC_COMPLETE: + target_send_reply(request, 0, OBD_FAIL_MDS_ALL_REPLY_NET); + goto put_conn; + case SECSVC_DROP: + goto out; + default: + LBUG(); + } + #if SWAB_PARANOIA /* Clear request swab mask; this is a new request */ request->rq_req_swab_mask = 0; @@ -667,7 +683,9 @@ put_conn: if (timediff / 1000000 > (long)obd_timeout) CERROR("request "LPU64" opc %u from %s processed in %lds " "trans "LPU64" rc %d/%d\n", - request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg), + request->rq_xid, + request->rq_reqmsg ? + lustre_msg_get_opc(request->rq_reqmsg) : 0, libcfs_id2str(request->rq_peer), cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL) / 1000000, @@ -680,7 +698,9 @@ put_conn: else CDEBUG(D_HA, "request "LPU64" opc %u from %s processed in " "%ldus (%ldus total) trans "LPU64" rc %d/%d\n", - request->rq_xid, lustre_msg_get_opc(request->rq_reqmsg), + request->rq_xid, + request->rq_reqmsg ? + lustre_msg_get_opc(request->rq_reqmsg) : 0, libcfs_id2str(request->rq_peer), timediff, cfs_timeval_sub(&work_end, &request->rq_arrival_time, NULL), @@ -689,7 +709,7 @@ put_conn: lustre_msg_get_status(request->rq_repmsg) : -999); - if (svc->srv_stats != NULL) { + if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) { int opc = opcode_offset(lustre_msg_get_opc(request->rq_reqmsg)); if (opc > 0) { LASSERT(opc < LUSTRE_MAX_OPCODES); diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index c45581e..2e35469 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -67,6 +67,7 @@ static int lfs_quotaoff(int argc, char **argv); static int lfs_setquota(int argc, char **argv); static int lfs_quota(int argc, char **argv); #endif +static int lfs_flushctx(int argc, char **argv); static int lfs_join(int argc, char **argv); /* all avaialable commands */ @@ -131,6 +132,8 @@ command_t cmdlist[] = { {"quota", lfs_quota, 0, "Display disk usage and limits.\n" "usage: quota [ -o obd_uuid ] [ -u | -g ] [name] "}, #endif + {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n" + "usage: flushctx [-k] [mountpoint...]"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, @@ -1478,6 +1481,92 @@ static int lfs_quota(int argc, char **argv) } #endif /* HAVE_QUOTA_SUPPORT */ +static int flushctx_ioctl(char *mp) +{ + int fd, rc; + + fd = open(mp, O_RDONLY); + if (fd == -1) { + fprintf(stderr, "flushctx: error open %s: %s\n", + mp, strerror(errno)); + return -1; + } + + rc = ioctl(fd, LL_IOC_FLUSHCTX); + if (rc == -1) + fprintf(stderr, "flushctx: error ioctl %s: %s\n", + mp, strerror(errno)); + + close(fd); + return rc; +} + +static int lfs_flushctx(int argc, char **argv) +{ + int kdestroy = 0, c; + FILE *proc; + char procline[PATH_MAX], *line; + int rc = 0; + + optind = 0; + while ((c = getopt(argc, argv, "k")) != -1) { + switch (c) { + case 'k': + kdestroy = 1; + break; + default: + fprintf(stderr, "error: %s: option '-%c' " + "unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (kdestroy) + system("kdestroy > /dev/null"); + + if (optind >= argc) { + /* flush for all mounted lustre fs. */ + proc = fopen("/proc/mounts", "r"); + if (!proc) { + fprintf(stderr, "error: %s: can't open /proc/mounts\n", + argv[0]); + return -1; + } + + while ((line = fgets(procline, PATH_MAX, proc)) != NULL) { + char dev[PATH_MAX]; + char mp[PATH_MAX]; + char fs[PATH_MAX]; + + if (sscanf(line, "%s %s %s", dev, mp, fs) != 3) { + fprintf(stderr, "%s: unexpected format in " + "/proc/mounts\n", + argv[0]); + return -1; + } + + if (strcmp(fs, "lustre") != 0) + continue; + /* we use '@' to determine it's a client. are there + * any other better way? + */ + if (strchr(dev, '@') == NULL) + continue; + + if (flushctx_ioctl(mp)) + rc = -1; + } + } else { + /* flush fs as specified */ + while (optind < argc) { + if (flushctx_ioctl(argv[optind++])) + rc = -1; + } + } + + return rc; +} + int main(int argc, char **argv) { int rc; -- 1.8.3.1