From: ericm Date: Fri, 6 Oct 2006 21:49:27 +0000 (+0000) Subject: branch: b_new_cmd X-Git-Tag: v1_8_0_110~486^2~667 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=271e7c16362425fa4132255f9e0107ffe60af1f8;p=fs%2Flustre-release.git branch: b_new_cmd port gss fixes from b1_8_gss: - pre-alloc page pools for bulk write in encrypting mode. - improve procfs for sptlrpc/gss. - various other sptlrpc fixes. --- diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 16f2e0e..dfa74ee 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -517,14 +517,13 @@ struct ptlrpc_bulk_desc { __u64 bd_last_xid; struct ptlrpc_cb_id bd_cbid; /* network callback info */ - lnet_handle_md_t bd_md_h; /* associated MD */ + lnet_handle_md_t bd_md_h; /* associated MD */ + cfs_page_t **bd_enc_pages; #if defined(__KERNEL__) - lnet_kiov_t *bd_enc_iov; /* used in privacy mode */ - lnet_kiov_t bd_iov[0]; + lnet_kiov_t bd_iov[0]; #else - lnet_md_iovec_t *bd_enc_iov; - lnet_md_iovec_t bd_iov[0]; + lnet_md_iovec_t bd_iov[0]; #endif }; @@ -936,10 +935,6 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); -/* ptlrpc/pers.c */ -int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc); -void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc); - /* ptlrpc/pinger.c */ int ptlrpc_pinger_add_import(struct obd_import *imp); int ptlrpc_pinger_del_import(struct obd_import *imp); diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h index 6554cb6..2065e2f 100644 --- a/lustre/include/lustre_sec.h +++ b/lustre/include/lustre_sec.h @@ -210,6 +210,8 @@ struct ptlrpc_ctx_ops { int (*match) (struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred); int (*refresh) (struct ptlrpc_cli_ctx *ctx); + int (*display) (struct ptlrpc_cli_ctx *ctx, + char *buf, int bufsize); /* * rpc data transform */ @@ -330,6 +332,7 @@ struct ptlrpc_sec_policy { #define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */ #define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */ +#define PTLRPC_SEC_FL_BULK 0x0004 /* intensive bulk i/o expected */ struct ptlrpc_sec { struct ptlrpc_sec_policy *ps_policy; @@ -394,6 +397,15 @@ struct ptlrpc_bulk_sec_desc { __u8 bsd_csum[0]; }; +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg); +const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg); + +/* + * lprocfs + */ +struct proc_dir_entry; +extern struct proc_dir_entry *sptlrpc_proc_root; + /* * security type */ @@ -424,6 +436,7 @@ void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync); void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx); void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new); void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx); +int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize); /* * client wrap/buffers @@ -482,6 +495,11 @@ int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, struct ptlrpc_cli_ctx *ctx); /* bulk security api */ +int sptlrpc_enc_pool_add_user(void); +int sptlrpc_enc_pool_del_user(void); +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc); +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); + int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc); int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, diff --git a/lustre/ptlrpc/Makefile.in b/lustre/ptlrpc/Makefile.in index 791f24e..bc45564 100644 --- a/lustre/ptlrpc/Makefile.in +++ b/lustre/ptlrpc/Makefile.in @@ -13,7 +13,7 @@ ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o -ptlrpc_objs += sec.o sec_null.o sec_plain.o +ptlrpc_objs += sec.o sec_bulk.o sec_null.o sec_plain.o sec_lproc.o ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs) diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index 6fc21c0..52f0931 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -18,8 +18,8 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \ - ptlrpc_internal.h layout.c sec.c sec_null.c sec_plain.c \ - $(LDLM_COMM_SOURCES) + ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_null.c sec_plain.c \ + sec_lproc.c $(LDLM_COMM_SOURCES) if LIBLUSTRE diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 371a48a..c0c1e35 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -180,7 +180,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) LASSERT(!desc->bd_network_rw); /* network hands off or */ LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); - ptlrpc_bulk_free_enc_pages(desc); + sptlrpc_enc_pool_put_pages(desc); if (desc->bd_export) class_export_put(desc->bd_export); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index e93e40b..e52ff11 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -139,7 +139,7 @@ void client_bulk_callback (lnet_event_t *ev) desc->bd_nob_transferred = ev->mlength; } - ptlrpc_bulk_free_enc_pages(desc); + sptlrpc_enc_pool_put_pages(desc); /* NB don't unlock till after wakeup; desc can disappear under us * otherwise */ diff --git a/lustre/ptlrpc/gss/gss_api.h b/lustre/ptlrpc/gss/gss_api.h index b222036..cf31747 100644 --- a/lustre/ptlrpc/gss/gss_api.h +++ b/lustre/ptlrpc/gss/gss_api.h @@ -39,39 +39,43 @@ struct gss_ctx { __u32 lgss_import_sec_context( rawobj_t *input_token, struct gss_api_mech *mech, - struct gss_ctx **ctx_id); + struct gss_ctx **ctx); __u32 lgss_copy_reverse_context( - struct gss_ctx *ctx_id, - struct gss_ctx **ctx_id_new); + struct gss_ctx *ctx, + struct gss_ctx **ctx_new); __u32 lgss_inquire_context( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, unsigned long *endtime); __u32 lgss_get_mic( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, int msgcnt, rawobj_t *msgs, rawobj_t *mic_token); __u32 lgss_verify_mic( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, int msgcnt, rawobj_t *msgs, rawobj_t *mic_token); __u32 lgss_wrap( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, rawobj_t *msg, int msg_buflen, rawobj_t *out_token); __u32 lgss_unwrap( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, rawobj_t *token, rawobj_t *out_msg); __u32 lgss_plain_encrypt( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, int length, void *in_buf, void *out_buf); __u32 lgss_delete_sec_context( - struct gss_ctx **ctx_id); + struct gss_ctx **ctx); +int lgss_display( + struct gss_ctx *ctx, + char *buf, + int bufsize); struct subflavor_desc { __u32 sf_subflavor; @@ -96,20 +100,20 @@ struct gss_api_mech { struct gss_api_ops { __u32 (*gss_import_sec_context)( rawobj_t *input_token, - struct gss_ctx *ctx_id); + struct gss_ctx *ctx); __u32 (*gss_copy_reverse_context)( - struct gss_ctx *ctx_id, - struct gss_ctx *ctx_id_new); + struct gss_ctx *ctx, + struct gss_ctx *ctx_new); __u32 (*gss_inquire_context)( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, unsigned long *endtime); __u32 (*gss_get_mic)( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, int msgcnt, rawobj_t *msgs, rawobj_t *mic_token); __u32 (*gss_verify_mic)( - struct gss_ctx *ctx_id, + struct gss_ctx *ctx, int msgcnt, rawobj_t *msgs, rawobj_t *mic_token); @@ -128,7 +132,11 @@ struct gss_api_ops { void *in_buf, void *out_buf); void (*gss_delete_sec_context)( - void *ctx_id); + void *ctx); + int (*gss_display)( + struct gss_ctx *ctx, + char *buf, + int bufsize); }; int lgss_mech_register(struct gss_api_mech *mech); diff --git a/lustre/ptlrpc/gss/gss_internal.h b/lustre/ptlrpc/gss/gss_internal.h index 4d97d52..9880588 100644 --- a/lustre/ptlrpc/gss/gss_internal.h +++ b/lustre/ptlrpc/gss/gss_internal.h @@ -316,11 +316,13 @@ struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req, void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx); void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx); -int __init gss_svc_init_upcall(void); +int __init gss_svc_init_upcall(void); void __exit gss_svc_exit_upcall(void); /* lproc_gss.c */ -int gss_init_lproc(void); +void gss_stat_oos_record_cli(int behind); +void gss_stat_oos_record_svc(int phase, int replay); +int gss_init_lproc(void); void gss_exit_lproc(void); /* gss_krb5_mech.c */ diff --git a/lustre/ptlrpc/gss/gss_krb5_mech.c b/lustre/ptlrpc/gss/gss_krb5_mech.c index 1360fc4..7f80197 100644 --- a/lustre/ptlrpc/gss/gss_krb5_mech.c +++ b/lustre/ptlrpc/gss/gss_krb5_mech.c @@ -74,6 +74,7 @@ spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED; struct krb5_enctype { + char *ke_dispname; int ke_hash_size; char *ke_hash_name; char *ke_enc_name; @@ -88,6 +89,7 @@ struct krb5_enctype { */ static struct krb5_enctype enctypes[] = { [ENCTYPE_DES_CBC_RAW] = { /* des-cbc-md5 */ + "des-cbc-md5", 16, "md5", "des", @@ -95,6 +97,7 @@ static struct krb5_enctype enctypes[] = { 0, }, [ENCTYPE_DES3_CBC_RAW] = { /* des3-hmac-sha1 */ + "des-hmac-sha1", 20, "sha1", "des3_ede", @@ -102,6 +105,7 @@ static struct krb5_enctype enctypes[] = { 1, }, [ENCTYPE_AES128_CTS_HMAC_SHA1_96] = { /* aes128-cts */ + "aes128-cts-hmac-sha1-96", 12, "sha1", "aes", @@ -109,6 +113,7 @@ static struct krb5_enctype enctypes[] = { 1, }, [ENCTYPE_AES256_CTS_HMAC_SHA1_96] = { /* aes256-cts */ + "aes256-cts-hmac-sha1-96", 12, "sha1", "aes", @@ -119,6 +124,14 @@ static struct krb5_enctype enctypes[] = { #define MAX_ENCTYPES sizeof(enctypes)/sizeof(struct krb5_enctype) +static const char * enctype2str(__u32 enctype) +{ + if (enctype < MAX_ENCTYPES && enctypes[enctype].ke_dispname) + return enctypes[enctype].ke_dispname; + + return "unknown"; +} + static int keyblock_init(struct krb5_keyblock *kb, char *alg_name, int alg_mode) { @@ -1030,6 +1043,20 @@ __u32 gss_plain_encrypt_kerberos(struct gss_ctx *ctx, return rc; } +int gss_display_kerberos(struct gss_ctx *ctx, + char *buf, + int bufsize) +{ + struct krb5_ctx *kctx = ctx->internal_ctx_id; + int written; + + written = snprintf(buf, bufsize, + " mech: krb5\n" + " enctype: %s\n", + enctype2str(kctx->kc_enctype)); + return written; +} + static struct gss_api_ops gss_kerberos_ops = { .gss_import_sec_context = gss_import_sec_context_kerberos, .gss_copy_reverse_context = gss_copy_reverse_context_kerberos, @@ -1040,6 +1067,7 @@ static struct gss_api_ops gss_kerberos_ops = { .gss_unwrap = gss_unwrap_kerberos, .gss_plain_encrypt = gss_plain_encrypt_kerberos, .gss_delete_sec_context = gss_delete_sec_context_kerberos, + .gss_display = gss_display_kerberos, }; static struct subflavor_desc gss_kerberos_sfs[] = { diff --git a/lustre/ptlrpc/gss/gss_mech_switch.c b/lustre/ptlrpc/gss/gss_mech_switch.c index b09cdee..a6a001e 100644 --- a/lustre/ptlrpc/gss/gss_mech_switch.c +++ b/lustre/ptlrpc/gss/gss_mech_switch.c @@ -330,3 +330,15 @@ __u32 lgss_delete_sec_context(struct gss_ctx **context_handle) *context_handle=NULL; return GSS_S_COMPLETE; } + +int lgss_display(struct gss_ctx *ctx, + char *buf, + int bufsize) +{ + LASSERT(ctx); + LASSERT(ctx->mech_type); + LASSERT(ctx->mech_type->gm_ops); + LASSERT(ctx->mech_type->gm_ops->gss_display); + + return ctx->mech_type->gm_ops->gss_display(ctx, buf, bufsize); +} diff --git a/lustre/ptlrpc/gss/lproc_gss.c b/lustre/ptlrpc/gss/lproc_gss.c index b32a0bd..57ed81d 100644 --- a/lustre/ptlrpc/gss/lproc_gss.c +++ b/lustre/ptlrpc/gss/lproc_gss.c @@ -54,6 +54,72 @@ static struct proc_dir_entry *gss_proc_root = NULL; +/* + * statistic of "out-of-sequence-window" + */ +static struct { + spinlock_t oos_lock; + atomic_t oos_cli_count; /* client occurrence */ + int oos_cli_behind; /* client max seqs behind */ + atomic_t oos_svc_replay[3]; /* server replay detected */ + atomic_t oos_svc_pass[3]; /* server verified ok */ +} gss_stat_oos = { + .oos_lock = SPIN_LOCK_UNLOCKED, + .oos_cli_count = ATOMIC_INIT(0), + .oos_cli_behind = 0, + .oos_svc_replay = { ATOMIC_INIT(0), }, + .oos_svc_pass = { ATOMIC_INIT(0), }, +}; + +void gss_stat_oos_record_cli(int behind) +{ + atomic_inc(&gss_stat_oos.oos_cli_count); + + spin_lock(&gss_stat_oos.oos_lock); + if (behind > gss_stat_oos.oos_cli_behind) + gss_stat_oos.oos_cli_behind = behind; + spin_unlock(&gss_stat_oos.oos_lock); +} + +void gss_stat_oos_record_svc(int phase, int replay) +{ + LASSERT(phase >= 0 && phase <= 2); + + if (replay) + atomic_inc(&gss_stat_oos.oos_svc_replay[phase]); + else + atomic_inc(&gss_stat_oos.oos_svc_pass[phase]); +} + +static int gss_proc_read_oos(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int written; + + written = snprintf(page, count, + "seqwin: %u\n" + "backwin: %u\n" + "client fall behind seqwin\n" + " occurrence: %d\n" + " max seq behind: %d\n" + "server replay detected:\n" + " phase 0: %d\n" + " phase 1: %d\n" + " phase 2: %d\n" + "server verify ok:\n" + " phase 2: %d\n", + GSS_SEQ_WIN_MAIN, + GSS_SEQ_WIN_BACK, + atomic_read(&gss_stat_oos.oos_cli_count), + gss_stat_oos.oos_cli_behind, + atomic_read(&gss_stat_oos.oos_svc_replay[0]), + atomic_read(&gss_stat_oos.oos_svc_replay[1]), + atomic_read(&gss_stat_oos.oos_svc_replay[2]), + atomic_read(&gss_stat_oos.oos_svc_pass[2])); + + return written; +} + static int gss_proc_write_secinit(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -69,6 +135,7 @@ static int gss_proc_write_secinit(struct file *file, const char *buffer, } static struct lprocfs_vars gss_lprocfs_vars[] = { + { "replays", gss_proc_read_oos, NULL }, { "init_channel", NULL, gss_proc_write_secinit, NULL }, { NULL } }; @@ -76,7 +143,7 @@ static struct lprocfs_vars gss_lprocfs_vars[] = { int gss_init_lproc(void) { int rc; - gss_proc_root = lprocfs_register("gss", proc_lustre_root, + gss_proc_root = lprocfs_register("gss", sptlrpc_proc_root, gss_lprocfs_vars, NULL); if (IS_ERR(gss_proc_root)) { diff --git a/lustre/ptlrpc/gss/sec_gss.c b/lustre/ptlrpc/gss/sec_gss.c index 4cbf891..83025d1 100644 --- a/lustre/ptlrpc/gss/sec_gss.c +++ b/lustre/ptlrpc/gss/sec_gss.c @@ -367,6 +367,11 @@ void gss_cli_ctx_finalize(struct gss_cli_ctx *gctx) * 2 - check in back window. if it is high above the window or fit in the * window and the bit is 0, then set the bit and accept. otherwise reject. * + * return value: + * 1: looks like a replay + * 0: is ok + * -1: is a replay + * * note phase 0 is necessary, because otherwise replay attacking request of * sequence which between the 2 windows can't be detected. * @@ -431,7 +436,7 @@ replay: seq_num + win_size > *max_seq ? "in" : "behind", phase == 2 ? "backup " : "main", *max_seq, win_size); - return 1; + return -1; } /* @@ -447,15 +452,35 @@ int gss_check_seq_num(struct gss_svc_seq_data *ssd, __u32 seq_num, int set) spin_lock(&ssd->ssd_lock); if (set == 0) { + /* + * phase 0 testing + */ rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN, &ssd->ssd_max_main, seq_num, 0); + if (unlikely(rc)) + gss_stat_oos_record_svc(0, 1); } else { + /* + * phase 1 checking main window + */ rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN, &ssd->ssd_max_main, seq_num, 1); - if (rc == 0) + switch (rc) { + case -1: + gss_stat_oos_record_svc(1, 1); + /* fall through */ + case 0: goto exit; + } + /* + * phase 2 checking back window + */ rc = gss_do_check_seq(ssd->ssd_win_back, GSS_SEQ_WIN_BACK, &ssd->ssd_max_back, seq_num, 2); + if (rc) + gss_stat_oos_record_svc(2, 1); + else + gss_stat_oos_record_svc(2, 0); } exit: spin_unlock(&ssd->ssd_lock); @@ -490,13 +515,62 @@ int gss_cli_ctx_refresh(struct ptlrpc_cli_ctx *ctx) } static -int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, - struct vfs_cred *vcred) +int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred) { return (ctx->cc_vcred.vc_uid == vcred->vc_uid); } static +void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_CTX_UPTODATE) + strncat(buf, "uptodate,", bufsize); + if (flags & PTLRPC_CTX_DEAD) + strncat(buf, "dead,", bufsize); + if (flags & PTLRPC_CTX_ERROR) + strncat(buf, "error,", bufsize); + if (flags & PTLRPC_CTX_HASHED) + strncat(buf, "hashed,", bufsize); + if (flags & PTLRPC_CTX_ETERNAL) + strncat(buf, "eternal,", bufsize); + if (buf[0] == '\0') + strncat(buf, "-,", bufsize); + + buf[strlen(buf) - 1] = '\0'; +} + +static +int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + struct gss_cli_ctx *gctx; + char flags_str[40]; + int written; + + gctx = container_of(ctx, struct gss_cli_ctx, gc_base); + + gss_cli_ctx_flags2str(ctx->cc_flags, flags_str, sizeof(flags_str)); + + written = snprintf(buf, bufsize, + "UID %d:\n" + " flags: %s\n" + " seqwin: %d\n" + " sequence: %d\n", + ctx->cc_vcred.vc_uid, + flags_str, + gctx->gc_win, + atomic_read(&gctx->gc_seq)); + + if (gctx->gc_mechctx) { + written += lgss_display(gctx->gc_mechctx, + buf + written, bufsize - written); + } + + return written; +} + +static int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) { @@ -529,8 +603,10 @@ redo: * be dropped. also applies to gss_cli_ctx_seal(). */ if (atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) { - CWARN("req %p: %u behind, retry signing\n", - req, atomic_read(&gctx->gc_seq) - seq); + int behind = atomic_read(&gctx->gc_seq) - seq; + + gss_stat_oos_record_cli(behind); + CWARN("req %p: %u behind, retry signing\n", req, behind); goto redo; } @@ -774,8 +850,11 @@ redo: /* see explain in gss_cli_ctx_sign() */ if (atomic_read(&gctx->gc_seq) - ghdr->gh_seq > GSS_SEQ_REPACK_THRESHOLD) { - CWARN("req %p: %u behind, retry sealing\n", - req, atomic_read(&gctx->gc_seq) - ghdr->gh_seq); + int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq; + + gss_stat_oos_record_cli(behind); + CWARN("req %p: %u behind, retry sealing\n", req, behind); + ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq); goto redo; } @@ -878,6 +957,7 @@ int gss_cli_ctx_unseal(struct ptlrpc_cli_ctx *ctx, static struct ptlrpc_ctx_ops gss_ctxops = { .refresh = gss_cli_ctx_refresh, .match = gss_cli_ctx_match, + .display = gss_cli_ctx_display, .sign = gss_cli_ctx_sign, .verify = gss_cli_ctx_verify, .seal = gss_cli_ctx_seal, @@ -1071,6 +1151,10 @@ struct ptlrpc_sec* gss_sec_create(struct obd_import *imp, sec->ps_gc_next = 0; } + if (SEC_FLAVOR_SVC(flavor) == SPTLRPC_SVC_PRIV && + flags & PTLRPC_SEC_FL_BULK) + sptlrpc_enc_pool_add_user(); + CWARN("create %s%s@%p\n", (ctx ? "reverse " : ""), gss_policy.sp_name, gsec); RETURN(sec); @@ -1103,6 +1187,10 @@ void gss_sec_destroy(struct ptlrpc_sec *sec) class_import_put(sec->ps_import); + if (SEC_FLAVOR_SVC(sec->ps_flavor) == SPTLRPC_SVC_PRIV && + sec->ps_flags & PTLRPC_SEC_FL_BULK) + sptlrpc_enc_pool_del_user(); + OBD_FREE(gsec, sizeof(*gsec) + sizeof(struct list_head) * sec->ps_ccache_size); EXIT; @@ -1621,7 +1709,7 @@ int gss_svc_verify_request(struct ptlrpc_request *req, } if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) { - CERROR("phase 1: discard replayed req: seq %u\n", gw->gw_seq); + CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq); *major = GSS_S_DUPLICATE_TOKEN; RETURN(-EACCES); } @@ -1631,7 +1719,7 @@ int gss_svc_verify_request(struct ptlrpc_request *req, RETURN(-EACCES); if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) { - CERROR("phase 2: discard replayed req: seq %u\n", gw->gw_seq); + CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq); *major = GSS_S_DUPLICATE_TOKEN; RETURN(-EACCES); } @@ -1679,7 +1767,7 @@ int gss_svc_unseal_request(struct ptlrpc_request *req, ENTRY; if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) { - CERROR("phase 1: discard replayed req: seq %u\n", gw->gw_seq); + CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq); *major = GSS_S_DUPLICATE_TOKEN; RETURN(-EACCES); } @@ -1690,7 +1778,7 @@ int gss_svc_unseal_request(struct ptlrpc_request *req, RETURN(-EACCES); if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) { - CERROR("phase 2: discard replayed req: seq %u\n", gw->gw_seq); + CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq); *major = GSS_S_DUPLICATE_TOKEN; RETURN(-EACCES); } diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 8e92509..865dcf0 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -45,7 +45,7 @@ void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc) LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS))); md->options |= LNET_MD_KIOV; - md->start = desc->bd_enc_iov ? desc->bd_enc_iov : &desc->bd_iov[0]; + md->start = &desc->bd_iov[0]; md->length = desc->bd_iov_count; } @@ -73,61 +73,6 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) } } -int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc) -{ - int i, alloc_size; - - LASSERT(desc->bd_enc_iov == NULL); - - if (desc->bd_iov_count == 0) - return 0; - - alloc_size = desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]); - - OBD_ALLOC(desc->bd_enc_iov, alloc_size); - if (desc->bd_enc_iov == NULL) - return -ENOMEM; - - memcpy(desc->bd_enc_iov, desc->bd_iov, alloc_size); - - for (i = 0; i < desc->bd_iov_count; i++) { - desc->bd_enc_iov[i].kiov_page = - cfs_alloc_page(CFS_ALLOC_IO | CFS_ALLOC_HIGH); - if (desc->bd_enc_iov[i].kiov_page == NULL) { - CERROR("Failed to alloc %d encryption pages\n", - desc->bd_iov_count); - break; - } - } - - if (i == desc->bd_iov_count) - return 0; - - /* error, cleanup */ - for (i = i - 1; i >= 0; i--) - __free_page(desc->bd_enc_iov[i].kiov_page); - OBD_FREE(desc->bd_enc_iov, alloc_size); - desc->bd_enc_iov = NULL; - return -ENOMEM; -} - -void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc) -{ - int i; - - if (desc->bd_enc_iov == NULL) - return; - - for (i = 0; i < desc->bd_iov_count; i++) { - LASSERT(desc->bd_enc_iov[i].kiov_page); - __free_page(desc->bd_enc_iov[i].kiov_page); - } - - OBD_FREE(desc->bd_enc_iov, - desc->bd_iov_count * sizeof(desc->bd_enc_iov[0])); - desc->bd_enc_iov = NULL; -} - #else /* !__KERNEL__ */ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc) @@ -182,12 +127,4 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) memset(iov->iov_base, 0xab, iov->iov_len); } } - -int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc) -{ - return 0; -} -void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc) -{ -} #endif /* !__KERNEL__ */ diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index bd92a26..8c2e352 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -143,15 +143,26 @@ int ping_evictor_wake(struct obd_export *exp); #endif /* sec_null.c */ -int sptlrpc_null_init(void); -int sptlrpc_null_exit(void); +int sptlrpc_null_init(void); +void sptlrpc_null_fini(void); /* sec_plain.c */ -int sptlrpc_plain_init(void); -int sptlrpc_plain_exit(void); +int sptlrpc_plain_init(void); +void sptlrpc_plain_fini(void); + +/* sec_bulk.c */ +int sptlrpc_enc_pool_init(void); +void sptlrpc_enc_pool_fini(void); +int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count, + int *eof, void *data); +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg); + +/* sec_lproc.c */ +int sptlrpc_lproc_init(void); +void sptlrpc_lproc_fini(void); /* sec.c */ -int sptlrpc_init(void); -int sptlrpc_exit(void); +int sptlrpc_init(void); +void sptlrpc_fini(void); #endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index b625000..15b9dd8 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -115,7 +115,7 @@ cleanup: #ifdef __KERNEL__ static void __exit ptlrpc_exit(void) { - sptlrpc_exit(); + sptlrpc_fini(); ldlm_exit(); ptlrpc_stop_pinger(); ptlrpc_exit_portals(); @@ -290,10 +290,6 @@ EXPORT_SYMBOL(ptlrpc_invalidate_import); EXPORT_SYMBOL(ptlrpc_fail_import); EXPORT_SYMBOL(ptlrpc_recover_import); -/* pers.c */ -EXPORT_SYMBOL(ptlrpc_bulk_alloc_enc_pages); -EXPORT_SYMBOL(ptlrpc_bulk_free_enc_pages); - /* pinger.c */ EXPORT_SYMBOL(ptlrpc_pinger_add_import); EXPORT_SYMBOL(ptlrpc_pinger_del_import); diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index 15326f3..964d008 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -678,6 +678,16 @@ void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) } EXPORT_SYMBOL(sptlrpc_ctx_wakeup); +int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + LASSERT(ctx->cc_ops); + + if (ctx->cc_ops->display == NULL) + return 0; + + return ctx->cc_ops->display(ctx, buf, bufsize); +} + void sptlrpc_req_put_ctx(struct ptlrpc_request *req) { ENTRY; @@ -1800,403 +1810,6 @@ int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) EXPORT_SYMBOL(sptlrpc_unpack_user_desc); /**************************************** - * Helpers to assist policy modules to * - * implement checksum funcationality * - ****************************************/ - -struct { - char *name; - int size; -} csum_types[] = { - [BULK_CSUM_ALG_NULL] = { "null", 0 }, - [BULK_CSUM_ALG_CRC32] = { "crc32", 4 }, - [BULK_CSUM_ALG_MD5] = { "md5", 16 }, - [BULK_CSUM_ALG_SHA1] = { "sha1", 20 }, - [BULK_CSUM_ALG_SHA256] = { "sha256", 32 }, - [BULK_CSUM_ALG_SHA384] = { "sha384", 48 }, - [BULK_CSUM_ALG_SHA512] = { "sha512", 64 }, -}; - -int bulk_sec_desc_size(__u32 csum_alg, int request, int read) -{ - int size = sizeof(struct ptlrpc_bulk_sec_desc); - - LASSERT(csum_alg < BULK_CSUM_ALG_MAX); - - /* read request don't need extra data */ - if (!(read && request)) - size += csum_types[csum_alg].size; - - return size; -} -EXPORT_SYMBOL(bulk_sec_desc_size); - -int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset) -{ - struct ptlrpc_bulk_sec_desc *bsd; - int size = msg->lm_buflens[offset]; - - bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); - if (bsd == NULL) { - CERROR("Invalid bulk sec desc: size %d\n", size); - return -EINVAL; - } - - if (lustre_msg_swabbed(msg)) { - __swab32s(&bsd->bsd_version); - __swab32s(&bsd->bsd_pad); - __swab32s(&bsd->bsd_csum_alg); - __swab32s(&bsd->bsd_priv_alg); - } - - if (bsd->bsd_version != 0) { - CERROR("Unexpected version %u\n", bsd->bsd_version); - return -EPROTO; - } - - if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) { - CERROR("Unsupported checksum algorithm %u\n", - bsd->bsd_csum_alg); - return -EINVAL; - } - if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) { - CERROR("Unsupported cipher algorithm %u\n", - bsd->bsd_priv_alg); - return -EINVAL; - } - - if (size > sizeof(*bsd) && - size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) { - CERROR("Mal-formed checksum data: csum alg %u, size %d\n", - bsd->bsd_csum_alg, size); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL(bulk_sec_desc_unpack); - -#ifdef __KERNEL__ -static -int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf) -{ - struct page *page; - int off; - char *ptr; - __u32 crc32 = ~0; - int len, i; - - for (i = 0; i < desc->bd_iov_count; i++) { - page = desc->bd_iov[i].kiov_page; - off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; - ptr = cfs_kmap(page) + off; - len = desc->bd_iov[i].kiov_len; - - crc32 = crc32_le(crc32, ptr, len); - - cfs_kunmap(page); - } - - *((__u32 *) buf) = crc32; - return 0; -} - -static -int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) -{ - struct crypto_tfm *tfm; - struct scatterlist *sl; - int i, rc = 0; - - LASSERT(alg > BULK_CSUM_ALG_NULL && - alg < BULK_CSUM_ALG_MAX); - - if (alg == BULK_CSUM_ALG_CRC32) - return do_bulk_checksum_crc32(desc, buf); - - tfm = crypto_alloc_tfm(csum_types[alg].name, 0); - if (tfm == NULL) { - CERROR("Unable to allocate tfm %s\n", csum_types[alg].name); - return -ENOMEM; - } - - OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count); - if (sl == NULL) { - rc = -ENOMEM; - goto out_tfm; - } - - for (i = 0; i < desc->bd_iov_count; i++) { - sl[i].page = desc->bd_iov[i].kiov_page; - sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; - sl[i].length = desc->bd_iov[i].kiov_len; - } - - crypto_digest_init(tfm); - crypto_digest_update(tfm, sl, desc->bd_iov_count); - crypto_digest_final(tfm, buf); - - OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count); - -out_tfm: - crypto_free_tfm(tfm); - return rc; -} - -#else /* !__KERNEL__ */ -static -int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) -{ - __u32 crc32 = ~0; - int i; - - LASSERT(alg == BULK_CSUM_ALG_CRC32); - - for (i = 0; i < desc->bd_iov_count; i++) { - char *ptr = desc->bd_iov[i].iov_base; - int len = desc->bd_iov[i].iov_len; - - crc32 = crc32_le(crc32, ptr, len); - } - - *((__u32 *) buf) = crc32; - return 0; -} -#endif - -/* - * perform algorithm @alg checksum on @desc, store result in @buf. - * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL. - */ -static -int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg, - struct ptlrpc_bulk_sec_desc *bsd, int bsdsize) -{ - int rc; - - LASSERT(bsd); - LASSERT(alg < BULK_CSUM_ALG_MAX); - - bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL; - - if (alg == BULK_CSUM_ALG_NULL) - return 0; - - LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size); - - rc = do_bulk_checksum(desc, alg, bsd->bsd_csum); - if (rc == 0) - bsd->bsd_csum_alg = alg; - - return rc; -} - -static -int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read, - struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize, - struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize) -{ - char *csum_p; - char *buf = NULL; - int csum_size, rc = 0; - - LASSERT(bsdv); - LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX); - - if (bsdr) - bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL; - - if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL) - return 0; - - /* for all supported algorithms */ - csum_size = csum_types[bsdv->bsd_csum_alg].size; - - if (bsdvsize < sizeof(*bsdv) + csum_size) { - CERROR("verifier size %d too small, require %d\n", - bsdvsize, sizeof(*bsdv) + csum_size); - return -EINVAL; - } - - if (bsdr) { - LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size); - csum_p = (char *) bsdr->bsd_csum; - } else { - OBD_ALLOC(buf, csum_size); - if (buf == NULL) - return -EINVAL; - csum_p = buf; - } - - rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p); - - if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) { - CERROR("BAD %s CHECKSUM (%s), data mutated during " - "transfer!\n", read ? "READ" : "WRITE", - csum_types[bsdv->bsd_csum_alg].name); - rc = -EINVAL; - } else { - CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n", - read ? "read" : "write", - csum_types[bsdv->bsd_csum_alg].name); - } - - if (bsdr) { - bsdr->bsd_csum_alg = bsdv->bsd_csum_alg; - memcpy(bsdr->bsd_csum, csum_p, csum_size); - } else { - LASSERT(buf); - OBD_FREE(buf, csum_size); - } - - return rc; -} - -int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, - __u32 alg, struct lustre_msg *rmsg, int roff) -{ - struct ptlrpc_bulk_sec_desc *bsdr; - int rsize, rc = 0; - - rsize = rmsg->lm_buflens[roff]; - bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr)); - - LASSERT(bsdr); - LASSERT(rsize >= sizeof(*bsdr)); - LASSERT(alg < BULK_CSUM_ALG_MAX); - - if (read) - bsdr->bsd_csum_alg = alg; - else { - rc = generate_bulk_csum(desc, alg, bsdr, rsize); - if (rc) { - CERROR("client bulk write: failed to perform " - "checksum: %d\n", rc); - } - } - - return rc; -} -EXPORT_SYMBOL(bulk_csum_cli_request); - -int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, - struct lustre_msg *rmsg, int roff, - struct lustre_msg *vmsg, int voff) -{ - struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; - int rsize, vsize; - - rsize = rmsg->lm_buflens[roff]; - vsize = vmsg->lm_buflens[voff]; - bsdr = lustre_msg_buf(rmsg, roff, 0); - bsdv = lustre_msg_buf(vmsg, voff, 0); - - if (bsdv == NULL || vsize < sizeof(*bsdv)) { - CERROR("Invalid checksum verifier from server: size %d\n", - vsize); - return -EINVAL; - } - - LASSERT(bsdr); - LASSERT(rsize >= sizeof(*bsdr)); - LASSERT(vsize >= sizeof(*bsdv)); - - if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) { - CERROR("bulk %s: checksum algorithm mismatch: client request " - "%s but server reply with %s. try to use the new one " - "for checksum verification\n", - read ? "read" : "write", - csum_types[bsdr->bsd_csum_alg].name, - csum_types[bsdv->bsd_csum_alg].name); - } - - if (read) - return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0); - else { - char *cli, *srv, *new = NULL; - int csum_size = csum_types[bsdr->bsd_csum_alg].size; - - LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX); - if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL) - return 0; - - if (vsize < sizeof(*bsdv) + csum_size) { - CERROR("verifier size %d too small, require %d\n", - vsize, sizeof(*bsdv) + csum_size); - return -EINVAL; - } - - cli = (char *) (bsdr + 1); - srv = (char *) (bsdv + 1); - - if (!memcmp(cli, srv, csum_size)) { - /* checksum confirmed */ - CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n", - csum_types[bsdr->bsd_csum_alg].name); - return 0; - } - - /* checksum mismatch, re-compute a new one and compare with - * others, give out proper warnings. - */ - OBD_ALLOC(new, csum_size); - if (new == NULL) - return -ENOMEM; - - do_bulk_checksum(desc, bsdr->bsd_csum_alg, new); - - if (!memcmp(new, srv, csum_size)) { - CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " - "on the client after we checksummed them\n", - csum_types[bsdr->bsd_csum_alg].name); - } else if (!memcmp(new, cli, csum_size)) { - CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " - "in transit\n", - csum_types[bsdr->bsd_csum_alg].name); - } else { - CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " - "in transit, and the current page contents " - "don't match the originals and what the server " - "received\n", - csum_types[bsdr->bsd_csum_alg].name); - } - OBD_FREE(new, csum_size); - - return -EINVAL; - } -} -EXPORT_SYMBOL(bulk_csum_cli_reply); - -int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, - struct lustre_msg *vmsg, int voff, - struct lustre_msg *rmsg, int roff) -{ - struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; - int vsize, rsize, rc; - - vsize = vmsg->lm_buflens[voff]; - rsize = rmsg->lm_buflens[roff]; - bsdv = lustre_msg_buf(vmsg, voff, 0); - bsdr = lustre_msg_buf(rmsg, roff, 0); - - LASSERT(vsize >= sizeof(*bsdv)); - LASSERT(rsize >= sizeof(*bsdr)); - LASSERT(bsdv && bsdr); - - if (read) { - rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize); - if (rc) - CERROR("bulk read: server failed to generate %s " - "checksum: %d\n", - csum_types[bsdv->bsd_csum_alg].name, rc); - } else - rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize); - - return rc; -} -EXPORT_SYMBOL(bulk_csum_svc); - -/**************************************** * user supplied flavor string parsing * ****************************************/ @@ -2368,7 +1981,7 @@ int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part, /* checksum algorithm */ for (i = 0; i < BULK_CSUM_ALG_MAX; i++) { - if (strcmp(alg, csum_types[i].name) == 0) { + if (strcmp(alg, sptlrpc_bulk_csum_alg2name(i)) == 0) { conf->sfc_bulk_csum = i; break; } @@ -2384,13 +1997,17 @@ int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part, } set_flags: - /* set ROOTONLY flag to: - * - to OST - * - from MDT to MDT + /* * set ROOTONLY flag: + * - to OST + * - from MDT to MDT + * * set BULK flag for: + * - from CLI to OST */ - if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) || - to_part == LUSTRE_OST) + if (to_part == LUSTRE_OST || + (from_part == LUSTRE_MDT && to_part == LUSTRE_MDT)) conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY; + if (from_part == LUSTRE_CLI && to_part == LUSTRE_OST) + conf->sfc_flags |= PTLRPC_SEC_FL_BULK; #ifdef __BIG_ENDIAN __swab32s(&conf->sfc_rpc_flavor); @@ -2419,77 +2036,46 @@ const char * sec2target_str(struct ptlrpc_sec *sec) } EXPORT_SYMBOL(sec2target_str); -int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = data; - struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf; - struct ptlrpc_sec *sec = NULL; - char flags_str[20]; - - if (obd == NULL) - return 0; - - LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || - strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || - strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); - LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX); - LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX); - - if (obd->u.cli.cl_import) - sec = obd->u.cli.cl_import->imp_sec; - - flags_str[0] = '\0'; - if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE) - strncat(flags_str, "reverse,", sizeof(flags_str)); - if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) - strncat(flags_str, "rootonly,", sizeof(flags_str)); - if (flags_str[0] != '\0') - flags_str[strlen(flags_str) - 1] = '\0'; - - return snprintf(page, count, - "rpc_flavor: %s\n" - "bulk_flavor: %s checksum, %s encryption\n" - "flags: %s\n" - "ctx_cache: size %u, busy %d\n" - "gc: interval %lus, next %lds\n", - sptlrpc_flavor2name(conf->sfc_rpc_flavor), - csum_types[conf->sfc_bulk_csum].name, - conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ? - "null" : "arc4", // XXX - flags_str, - sec ? sec->ps_ccache_size : 0, - sec ? atomic_read(&sec->ps_busy) : 0, - sec ? sec->ps_gc_interval: 0, - sec ? (sec->ps_gc_interval ? - sec->ps_gc_next - cfs_time_current_sec() : 0) - : 0); -} -EXPORT_SYMBOL(sptlrpc_lprocfs_rd); - +/**************************************** + * initialize/finalize * + ****************************************/ int sptlrpc_init(void) { int rc; - rc = sptlrpc_null_init(); + rc = sptlrpc_enc_pool_init(); if (rc) goto out; + rc = sptlrpc_null_init(); + if (rc) + goto out_pool; + rc = sptlrpc_plain_init(); if (rc) goto out_null; + + rc = sptlrpc_lproc_init(); + if (rc) + goto out_plain; + return 0; +out_plain: + sptlrpc_plain_fini(); out_null: - sptlrpc_null_exit(); + sptlrpc_null_fini(); +out_pool: + sptlrpc_enc_pool_fini(); out: return rc; } -int sptlrpc_exit(void) +void sptlrpc_fini(void) { - sptlrpc_plain_exit(); - sptlrpc_null_exit(); - return 0; + sptlrpc_lproc_fini(); + sptlrpc_plain_fini(); + sptlrpc_null_fini(); + sptlrpc_enc_pool_fini(); } diff --git a/lustre/ptlrpc/sec_bulk.c b/lustre/ptlrpc/sec_bulk.c new file mode 100644 index 0000000..ffdda1b --- /dev/null +++ b/lustre/ptlrpc/sec_bulk.c @@ -0,0 +1,1003 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include +#ifndef __KERNEL__ +#include +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "ptlrpc_internal.h" + +/**************************************** + * bulk encryption page pools * + ****************************************/ + +#ifdef __KERNEL__ + +#define PTRS_PER_PAGE (CFS_PAGE_SIZE / sizeof(void *)) +#define PAGES_PER_POOL (PTRS_PER_PAGE) + +static struct ptlrpc_enc_page_pool { + /* + * constants + */ + unsigned long epp_max_pages; /* maximum pages can hold, const */ + unsigned int epp_max_pools; /* number of pools, const */ + /* + * users of the pools. the capacity grow as more user added, + * but doesn't shrink when users gone -- just current policy. + * during failover there might be user add/remove activities. + */ + atomic_t epp_users; /* shared by how many users (osc) */ + atomic_t epp_users_gone; /* users removed */ + /* + * wait queue in case of not enough free pages. + */ + cfs_waitq_t epp_waitq; /* waiting threads */ + unsigned int epp_waitqlen; /* wait queue length */ + unsigned long epp_pages_short; /* # of pages wanted of in-q users */ + unsigned long epp_adding:1, /* during adding pages */ + epp_full:1; /* pools are all full */ + /* + * in-pool pages bookkeeping + */ + spinlock_t epp_lock; /* protect following fields */ + unsigned long epp_total_pages; /* total pages in pools */ + unsigned long epp_free_pages; /* current pages available */ + /* + * statistics + */ + unsigned int epp_st_adds; + unsigned int epp_st_failadds; /* # of add pages failures */ + unsigned long epp_st_reqs; /* # of get_pages requests */ + unsigned long epp_st_missings; /* # of cache missing */ + unsigned long epp_st_lowfree; /* lowest free pages ever reached */ + unsigned long epp_st_max_wqlen;/* highest waitqueue length ever */ + cfs_time_t epp_st_max_wait; /* in jeffies */ + /* + * pointers to pools + */ + cfs_page_t ***epp_pools; +} page_pools; + +int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + + spin_lock(&page_pools.epp_lock); + + rc = snprintf(page, count, + "physical pages: %lu\n" + "pages per pool: %lu\n" + "max pages: %lu\n" + "max pools: %u\n" + "users: %d - %d\n" + "current waitqueue len: %u\n" + "current pages in short: %lu\n" + "total pages: %lu\n" + "total free: %lu\n" + "add page times: %u\n" + "add page failed times: %u\n" + "total requests: %lu\n" + "cache missing: %lu\n" + "lowest free pages: %lu\n" + "max waitqueue depth: %lu\n" + "max wait time: "CFS_TIME_T"\n" + , + num_physpages, + PAGES_PER_POOL, + page_pools.epp_max_pages, + page_pools.epp_max_pools, + atomic_read(&page_pools.epp_users), + atomic_read(&page_pools.epp_users_gone), + page_pools.epp_waitqlen, + page_pools.epp_pages_short, + page_pools.epp_total_pages, + page_pools.epp_free_pages, + page_pools.epp_st_adds, + page_pools.epp_st_failadds, + page_pools.epp_st_reqs, + page_pools.epp_st_missings, + page_pools.epp_st_lowfree, + page_pools.epp_st_max_wqlen, + page_pools.epp_st_max_wait + ); + + spin_unlock(&page_pools.epp_lock); + return rc; +} + +static inline +int npages_to_npools(unsigned long npages) +{ + return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); +} + +/* + * return how many pages cleaned up. + */ +static unsigned long enc_cleanup_pools(cfs_page_t ***pools, int npools) +{ + unsigned long cleaned = 0; + int i, j; + + for (i = 0; i < npools; i++) { + if (pools[i]) { + for (j = 0; j < PAGES_PER_POOL; j++) { + if (pools[i][j]) { + cfs_free_page(pools[i][j]); + cleaned++; + } + } + OBD_FREE(pools[i], CFS_PAGE_SIZE); + pools[i] = NULL; + } + } + + return cleaned; +} + +/* + * merge @npools pointed by @pools which contains @npages new pages + * into current pools. + * + * we have options to avoid most memory copy with some tricks. but we choose + * the simplest way to avoid complexity. It's not frequently called. + */ +static void enc_insert_pool(cfs_page_t ***pools, int npools, int npages) +{ + int freeslot; + int op_idx, np_idx, og_idx, ng_idx; + int cur_npools, end_npools; + + LASSERT(npages > 0); + LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages); + LASSERT(npages_to_npools(npages) == npools); + + spin_lock(&page_pools.epp_lock); + + /* + * (1) fill all the free slots of current pools. + */ + /* + * free slots are those left by rent pages, and the extra ones with + * index >= eep_total_pages, locate at the tail of last pool. + */ + freeslot = page_pools.epp_total_pages % PAGES_PER_POOL; + if (freeslot != 0) + freeslot = PAGES_PER_POOL - freeslot; + freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages; + + op_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + og_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + np_idx = npools - 1; + ng_idx = (npages - 1) % PAGES_PER_POOL; + + while (freeslot) { + LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL); + LASSERT(pools[np_idx][ng_idx] != NULL); + + page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; + pools[np_idx][ng_idx] = NULL; + + freeslot--; + + if (++og_idx == PAGES_PER_POOL) { + op_idx++; + og_idx = 0; + } + if (--ng_idx < 0) { + if (np_idx == 0) + break; + np_idx--; + ng_idx = PAGES_PER_POOL - 1; + } + } + + /* + * (2) add pools if needed. + */ + cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) / + PAGES_PER_POOL; + end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) / + PAGES_PER_POOL; + LASSERT(end_npools <= page_pools.epp_max_pools); + + np_idx = 0; + while (cur_npools < end_npools) { + LASSERT(page_pools.epp_pools[cur_npools] == NULL); + LASSERT(np_idx < npools); + LASSERT(pools[np_idx] != NULL); + + page_pools.epp_pools[cur_npools++] = pools[np_idx]; + pools[np_idx++] = NULL; + } + + page_pools.epp_total_pages += npages; + page_pools.epp_free_pages += npages; + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + if (page_pools.epp_total_pages == page_pools.epp_max_pages) + page_pools.epp_full = 1; + + CDEBUG(D_SEC, "add %d pages to total %lu\n", npages, + page_pools.epp_total_pages); + + spin_unlock(&page_pools.epp_lock); +} + +static int enc_pools_add_pages(int npages) +{ + static DECLARE_MUTEX(sem_add_pages); + cfs_page_t ***pools; + int npools, alloced = 0; + int i, j, rc = -ENOMEM; + + down(&sem_add_pages); + + if (npages > page_pools.epp_max_pages - page_pools.epp_total_pages) + npages = page_pools.epp_max_pages - page_pools.epp_total_pages; + if (npages == 0) { + rc = 0; + goto out; + } + + page_pools.epp_st_adds++; + + npools = npages_to_npools(npages); + OBD_ALLOC(pools, npools * sizeof(*pools)); + if (pools == NULL) + goto out; + + for (i = 0; i < npools; i++) { + OBD_ALLOC(pools[i], CFS_PAGE_SIZE); + if (pools[i] == NULL) + goto out_pools; + + for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { + pools[i][j] = cfs_alloc_page(CFS_ALLOC_IO | + CFS_ALLOC_HIGH); + if (pools[i][j] == NULL) + goto out_pools; + + alloced++; + } + } + + enc_insert_pool(pools, npools, npages); + CDEBUG(D_SEC, "add %d pages into enc page pools\n", npages); + rc = 0; + +out_pools: + enc_cleanup_pools(pools, npools); + OBD_FREE(pools, npools * sizeof(*pools)); +out: + if (rc) { + page_pools.epp_st_failadds++; + CERROR("Failed to pre-allocate %d enc pages\n", npages); + } + + up(&sem_add_pages); + return rc; +} + +/* + * both "max bulk rpcs inflight" and "lnet MTU" are tunable, we use the + * default fixed value initially. + */ +int sptlrpc_enc_pool_add_user(void) +{ + int page_plus = PTLRPC_MAX_BRW_PAGES * OSC_MAX_RIF_DEFAULT; + int users, users_gone, shift, rc; + + LASSERT(!in_interrupt()); + LASSERT(atomic_read(&page_pools.epp_users) >= 0); + + users_gone = atomic_dec_return(&page_pools.epp_users_gone); + if (users_gone >= 0) { + CWARN("%d users gone, skip\n", users_gone + 1); + return 0; + } + atomic_inc(&page_pools.epp_users_gone); + + /* + * prepare full pages for first 2 users; 1/2 for next 2 users; + * 1/4 for next 4 users; 1/8 for next 8 users; 1/16 for next 16 users; + * ... + */ + users = atomic_add_return(1, &page_pools.epp_users); + shift = fls(users - 1); + shift = shift > 1 ? shift - 1 : 0; + page_plus = page_plus >> shift; + page_plus = page_plus > 2 ? page_plus : 2; + + rc = enc_pools_add_pages(page_plus); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_add_user); + +int sptlrpc_enc_pool_del_user(void) +{ + atomic_inc(&page_pools.epp_users_gone); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_del_user); + +/* + * we allocate the requested pages atomically. + */ +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) +{ + cfs_waitlink_t waitlink; + cfs_time_t tick1 = 0, tick2; + int p_idx, g_idx; + int i; + + LASSERT(desc->bd_enc_pages == NULL); + LASSERT(desc->bd_max_iov > 0); + LASSERT(desc->bd_max_iov <= page_pools.epp_total_pages); + + OBD_ALLOC(desc->bd_enc_pages, + desc->bd_max_iov * sizeof(*desc->bd_enc_pages)); + if (desc->bd_enc_pages == NULL) + return -ENOMEM; + + spin_lock(&page_pools.epp_lock); +again: + page_pools.epp_st_reqs++; + + if (unlikely(page_pools.epp_free_pages < desc->bd_max_iov)) { + if (tick1 == 0) + tick1 = cfs_time_current(); + + page_pools.epp_st_missings++; + page_pools.epp_pages_short += desc->bd_max_iov; + + if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen) + page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen; + /* + * we just wait if someone else is adding more pages, or + * wait queue length is not deep enough. otherwise try to + * add more pages in the pools. + * + * FIXME the policy of detecting resource tight & growing pool + * need to be reconsidered. + */ + if (page_pools.epp_adding || page_pools.epp_waitqlen < 2 || + page_pools.epp_full) { + set_current_state(TASK_UNINTERRUPTIBLE); + cfs_waitlink_init(&waitlink); + cfs_waitq_add(&page_pools.epp_waitq, &waitlink); + + spin_unlock(&page_pools.epp_lock); + cfs_schedule(); + spin_lock(&page_pools.epp_lock); + } else { + page_pools.epp_adding = 1; + + spin_unlock(&page_pools.epp_lock); + enc_pools_add_pages(page_pools.epp_pages_short / 2); + spin_lock(&page_pools.epp_lock); + + page_pools.epp_adding = 0; + } + + LASSERT(page_pools.epp_pages_short >= desc->bd_max_iov); + LASSERT(page_pools.epp_waitqlen > 0); + page_pools.epp_pages_short -= desc->bd_max_iov; + page_pools.epp_waitqlen--; + + goto again; + } + /* + * record max wait time + */ + if (unlikely(tick1 != 0)) { + tick2 = cfs_time_current(); + if (tick2 - tick1 > page_pools.epp_st_max_wait) + page_pools.epp_st_max_wait = tick2 - tick1; + } + /* + * proceed with rest of allocation + */ + page_pools.epp_free_pages -= desc->bd_max_iov; + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + for (i = 0; i < desc->bd_max_iov; i++) { + LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); + desc->bd_enc_pages[i] = page_pools.epp_pools[p_idx][g_idx]; + page_pools.epp_pools[p_idx][g_idx] = NULL; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) + page_pools.epp_st_lowfree = page_pools.epp_free_pages; + + spin_unlock(&page_pools.epp_lock); + return 0; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages); + +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) +{ + int p_idx, g_idx; + int i; + + if (desc->bd_enc_pages == NULL) + return; + if (desc->bd_max_iov == 0) + return; + + spin_lock(&page_pools.epp_lock); + + p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; + g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; + + LASSERT(page_pools.epp_free_pages + desc->bd_max_iov <= + page_pools.epp_total_pages); + LASSERT(page_pools.epp_pools[p_idx]); + + for (i = 0; i < desc->bd_max_iov; i++) { + LASSERT(desc->bd_enc_pages[i] != NULL); + LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); + LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL); + + page_pools.epp_pools[p_idx][g_idx] = desc->bd_enc_pages[i]; + + if (++g_idx == PAGES_PER_POOL) { + p_idx++; + g_idx = 0; + } + } + + page_pools.epp_free_pages += desc->bd_max_iov; + + spin_unlock(&page_pools.epp_lock); + + OBD_FREE(desc->bd_enc_pages, + desc->bd_max_iov * sizeof(*desc->bd_enc_pages)); + desc->bd_enc_pages = NULL; +} +EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages); + +int sptlrpc_enc_pool_init(void) +{ + /* constants */ + page_pools.epp_max_pages = num_physpages / 4; + page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); + + atomic_set(&page_pools.epp_users, 0); + atomic_set(&page_pools.epp_users_gone, 0); + + cfs_waitq_init(&page_pools.epp_waitq); + page_pools.epp_waitqlen = 0; + page_pools.epp_pages_short = 0; + + page_pools.epp_adding = 0; + page_pools.epp_full = 0; + + spin_lock_init(&page_pools.epp_lock); + page_pools.epp_total_pages = 0; + page_pools.epp_free_pages = 0; + + page_pools.epp_st_adds = 0; + page_pools.epp_st_failadds = 0; + page_pools.epp_st_reqs = 0; + page_pools.epp_st_missings = 0; + page_pools.epp_st_lowfree = 0; + page_pools.epp_st_max_wqlen = 0; + page_pools.epp_st_max_wait = 0; + + OBD_ALLOC(page_pools.epp_pools, + page_pools.epp_max_pools * sizeof(*page_pools.epp_pools)); + if (page_pools.epp_pools == NULL) + return -ENOMEM; + + return 0; +} + +void sptlrpc_enc_pool_fini(void) +{ + unsigned long cleaned, npools; + + LASSERT(page_pools.epp_pools); + LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); + + npools = npages_to_npools(page_pools.epp_total_pages); + cleaned = enc_cleanup_pools(page_pools.epp_pools, npools); + LASSERT(cleaned == page_pools.epp_total_pages); + + OBD_FREE(page_pools.epp_pools, + page_pools.epp_max_pools * sizeof(*page_pools.epp_pools)); +} + +#else /* !__KERNEL__ */ + +int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) +{ + return 0; +} + +void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) +{ +} + +int sptlrpc_enc_pool_init(void) +{ + return 0; +} + +void sptlrpc_enc_pool_fini(void) +{ +} +#endif + +/**************************************** + * Helpers to assist policy modules to * + * implement checksum funcationality * + ****************************************/ + +static struct { + char *name; + int size; +} csum_types[] = { + [BULK_CSUM_ALG_NULL] = { "null", 0 }, + [BULK_CSUM_ALG_CRC32] = { "crc32", 4 }, + [BULK_CSUM_ALG_MD5] = { "md5", 16 }, + [BULK_CSUM_ALG_SHA1] = { "sha1", 20 }, + [BULK_CSUM_ALG_SHA256] = { "sha256", 32 }, + [BULK_CSUM_ALG_SHA384] = { "sha384", 48 }, + [BULK_CSUM_ALG_SHA512] = { "sha512", 64 }, +}; + +const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg) +{ + if (csum_alg < BULK_CSUM_ALG_MAX) + return csum_types[csum_alg].name; + return "unknown_cksum"; +} +EXPORT_SYMBOL(sptlrpc_bulk_csum_alg2name); + +int bulk_sec_desc_size(__u32 csum_alg, int request, int read) +{ + int size = sizeof(struct ptlrpc_bulk_sec_desc); + + LASSERT(csum_alg < BULK_CSUM_ALG_MAX); + + /* read request don't need extra data */ + if (!(read && request)) + size += csum_types[csum_alg].size; + + return size; +} +EXPORT_SYMBOL(bulk_sec_desc_size); + +int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset) +{ + struct ptlrpc_bulk_sec_desc *bsd; + int size = msg->lm_buflens[offset]; + + bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); + if (bsd == NULL) { + CERROR("Invalid bulk sec desc: size %d\n", size); + return -EINVAL; + } + + if (lustre_msg_swabbed(msg)) { + __swab32s(&bsd->bsd_version); + __swab32s(&bsd->bsd_pad); + __swab32s(&bsd->bsd_csum_alg); + __swab32s(&bsd->bsd_priv_alg); + } + + if (bsd->bsd_version != 0) { + CERROR("Unexpected version %u\n", bsd->bsd_version); + return -EPROTO; + } + + if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) { + CERROR("Unsupported checksum algorithm %u\n", + bsd->bsd_csum_alg); + return -EINVAL; + } + if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) { + CERROR("Unsupported cipher algorithm %u\n", + bsd->bsd_priv_alg); + return -EINVAL; + } + + if (size > sizeof(*bsd) && + size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) { + CERROR("Mal-formed checksum data: csum alg %u, size %d\n", + bsd->bsd_csum_alg, size); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(bulk_sec_desc_unpack); + +#ifdef __KERNEL__ +static +int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf) +{ + struct page *page; + int off; + char *ptr; + __u32 crc32 = ~0; + int len, i; + + for (i = 0; i < desc->bd_iov_count; i++) { + page = desc->bd_iov[i].kiov_page; + off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + ptr = cfs_kmap(page) + off; + len = desc->bd_iov[i].kiov_len; + + crc32 = crc32_le(crc32, ptr, len); + + cfs_kunmap(page); + } + + *((__u32 *) buf) = crc32; + return 0; +} + +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + struct crypto_tfm *tfm; + struct scatterlist *sl; + int i, rc = 0; + + LASSERT(alg > BULK_CSUM_ALG_NULL && + alg < BULK_CSUM_ALG_MAX); + + if (alg == BULK_CSUM_ALG_CRC32) + return do_bulk_checksum_crc32(desc, buf); + + tfm = crypto_alloc_tfm(csum_types[alg].name, 0); + if (tfm == NULL) { + CERROR("Unable to allocate tfm %s\n", csum_types[alg].name); + return -ENOMEM; + } + + OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count); + if (sl == NULL) { + rc = -ENOMEM; + goto out_tfm; + } + + for (i = 0; i < desc->bd_iov_count; i++) { + sl[i].page = desc->bd_iov[i].kiov_page; + sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; + sl[i].length = desc->bd_iov[i].kiov_len; + } + + crypto_digest_init(tfm); + crypto_digest_update(tfm, sl, desc->bd_iov_count); + crypto_digest_final(tfm, buf); + + OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count); + +out_tfm: + crypto_free_tfm(tfm); + return rc; +} + +#else /* !__KERNEL__ */ +static +int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf) +{ + __u32 crc32 = ~0; + int i; + + LASSERT(alg == BULK_CSUM_ALG_CRC32); + + for (i = 0; i < desc->bd_iov_count; i++) { + char *ptr = desc->bd_iov[i].iov_base; + int len = desc->bd_iov[i].iov_len; + + crc32 = crc32_le(crc32, ptr, len); + } + + *((__u32 *) buf) = crc32; + return 0; +} +#endif + +/* + * perform algorithm @alg checksum on @desc, store result in @buf. + * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL. + */ +static +int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg, + struct ptlrpc_bulk_sec_desc *bsd, int bsdsize) +{ + int rc; + + LASSERT(bsd); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (alg == BULK_CSUM_ALG_NULL) + return 0; + + LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size); + + rc = do_bulk_checksum(desc, alg, bsd->bsd_csum); + if (rc == 0) + bsd->bsd_csum_alg = alg; + + return rc; +} + +static +int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read, + struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize, + struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize) +{ + char *csum_p; + char *buf = NULL; + int csum_size, rc = 0; + + LASSERT(bsdv); + LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX); + + if (bsdr) + bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL; + + if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + /* for all supported algorithms */ + csum_size = csum_types[bsdv->bsd_csum_alg].size; + + if (bsdvsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + bsdvsize, sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + if (bsdr) { + LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size); + csum_p = (char *) bsdr->bsd_csum; + } else { + OBD_ALLOC(buf, csum_size); + if (buf == NULL) + return -EINVAL; + csum_p = buf; + } + + rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p); + + if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) { + CERROR("BAD %s CHECKSUM (%s), data mutated during " + "transfer!\n", read ? "READ" : "WRITE", + csum_types[bsdv->bsd_csum_alg].name); + rc = -EINVAL; + } else { + CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n", + read ? "read" : "write", + csum_types[bsdv->bsd_csum_alg].name); + } + + if (bsdr) { + bsdr->bsd_csum_alg = bsdv->bsd_csum_alg; + memcpy(bsdr->bsd_csum, csum_p, csum_size); + } else { + LASSERT(buf); + OBD_FREE(buf, csum_size); + } + + return rc; +} + +int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read, + __u32 alg, struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdr; + int rsize, rc = 0; + + rsize = rmsg->lm_buflens[roff]; + bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr)); + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(alg < BULK_CSUM_ALG_MAX); + + if (read) + bsdr->bsd_csum_alg = alg; + else { + rc = generate_bulk_csum(desc, alg, bsdr, rsize); + if (rc) { + CERROR("client bulk write: failed to perform " + "checksum: %d\n", rc); + } + } + + return rc; +} +EXPORT_SYMBOL(bulk_csum_cli_request); + +int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *rmsg, int roff, + struct lustre_msg *vmsg, int voff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int rsize, vsize; + + rsize = rmsg->lm_buflens[roff]; + vsize = vmsg->lm_buflens[voff]; + bsdr = lustre_msg_buf(rmsg, roff, 0); + bsdv = lustre_msg_buf(vmsg, voff, 0); + + if (bsdv == NULL || vsize < sizeof(*bsdv)) { + CERROR("Invalid checksum verifier from server: size %d\n", + vsize); + return -EINVAL; + } + + LASSERT(bsdr); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(vsize >= sizeof(*bsdv)); + + if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) { + CERROR("bulk %s: checksum algorithm mismatch: client request " + "%s but server reply with %s. try to use the new one " + "for checksum verification\n", + read ? "read" : "write", + csum_types[bsdr->bsd_csum_alg].name, + csum_types[bsdv->bsd_csum_alg].name); + } + + if (read) + return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0); + else { + char *cli, *srv, *new = NULL; + int csum_size = csum_types[bsdr->bsd_csum_alg].size; + + LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX); + if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL) + return 0; + + if (vsize < sizeof(*bsdv) + csum_size) { + CERROR("verifier size %d too small, require %d\n", + vsize, sizeof(*bsdv) + csum_size); + return -EINVAL; + } + + cli = (char *) (bsdr + 1); + srv = (char *) (bsdv + 1); + + if (!memcmp(cli, srv, csum_size)) { + /* checksum confirmed */ + CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n", + csum_types[bsdr->bsd_csum_alg].name); + return 0; + } + + /* checksum mismatch, re-compute a new one and compare with + * others, give out proper warnings. + */ + OBD_ALLOC(new, csum_size); + if (new == NULL) + return -ENOMEM; + + do_bulk_checksum(desc, bsdr->bsd_csum_alg, new); + + if (!memcmp(new, srv, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "on the client after we checksummed them\n", + csum_types[bsdr->bsd_csum_alg].name); + } else if (!memcmp(new, cli, csum_size)) { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit\n", + csum_types[bsdr->bsd_csum_alg].name); + } else { + CERROR("BAD WRITE CHECKSUM (%s): pages were mutated " + "in transit, and the current page contents " + "don't match the originals and what the server " + "received\n", + csum_types[bsdr->bsd_csum_alg].name); + } + OBD_FREE(new, csum_size); + + return -EINVAL; + } +} +EXPORT_SYMBOL(bulk_csum_cli_reply); + +int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, + struct lustre_msg *vmsg, int voff, + struct lustre_msg *rmsg, int roff) +{ + struct ptlrpc_bulk_sec_desc *bsdv, *bsdr; + int vsize, rsize, rc; + + vsize = vmsg->lm_buflens[voff]; + rsize = rmsg->lm_buflens[roff]; + bsdv = lustre_msg_buf(vmsg, voff, 0); + bsdr = lustre_msg_buf(rmsg, roff, 0); + + LASSERT(vsize >= sizeof(*bsdv)); + LASSERT(rsize >= sizeof(*bsdr)); + LASSERT(bsdv && bsdr); + + if (read) { + rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize); + if (rc) + CERROR("bulk read: server failed to generate %s " + "checksum: %d\n", + csum_types[bsdv->bsd_csum_alg].name, rc); + } else + rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize); + + return rc; +} +EXPORT_SYMBOL(bulk_csum_svc); + +/**************************************** + * Helpers to assist policy modules to * + * implement encryption funcationality * + ****************************************/ + +/* + * NOTE: These algorithms must be stream cipher! + */ +static struct { + char *name; + __u32 flags; +} priv_types[] = { + [BULK_PRIV_ALG_NULL] = { "null", 0 }, + [BULK_PRIV_ALG_ARC4] = { "arc4", 0 }, +}; + +const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg) +{ + if (priv_alg < BULK_PRIV_ALG_MAX) + return priv_types[priv_alg].name; + return "unknown_priv"; +} +EXPORT_SYMBOL(sptlrpc_bulk_priv_alg2name); diff --git a/lustre/ptlrpc/sec_lproc.c b/lustre/ptlrpc/sec_lproc.c new file mode 100644 index 0000000..5a6ec02 --- /dev/null +++ b/lustre/ptlrpc/sec_lproc.c @@ -0,0 +1,182 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_SEC + +#include +#ifndef __KERNEL__ +#include +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "ptlrpc_internal.h" + +#ifdef __KERNEL__ + +struct proc_dir_entry *sptlrpc_proc_root = NULL; +EXPORT_SYMBOL(sptlrpc_proc_root); + +void sec_flags2str(unsigned long flags, char *buf, int bufsize) +{ + buf[0] = '\0'; + + if (flags & PTLRPC_SEC_FL_REVERSE) + strncat(buf, "reverse,", bufsize); + if (flags & PTLRPC_SEC_FL_ROOTONLY) + strncat(buf, "rootonly,", bufsize); + if (flags & PTLRPC_SEC_FL_BULK) + strncat(buf, "bulk,", bufsize); + if (buf[0] == '\0') + strncat(buf, "-,", bufsize); + + buf[strlen(buf) - 1] = '\0'; + +} + +int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = data; + struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf; + struct ptlrpc_sec *sec = NULL; + struct ptlrpc_cli_ctx *ctx; + struct hlist_node *pos, *next; + char flags_str[32]; + int written, i; + + if (obd == NULL) + return 0; + + LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || + strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); + LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX); + LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX); + + if (obd->u.cli.cl_import) + sec = obd->u.cli.cl_import->imp_sec; + + if (sec == NULL) { + written = snprintf(page, count, "\n"); + goto out; + } + + sec_flags2str(sec->ps_flags, flags_str, sizeof(flags_str)); + + written = snprintf(page, count, + "rpc msg flavor: %s\n" + "bulk checksum: %s\n" + "bulk encrypt: %s\n" + "flags: %s\n" + "ctx cache size %u\n" + "ctx cache busy %d\n" + "gc interval %lu\n" + "gc next %ld\n", + sptlrpc_flavor2name(sec->ps_flavor), + sptlrpc_bulk_csum_alg2name(conf->sfc_bulk_csum), + sptlrpc_bulk_priv_alg2name(conf->sfc_bulk_priv), + flags_str, + sec->ps_ccache_size, + atomic_read(&sec->ps_busy), + sec->ps_gc_interval, + sec->ps_gc_interval ? + sec->ps_gc_next - cfs_time_current_sec() : 0 + ); + /* + * list contexts + */ + if (sec->ps_policy->sp_policy != SPTLRPC_POLICY_GSS) + goto out; + + written += snprintf(page + written, count - written, + "GSS contexts ==>\n"); + + spin_lock(&sec->ps_lock); + for (i = 0; i < sec->ps_ccache_size; i++) { + hlist_for_each_entry_safe(ctx, pos, next, + &sec->ps_ccache[i], cc_hash) { + if (written >= count) + break; + written += sptlrpc_ctx_display(ctx, page + written, + count - written); + } + } + spin_unlock(&sec->ps_lock); + +out: + return written; +} +EXPORT_SYMBOL(sptlrpc_lprocfs_rd); + +static struct lprocfs_vars sptlrpc_lprocfs_vars[] = { + { "enc_pool", sptlrpc_proc_read_enc_pool, NULL, NULL }, + { NULL } +}; + +int sptlrpc_lproc_init(void) +{ + int rc; + + LASSERT(sptlrpc_proc_root == NULL); + + sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root, + sptlrpc_lprocfs_vars, NULL); + if (IS_ERR(sptlrpc_proc_root)) { + rc = PTR_ERR(sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + return rc; + } + return 0; +} + +void sptlrpc_lproc_fini(void) +{ + if (sptlrpc_proc_root) { + lprocfs_remove(sptlrpc_proc_root); + sptlrpc_proc_root = NULL; + } +} + +#else /* !__KERNEL__ */ + +int sptlrpc_lproc_init(void) +{ + return 0; +} + +void sptlrpc_lproc_fini(void) +{ +} + +#endif diff --git a/lustre/ptlrpc/sec_null.c b/lustre/ptlrpc/sec_null.c index 7b1d391..9b82d51 100644 --- a/lustre/ptlrpc/sec_null.c +++ b/lustre/ptlrpc/sec_null.c @@ -293,13 +293,11 @@ int sptlrpc_null_init(void) return rc; } -int sptlrpc_null_exit(void) +void sptlrpc_null_fini(void) { int rc; rc = sptlrpc_unregister_policy(&null_policy); if (rc) CERROR("cannot unregister sec.null: %d\n", rc); - - return rc; } diff --git a/lustre/ptlrpc/sec_plain.c b/lustre/ptlrpc/sec_plain.c index c2c7df4..7eeaf14 100644 --- a/lustre/ptlrpc/sec_plain.c +++ b/lustre/ptlrpc/sec_plain.c @@ -486,13 +486,11 @@ int sptlrpc_plain_init(void) return rc; } -int sptlrpc_plain_exit(void) +void sptlrpc_plain_fini(void) { int rc; rc = sptlrpc_unregister_policy(&plain_policy); if (rc) CERROR("cannot unregister sec.plain: %d\n", rc); - - return rc; } diff --git a/lustre/utils/gss/gssd_proc.c b/lustre/utils/gss/gssd_proc.c index c88f14c..2aabf19 100644 --- a/lustre/utils/gss/gssd_proc.c +++ b/lustre/utils/gss/gssd_proc.c @@ -618,7 +618,7 @@ int do_negotiation(struct lustre_gss_data *lgd, struct lustre_gss_init_res *gr, int timeout) { - char *file = "/proc/fs/lustre/gss/init_channel"; + char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel"; struct lgssd_ioctl_param param; struct passwd *pw; int fd, ret; diff --git a/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff b/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff index 1055fc5..262a554 100644 --- a/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff +++ b/lustre/utils/gss/nfs-utils-1.0.10-lustre.diff @@ -634,7 +634,7 @@ + struct lustre_gss_init_res *gr, + int timeout) +{ -+ char *file = "/proc/fs/lustre/gss/init_channel"; ++ char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel"; + struct lgssd_ioctl_param param; + struct passwd *pw; + int fd, ret;