__u64 bd_last_xid;
struct ptlrpc_cb_id bd_cbid; /* network callback info */
- lnet_handle_md_t bd_md_h; /* associated MD */
+ lnet_handle_md_t bd_md_h; /* associated MD */
+ cfs_page_t **bd_enc_pages;
#if defined(__KERNEL__)
- lnet_kiov_t *bd_enc_iov; /* used in privacy mode */
- lnet_kiov_t bd_iov[0];
+ lnet_kiov_t bd_iov[0];
#else
- lnet_md_iovec_t *bd_enc_iov;
- lnet_md_iovec_t bd_iov[0];
+ lnet_md_iovec_t bd_iov[0];
#endif
};
int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
-/* ptlrpc/pers.c */
-int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc);
-void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc);
-
/* ptlrpc/pinger.c */
int ptlrpc_pinger_add_import(struct obd_import *imp);
int ptlrpc_pinger_del_import(struct obd_import *imp);
int (*match) (struct ptlrpc_cli_ctx *ctx,
struct vfs_cred *vcred);
int (*refresh) (struct ptlrpc_cli_ctx *ctx);
+ int (*display) (struct ptlrpc_cli_ctx *ctx,
+ char *buf, int bufsize);
/*
* rpc data transform
*/
#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */
#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */
+#define PTLRPC_SEC_FL_BULK 0x0004 /* intensive bulk i/o expected */
struct ptlrpc_sec {
struct ptlrpc_sec_policy *ps_policy;
__u8 bsd_csum[0];
};
+const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg);
+const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg);
+
+/*
+ * lprocfs
+ */
+struct proc_dir_entry;
+extern struct proc_dir_entry *sptlrpc_proc_root;
+
/*
* security type
*/
void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx);
void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new);
void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx);
+int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize);
/*
* client wrap/buffers
struct ptlrpc_cli_ctx *ctx);
/* bulk security api */
+int sptlrpc_enc_pool_add_user(void);
+int sptlrpc_enc_pool_del_user(void);
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc);
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc);
+
int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o recov_thread.o
ptlrpc_objs += llog_net.o llog_client.o llog_server.o import.o ptlrpcd.o
ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o
-ptlrpc_objs += sec.o sec_null.o sec_plain.o
+ptlrpc_objs += sec.o sec_bulk.o sec_null.o sec_plain.o sec_lproc.o
ptlrpc-objs := $(ldlm_objs) $(ptlrpc_objs)
COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \
events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \
llog_client.c llog_server.c import.c ptlrpcd.c pers.c wiretest.c \
- ptlrpc_internal.h layout.c sec.c sec_null.c sec_plain.c \
- $(LDLM_COMM_SOURCES)
+ ptlrpc_internal.h layout.c sec.c sec_bulk.c sec_null.c sec_plain.c \
+ sec_lproc.c $(LDLM_COMM_SOURCES)
if LIBLUSTRE
LASSERT(!desc->bd_network_rw); /* network hands off or */
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
- ptlrpc_bulk_free_enc_pages(desc);
+ sptlrpc_enc_pool_put_pages(desc);
if (desc->bd_export)
class_export_put(desc->bd_export);
desc->bd_nob_transferred = ev->mlength;
}
- ptlrpc_bulk_free_enc_pages(desc);
+ sptlrpc_enc_pool_put_pages(desc);
/* NB don't unlock till after wakeup; desc can disappear under us
* otherwise */
__u32 lgss_import_sec_context(
rawobj_t *input_token,
struct gss_api_mech *mech,
- struct gss_ctx **ctx_id);
+ struct gss_ctx **ctx);
__u32 lgss_copy_reverse_context(
- struct gss_ctx *ctx_id,
- struct gss_ctx **ctx_id_new);
+ struct gss_ctx *ctx,
+ struct gss_ctx **ctx_new);
__u32 lgss_inquire_context(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
unsigned long *endtime);
__u32 lgss_get_mic(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
rawobj_t *mic_token);
__u32 lgss_verify_mic(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
rawobj_t *mic_token);
__u32 lgss_wrap(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
rawobj_t *msg,
int msg_buflen,
rawobj_t *out_token);
__u32 lgss_unwrap(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
rawobj_t *token,
rawobj_t *out_msg);
__u32 lgss_plain_encrypt(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
int length,
void *in_buf,
void *out_buf);
__u32 lgss_delete_sec_context(
- struct gss_ctx **ctx_id);
+ struct gss_ctx **ctx);
+int lgss_display(
+ struct gss_ctx *ctx,
+ char *buf,
+ int bufsize);
struct subflavor_desc {
__u32 sf_subflavor;
struct gss_api_ops {
__u32 (*gss_import_sec_context)(
rawobj_t *input_token,
- struct gss_ctx *ctx_id);
+ struct gss_ctx *ctx);
__u32 (*gss_copy_reverse_context)(
- struct gss_ctx *ctx_id,
- struct gss_ctx *ctx_id_new);
+ struct gss_ctx *ctx,
+ struct gss_ctx *ctx_new);
__u32 (*gss_inquire_context)(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
unsigned long *endtime);
__u32 (*gss_get_mic)(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
rawobj_t *mic_token);
__u32 (*gss_verify_mic)(
- struct gss_ctx *ctx_id,
+ struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
rawobj_t *mic_token);
void *in_buf,
void *out_buf);
void (*gss_delete_sec_context)(
- void *ctx_id);
+ void *ctx);
+ int (*gss_display)(
+ struct gss_ctx *ctx,
+ char *buf,
+ int bufsize);
};
int lgss_mech_register(struct gss_api_mech *mech);
void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx);
void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx);
-int __init gss_svc_init_upcall(void);
+int __init gss_svc_init_upcall(void);
void __exit gss_svc_exit_upcall(void);
/* lproc_gss.c */
-int gss_init_lproc(void);
+void gss_stat_oos_record_cli(int behind);
+void gss_stat_oos_record_svc(int phase, int replay);
+int gss_init_lproc(void);
void gss_exit_lproc(void);
/* gss_krb5_mech.c */
spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
struct krb5_enctype {
+ char *ke_dispname;
int ke_hash_size;
char *ke_hash_name;
char *ke_enc_name;
*/
static struct krb5_enctype enctypes[] = {
[ENCTYPE_DES_CBC_RAW] = { /* des-cbc-md5 */
+ "des-cbc-md5",
16,
"md5",
"des",
0,
},
[ENCTYPE_DES3_CBC_RAW] = { /* des3-hmac-sha1 */
+ "des-hmac-sha1",
20,
"sha1",
"des3_ede",
1,
},
[ENCTYPE_AES128_CTS_HMAC_SHA1_96] = { /* aes128-cts */
+ "aes128-cts-hmac-sha1-96",
12,
"sha1",
"aes",
1,
},
[ENCTYPE_AES256_CTS_HMAC_SHA1_96] = { /* aes256-cts */
+ "aes256-cts-hmac-sha1-96",
12,
"sha1",
"aes",
#define MAX_ENCTYPES sizeof(enctypes)/sizeof(struct krb5_enctype)
+static const char * enctype2str(__u32 enctype)
+{
+ if (enctype < MAX_ENCTYPES && enctypes[enctype].ke_dispname)
+ return enctypes[enctype].ke_dispname;
+
+ return "unknown";
+}
+
static
int keyblock_init(struct krb5_keyblock *kb, char *alg_name, int alg_mode)
{
return rc;
}
+int gss_display_kerberos(struct gss_ctx *ctx,
+ char *buf,
+ int bufsize)
+{
+ struct krb5_ctx *kctx = ctx->internal_ctx_id;
+ int written;
+
+ written = snprintf(buf, bufsize,
+ " mech: krb5\n"
+ " enctype: %s\n",
+ enctype2str(kctx->kc_enctype));
+ return written;
+}
+
static struct gss_api_ops gss_kerberos_ops = {
.gss_import_sec_context = gss_import_sec_context_kerberos,
.gss_copy_reverse_context = gss_copy_reverse_context_kerberos,
.gss_unwrap = gss_unwrap_kerberos,
.gss_plain_encrypt = gss_plain_encrypt_kerberos,
.gss_delete_sec_context = gss_delete_sec_context_kerberos,
+ .gss_display = gss_display_kerberos,
};
static struct subflavor_desc gss_kerberos_sfs[] = {
*context_handle=NULL;
return GSS_S_COMPLETE;
}
+
+int lgss_display(struct gss_ctx *ctx,
+ char *buf,
+ int bufsize)
+{
+ LASSERT(ctx);
+ LASSERT(ctx->mech_type);
+ LASSERT(ctx->mech_type->gm_ops);
+ LASSERT(ctx->mech_type->gm_ops->gss_display);
+
+ return ctx->mech_type->gm_ops->gss_display(ctx, buf, bufsize);
+}
static struct proc_dir_entry *gss_proc_root = NULL;
+/*
+ * statistic of "out-of-sequence-window"
+ */
+static struct {
+ spinlock_t oos_lock;
+ atomic_t oos_cli_count; /* client occurrence */
+ int oos_cli_behind; /* client max seqs behind */
+ atomic_t oos_svc_replay[3]; /* server replay detected */
+ atomic_t oos_svc_pass[3]; /* server verified ok */
+} gss_stat_oos = {
+ .oos_lock = SPIN_LOCK_UNLOCKED,
+ .oos_cli_count = ATOMIC_INIT(0),
+ .oos_cli_behind = 0,
+ .oos_svc_replay = { ATOMIC_INIT(0), },
+ .oos_svc_pass = { ATOMIC_INIT(0), },
+};
+
+void gss_stat_oos_record_cli(int behind)
+{
+ atomic_inc(&gss_stat_oos.oos_cli_count);
+
+ spin_lock(&gss_stat_oos.oos_lock);
+ if (behind > gss_stat_oos.oos_cli_behind)
+ gss_stat_oos.oos_cli_behind = behind;
+ spin_unlock(&gss_stat_oos.oos_lock);
+}
+
+void gss_stat_oos_record_svc(int phase, int replay)
+{
+ LASSERT(phase >= 0 && phase <= 2);
+
+ if (replay)
+ atomic_inc(&gss_stat_oos.oos_svc_replay[phase]);
+ else
+ atomic_inc(&gss_stat_oos.oos_svc_pass[phase]);
+}
+
+static int gss_proc_read_oos(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ int written;
+
+ written = snprintf(page, count,
+ "seqwin: %u\n"
+ "backwin: %u\n"
+ "client fall behind seqwin\n"
+ " occurrence: %d\n"
+ " max seq behind: %d\n"
+ "server replay detected:\n"
+ " phase 0: %d\n"
+ " phase 1: %d\n"
+ " phase 2: %d\n"
+ "server verify ok:\n"
+ " phase 2: %d\n",
+ GSS_SEQ_WIN_MAIN,
+ GSS_SEQ_WIN_BACK,
+ atomic_read(&gss_stat_oos.oos_cli_count),
+ gss_stat_oos.oos_cli_behind,
+ atomic_read(&gss_stat_oos.oos_svc_replay[0]),
+ atomic_read(&gss_stat_oos.oos_svc_replay[1]),
+ atomic_read(&gss_stat_oos.oos_svc_replay[2]),
+ atomic_read(&gss_stat_oos.oos_svc_pass[2]));
+
+ return written;
+}
+
static int gss_proc_write_secinit(struct file *file, const char *buffer,
unsigned long count, void *data)
{
}
static struct lprocfs_vars gss_lprocfs_vars[] = {
+ { "replays", gss_proc_read_oos, NULL },
{ "init_channel", NULL, gss_proc_write_secinit, NULL },
{ NULL }
};
int gss_init_lproc(void)
{
int rc;
- gss_proc_root = lprocfs_register("gss", proc_lustre_root,
+ gss_proc_root = lprocfs_register("gss", sptlrpc_proc_root,
gss_lprocfs_vars, NULL);
if (IS_ERR(gss_proc_root)) {
* 2 - check in back window. if it is high above the window or fit in the
* window and the bit is 0, then set the bit and accept. otherwise reject.
*
+ * return value:
+ * 1: looks like a replay
+ * 0: is ok
+ * -1: is a replay
+ *
* note phase 0 is necessary, because otherwise replay attacking request of
* sequence which between the 2 windows can't be detected.
*
seq_num + win_size > *max_seq ? "in" : "behind",
phase == 2 ? "backup " : "main",
*max_seq, win_size);
- return 1;
+ return -1;
}
/*
spin_lock(&ssd->ssd_lock);
if (set == 0) {
+ /*
+ * phase 0 testing
+ */
rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN,
&ssd->ssd_max_main, seq_num, 0);
+ if (unlikely(rc))
+ gss_stat_oos_record_svc(0, 1);
} else {
+ /*
+ * phase 1 checking main window
+ */
rc = gss_do_check_seq(ssd->ssd_win_main, GSS_SEQ_WIN_MAIN,
&ssd->ssd_max_main, seq_num, 1);
- if (rc == 0)
+ switch (rc) {
+ case -1:
+ gss_stat_oos_record_svc(1, 1);
+ /* fall through */
+ case 0:
goto exit;
+ }
+ /*
+ * phase 2 checking back window
+ */
rc = gss_do_check_seq(ssd->ssd_win_back, GSS_SEQ_WIN_BACK,
&ssd->ssd_max_back, seq_num, 2);
+ if (rc)
+ gss_stat_oos_record_svc(2, 1);
+ else
+ gss_stat_oos_record_svc(2, 0);
}
exit:
spin_unlock(&ssd->ssd_lock);
}
static
-int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx,
- struct vfs_cred *vcred)
+int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
{
return (ctx->cc_vcred.vc_uid == vcred->vc_uid);
}
static
+void gss_cli_ctx_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_CTX_UPTODATE)
+ strncat(buf, "uptodate,", bufsize);
+ if (flags & PTLRPC_CTX_DEAD)
+ strncat(buf, "dead,", bufsize);
+ if (flags & PTLRPC_CTX_ERROR)
+ strncat(buf, "error,", bufsize);
+ if (flags & PTLRPC_CTX_HASHED)
+ strncat(buf, "hashed,", bufsize);
+ if (flags & PTLRPC_CTX_ETERNAL)
+ strncat(buf, "eternal,", bufsize);
+ if (buf[0] == '\0')
+ strncat(buf, "-,", bufsize);
+
+ buf[strlen(buf) - 1] = '\0';
+}
+
+static
+int gss_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+ struct gss_cli_ctx *gctx;
+ char flags_str[40];
+ int written;
+
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+
+ gss_cli_ctx_flags2str(ctx->cc_flags, flags_str, sizeof(flags_str));
+
+ written = snprintf(buf, bufsize,
+ "UID %d:\n"
+ " flags: %s\n"
+ " seqwin: %d\n"
+ " sequence: %d\n",
+ ctx->cc_vcred.vc_uid,
+ flags_str,
+ gctx->gc_win,
+ atomic_read(&gctx->gc_seq));
+
+ if (gctx->gc_mechctx) {
+ written += lgss_display(gctx->gc_mechctx,
+ buf + written, bufsize - written);
+ }
+
+ return written;
+}
+
+static
int gss_cli_ctx_sign(struct ptlrpc_cli_ctx *ctx,
struct ptlrpc_request *req)
{
* be dropped. also applies to gss_cli_ctx_seal().
*/
if (atomic_read(&gctx->gc_seq) - seq > GSS_SEQ_REPACK_THRESHOLD) {
- CWARN("req %p: %u behind, retry signing\n",
- req, atomic_read(&gctx->gc_seq) - seq);
+ int behind = atomic_read(&gctx->gc_seq) - seq;
+
+ gss_stat_oos_record_cli(behind);
+ CWARN("req %p: %u behind, retry signing\n", req, behind);
goto redo;
}
/* see explain in gss_cli_ctx_sign() */
if (atomic_read(&gctx->gc_seq) - ghdr->gh_seq >
GSS_SEQ_REPACK_THRESHOLD) {
- CWARN("req %p: %u behind, retry sealing\n",
- req, atomic_read(&gctx->gc_seq) - ghdr->gh_seq);
+ int behind = atomic_read(&gctx->gc_seq) - ghdr->gh_seq;
+
+ gss_stat_oos_record_cli(behind);
+ CWARN("req %p: %u behind, retry sealing\n", req, behind);
+
ghdr->gh_seq = atomic_inc_return(&gctx->gc_seq);
goto redo;
}
static struct ptlrpc_ctx_ops gss_ctxops = {
.refresh = gss_cli_ctx_refresh,
.match = gss_cli_ctx_match,
+ .display = gss_cli_ctx_display,
.sign = gss_cli_ctx_sign,
.verify = gss_cli_ctx_verify,
.seal = gss_cli_ctx_seal,
sec->ps_gc_next = 0;
}
+ if (SEC_FLAVOR_SVC(flavor) == SPTLRPC_SVC_PRIV &&
+ flags & PTLRPC_SEC_FL_BULK)
+ sptlrpc_enc_pool_add_user();
+
CWARN("create %s%s@%p\n", (ctx ? "reverse " : ""),
gss_policy.sp_name, gsec);
RETURN(sec);
class_import_put(sec->ps_import);
+ if (SEC_FLAVOR_SVC(sec->ps_flavor) == SPTLRPC_SVC_PRIV &&
+ sec->ps_flags & PTLRPC_SEC_FL_BULK)
+ sptlrpc_enc_pool_del_user();
+
OBD_FREE(gsec, sizeof(*gsec) +
sizeof(struct list_head) * sec->ps_ccache_size);
EXIT;
}
if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) {
- CERROR("phase 1: discard replayed req: seq %u\n", gw->gw_seq);
+ CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq);
*major = GSS_S_DUPLICATE_TOKEN;
RETURN(-EACCES);
}
RETURN(-EACCES);
if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
- CERROR("phase 2: discard replayed req: seq %u\n", gw->gw_seq);
+ CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
*major = GSS_S_DUPLICATE_TOKEN;
RETURN(-EACCES);
}
ENTRY;
if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 0)) {
- CERROR("phase 1: discard replayed req: seq %u\n", gw->gw_seq);
+ CERROR("phase 0: discard replayed req: seq %u\n", gw->gw_seq);
*major = GSS_S_DUPLICATE_TOKEN;
RETURN(-EACCES);
}
RETURN(-EACCES);
if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
- CERROR("phase 2: discard replayed req: seq %u\n", gw->gw_seq);
+ CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
*major = GSS_S_DUPLICATE_TOKEN;
RETURN(-EACCES);
}
LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS)));
md->options |= LNET_MD_KIOV;
- md->start = desc->bd_enc_iov ? desc->bd_enc_iov : &desc->bd_iov[0];
+ md->start = &desc->bd_iov[0];
md->length = desc->bd_iov_count;
}
}
}
-int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
-{
- int i, alloc_size;
-
- LASSERT(desc->bd_enc_iov == NULL);
-
- if (desc->bd_iov_count == 0)
- return 0;
-
- alloc_size = desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]);
-
- OBD_ALLOC(desc->bd_enc_iov, alloc_size);
- if (desc->bd_enc_iov == NULL)
- return -ENOMEM;
-
- memcpy(desc->bd_enc_iov, desc->bd_iov, alloc_size);
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- desc->bd_enc_iov[i].kiov_page =
- cfs_alloc_page(CFS_ALLOC_IO | CFS_ALLOC_HIGH);
- if (desc->bd_enc_iov[i].kiov_page == NULL) {
- CERROR("Failed to alloc %d encryption pages\n",
- desc->bd_iov_count);
- break;
- }
- }
-
- if (i == desc->bd_iov_count)
- return 0;
-
- /* error, cleanup */
- for (i = i - 1; i >= 0; i--)
- __free_page(desc->bd_enc_iov[i].kiov_page);
- OBD_FREE(desc->bd_enc_iov, alloc_size);
- desc->bd_enc_iov = NULL;
- return -ENOMEM;
-}
-
-void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
-{
- int i;
-
- if (desc->bd_enc_iov == NULL)
- return;
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- LASSERT(desc->bd_enc_iov[i].kiov_page);
- __free_page(desc->bd_enc_iov[i].kiov_page);
- }
-
- OBD_FREE(desc->bd_enc_iov,
- desc->bd_iov_count * sizeof(desc->bd_enc_iov[0]));
- desc->bd_enc_iov = NULL;
-}
-
#else /* !__KERNEL__ */
void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc)
memset(iov->iov_base, 0xab, iov->iov_len);
}
}
-
-int ptlrpc_bulk_alloc_enc_pages(struct ptlrpc_bulk_desc *desc)
-{
- return 0;
-}
-void ptlrpc_bulk_free_enc_pages(struct ptlrpc_bulk_desc *desc)
-{
-}
#endif /* !__KERNEL__ */
#endif
/* sec_null.c */
-int sptlrpc_null_init(void);
-int sptlrpc_null_exit(void);
+int sptlrpc_null_init(void);
+void sptlrpc_null_fini(void);
/* sec_plain.c */
-int sptlrpc_plain_init(void);
-int sptlrpc_plain_exit(void);
+int sptlrpc_plain_init(void);
+void sptlrpc_plain_fini(void);
+
+/* sec_bulk.c */
+int sptlrpc_enc_pool_init(void);
+void sptlrpc_enc_pool_fini(void);
+int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
+const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg);
+
+/* sec_lproc.c */
+int sptlrpc_lproc_init(void);
+void sptlrpc_lproc_fini(void);
/* sec.c */
-int sptlrpc_init(void);
-int sptlrpc_exit(void);
+int sptlrpc_init(void);
+void sptlrpc_fini(void);
#endif /* PTLRPC_INTERNAL_H */
#ifdef __KERNEL__
static void __exit ptlrpc_exit(void)
{
- sptlrpc_exit();
+ sptlrpc_fini();
ldlm_exit();
ptlrpc_stop_pinger();
ptlrpc_exit_portals();
EXPORT_SYMBOL(ptlrpc_fail_import);
EXPORT_SYMBOL(ptlrpc_recover_import);
-/* pers.c */
-EXPORT_SYMBOL(ptlrpc_bulk_alloc_enc_pages);
-EXPORT_SYMBOL(ptlrpc_bulk_free_enc_pages);
-
/* pinger.c */
EXPORT_SYMBOL(ptlrpc_pinger_add_import);
EXPORT_SYMBOL(ptlrpc_pinger_del_import);
}
EXPORT_SYMBOL(sptlrpc_ctx_wakeup);
+int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
+{
+ LASSERT(ctx->cc_ops);
+
+ if (ctx->cc_ops->display == NULL)
+ return 0;
+
+ return ctx->cc_ops->display(ctx, buf, bufsize);
+}
+
void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
{
ENTRY;
EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
/****************************************
- * Helpers to assist policy modules to *
- * implement checksum funcationality *
- ****************************************/
-
-struct {
- char *name;
- int size;
-} csum_types[] = {
- [BULK_CSUM_ALG_NULL] = { "null", 0 },
- [BULK_CSUM_ALG_CRC32] = { "crc32", 4 },
- [BULK_CSUM_ALG_MD5] = { "md5", 16 },
- [BULK_CSUM_ALG_SHA1] = { "sha1", 20 },
- [BULK_CSUM_ALG_SHA256] = { "sha256", 32 },
- [BULK_CSUM_ALG_SHA384] = { "sha384", 48 },
- [BULK_CSUM_ALG_SHA512] = { "sha512", 64 },
-};
-
-int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
-{
- int size = sizeof(struct ptlrpc_bulk_sec_desc);
-
- LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
-
- /* read request don't need extra data */
- if (!(read && request))
- size += csum_types[csum_alg].size;
-
- return size;
-}
-EXPORT_SYMBOL(bulk_sec_desc_size);
-
-int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
-{
- struct ptlrpc_bulk_sec_desc *bsd;
- int size = msg->lm_buflens[offset];
-
- bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
- if (bsd == NULL) {
- CERROR("Invalid bulk sec desc: size %d\n", size);
- return -EINVAL;
- }
-
- if (lustre_msg_swabbed(msg)) {
- __swab32s(&bsd->bsd_version);
- __swab32s(&bsd->bsd_pad);
- __swab32s(&bsd->bsd_csum_alg);
- __swab32s(&bsd->bsd_priv_alg);
- }
-
- if (bsd->bsd_version != 0) {
- CERROR("Unexpected version %u\n", bsd->bsd_version);
- return -EPROTO;
- }
-
- if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
- CERROR("Unsupported checksum algorithm %u\n",
- bsd->bsd_csum_alg);
- return -EINVAL;
- }
- if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
- CERROR("Unsupported cipher algorithm %u\n",
- bsd->bsd_priv_alg);
- return -EINVAL;
- }
-
- if (size > sizeof(*bsd) &&
- size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
- CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
- bsd->bsd_csum_alg, size);
- return -EINVAL;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bulk_sec_desc_unpack);
-
-#ifdef __KERNEL__
-static
-int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
-{
- struct page *page;
- int off;
- char *ptr;
- __u32 crc32 = ~0;
- int len, i;
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- page = desc->bd_iov[i].kiov_page;
- off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- ptr = cfs_kmap(page) + off;
- len = desc->bd_iov[i].kiov_len;
-
- crc32 = crc32_le(crc32, ptr, len);
-
- cfs_kunmap(page);
- }
-
- *((__u32 *) buf) = crc32;
- return 0;
-}
-
-static
-int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
-{
- struct crypto_tfm *tfm;
- struct scatterlist *sl;
- int i, rc = 0;
-
- LASSERT(alg > BULK_CSUM_ALG_NULL &&
- alg < BULK_CSUM_ALG_MAX);
-
- if (alg == BULK_CSUM_ALG_CRC32)
- return do_bulk_checksum_crc32(desc, buf);
-
- tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
- if (tfm == NULL) {
- CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
- return -ENOMEM;
- }
-
- OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
- if (sl == NULL) {
- rc = -ENOMEM;
- goto out_tfm;
- }
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- sl[i].page = desc->bd_iov[i].kiov_page;
- sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- sl[i].length = desc->bd_iov[i].kiov_len;
- }
-
- crypto_digest_init(tfm);
- crypto_digest_update(tfm, sl, desc->bd_iov_count);
- crypto_digest_final(tfm, buf);
-
- OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
-
-out_tfm:
- crypto_free_tfm(tfm);
- return rc;
-}
-
-#else /* !__KERNEL__ */
-static
-int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
-{
- __u32 crc32 = ~0;
- int i;
-
- LASSERT(alg == BULK_CSUM_ALG_CRC32);
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- char *ptr = desc->bd_iov[i].iov_base;
- int len = desc->bd_iov[i].iov_len;
-
- crc32 = crc32_le(crc32, ptr, len);
- }
-
- *((__u32 *) buf) = crc32;
- return 0;
-}
-#endif
-
-/*
- * perform algorithm @alg checksum on @desc, store result in @buf.
- * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
- */
-static
-int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
- struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
-{
- int rc;
-
- LASSERT(bsd);
- LASSERT(alg < BULK_CSUM_ALG_MAX);
-
- bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
-
- if (alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
-
- rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
- if (rc == 0)
- bsd->bsd_csum_alg = alg;
-
- return rc;
-}
-
-static
-int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
- struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
- struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
-{
- char *csum_p;
- char *buf = NULL;
- int csum_size, rc = 0;
-
- LASSERT(bsdv);
- LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
-
- if (bsdr)
- bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
-
- if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- /* for all supported algorithms */
- csum_size = csum_types[bsdv->bsd_csum_alg].size;
-
- if (bsdvsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- bsdvsize, sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- if (bsdr) {
- LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
- csum_p = (char *) bsdr->bsd_csum;
- } else {
- OBD_ALLOC(buf, csum_size);
- if (buf == NULL)
- return -EINVAL;
- csum_p = buf;
- }
-
- rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
-
- if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
- CERROR("BAD %s CHECKSUM (%s), data mutated during "
- "transfer!\n", read ? "READ" : "WRITE",
- csum_types[bsdv->bsd_csum_alg].name);
- rc = -EINVAL;
- } else {
- CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
- read ? "read" : "write",
- csum_types[bsdv->bsd_csum_alg].name);
- }
-
- if (bsdr) {
- bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
- memcpy(bsdr->bsd_csum, csum_p, csum_size);
- } else {
- LASSERT(buf);
- OBD_FREE(buf, csum_size);
- }
-
- return rc;
-}
-
-int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
- __u32 alg, struct lustre_msg *rmsg, int roff)
-{
- struct ptlrpc_bulk_sec_desc *bsdr;
- int rsize, rc = 0;
-
- rsize = rmsg->lm_buflens[roff];
- bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(alg < BULK_CSUM_ALG_MAX);
-
- if (read)
- bsdr->bsd_csum_alg = alg;
- else {
- rc = generate_bulk_csum(desc, alg, bsdr, rsize);
- if (rc) {
- CERROR("client bulk write: failed to perform "
- "checksum: %d\n", rc);
- }
- }
-
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_cli_request);
-
-int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *rmsg, int roff,
- struct lustre_msg *vmsg, int voff)
-{
- struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
- int rsize, vsize;
-
- rsize = rmsg->lm_buflens[roff];
- vsize = vmsg->lm_buflens[voff];
- bsdr = lustre_msg_buf(rmsg, roff, 0);
- bsdv = lustre_msg_buf(vmsg, voff, 0);
-
- if (bsdv == NULL || vsize < sizeof(*bsdv)) {
- CERROR("Invalid checksum verifier from server: size %d\n",
- vsize);
- return -EINVAL;
- }
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(vsize >= sizeof(*bsdv));
-
- if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
- CERROR("bulk %s: checksum algorithm mismatch: client request "
- "%s but server reply with %s. try to use the new one "
- "for checksum verification\n",
- read ? "read" : "write",
- csum_types[bsdr->bsd_csum_alg].name,
- csum_types[bsdv->bsd_csum_alg].name);
- }
-
- if (read)
- return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
- else {
- char *cli, *srv, *new = NULL;
- int csum_size = csum_types[bsdr->bsd_csum_alg].size;
-
- LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
- if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
- return 0;
-
- if (vsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- vsize, sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- cli = (char *) (bsdr + 1);
- srv = (char *) (bsdv + 1);
-
- if (!memcmp(cli, srv, csum_size)) {
- /* checksum confirmed */
- CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
- csum_types[bsdr->bsd_csum_alg].name);
- return 0;
- }
-
- /* checksum mismatch, re-compute a new one and compare with
- * others, give out proper warnings.
- */
- OBD_ALLOC(new, csum_size);
- if (new == NULL)
- return -ENOMEM;
-
- do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
-
- if (!memcmp(new, srv, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "on the client after we checksummed them\n",
- csum_types[bsdr->bsd_csum_alg].name);
- } else if (!memcmp(new, cli, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit\n",
- csum_types[bsdr->bsd_csum_alg].name);
- } else {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit, and the current page contents "
- "don't match the originals and what the server "
- "received\n",
- csum_types[bsdr->bsd_csum_alg].name);
- }
- OBD_FREE(new, csum_size);
-
- return -EINVAL;
- }
-}
-EXPORT_SYMBOL(bulk_csum_cli_reply);
-
-int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *vmsg, int voff,
- struct lustre_msg *rmsg, int roff)
-{
- struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
- int vsize, rsize, rc;
-
- vsize = vmsg->lm_buflens[voff];
- rsize = rmsg->lm_buflens[roff];
- bsdv = lustre_msg_buf(vmsg, voff, 0);
- bsdr = lustre_msg_buf(rmsg, roff, 0);
-
- LASSERT(vsize >= sizeof(*bsdv));
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(bsdv && bsdr);
-
- if (read) {
- rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
- if (rc)
- CERROR("bulk read: server failed to generate %s "
- "checksum: %d\n",
- csum_types[bsdv->bsd_csum_alg].name, rc);
- } else
- rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
-
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_svc);
-
-/****************************************
* user supplied flavor string parsing *
****************************************/
/* checksum algorithm */
for (i = 0; i < BULK_CSUM_ALG_MAX; i++) {
- if (strcmp(alg, csum_types[i].name) == 0) {
+ if (strcmp(alg, sptlrpc_bulk_csum_alg2name(i)) == 0) {
conf->sfc_bulk_csum = i;
break;
}
}
set_flags:
- /* set ROOTONLY flag to:
- * - to OST
- * - from MDT to MDT
+ /* * set ROOTONLY flag:
+ * - to OST
+ * - from MDT to MDT
+ * * set BULK flag for:
+ * - from CLI to OST
*/
- if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) ||
- to_part == LUSTRE_OST)
+ if (to_part == LUSTRE_OST ||
+ (from_part == LUSTRE_MDT && to_part == LUSTRE_MDT))
conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY;
+ if (from_part == LUSTRE_CLI && to_part == LUSTRE_OST)
+ conf->sfc_flags |= PTLRPC_SEC_FL_BULK;
#ifdef __BIG_ENDIAN
__swab32s(&conf->sfc_rpc_flavor);
}
EXPORT_SYMBOL(sec2target_str);
-int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct obd_device *obd = data;
- struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
- struct ptlrpc_sec *sec = NULL;
- char flags_str[20];
-
- if (obd == NULL)
- return 0;
-
- LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
- strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
- strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
- LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX);
- LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX);
-
- if (obd->u.cli.cl_import)
- sec = obd->u.cli.cl_import->imp_sec;
-
- flags_str[0] = '\0';
- if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE)
- strncat(flags_str, "reverse,", sizeof(flags_str));
- if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY)
- strncat(flags_str, "rootonly,", sizeof(flags_str));
- if (flags_str[0] != '\0')
- flags_str[strlen(flags_str) - 1] = '\0';
-
- return snprintf(page, count,
- "rpc_flavor: %s\n"
- "bulk_flavor: %s checksum, %s encryption\n"
- "flags: %s\n"
- "ctx_cache: size %u, busy %d\n"
- "gc: interval %lus, next %lds\n",
- sptlrpc_flavor2name(conf->sfc_rpc_flavor),
- csum_types[conf->sfc_bulk_csum].name,
- conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ?
- "null" : "arc4", // XXX
- flags_str,
- sec ? sec->ps_ccache_size : 0,
- sec ? atomic_read(&sec->ps_busy) : 0,
- sec ? sec->ps_gc_interval: 0,
- sec ? (sec->ps_gc_interval ?
- sec->ps_gc_next - cfs_time_current_sec() : 0)
- : 0);
-}
-EXPORT_SYMBOL(sptlrpc_lprocfs_rd);
-
+/****************************************
+ * initialize/finalize *
+ ****************************************/
int sptlrpc_init(void)
{
int rc;
- rc = sptlrpc_null_init();
+ rc = sptlrpc_enc_pool_init();
if (rc)
goto out;
+ rc = sptlrpc_null_init();
+ if (rc)
+ goto out_pool;
+
rc = sptlrpc_plain_init();
if (rc)
goto out_null;
+
+ rc = sptlrpc_lproc_init();
+ if (rc)
+ goto out_plain;
+
return 0;
+out_plain:
+ sptlrpc_plain_fini();
out_null:
- sptlrpc_null_exit();
+ sptlrpc_null_fini();
+out_pool:
+ sptlrpc_enc_pool_fini();
out:
return rc;
}
-int sptlrpc_exit(void)
+void sptlrpc_fini(void)
{
- sptlrpc_plain_exit();
- sptlrpc_null_exit();
- return 0;
+ sptlrpc_lproc_fini();
+ sptlrpc_plain_fini();
+ sptlrpc_null_fini();
+ sptlrpc_enc_pool_fini();
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <libcfs/libcfs.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <libcfs/list.h>
+#else
+#include <linux/crypto.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+/****************************************
+ * bulk encryption page pools *
+ ****************************************/
+
+#ifdef __KERNEL__
+
+#define PTRS_PER_PAGE (CFS_PAGE_SIZE / sizeof(void *))
+#define PAGES_PER_POOL (PTRS_PER_PAGE)
+
+static struct ptlrpc_enc_page_pool {
+ /*
+ * constants
+ */
+ unsigned long epp_max_pages; /* maximum pages can hold, const */
+ unsigned int epp_max_pools; /* number of pools, const */
+ /*
+ * users of the pools. the capacity grow as more user added,
+ * but doesn't shrink when users gone -- just current policy.
+ * during failover there might be user add/remove activities.
+ */
+ atomic_t epp_users; /* shared by how many users (osc) */
+ atomic_t epp_users_gone; /* users removed */
+ /*
+ * wait queue in case of not enough free pages.
+ */
+ cfs_waitq_t epp_waitq; /* waiting threads */
+ unsigned int epp_waitqlen; /* wait queue length */
+ unsigned long epp_pages_short; /* # of pages wanted of in-q users */
+ unsigned long epp_adding:1, /* during adding pages */
+ epp_full:1; /* pools are all full */
+ /*
+ * in-pool pages bookkeeping
+ */
+ spinlock_t epp_lock; /* protect following fields */
+ unsigned long epp_total_pages; /* total pages in pools */
+ unsigned long epp_free_pages; /* current pages available */
+ /*
+ * statistics
+ */
+ unsigned int epp_st_adds;
+ unsigned int epp_st_failadds; /* # of add pages failures */
+ unsigned long epp_st_reqs; /* # of get_pages requests */
+ unsigned long epp_st_missings; /* # of cache missing */
+ unsigned long epp_st_lowfree; /* lowest free pages ever reached */
+ unsigned long epp_st_max_wqlen;/* highest waitqueue length ever */
+ cfs_time_t epp_st_max_wait; /* in jeffies */
+ /*
+ * pointers to pools
+ */
+ cfs_page_t ***epp_pools;
+} page_pools;
+
+int sptlrpc_proc_read_enc_pool(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ int rc;
+
+ spin_lock(&page_pools.epp_lock);
+
+ rc = snprintf(page, count,
+ "physical pages: %lu\n"
+ "pages per pool: %lu\n"
+ "max pages: %lu\n"
+ "max pools: %u\n"
+ "users: %d - %d\n"
+ "current waitqueue len: %u\n"
+ "current pages in short: %lu\n"
+ "total pages: %lu\n"
+ "total free: %lu\n"
+ "add page times: %u\n"
+ "add page failed times: %u\n"
+ "total requests: %lu\n"
+ "cache missing: %lu\n"
+ "lowest free pages: %lu\n"
+ "max waitqueue depth: %lu\n"
+ "max wait time: "CFS_TIME_T"\n"
+ ,
+ num_physpages,
+ PAGES_PER_POOL,
+ page_pools.epp_max_pages,
+ page_pools.epp_max_pools,
+ atomic_read(&page_pools.epp_users),
+ atomic_read(&page_pools.epp_users_gone),
+ page_pools.epp_waitqlen,
+ page_pools.epp_pages_short,
+ page_pools.epp_total_pages,
+ page_pools.epp_free_pages,
+ page_pools.epp_st_adds,
+ page_pools.epp_st_failadds,
+ page_pools.epp_st_reqs,
+ page_pools.epp_st_missings,
+ page_pools.epp_st_lowfree,
+ page_pools.epp_st_max_wqlen,
+ page_pools.epp_st_max_wait
+ );
+
+ spin_unlock(&page_pools.epp_lock);
+ return rc;
+}
+
+static inline
+int npages_to_npools(unsigned long npages)
+{
+ return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL);
+}
+
+/*
+ * return how many pages cleaned up.
+ */
+static unsigned long enc_cleanup_pools(cfs_page_t ***pools, int npools)
+{
+ unsigned long cleaned = 0;
+ int i, j;
+
+ for (i = 0; i < npools; i++) {
+ if (pools[i]) {
+ for (j = 0; j < PAGES_PER_POOL; j++) {
+ if (pools[i][j]) {
+ cfs_free_page(pools[i][j]);
+ cleaned++;
+ }
+ }
+ OBD_FREE(pools[i], CFS_PAGE_SIZE);
+ pools[i] = NULL;
+ }
+ }
+
+ return cleaned;
+}
+
+/*
+ * merge @npools pointed by @pools which contains @npages new pages
+ * into current pools.
+ *
+ * we have options to avoid most memory copy with some tricks. but we choose
+ * the simplest way to avoid complexity. It's not frequently called.
+ */
+static void enc_insert_pool(cfs_page_t ***pools, int npools, int npages)
+{
+ int freeslot;
+ int op_idx, np_idx, og_idx, ng_idx;
+ int cur_npools, end_npools;
+
+ LASSERT(npages > 0);
+ LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages);
+ LASSERT(npages_to_npools(npages) == npools);
+
+ spin_lock(&page_pools.epp_lock);
+
+ /*
+ * (1) fill all the free slots of current pools.
+ */
+ /*
+ * free slots are those left by rent pages, and the extra ones with
+ * index >= eep_total_pages, locate at the tail of last pool.
+ */
+ freeslot = page_pools.epp_total_pages % PAGES_PER_POOL;
+ if (freeslot != 0)
+ freeslot = PAGES_PER_POOL - freeslot;
+ freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages;
+
+ op_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ og_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+ np_idx = npools - 1;
+ ng_idx = (npages - 1) % PAGES_PER_POOL;
+
+ while (freeslot) {
+ LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL);
+ LASSERT(pools[np_idx][ng_idx] != NULL);
+
+ page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx];
+ pools[np_idx][ng_idx] = NULL;
+
+ freeslot--;
+
+ if (++og_idx == PAGES_PER_POOL) {
+ op_idx++;
+ og_idx = 0;
+ }
+ if (--ng_idx < 0) {
+ if (np_idx == 0)
+ break;
+ np_idx--;
+ ng_idx = PAGES_PER_POOL - 1;
+ }
+ }
+
+ /*
+ * (2) add pools if needed.
+ */
+ cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) /
+ PAGES_PER_POOL;
+ end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL -1) /
+ PAGES_PER_POOL;
+ LASSERT(end_npools <= page_pools.epp_max_pools);
+
+ np_idx = 0;
+ while (cur_npools < end_npools) {
+ LASSERT(page_pools.epp_pools[cur_npools] == NULL);
+ LASSERT(np_idx < npools);
+ LASSERT(pools[np_idx] != NULL);
+
+ page_pools.epp_pools[cur_npools++] = pools[np_idx];
+ pools[np_idx++] = NULL;
+ }
+
+ page_pools.epp_total_pages += npages;
+ page_pools.epp_free_pages += npages;
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ if (page_pools.epp_total_pages == page_pools.epp_max_pages)
+ page_pools.epp_full = 1;
+
+ CDEBUG(D_SEC, "add %d pages to total %lu\n", npages,
+ page_pools.epp_total_pages);
+
+ spin_unlock(&page_pools.epp_lock);
+}
+
+static int enc_pools_add_pages(int npages)
+{
+ static DECLARE_MUTEX(sem_add_pages);
+ cfs_page_t ***pools;
+ int npools, alloced = 0;
+ int i, j, rc = -ENOMEM;
+
+ down(&sem_add_pages);
+
+ if (npages > page_pools.epp_max_pages - page_pools.epp_total_pages)
+ npages = page_pools.epp_max_pages - page_pools.epp_total_pages;
+ if (npages == 0) {
+ rc = 0;
+ goto out;
+ }
+
+ page_pools.epp_st_adds++;
+
+ npools = npages_to_npools(npages);
+ OBD_ALLOC(pools, npools * sizeof(*pools));
+ if (pools == NULL)
+ goto out;
+
+ for (i = 0; i < npools; i++) {
+ OBD_ALLOC(pools[i], CFS_PAGE_SIZE);
+ if (pools[i] == NULL)
+ goto out_pools;
+
+ for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) {
+ pools[i][j] = cfs_alloc_page(CFS_ALLOC_IO |
+ CFS_ALLOC_HIGH);
+ if (pools[i][j] == NULL)
+ goto out_pools;
+
+ alloced++;
+ }
+ }
+
+ enc_insert_pool(pools, npools, npages);
+ CDEBUG(D_SEC, "add %d pages into enc page pools\n", npages);
+ rc = 0;
+
+out_pools:
+ enc_cleanup_pools(pools, npools);
+ OBD_FREE(pools, npools * sizeof(*pools));
+out:
+ if (rc) {
+ page_pools.epp_st_failadds++;
+ CERROR("Failed to pre-allocate %d enc pages\n", npages);
+ }
+
+ up(&sem_add_pages);
+ return rc;
+}
+
+/*
+ * both "max bulk rpcs inflight" and "lnet MTU" are tunable, we use the
+ * default fixed value initially.
+ */
+int sptlrpc_enc_pool_add_user(void)
+{
+ int page_plus = PTLRPC_MAX_BRW_PAGES * OSC_MAX_RIF_DEFAULT;
+ int users, users_gone, shift, rc;
+
+ LASSERT(!in_interrupt());
+ LASSERT(atomic_read(&page_pools.epp_users) >= 0);
+
+ users_gone = atomic_dec_return(&page_pools.epp_users_gone);
+ if (users_gone >= 0) {
+ CWARN("%d users gone, skip\n", users_gone + 1);
+ return 0;
+ }
+ atomic_inc(&page_pools.epp_users_gone);
+
+ /*
+ * prepare full pages for first 2 users; 1/2 for next 2 users;
+ * 1/4 for next 4 users; 1/8 for next 8 users; 1/16 for next 16 users;
+ * ...
+ */
+ users = atomic_add_return(1, &page_pools.epp_users);
+ shift = fls(users - 1);
+ shift = shift > 1 ? shift - 1 : 0;
+ page_plus = page_plus >> shift;
+ page_plus = page_plus > 2 ? page_plus : 2;
+
+ rc = enc_pools_add_pages(page_plus);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_add_user);
+
+int sptlrpc_enc_pool_del_user(void)
+{
+ atomic_inc(&page_pools.epp_users_gone);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_del_user);
+
+/*
+ * we allocate the requested pages atomically.
+ */
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
+{
+ cfs_waitlink_t waitlink;
+ cfs_time_t tick1 = 0, tick2;
+ int p_idx, g_idx;
+ int i;
+
+ LASSERT(desc->bd_enc_pages == NULL);
+ LASSERT(desc->bd_max_iov > 0);
+ LASSERT(desc->bd_max_iov <= page_pools.epp_total_pages);
+
+ OBD_ALLOC(desc->bd_enc_pages,
+ desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
+ if (desc->bd_enc_pages == NULL)
+ return -ENOMEM;
+
+ spin_lock(&page_pools.epp_lock);
+again:
+ page_pools.epp_st_reqs++;
+
+ if (unlikely(page_pools.epp_free_pages < desc->bd_max_iov)) {
+ if (tick1 == 0)
+ tick1 = cfs_time_current();
+
+ page_pools.epp_st_missings++;
+ page_pools.epp_pages_short += desc->bd_max_iov;
+
+ if (++page_pools.epp_waitqlen > page_pools.epp_st_max_wqlen)
+ page_pools.epp_st_max_wqlen = page_pools.epp_waitqlen;
+ /*
+ * we just wait if someone else is adding more pages, or
+ * wait queue length is not deep enough. otherwise try to
+ * add more pages in the pools.
+ *
+ * FIXME the policy of detecting resource tight & growing pool
+ * need to be reconsidered.
+ */
+ if (page_pools.epp_adding || page_pools.epp_waitqlen < 2 ||
+ page_pools.epp_full) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ cfs_waitlink_init(&waitlink);
+ cfs_waitq_add(&page_pools.epp_waitq, &waitlink);
+
+ spin_unlock(&page_pools.epp_lock);
+ cfs_schedule();
+ spin_lock(&page_pools.epp_lock);
+ } else {
+ page_pools.epp_adding = 1;
+
+ spin_unlock(&page_pools.epp_lock);
+ enc_pools_add_pages(page_pools.epp_pages_short / 2);
+ spin_lock(&page_pools.epp_lock);
+
+ page_pools.epp_adding = 0;
+ }
+
+ LASSERT(page_pools.epp_pages_short >= desc->bd_max_iov);
+ LASSERT(page_pools.epp_waitqlen > 0);
+ page_pools.epp_pages_short -= desc->bd_max_iov;
+ page_pools.epp_waitqlen--;
+
+ goto again;
+ }
+ /*
+ * record max wait time
+ */
+ if (unlikely(tick1 != 0)) {
+ tick2 = cfs_time_current();
+ if (tick2 - tick1 > page_pools.epp_st_max_wait)
+ page_pools.epp_st_max_wait = tick2 - tick1;
+ }
+ /*
+ * proceed with rest of allocation
+ */
+ page_pools.epp_free_pages -= desc->bd_max_iov;
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ for (i = 0; i < desc->bd_max_iov; i++) {
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
+ desc->bd_enc_pages[i] = page_pools.epp_pools[p_idx][g_idx];
+ page_pools.epp_pools[p_idx][g_idx] = NULL;
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ if (page_pools.epp_free_pages < page_pools.epp_st_lowfree)
+ page_pools.epp_st_lowfree = page_pools.epp_free_pages;
+
+ spin_unlock(&page_pools.epp_lock);
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages);
+
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
+{
+ int p_idx, g_idx;
+ int i;
+
+ if (desc->bd_enc_pages == NULL)
+ return;
+ if (desc->bd_max_iov == 0)
+ return;
+
+ spin_lock(&page_pools.epp_lock);
+
+ p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
+ g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
+
+ LASSERT(page_pools.epp_free_pages + desc->bd_max_iov <=
+ page_pools.epp_total_pages);
+ LASSERT(page_pools.epp_pools[p_idx]);
+
+ for (i = 0; i < desc->bd_max_iov; i++) {
+ LASSERT(desc->bd_enc_pages[i] != NULL);
+ LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
+ LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
+
+ page_pools.epp_pools[p_idx][g_idx] = desc->bd_enc_pages[i];
+
+ if (++g_idx == PAGES_PER_POOL) {
+ p_idx++;
+ g_idx = 0;
+ }
+ }
+
+ page_pools.epp_free_pages += desc->bd_max_iov;
+
+ spin_unlock(&page_pools.epp_lock);
+
+ OBD_FREE(desc->bd_enc_pages,
+ desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
+ desc->bd_enc_pages = NULL;
+}
+EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
+
+int sptlrpc_enc_pool_init(void)
+{
+ /* constants */
+ page_pools.epp_max_pages = num_physpages / 4;
+ page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages);
+
+ atomic_set(&page_pools.epp_users, 0);
+ atomic_set(&page_pools.epp_users_gone, 0);
+
+ cfs_waitq_init(&page_pools.epp_waitq);
+ page_pools.epp_waitqlen = 0;
+ page_pools.epp_pages_short = 0;
+
+ page_pools.epp_adding = 0;
+ page_pools.epp_full = 0;
+
+ spin_lock_init(&page_pools.epp_lock);
+ page_pools.epp_total_pages = 0;
+ page_pools.epp_free_pages = 0;
+
+ page_pools.epp_st_adds = 0;
+ page_pools.epp_st_failadds = 0;
+ page_pools.epp_st_reqs = 0;
+ page_pools.epp_st_missings = 0;
+ page_pools.epp_st_lowfree = 0;
+ page_pools.epp_st_max_wqlen = 0;
+ page_pools.epp_st_max_wait = 0;
+
+ OBD_ALLOC(page_pools.epp_pools,
+ page_pools.epp_max_pools * sizeof(*page_pools.epp_pools));
+ if (page_pools.epp_pools == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void sptlrpc_enc_pool_fini(void)
+{
+ unsigned long cleaned, npools;
+
+ LASSERT(page_pools.epp_pools);
+ LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
+
+ npools = npages_to_npools(page_pools.epp_total_pages);
+ cleaned = enc_cleanup_pools(page_pools.epp_pools, npools);
+ LASSERT(cleaned == page_pools.epp_total_pages);
+
+ OBD_FREE(page_pools.epp_pools,
+ page_pools.epp_max_pools * sizeof(*page_pools.epp_pools));
+}
+
+#else /* !__KERNEL__ */
+
+int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc)
+{
+ return 0;
+}
+
+void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
+{
+}
+
+int sptlrpc_enc_pool_init(void)
+{
+ return 0;
+}
+
+void sptlrpc_enc_pool_fini(void)
+{
+}
+#endif
+
+/****************************************
+ * Helpers to assist policy modules to *
+ * implement checksum funcationality *
+ ****************************************/
+
+static struct {
+ char *name;
+ int size;
+} csum_types[] = {
+ [BULK_CSUM_ALG_NULL] = { "null", 0 },
+ [BULK_CSUM_ALG_CRC32] = { "crc32", 4 },
+ [BULK_CSUM_ALG_MD5] = { "md5", 16 },
+ [BULK_CSUM_ALG_SHA1] = { "sha1", 20 },
+ [BULK_CSUM_ALG_SHA256] = { "sha256", 32 },
+ [BULK_CSUM_ALG_SHA384] = { "sha384", 48 },
+ [BULK_CSUM_ALG_SHA512] = { "sha512", 64 },
+};
+
+const char * sptlrpc_bulk_csum_alg2name(__u32 csum_alg)
+{
+ if (csum_alg < BULK_CSUM_ALG_MAX)
+ return csum_types[csum_alg].name;
+ return "unknown_cksum";
+}
+EXPORT_SYMBOL(sptlrpc_bulk_csum_alg2name);
+
+int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
+{
+ int size = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
+
+ /* read request don't need extra data */
+ if (!(read && request))
+ size += csum_types[csum_alg].size;
+
+ return size;
+}
+EXPORT_SYMBOL(bulk_sec_desc_size);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
+{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int size = msg->lm_buflens[offset];
+
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ if (bsd == NULL) {
+ CERROR("Invalid bulk sec desc: size %d\n", size);
+ return -EINVAL;
+ }
+
+ if (lustre_msg_swabbed(msg)) {
+ __swab32s(&bsd->bsd_version);
+ __swab32s(&bsd->bsd_pad);
+ __swab32s(&bsd->bsd_csum_alg);
+ __swab32s(&bsd->bsd_priv_alg);
+ }
+
+ if (bsd->bsd_version != 0) {
+ CERROR("Unexpected version %u\n", bsd->bsd_version);
+ return -EPROTO;
+ }
+
+ if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
+ CERROR("Unsupported checksum algorithm %u\n",
+ bsd->bsd_csum_alg);
+ return -EINVAL;
+ }
+ if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
+ CERROR("Unsupported cipher algorithm %u\n",
+ bsd->bsd_priv_alg);
+ return -EINVAL;
+ }
+
+ if (size > sizeof(*bsd) &&
+ size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
+ CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
+ bsd->bsd_csum_alg, size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bulk_sec_desc_unpack);
+
+#ifdef __KERNEL__
+static
+int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
+{
+ struct page *page;
+ int off;
+ char *ptr;
+ __u32 crc32 = ~0;
+ int len, i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ page = desc->bd_iov[i].kiov_page;
+ off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ ptr = cfs_kmap(page) + off;
+ len = desc->bd_iov[i].kiov_len;
+
+ crc32 = crc32_le(crc32, ptr, len);
+
+ cfs_kunmap(page);
+ }
+
+ *((__u32 *) buf) = crc32;
+ return 0;
+}
+
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+ struct crypto_tfm *tfm;
+ struct scatterlist *sl;
+ int i, rc = 0;
+
+ LASSERT(alg > BULK_CSUM_ALG_NULL &&
+ alg < BULK_CSUM_ALG_MAX);
+
+ if (alg == BULK_CSUM_ALG_CRC32)
+ return do_bulk_checksum_crc32(desc, buf);
+
+ tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
+ if (tfm == NULL) {
+ CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
+ return -ENOMEM;
+ }
+
+ OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
+ if (sl == NULL) {
+ rc = -ENOMEM;
+ goto out_tfm;
+ }
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ sl[i].page = desc->bd_iov[i].kiov_page;
+ sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ sl[i].length = desc->bd_iov[i].kiov_len;
+ }
+
+ crypto_digest_init(tfm);
+ crypto_digest_update(tfm, sl, desc->bd_iov_count);
+ crypto_digest_final(tfm, buf);
+
+ OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
+
+out_tfm:
+ crypto_free_tfm(tfm);
+ return rc;
+}
+
+#else /* !__KERNEL__ */
+static
+int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+{
+ __u32 crc32 = ~0;
+ int i;
+
+ LASSERT(alg == BULK_CSUM_ALG_CRC32);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ char *ptr = desc->bd_iov[i].iov_base;
+ int len = desc->bd_iov[i].iov_len;
+
+ crc32 = crc32_le(crc32, ptr, len);
+ }
+
+ *((__u32 *) buf) = crc32;
+ return 0;
+}
+#endif
+
+/*
+ * perform algorithm @alg checksum on @desc, store result in @buf.
+ * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
+ */
+static
+int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
+ struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
+{
+ int rc;
+
+ LASSERT(bsd);
+ LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+ bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+ if (alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
+
+ rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
+ if (rc == 0)
+ bsd->bsd_csum_alg = alg;
+
+ return rc;
+}
+
+static
+int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
+ struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
+ struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
+{
+ char *csum_p;
+ char *buf = NULL;
+ int csum_size, rc = 0;
+
+ LASSERT(bsdv);
+ LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+
+ if (bsdr)
+ bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
+
+ if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ /* for all supported algorithms */
+ csum_size = csum_types[bsdv->bsd_csum_alg].size;
+
+ if (bsdvsize < sizeof(*bsdv) + csum_size) {
+ CERROR("verifier size %d too small, require %d\n",
+ bsdvsize, sizeof(*bsdv) + csum_size);
+ return -EINVAL;
+ }
+
+ if (bsdr) {
+ LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
+ csum_p = (char *) bsdr->bsd_csum;
+ } else {
+ OBD_ALLOC(buf, csum_size);
+ if (buf == NULL)
+ return -EINVAL;
+ csum_p = buf;
+ }
+
+ rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
+
+ if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
+ CERROR("BAD %s CHECKSUM (%s), data mutated during "
+ "transfer!\n", read ? "READ" : "WRITE",
+ csum_types[bsdv->bsd_csum_alg].name);
+ rc = -EINVAL;
+ } else {
+ CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
+ read ? "read" : "write",
+ csum_types[bsdv->bsd_csum_alg].name);
+ }
+
+ if (bsdr) {
+ bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
+ memcpy(bsdr->bsd_csum, csum_p, csum_size);
+ } else {
+ LASSERT(buf);
+ OBD_FREE(buf, csum_size);
+ }
+
+ return rc;
+}
+
+int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
+ __u32 alg, struct lustre_msg *rmsg, int roff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdr;
+ int rsize, rc = 0;
+
+ rsize = rmsg->lm_buflens[roff];
+ bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
+
+ LASSERT(bsdr);
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(alg < BULK_CSUM_ALG_MAX);
+
+ if (read)
+ bsdr->bsd_csum_alg = alg;
+ else {
+ rc = generate_bulk_csum(desc, alg, bsdr, rsize);
+ if (rc) {
+ CERROR("client bulk write: failed to perform "
+ "checksum: %d\n", rc);
+ }
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(bulk_csum_cli_request);
+
+int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *rmsg, int roff,
+ struct lustre_msg *vmsg, int voff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+ int rsize, vsize;
+
+ rsize = rmsg->lm_buflens[roff];
+ vsize = vmsg->lm_buflens[voff];
+ bsdr = lustre_msg_buf(rmsg, roff, 0);
+ bsdv = lustre_msg_buf(vmsg, voff, 0);
+
+ if (bsdv == NULL || vsize < sizeof(*bsdv)) {
+ CERROR("Invalid checksum verifier from server: size %d\n",
+ vsize);
+ return -EINVAL;
+ }
+
+ LASSERT(bsdr);
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(vsize >= sizeof(*bsdv));
+
+ if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
+ CERROR("bulk %s: checksum algorithm mismatch: client request "
+ "%s but server reply with %s. try to use the new one "
+ "for checksum verification\n",
+ read ? "read" : "write",
+ csum_types[bsdr->bsd_csum_alg].name,
+ csum_types[bsdv->bsd_csum_alg].name);
+ }
+
+ if (read)
+ return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
+ else {
+ char *cli, *srv, *new = NULL;
+ int csum_size = csum_types[bsdr->bsd_csum_alg].size;
+
+ LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
+ if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
+ return 0;
+
+ if (vsize < sizeof(*bsdv) + csum_size) {
+ CERROR("verifier size %d too small, require %d\n",
+ vsize, sizeof(*bsdv) + csum_size);
+ return -EINVAL;
+ }
+
+ cli = (char *) (bsdr + 1);
+ srv = (char *) (bsdv + 1);
+
+ if (!memcmp(cli, srv, csum_size)) {
+ /* checksum confirmed */
+ CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ return 0;
+ }
+
+ /* checksum mismatch, re-compute a new one and compare with
+ * others, give out proper warnings.
+ */
+ OBD_ALLOC(new, csum_size);
+ if (new == NULL)
+ return -ENOMEM;
+
+ do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
+
+ if (!memcmp(new, srv, csum_size)) {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "on the client after we checksummed them\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ } else if (!memcmp(new, cli, csum_size)) {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "in transit\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ } else {
+ CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
+ "in transit, and the current page contents "
+ "don't match the originals and what the server "
+ "received\n",
+ csum_types[bsdr->bsd_csum_alg].name);
+ }
+ OBD_FREE(new, csum_size);
+
+ return -EINVAL;
+ }
+}
+EXPORT_SYMBOL(bulk_csum_cli_reply);
+
+int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
+ struct lustre_msg *vmsg, int voff,
+ struct lustre_msg *rmsg, int roff)
+{
+ struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
+ int vsize, rsize, rc;
+
+ vsize = vmsg->lm_buflens[voff];
+ rsize = rmsg->lm_buflens[roff];
+ bsdv = lustre_msg_buf(vmsg, voff, 0);
+ bsdr = lustre_msg_buf(rmsg, roff, 0);
+
+ LASSERT(vsize >= sizeof(*bsdv));
+ LASSERT(rsize >= sizeof(*bsdr));
+ LASSERT(bsdv && bsdr);
+
+ if (read) {
+ rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
+ if (rc)
+ CERROR("bulk read: server failed to generate %s "
+ "checksum: %d\n",
+ csum_types[bsdv->bsd_csum_alg].name, rc);
+ } else
+ rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
+
+ return rc;
+}
+EXPORT_SYMBOL(bulk_csum_svc);
+
+/****************************************
+ * Helpers to assist policy modules to *
+ * implement encryption funcationality *
+ ****************************************/
+
+/*
+ * NOTE: These algorithms must be stream cipher!
+ */
+static struct {
+ char *name;
+ __u32 flags;
+} priv_types[] = {
+ [BULK_PRIV_ALG_NULL] = { "null", 0 },
+ [BULK_PRIV_ALG_ARC4] = { "arc4", 0 },
+};
+
+const char * sptlrpc_bulk_priv_alg2name(__u32 priv_alg)
+{
+ if (priv_alg < BULK_PRIV_ALG_MAX)
+ return priv_types[priv_alg].name;
+ return "unknown_priv";
+}
+EXPORT_SYMBOL(sptlrpc_bulk_priv_alg2name);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_SEC
+
+#include <libcfs/libcfs.h>
+#ifndef __KERNEL__
+#include <liblustre.h>
+#include <libcfs/list.h>
+#else
+#include <linux/crypto.h>
+#endif
+
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_dlm.h>
+#include <lustre_sec.h>
+
+#include "ptlrpc_internal.h"
+
+#ifdef __KERNEL__
+
+struct proc_dir_entry *sptlrpc_proc_root = NULL;
+EXPORT_SYMBOL(sptlrpc_proc_root);
+
+void sec_flags2str(unsigned long flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strncat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strncat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strncat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strncat(buf, "-,", bufsize);
+
+ buf[strlen(buf) - 1] = '\0';
+
+}
+
+int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device *obd = data;
+ struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
+ struct ptlrpc_sec *sec = NULL;
+ struct ptlrpc_cli_ctx *ctx;
+ struct hlist_node *pos, *next;
+ char flags_str[32];
+ int written, i;
+
+ if (obd == NULL)
+ return 0;
+
+ LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
+ strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
+ LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX);
+ LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX);
+
+ if (obd->u.cli.cl_import)
+ sec = obd->u.cli.cl_import->imp_sec;
+
+ if (sec == NULL) {
+ written = snprintf(page, count, "\n");
+ goto out;
+ }
+
+ sec_flags2str(sec->ps_flags, flags_str, sizeof(flags_str));
+
+ written = snprintf(page, count,
+ "rpc msg flavor: %s\n"
+ "bulk checksum: %s\n"
+ "bulk encrypt: %s\n"
+ "flags: %s\n"
+ "ctx cache size %u\n"
+ "ctx cache busy %d\n"
+ "gc interval %lu\n"
+ "gc next %ld\n",
+ sptlrpc_flavor2name(sec->ps_flavor),
+ sptlrpc_bulk_csum_alg2name(conf->sfc_bulk_csum),
+ sptlrpc_bulk_priv_alg2name(conf->sfc_bulk_priv),
+ flags_str,
+ sec->ps_ccache_size,
+ atomic_read(&sec->ps_busy),
+ sec->ps_gc_interval,
+ sec->ps_gc_interval ?
+ sec->ps_gc_next - cfs_time_current_sec() : 0
+ );
+ /*
+ * list contexts
+ */
+ if (sec->ps_policy->sp_policy != SPTLRPC_POLICY_GSS)
+ goto out;
+
+ written += snprintf(page + written, count - written,
+ "GSS contexts ==>\n");
+
+ spin_lock(&sec->ps_lock);
+ for (i = 0; i < sec->ps_ccache_size; i++) {
+ hlist_for_each_entry_safe(ctx, pos, next,
+ &sec->ps_ccache[i], cc_hash) {
+ if (written >= count)
+ break;
+ written += sptlrpc_ctx_display(ctx, page + written,
+ count - written);
+ }
+ }
+ spin_unlock(&sec->ps_lock);
+
+out:
+ return written;
+}
+EXPORT_SYMBOL(sptlrpc_lprocfs_rd);
+
+static struct lprocfs_vars sptlrpc_lprocfs_vars[] = {
+ { "enc_pool", sptlrpc_proc_read_enc_pool, NULL, NULL },
+ { NULL }
+};
+
+int sptlrpc_lproc_init(void)
+{
+ int rc;
+
+ LASSERT(sptlrpc_proc_root == NULL);
+
+ sptlrpc_proc_root = lprocfs_register("sptlrpc", proc_lustre_root,
+ sptlrpc_lprocfs_vars, NULL);
+ if (IS_ERR(sptlrpc_proc_root)) {
+ rc = PTR_ERR(sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ return rc;
+ }
+ return 0;
+}
+
+void sptlrpc_lproc_fini(void)
+{
+ if (sptlrpc_proc_root) {
+ lprocfs_remove(sptlrpc_proc_root);
+ sptlrpc_proc_root = NULL;
+ }
+}
+
+#else /* !__KERNEL__ */
+
+int sptlrpc_lproc_init(void)
+{
+ return 0;
+}
+
+void sptlrpc_lproc_fini(void)
+{
+}
+
+#endif
return rc;
}
-int sptlrpc_null_exit(void)
+void sptlrpc_null_fini(void)
{
int rc;
rc = sptlrpc_unregister_policy(&null_policy);
if (rc)
CERROR("cannot unregister sec.null: %d\n", rc);
-
- return rc;
}
return rc;
}
-int sptlrpc_plain_exit(void)
+void sptlrpc_plain_fini(void)
{
int rc;
rc = sptlrpc_unregister_policy(&plain_policy);
if (rc)
CERROR("cannot unregister sec.plain: %d\n", rc);
-
- return rc;
}
struct lustre_gss_init_res *gr,
int timeout)
{
- char *file = "/proc/fs/lustre/gss/init_channel";
+ char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel";
struct lgssd_ioctl_param param;
struct passwd *pw;
int fd, ret;
+ struct lustre_gss_init_res *gr,
+ int timeout)
+{
-+ char *file = "/proc/fs/lustre/gss/init_channel";
++ char *file = "/proc/fs/lustre/sptlrpc/gss/init_channel";
+ struct lgssd_ioctl_param param;
+ struct passwd *pw;
+ int fd, ret;