1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Modifications for Lustre
5 * Copyright 2004 - 2006, Cluster File Systems, Inc.
7 * Author: Eric Mei <ericm@clusterfs.com>
11 * Neil Brown <neilb@cse.unsw.edu.au>
12 * J. Bruce Fields <bfields@umich.edu>
13 * Andy Adamson <andros@umich.edu>
14 * Dug Song <dugsong@monkey.org>
16 * RPCSEC_GSS server authentication.
17 * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078
20 * The RPCSEC_GSS involves three stages:
23 * 3/ context destruction
25 * Context creation is handled largely by upcalls to user-space.
26 * In particular, GSS_Accept_sec_context is handled by an upcall
27 * Data exchange is handled entirely within the kernel
28 * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel.
29 * Context destruction is handled in-kernel
30 * GSS_Delete_sec_context is in-kernel
32 * Context creation is initiated by a RPCSEC_GSS_INIT request arriving.
33 * The context handle and gss_token are used as a key into the rpcsec_init cache.
34 * The content of this cache includes some of the outputs of GSS_Accept_sec_context,
35 * being major_status, minor_status, context_handle, reply_token.
36 * These are sent back to the client.
37 * Sequence window management is handled by the kernel. The window size if currently
38 * a compile time constant.
40 * When user-space is happy that a context is established, it places an entry
41 * in the rpcsec_context cache. The key for this cache is the context_handle.
42 * The content includes:
43 * uid/gidlist - for determining access rights
45 * mechanism specific information, such as a key
49 #define DEBUG_SUBSYSTEM S_SEC
51 #include <linux/types.h>
52 #include <linux/init.h>
53 #include <linux/module.h>
54 #include <linux/slab.h>
55 #include <linux/hash.h>
56 #include <linux/mutex.h>
58 #include <liblustre.h>
61 #include <linux/sunrpc/cache.h>
64 #include <obd_class.h>
65 #include <obd_support.h>
66 #include <lustre/lustre_idl.h>
67 #include <lustre_net.h>
68 #include <lustre_import.h>
69 #include <lustre_sec.h>
72 #include "gss_internal.h"
75 #define GSS_SVC_UPCALL_TIMEOUT (20)
77 static spinlock_t __ctx_index_lock = SPIN_LOCK_UNLOCKED;
78 static __u64 __ctx_index;
80 __u64 gss_get_next_ctx_index(void)
84 spin_lock(&__ctx_index_lock);
86 spin_unlock(&__ctx_index_lock);
92 unsigned long hash_mem(char *buf, int length, int bits)
94 unsigned long hash = 0;
109 if ((len & (BITS_PER_LONG/8-1)) == 0)
110 hash = hash_long(hash^l, BITS_PER_LONG);
113 return hash >> (BITS_PER_LONG - bits);
116 /****************************************
118 ****************************************/
120 #define RSI_HASHBITS (6)
121 #define RSI_HASHMAX (1 << RSI_HASHBITS)
122 #define RSI_HASHMASK (RSI_HASHMAX - 1)
128 wait_queue_head_t waitq;
129 rawobj_t in_handle, in_token;
130 rawobj_t out_handle, out_token;
131 int major_status, minor_status;
134 static struct cache_head *rsi_table[RSI_HASHMAX];
135 static struct cache_detail rsi_cache;
136 static struct rsi *rsi_lookup(struct rsi *item, int set);
139 void rsi_free(struct rsi *rsi)
141 rawobj_free(&rsi->in_handle);
142 rawobj_free(&rsi->in_token);
143 rawobj_free(&rsi->out_handle);
144 rawobj_free(&rsi->out_token);
148 void rsi_put(struct cache_head *item, struct cache_detail *cd)
150 struct rsi *rsi = container_of(item, struct rsi, h);
152 LASSERT(atomic_read(&item->refcnt) > 0);
154 if (cache_put(item, cd)) {
155 LASSERT(item->next == NULL);
157 kfree(rsi); /* created by cache mgmt using kmalloc */
162 int rsi_hash(struct rsi *item)
164 return hash_mem((char *)item->in_handle.data, item->in_handle.len,
166 hash_mem((char *)item->in_token.data, item->in_token.len,
171 int rsi_match(struct rsi *item, struct rsi *tmp)
173 return (rawobj_equal(&item->in_handle, &tmp->in_handle) &&
174 rawobj_equal(&item->in_token, &tmp->in_token));
178 void rsi_request(struct cache_detail *cd,
179 struct cache_head *h,
180 char **bpp, int *blen)
182 struct rsi *rsi = container_of(h, struct rsi, h);
185 /* if in_handle is null, provide kernel suggestion */
186 if (rsi->in_handle.len == 0)
187 index = gss_get_next_ctx_index();
189 qword_addhex(bpp, blen, (char *) &rsi->lustre_svc,
190 sizeof(rsi->lustre_svc));
191 qword_addhex(bpp, blen, (char *) &rsi->nid, sizeof(rsi->nid));
192 qword_addhex(bpp, blen, (char *) &index, sizeof(index));
193 qword_addhex(bpp, blen, rsi->in_handle.data, rsi->in_handle.len);
194 qword_addhex(bpp, blen, rsi->in_token.data, rsi->in_token.len);
199 void rsi_init(struct rsi *new, struct rsi *item)
201 new->out_handle = RAWOBJ_EMPTY;
202 new->out_token = RAWOBJ_EMPTY;
204 new->in_handle = item->in_handle;
205 item->in_handle = RAWOBJ_EMPTY;
206 new->in_token = item->in_token;
207 item->in_token = RAWOBJ_EMPTY;
209 new->lustre_svc = item->lustre_svc;
210 new->nid = item->nid;
211 init_waitqueue_head(&new->waitq);
215 void rsi_update(struct rsi *new, struct rsi *item)
217 LASSERT(new->out_handle.len == 0);
218 LASSERT(new->out_token.len == 0);
220 new->out_handle = item->out_handle;
221 item->out_handle = RAWOBJ_EMPTY;
222 new->out_token = item->out_token;
223 item->out_token = RAWOBJ_EMPTY;
225 new->major_status = item->major_status;
226 new->minor_status = item->minor_status;
230 int rsi_parse(struct cache_detail *cd, char *mesg, int mlen)
235 struct rsi rsii, *rsip = NULL;
237 int status = -EINVAL;
241 memset(&rsii, 0, sizeof(rsii));
244 len = qword_get(&mesg, buf, mlen);
247 if (rawobj_alloc(&rsii.in_handle, buf, len)) {
253 len = qword_get(&mesg, buf, mlen);
256 if (rawobj_alloc(&rsii.in_token, buf, len)) {
262 expiry = get_expiry(&mesg);
266 len = qword_get(&mesg, buf, mlen);
271 rsii.major_status = simple_strtol(buf, &ep, 10);
276 len = qword_get(&mesg, buf, mlen);
279 rsii.minor_status = simple_strtol(buf, &ep, 10);
284 len = qword_get(&mesg, buf, mlen);
287 if (rawobj_alloc(&rsii.out_handle, buf, len)) {
293 len = qword_get(&mesg, buf, mlen);
296 if (rawobj_alloc(&rsii.out_token, buf, len)) {
301 rsii.h.expiry_time = expiry;
302 rsip = rsi_lookup(&rsii, 1);
307 wake_up_all(&rsip->waitq);
308 rsi_put(&rsip->h, &rsi_cache);
312 CERROR("rsi parse error %d\n", status);
316 static struct cache_detail rsi_cache = {
317 .hash_size = RSI_HASHMAX,
318 .hash_table = rsi_table,
319 .name = "auth.sptlrpc.init",
320 .cache_put = rsi_put,
321 .cache_request = rsi_request,
322 .cache_parse = rsi_parse,
325 static DefineSimpleCacheLookup(rsi, 0)
327 /****************************************
329 ****************************************/
331 #define RSC_HASHBITS (10)
332 #define RSC_HASHMAX (1 << RSC_HASHBITS)
333 #define RSC_HASHMASK (RSC_HASHMAX - 1)
337 struct obd_device *target;
339 struct gss_svc_ctx ctx;
342 static struct cache_head *rsc_table[RSC_HASHMAX];
343 static struct cache_detail rsc_cache;
344 static struct rsc *rsc_lookup(struct rsc *item, int set);
347 void rsc_free(struct rsc *rsci)
349 rawobj_free(&rsci->handle);
350 rawobj_free(&rsci->ctx.gsc_rvs_hdl);
351 lgss_delete_sec_context(&rsci->ctx.gsc_mechctx);
355 void rsc_put(struct cache_head *item, struct cache_detail *cd)
357 struct rsc *rsci = container_of(item, struct rsc, h);
359 LASSERT(atomic_read(&item->refcnt) > 0);
361 if (cache_put(item, cd)) {
362 LASSERT(item->next == NULL);
364 kfree(rsci); /* created by cache mgmt using kmalloc */
369 int rsc_hash(struct rsc *rsci)
371 return hash_mem((char *)rsci->handle.data,
372 rsci->handle.len, RSC_HASHBITS);
376 int rsc_match(struct rsc *new, struct rsc *tmp)
378 return rawobj_equal(&new->handle, &tmp->handle);
382 void rsc_init(struct rsc *new, struct rsc *tmp)
384 new->handle = tmp->handle;
385 tmp->handle = RAWOBJ_EMPTY;
388 memset(&new->ctx, 0, sizeof(new->ctx));
389 new->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY;
393 void rsc_update(struct rsc *new, struct rsc *tmp)
396 tmp->ctx.gsc_rvs_hdl = RAWOBJ_EMPTY;
397 tmp->ctx.gsc_mechctx = NULL;
399 memset(&new->ctx.gsc_seqdata, 0, sizeof(new->ctx.gsc_seqdata));
400 spin_lock_init(&new->ctx.gsc_seqdata.ssd_lock);
404 int rsc_parse(struct cache_detail *cd, char *mesg, int mlen)
407 int len, rv, tmp_int;
408 struct rsc rsci, *rscp = NULL;
410 int status = -EINVAL;
412 memset(&rsci, 0, sizeof(rsci));
415 len = qword_get(&mesg, buf, mlen);
416 if (len < 0) goto out;
418 if (rawobj_alloc(&rsci.handle, buf, len))
423 expiry = get_expiry(&mesg);
429 rv = get_int(&mesg, &tmp_int);
431 CERROR("fail to get remote flag\n");
434 rsci.ctx.gsc_remote = (tmp_int != 0);
437 rv = get_int(&mesg, &tmp_int);
439 CERROR("fail to get oss user flag\n");
442 rsci.ctx.gsc_usr_root = (tmp_int != 0);
445 rv = get_int(&mesg, &tmp_int);
447 CERROR("fail to get mds user flag\n");
450 rsci.ctx.gsc_usr_mds = (tmp_int != 0);
453 rv = get_int(&mesg, (int *) &rsci.ctx.gsc_mapped_uid);
455 CERROR("fail to get mapped uid\n");
459 /* uid, or NEGATIVE */
460 rv = get_int(&mesg, (int *) &rsci.ctx.gsc_uid);
464 CERROR("NOENT? set rsc entry negative\n");
465 set_bit(CACHE_NEGATIVE, &rsci.h.flags);
467 struct gss_api_mech *gm;
469 unsigned long ctx_expiry;
472 if (get_int(&mesg, (int *) &rsci.ctx.gsc_gid))
476 len = qword_get(&mesg, buf, mlen);
479 gm = lgss_name_to_mech(buf);
480 status = -EOPNOTSUPP;
485 /* mech-specific data: */
486 len = qword_get(&mesg, buf, mlen);
492 tmp_buf.data = (unsigned char *)buf;
493 if (lgss_import_sec_context(&tmp_buf, gm,
494 &rsci.ctx.gsc_mechctx)) {
499 /* currently the expiry time passed down from user-space
500 * is invalid, here we retrive it from mech. */
501 if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) {
502 CERROR("unable to get expire time, drop it\n");
506 expiry = (time_t) ctx_expiry;
511 rsci.h.expiry_time = expiry;
512 rscp = rsc_lookup(&rsci, 1);
517 rsc_put(&rscp->h, &rsc_cache);
520 CERROR("parse rsc error %d\n", status);
524 /****************************************
526 ****************************************/
528 typedef int rsc_entry_match(struct rsc *rscp, long data);
531 void rsc_flush(rsc_entry_match *match, long data)
533 struct cache_head **ch;
538 write_lock(&rsc_cache.hash_lock);
539 for (n = 0; n < RSC_HASHMAX; n++) {
540 for (ch = &rsc_cache.hash_table[n]; *ch;) {
541 rscp = container_of(*ch, struct rsc, h);
543 if (!match(rscp, data)) {
548 /* it seems simply set NEGATIVE doesn't work */
552 set_bit(CACHE_NEGATIVE, &rscp->h.flags);
553 rsc_put(&rscp->h, &rsc_cache);
557 write_unlock(&rsc_cache.hash_lock);
562 int match_uid(struct rsc *rscp, long uid)
566 return ((int) rscp->ctx.gsc_uid == (int) uid);
570 int match_target(struct rsc *rscp, long target)
572 return (rscp->target == (struct obd_device *) target);
576 void rsc_flush_uid(int uid)
579 CWARN("flush all gss contexts...\n");
581 rsc_flush(match_uid, (long) uid);
585 void rsc_flush_target(struct obd_device *target)
587 rsc_flush(match_target, (long) target);
590 void gss_secsvc_flush(struct obd_device *target)
592 rsc_flush_target(target);
594 EXPORT_SYMBOL(gss_secsvc_flush);
596 static struct cache_detail rsc_cache = {
597 .hash_size = RSC_HASHMAX,
598 .hash_table = rsc_table,
599 .name = "auth.sptlrpc.context",
600 .cache_put = rsc_put,
601 .cache_parse = rsc_parse,
604 static DefineSimpleCacheLookup(rsc, 0);
607 struct rsc *gss_svc_searchbyctx(rawobj_t *handle)
612 memset(&rsci, 0, sizeof(rsci));
613 if (rawobj_dup(&rsci.handle, handle))
616 found = rsc_lookup(&rsci, 0);
620 if (cache_check(&rsc_cache, &found->h, NULL))
625 int gss_svc_upcall_install_rvs_ctx(struct obd_import *imp,
626 struct gss_sec *gsec,
627 struct gss_cli_ctx *gctx)
629 struct rsc rsci, *rscp;
630 unsigned long ctx_expiry;
634 memset(&rsci, 0, sizeof(rsci));
636 if (rawobj_alloc(&rsci.handle, (char *) &gsec->gs_rvs_hdl,
637 sizeof(gsec->gs_rvs_hdl))) {
638 CERROR("unable alloc handle\n");
642 major = lgss_copy_reverse_context(gctx->gc_mechctx,
643 &rsci.ctx.gsc_mechctx);
644 if (major != GSS_S_COMPLETE) {
645 CERROR("unable to copy reverse context\n");
650 if (lgss_inquire_context(rsci.ctx.gsc_mechctx, &ctx_expiry)) {
651 CERROR("unable to get expire time, drop it\n");
656 rsci.h.expiry_time = (time_t) ctx_expiry;
657 rsci.target = imp->imp_obd;
659 rscp = rsc_lookup(&rsci, 1);
662 rsc_put(&rscp->h, &rsc_cache);
664 CDEBUG(D_SEC, "client installed reverse svc ctx to %s: idx "LPX64"\n",
665 imp->imp_obd->u.cli.cl_target_uuid.uuid, gsec->gs_rvs_hdl);
671 struct cache_deferred_req* cache_upcall_defer(struct cache_req *req)
675 static struct cache_req cache_upcall_chandle = { cache_upcall_defer };
677 int gss_svc_upcall_handle_init(struct ptlrpc_request *req,
678 struct gss_svc_reqctx *grctx,
679 struct gss_wire_ctx *gw,
680 struct obd_device *target,
685 struct ptlrpc_reply_state *rs;
686 struct rsc *rsci = NULL;
687 struct rsi *rsip = NULL, rsikey;
689 int replen = sizeof(struct ptlrpc_body);
690 struct gss_rep_header *rephdr;
692 int rc = SECSVC_DROP;
695 memset(&rsikey, 0, sizeof(rsikey));
696 rsikey.lustre_svc = lustre_svc;
697 rsikey.nid = (__u64) req->rq_peer.nid;
699 /* duplicate context handle. for INIT it always 0 */
700 if (rawobj_dup(&rsikey.in_handle, &gw->gw_handle)) {
701 CERROR("fail to dup context handle\n");
705 if (rawobj_dup(&rsikey.in_token, in_token)) {
706 CERROR("can't duplicate token\n");
707 rawobj_free(&rsikey.in_handle);
711 rsip = rsi_lookup(&rsikey, 0);
714 CERROR("error in rsi_lookup.\n");
716 if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0))
717 rc = SECSVC_COMPLETE;
722 cache_get(&rsip->h); /* take an extra ref */
723 init_waitqueue_head(&rsip->waitq);
724 init_waitqueue_entry(&wait, current);
725 add_wait_queue(&rsip->waitq, &wait);
728 /* Note each time cache_check() will drop a reference if return
729 * non-zero. We hold an extra reference on initial rsip, but must
730 * take care of following calls. */
731 rc = cache_check(&rsi_cache, &rsip->h, &cache_upcall_chandle);
739 read_lock(&rsi_cache.hash_lock);
740 valid = test_bit(CACHE_VALID, &rsip->h.flags);
742 set_current_state(TASK_INTERRUPTIBLE);
743 read_unlock(&rsi_cache.hash_lock);
746 schedule_timeout(GSS_SVC_UPCALL_TIMEOUT * HZ);
751 CWARN("waited %ds timeout, drop\n", GSS_SVC_UPCALL_TIMEOUT);
755 CWARN("cache_check return ENOENT, drop\n");
758 /* if not the first check, we have to release the extra
759 * reference we just added on it. */
761 cache_put(&rsip->h, &rsi_cache);
762 CDEBUG(D_SEC, "cache_check is good\n");
766 remove_wait_queue(&rsip->waitq, &wait);
767 cache_put(&rsip->h, &rsi_cache);
770 GOTO(out, rc = SECSVC_DROP);
773 rsci = gss_svc_searchbyctx(&rsip->out_handle);
775 CERROR("authentication failed\n");
777 if (!gss_pack_err_notify(req, GSS_S_FAILURE, 0))
778 rc = SECSVC_COMPLETE;
783 grctx->src_ctx = &rsci->ctx;
786 if (rawobj_dup(&rsci->ctx.gsc_rvs_hdl, rvs_hdl)) {
787 CERROR("failed duplicate reverse handle\n");
791 rsci->target = target;
793 CDEBUG(D_SEC, "server create rsc %p(%u->%s)\n",
794 rsci, rsci->ctx.gsc_uid, libcfs_nid2str(req->rq_peer.nid));
796 if (rsip->out_handle.len > PTLRPC_GSS_MAX_HANDLE_SIZE) {
797 CERROR("handle size %u too large\n", rsip->out_handle.len);
798 GOTO(out, rc = SECSVC_DROP);
802 grctx->src_reserve_len = size_round4(rsip->out_token.len);
804 rc = lustre_pack_reply_v2(req, 1, &replen, NULL);
806 CERROR("failed to pack reply: %d\n", rc);
807 GOTO(out, rc = SECSVC_DROP);
810 rs = req->rq_reply_state;
811 LASSERT(rs->rs_repbuf->lm_bufcount == 3);
812 LASSERT(rs->rs_repbuf->lm_buflens[0] >=
813 sizeof(*rephdr) + rsip->out_handle.len);
814 LASSERT(rs->rs_repbuf->lm_buflens[2] >= rsip->out_token.len);
816 rephdr = lustre_msg_buf(rs->rs_repbuf, 0, 0);
817 rephdr->gh_version = PTLRPC_GSS_VERSION;
818 rephdr->gh_flags = 0;
819 rephdr->gh_proc = PTLRPC_GSS_PROC_ERR;
820 rephdr->gh_major = rsip->major_status;
821 rephdr->gh_minor = rsip->minor_status;
822 rephdr->gh_seqwin = GSS_SEQ_WIN;
823 rephdr->gh_handle.len = rsip->out_handle.len;
824 memcpy(rephdr->gh_handle.data, rsip->out_handle.data,
825 rsip->out_handle.len);
827 memcpy(lustre_msg_buf(rs->rs_repbuf, 2, 0), rsip->out_token.data,
828 rsip->out_token.len);
830 rs->rs_repdata_len = lustre_shrink_msg(rs->rs_repbuf, 2,
831 rsip->out_token.len, 0);
836 /* it looks like here we should put rsip also, but this mess up
837 * with NFS cache mgmt code... FIXME */
840 rsi_put(&rsip->h, &rsi_cache);
844 /* if anything went wrong, we don't keep the context too */
846 set_bit(CACHE_NEGATIVE, &rsci->h.flags);
848 rsc_put(&rsci->h, &rsc_cache);
853 struct gss_svc_ctx *gss_svc_upcall_get_ctx(struct ptlrpc_request *req,
854 struct gss_wire_ctx *gw)
858 rsc = gss_svc_searchbyctx(&gw->gw_handle);
860 CWARN("Invalid gss context handle from %s\n",
861 libcfs_nid2str(req->rq_peer.nid));
868 void gss_svc_upcall_put_ctx(struct gss_svc_ctx *ctx)
870 struct rsc *rsc = container_of(ctx, struct rsc, ctx);
872 rsc_put(&rsc->h, &rsc_cache);
875 void gss_svc_upcall_destroy_ctx(struct gss_svc_ctx *ctx)
877 struct rsc *rsc = container_of(ctx, struct rsc, ctx);
879 set_bit(CACHE_NEGATIVE, &rsc->h.flags);
882 int __init gss_init_svc_upcall(void)
886 cache_register(&rsi_cache);
887 cache_register(&rsc_cache);
889 /* FIXME this looks stupid. we intend to give lsvcgssd a chance to open
890 * the init upcall channel, otherwise there's big chance that the first
891 * upcall issued before the channel be opened thus nfsv4 cache code will
892 * drop the request direclty, thus lead to unnecessary recovery time.
893 * here we wait at miximum 1.5 seconds. */
894 for (i = 0; i < 6; i++) {
895 if (atomic_read(&rsi_cache.readers) > 0)
897 set_current_state(TASK_UNINTERRUPTIBLE);
899 schedule_timeout(HZ / 4);
902 if (atomic_read(&rsi_cache.readers) == 0)
903 CWARN("Init channel is not opened by lsvcgssd, following "
904 "request might be dropped until lsvcgssd is active\n");
906 /* this helps reducing context index confliction. after server reboot,
907 * conflicting request from clients might be filtered out by initial
908 * sequence number checking, thus no chance to sent error notification
909 * back to clients. */
910 get_random_bytes(&__ctx_index, sizeof(__ctx_index));
915 void __exit gss_exit_svc_upcall(void)
919 cache_purge(&rsi_cache);
920 if ((rc = cache_unregister(&rsi_cache)))
921 CERROR("unregister rsi cache: %d\n", rc);
923 cache_purge(&rsc_cache);
924 if ((rc = cache_unregister(&rsc_cache)))
925 CERROR("unregister rsc cache: %d\n", rc);