X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fptlrpc%2Fsec.c;h=5c44f89cb8d622dd90cd035d47c5b8bec4966664;hp=2aab7495e1491fa72e752de4489b8ec6a94507b3;hb=135fea8fa986d7abf107953b5b9a57170a418eda;hpb=b11d0d89b22e8409cac12129a80044f84df0023c diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index 2aab749..5c44f89 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -1,37 +1,49 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Copyright (C) 2004-2006 Cluster File Systems, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Lustre, http://www.lustre.org. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2017, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/ptlrpc/sec.c + * + * Author: Eric Mei */ -#ifndef EXPORT_SYMTAB -#define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_SEC -#include -#ifndef __KERNEL__ -#include -#include -#else -#include +#include +#ifdef HAVE_UIDGID_HEADER +# include #endif +#include +#include +#include #include #include #include @@ -42,23 +54,18 @@ #include "ptlrpc_internal.h" -static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec); -static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, - struct ptlrpc_cli_ctx *ctx); -static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx); - /*********************************************** * policy registers * ***********************************************/ -static rwlock_t policy_lock = RW_LOCK_UNLOCKED; +static rwlock_t policy_lock; static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = { NULL, }; int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) { - __u32 number = policy->sp_policy; + __u16 number = policy->sp_policy; LASSERT(policy->sp_name); LASSERT(policy->sp_cops); @@ -67,13 +74,13 @@ int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) if (number >= SPTLRPC_POLICY_MAX) return -EINVAL; - write_lock(&policy_lock); + write_lock(&policy_lock); if (unlikely(policies[number])) { - write_unlock(&policy_lock); + write_unlock(&policy_lock); return -EALREADY; } policies[number] = policy; - write_unlock(&policy_lock); + write_unlock(&policy_lock); CDEBUG(D_SEC, "%s: registered\n", policy->sp_name); return 0; @@ -82,20 +89,20 @@ EXPORT_SYMBOL(sptlrpc_register_policy); int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) { - __u32 number = policy->sp_policy; + __u16 number = policy->sp_policy; LASSERT(number < SPTLRPC_POLICY_MAX); - write_lock(&policy_lock); + write_lock(&policy_lock); if (unlikely(policies[number] == NULL)) { - write_unlock(&policy_lock); + write_unlock(&policy_lock); CERROR("%s: already unregistered\n", policy->sp_name); return -EINVAL; } LASSERT(policies[number] == policy); policies[number] = NULL; - write_unlock(&policy_lock); + write_unlock(&policy_lock); CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name); return 0; @@ -103,621 +110,356 @@ int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) EXPORT_SYMBOL(sptlrpc_unregister_policy); static -struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_sec_flavor_t flavor) -{ -#ifdef CONFIG_KMOD - static DECLARE_MUTEX(load_mutex); -#endif - static atomic_t loaded = ATOMIC_INIT(0); - struct ptlrpc_sec_policy *policy; - __u32 number = SEC_FLAVOR_POLICY(flavor), flag = 0; - - if (number >= SPTLRPC_POLICY_MAX) - return NULL; - -#ifdef CONFIG_KMOD -again: -#endif - read_lock(&policy_lock); - policy = policies[number]; - if (policy && !try_module_get(policy->sp_owner)) - policy = NULL; - if (policy == NULL) - flag = atomic_read(&loaded); - read_unlock(&policy_lock); - -#ifdef CONFIG_KMOD - /* if failure, try to load gss module, once */ - if (unlikely(policy == NULL) && - number == SPTLRPC_POLICY_GSS && flag == 0) { - mutex_down(&load_mutex); - if (atomic_read(&loaded) == 0) { - if (request_module("ptlrpc_gss") != 0) - CERROR("Unable to load module ptlrpc_gss\n"); - else - CWARN("module ptlrpc_gss loaded\n"); - - atomic_set(&loaded, 1); - } - mutex_up(&load_mutex); - - goto again; - } -#endif - - return policy; -} - -ptlrpc_sec_flavor_t sptlrpc_name2flavor(const char *name) -{ - if (!strcmp(name, "null")) - return SPTLRPC_FLVR_NULL; - if (!strcmp(name, "plain")) - return SPTLRPC_FLVR_PLAIN; - if (!strcmp(name, "krb5")) - return SPTLRPC_FLVR_KRB5; +struct ptlrpc_sec_policy * sptlrpc_wireflavor2policy(__u32 flavor) +{ + static DEFINE_MUTEX(load_mutex); + static atomic_t loaded = ATOMIC_INIT(0); + struct ptlrpc_sec_policy *policy; + __u16 number = SPTLRPC_FLVR_POLICY(flavor); + __u16 flag = 0; + + if (number >= SPTLRPC_POLICY_MAX) + return NULL; + + while (1) { + read_lock(&policy_lock); + policy = policies[number]; + if (policy && !try_module_get(policy->sp_owner)) + policy = NULL; + if (policy == NULL) + flag = atomic_read(&loaded); + read_unlock(&policy_lock); + + if (policy != NULL || flag != 0 || + number != SPTLRPC_POLICY_GSS) + break; + + /* try to load gss module, once */ + mutex_lock(&load_mutex); + if (atomic_read(&loaded) == 0) { + if (request_module("ptlrpc_gss") == 0) + CDEBUG(D_SEC, + "module ptlrpc_gss loaded on demand\n"); + else + CERROR("Unable to load module ptlrpc_gss\n"); + + atomic_set(&loaded, 1); + } + mutex_unlock(&load_mutex); + } + + return policy; +} + +__u32 sptlrpc_name2flavor_base(const char *name) +{ + if (!strcmp(name, "null")) + return SPTLRPC_FLVR_NULL; + if (!strcmp(name, "plain")) + return SPTLRPC_FLVR_PLAIN; + if (!strcmp(name, "gssnull")) + return SPTLRPC_FLVR_GSSNULL; + if (!strcmp(name, "krb5n")) + return SPTLRPC_FLVR_KRB5N; + if (!strcmp(name, "krb5a")) + return SPTLRPC_FLVR_KRB5A; if (!strcmp(name, "krb5i")) return SPTLRPC_FLVR_KRB5I; if (!strcmp(name, "krb5p")) return SPTLRPC_FLVR_KRB5P; + if (!strcmp(name, "skn")) + return SPTLRPC_FLVR_SKN; + if (!strcmp(name, "ska")) + return SPTLRPC_FLVR_SKA; + if (!strcmp(name, "ski")) + return SPTLRPC_FLVR_SKI; + if (!strcmp(name, "skpi")) + return SPTLRPC_FLVR_SKPI; - return SPTLRPC_FLVR_INVALID; + return SPTLRPC_FLVR_INVALID; } -EXPORT_SYMBOL(sptlrpc_name2flavor); +EXPORT_SYMBOL(sptlrpc_name2flavor_base); -char *sptlrpc_flavor2name(ptlrpc_sec_flavor_t flavor) +const char *sptlrpc_flavor2name_base(__u32 flvr) { - switch (flavor) { - case SPTLRPC_FLVR_NULL: + __u32 base = SPTLRPC_FLVR_BASE(flvr); + + if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) return "null"; - case SPTLRPC_FLVR_PLAIN: + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN)) return "plain"; - case SPTLRPC_FLVR_KRB5: - return "krb5"; - case SPTLRPC_FLVR_KRB5I: + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_GSSNULL)) + return "gssnull"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N)) + return "krb5n"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A)) + return "krb5a"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I)) return "krb5i"; - case SPTLRPC_FLVR_KRB5P: + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P)) return "krb5p"; - default: - CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor, - SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor), - SEC_FLAVOR_SVC(flavor)); - } - return "UNKNOWN"; -} -EXPORT_SYMBOL(sptlrpc_flavor2name); - -/*********************************************** - * context helpers * - * internal APIs * - * cache management * - ***********************************************/ - -static inline -unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx) -{ - smp_mb(); - return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK); -} - -static inline -int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx) -{ - return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE); -} - -static inline -int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx) -{ - return (ctx_status(ctx) != 0); -} - -static inline -int ctx_is_dead(struct ptlrpc_cli_ctx *ctx) -{ - smp_mb(); - return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0); -} - -static inline -int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx) -{ - smp_mb(); - return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0); -} - -static -int ctx_expire(struct ptlrpc_cli_ctx *ctx) -{ - LASSERT(atomic_read(&ctx->cc_refcount)); - - if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) { - cfs_time_t now = cfs_time_current_sec(); - - smp_mb(); - clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); - - if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire)) - CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n", - ctx, ctx->cc_vcred.vc_uid, - sec2target_str(ctx->cc_sec), - cfs_time_sub(now, ctx->cc_expire)); - else - CWARN("ctx %p(%u->%s): force to die (%lds remains)\n", - ctx, ctx->cc_vcred.vc_uid, - sec2target_str(ctx->cc_sec), - ctx->cc_expire == 0 ? 0 : - cfs_time_sub(ctx->cc_expire, now)); - - return 1; - } - return 0; -} - -static -void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash) -{ - set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); - atomic_inc(&ctx->cc_refcount); - hlist_add_head(&ctx->cc_hash, hash); -} - -static -void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) -{ - LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); - LASSERT(!hlist_unhashed(&ctx->cc_hash)); - - clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags); + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKN)) + return "skn"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKA)) + return "ska"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKI)) + return "ski"; + else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKPI)) + return "skpi"; + + CERROR("invalid wire flavor 0x%x\n", flvr); + return "invalid"; +} +EXPORT_SYMBOL(sptlrpc_flavor2name_base); + +char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf, + char *buf, int bufsize) +{ + if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) + snprintf(buf, bufsize, "hash:%s", + sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg)); + else + snprintf(buf, bufsize, "%s", + sptlrpc_flavor2name_base(sf->sf_rpc)); - if (atomic_dec_and_test(&ctx->cc_refcount)) { - __hlist_del(&ctx->cc_hash); - hlist_add_head(&ctx->cc_hash, freelist); - } else - hlist_del_init(&ctx->cc_hash); + buf[bufsize - 1] = '\0'; + return buf; } +EXPORT_SYMBOL(sptlrpc_flavor2name_bulk); -/* - * return 1 if the context is dead. - */ -static -int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist) +char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize) { - if (unlikely(ctx_is_dead(ctx))) - goto unhash; + snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc)); - /* expire is 0 means never expire. a newly created gss context - * which during upcall also has 0 expiration + /* + * currently we don't support customized bulk specification for + * flavors other than plain */ - smp_mb(); - if (ctx->cc_expire == 0) - return 0; - - /* check real expiration */ - smp_mb(); - if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec())) - return 0; - - ctx_expire(ctx); - -unhash: - if (freelist) - ctx_unhash(ctx, freelist); - - return 1; -} - -static inline -int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx, - struct hlist_head *freelist) -{ - LASSERT(ctx->cc_sec); - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock); - LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)); - - return ctx_check_death(ctx, freelist); -} - -static -int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx) -{ - LASSERT(ctx->cc_sec); - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - - if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx)) - return 1; - return 0; -} - -static inline -int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred) -{ - /* a little bit optimization for null policy */ - if (!ctx->cc_ops->match) - return 1; - - return ctx->cc_ops->match(ctx, vcred); -} + if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) { + char bspec[16]; -static -void ctx_list_destroy(struct hlist_head *head) -{ - struct ptlrpc_cli_ctx *ctx; - - while (!hlist_empty(head)) { - ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash); - - LASSERT(atomic_read(&ctx->cc_refcount) == 0); - LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); - - hlist_del_init(&ctx->cc_hash); - sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx); - } -} - -static -void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist) -{ - struct ptlrpc_cli_ctx *ctx; - struct hlist_node *pos, *next; - int i; - ENTRY; - - CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec); - - for (i = 0; i < sec->ps_ccache_size; i++) { - hlist_for_each_entry_safe(ctx, pos, next, - &sec->ps_ccache[i], cc_hash) - ctx_check_death_locked(ctx, freelist); + bspec[0] = '-'; + sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1); + strncat(buf, bspec, bufsize); } - sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval; - EXIT; + buf[bufsize - 1] = '\0'; + return buf; } +EXPORT_SYMBOL(sptlrpc_flavor2name); -/* - * @uid: which user. "-1" means flush all. - * @grace: mark context DEAD, allow graceful destroy like notify - * server side, etc. - * @force: also flush busy entries. - * - * return the number of busy context encountered. - * - * In any cases, never touch "eternal" contexts. - */ -static -int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force) +char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize) { - struct ptlrpc_cli_ctx *ctx; - struct hlist_node *pos, *next; - HLIST_HEAD(freelist); - int i, busy = 0; - ENTRY; - - might_sleep_if(grace); - - spin_lock(&sec->ps_lock); - for (i = 0; i < sec->ps_ccache_size; i++) { - hlist_for_each_entry_safe(ctx, pos, next, - &sec->ps_ccache[i], cc_hash) { - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - - if (ctx_is_eternal(ctx)) - continue; - if (uid != -1 && uid != ctx->cc_vcred.vc_uid) - continue; - - if (atomic_read(&ctx->cc_refcount) > 1) { - busy++; - if (!force) - continue; - - CWARN("flush busy(%d) ctx %p(%u->%s) by force, " - "grace %d\n", - atomic_read(&ctx->cc_refcount), - ctx, ctx->cc_vcred.vc_uid, - sec2target_str(ctx->cc_sec), grace); - } - ctx_unhash(ctx, &freelist); + buf[0] = '\0'; - set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags); - if (!grace) - clear_bit(PTLRPC_CTX_UPTODATE_BIT, - &ctx->cc_flags); - } - } - spin_unlock(&sec->ps_lock); + if (flags & PTLRPC_SEC_FL_REVERSE) + strlcat(buf, "reverse,", bufsize); + if (flags & PTLRPC_SEC_FL_ROOTONLY) + strlcat(buf, "rootonly,", bufsize); + if (flags & PTLRPC_SEC_FL_UDESC) + strlcat(buf, "udesc,", bufsize); + if (flags & PTLRPC_SEC_FL_BULK) + strlcat(buf, "bulk,", bufsize); + if (buf[0] == '\0') + strlcat(buf, "-,", bufsize); - ctx_list_destroy(&freelist); - RETURN(busy); + return buf; } +EXPORT_SYMBOL(sptlrpc_secflags2str); -static inline -unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key) -{ - return (unsigned int) (key & (sec->ps_ccache_size - 1)); -} +/************************************************** + * client context APIs * + **************************************************/ -/* - * return matched context. If it's a newly created one, we also give the - * first push to refresh. return NULL if error happens. - */ static -struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec, - struct vfs_cred *vcred, - int create, int remove_dead) -{ - struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL; - struct hlist_head *hash_head; - struct hlist_node *pos, *next; - HLIST_HEAD(freelist); - unsigned int hash, gc = 0, found = 0; - ENTRY; - - might_sleep(); - - hash = ctx_hash_index(sec, (__u64) vcred->vc_uid); - LASSERT(hash < sec->ps_ccache_size); - hash_head = &sec->ps_ccache[hash]; - -retry: - spin_lock(&sec->ps_lock); - - /* gc_next == 0 means never do gc */ - if (remove_dead && sec->ps_gc_next && - cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) { - ctx_cache_gc(sec, &freelist); - gc = 1; - } - - hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) { - if (gc == 0 && - ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL)) - continue; - - if (ctx_match(ctx, vcred)) { - found = 1; - break; - } - } - - if (found) { - if (new && new != ctx) { - /* lost the race, just free it */ - hlist_add_head(&new->cc_hash, &freelist); - new = NULL; - } - - /* hot node, move to head */ - if (hash_head->first != &ctx->cc_hash) { - __hlist_del(&ctx->cc_hash); - hlist_add_head(&ctx->cc_hash, hash_head); - } - } else { - /* don't allocate for reverse sec */ - if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { - spin_unlock(&sec->ps_lock); - RETURN(NULL); - } - - if (new) { - ctx_enhash(new, hash_head); - ctx = new; - } else if (create) { - spin_unlock(&sec->ps_lock); - new = sec->ps_policy->sp_cops->create_ctx(sec, vcred); - if (new) { - atomic_inc(&sec->ps_busy); - goto retry; - } - } else - ctx = NULL; - } - - /* hold a ref */ - if (ctx) - atomic_inc(&ctx->cc_refcount); - - spin_unlock(&sec->ps_lock); - - /* the allocator of the context must give the first push to refresh */ - if (new) { - LASSERT(new == ctx); - sptlrpc_ctx_refresh(new); - } - - ctx_list_destroy(&freelist); - RETURN(ctx); -} - -static inline struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec) { struct vfs_cred vcred; int create = 1, remove_dead = 1; LASSERT(sec); + LASSERT(sec->ps_policy->sp_cops->lookup_ctx); - if (sec->ps_flags & (PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY)) { + if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE | + PTLRPC_SEC_FL_ROOTONLY)) { vcred.vc_uid = 0; vcred.vc_gid = 0; - if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) { + if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) { create = 0; remove_dead = 0; } - } else { - vcred.vc_uid = cfs_current()->uid; - vcred.vc_gid = cfs_current()->gid; - } - - if (sec->ps_policy->sp_cops->lookup_ctx) - return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred); - else - return ctx_cache_lookup(sec, &vcred, create, remove_dead); -} - -/************************************************** - * client context APIs * - **************************************************/ + } else { + vcred.vc_uid = from_kuid(&init_user_ns, current_uid()); + vcred.vc_gid = from_kgid(&init_user_ns, current_gid()); + } -static -void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx) -{ - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - - if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh) - ctx->cc_ops->refresh(ctx); + return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred, create, + remove_dead); } -struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx) +struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx) { - LASSERT(atomic_read(&ctx->cc_refcount) > 0); - atomic_inc(&ctx->cc_refcount); - return ctx; + atomic_inc(&ctx->cc_refcount); + return ctx; } -EXPORT_SYMBOL(sptlrpc_ctx_get); +EXPORT_SYMBOL(sptlrpc_cli_ctx_get); -void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) +void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) { - struct ptlrpc_sec *sec = ctx->cc_sec; - - LASSERT(sec); - LASSERT(atomic_read(&ctx->cc_refcount)); - - if (!atomic_dec_and_test(&ctx->cc_refcount)) - return; + struct ptlrpc_sec *sec = ctx->cc_sec; - LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0); - LASSERT(hlist_unhashed(&ctx->cc_hash)); + LASSERT(sec); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); - /* if required async, we must clear the UPTODATE bit to prevent extra - * rpcs during destroy procedure. - */ - if (!sync) - clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags); - - /* destroy this context */ - if (!sptlrpc_sec_destroy_ctx(sec, ctx)) - return; + if (!atomic_dec_and_test(&ctx->cc_refcount)) + return; - CWARN("%s@%p: put last ctx, also destroy the sec\n", - sec->ps_policy->sp_name, sec); - - sptlrpc_sec_destroy(sec); + sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync); } -EXPORT_SYMBOL(sptlrpc_ctx_put); +EXPORT_SYMBOL(sptlrpc_cli_ctx_put); -/* - * mark a ctx as DEAD, and pull it out from hash table. +/** + * Expire the client context immediately. * - * NOTE: the caller must hold at least 1 ref on the ctx. + * \pre Caller must hold at least 1 reference on the \a ctx. */ -void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx) +void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx) { - LASSERT(ctx->cc_sec); - LASSERT(atomic_read(&ctx->cc_refcount) > 0); + LASSERT(ctx->cc_ops->die); + ctx->cc_ops->die(ctx, 0); +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_expire); - ctx_expire(ctx); +/** + * To wake up the threads who are waiting for this client context. Called + * after some status change happened on \a ctx. + */ +void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) +{ + struct ptlrpc_request *req, *next; - spin_lock(&ctx->cc_sec->ps_lock); + spin_lock(&ctx->cc_lock); + list_for_each_entry_safe(req, next, &ctx->cc_req_list, + rq_ctx_chain) { + list_del_init(&req->rq_ctx_chain); + ptlrpc_client_wake_req(req); + } + spin_unlock(&ctx->cc_lock); +} +EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup); - if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) { - LASSERT(!hlist_unhashed(&ctx->cc_hash)); - LASSERT(atomic_read(&ctx->cc_refcount) > 1); +int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) +{ + LASSERT(ctx->cc_ops); - hlist_del_init(&ctx->cc_hash); - if (atomic_dec_and_test(&ctx->cc_refcount)) - LBUG(); - } + if (ctx->cc_ops->display == NULL) + return 0; - spin_unlock(&ctx->cc_sec->ps_lock); + return ctx->cc_ops->display(ctx, buf, bufsize); } -EXPORT_SYMBOL(sptlrpc_ctx_expire); -void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new) +static int import_sec_check_expire(struct obd_import *imp) { - struct ptlrpc_cli_ctx *ctx; - struct hlist_node *pos, *next; - HLIST_HEAD(freelist); - unsigned int hash; - ENTRY; + int adapt = 0; - hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid); - LASSERT(hash < sec->ps_ccache_size); + spin_lock(&imp->imp_lock); + if (imp->imp_sec_expire && + imp->imp_sec_expire < ktime_get_real_seconds()) { + adapt = 1; + imp->imp_sec_expire = 0; + } + spin_unlock(&imp->imp_lock); - spin_lock(&sec->ps_lock); + if (!adapt) + return 0; - hlist_for_each_entry_safe(ctx, pos, next, - &sec->ps_ccache[hash], cc_hash) { - if (!ctx_match(ctx, &new->cc_vcred)) - continue; + CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n"); + return sptlrpc_import_sec_adapt(imp, NULL, NULL); +} - ctx_expire(ctx); - ctx_unhash(ctx, &freelist); - break; - } +/** + * Get and validate the client side ptlrpc security facilities from + * \a imp. There is a race condition on client reconnect when the import is + * being destroyed while there are outstanding client bound requests. In + * this case do not output any error messages if import secuity is not + * found. + * + * \param[in] imp obd import associated with client + * \param[out] sec client side ptlrpc security + * + * \retval 0 if security retrieved successfully + * \retval -ve errno if there was a problem + */ +static int import_sec_validate_get(struct obd_import *imp, + struct ptlrpc_sec **sec) +{ + int rc; - ctx_enhash(new, &sec->ps_ccache[hash]); - atomic_inc(&sec->ps_busy); + if (unlikely(imp->imp_sec_expire)) { + rc = import_sec_check_expire(imp); + if (rc) + return rc; + } - spin_unlock(&sec->ps_lock); + *sec = sptlrpc_import_sec_ref(imp); + if (*sec == NULL) { + CERROR("import %p (%s) with no sec\n", + imp, ptlrpc_import_state_name(imp->imp_state)); + return -EACCES; + } - ctx_list_destroy(&freelist); - EXIT; + if (unlikely((*sec)->ps_dying)) { + CERROR("attempt to use dying sec %p\n", sec); + sptlrpc_sec_put(*sec); + return -EACCES; + } + + return 0; } -EXPORT_SYMBOL(sptlrpc_ctx_replace); +/** + * Given a \a req, find or allocate an appropriate context for it. + * \pre req->rq_cli_ctx == NULL. + * + * \retval 0 succeed, and req->rq_cli_ctx is set. + * \retval -ev error number, and req->rq_cli_ctx == NULL. + */ int sptlrpc_req_get_ctx(struct ptlrpc_request *req) { struct obd_import *imp = req->rq_import; + struct ptlrpc_sec *sec; + int rc; ENTRY; LASSERT(!req->rq_cli_ctx); LASSERT(imp); - if (imp->imp_sec == NULL) { - CERROR("import %p (%s) with no sec pointer\n", - imp, ptlrpc_import_state_name(imp->imp_state)); - RETURN(-EACCES); - } - - req->rq_cli_ctx = get_my_ctx(imp->imp_sec); - - if (!req->rq_cli_ctx) { - CERROR("req %p: fail to get context from cache\n", req); - RETURN(-ENOMEM); - } - - RETURN(0); -} - -void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx) -{ - struct ptlrpc_request *req, *next; + rc = import_sec_validate_get(imp, &sec); + if (rc) + RETURN(rc); - spin_lock(&ctx->cc_lock); - list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) { - list_del_init(&req->rq_ctx_chain); - ptlrpc_wake_client_req(req); - } - spin_unlock(&ctx->cc_lock); -} -EXPORT_SYMBOL(sptlrpc_ctx_wakeup); + req->rq_cli_ctx = get_my_ctx(sec); -int sptlrpc_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize) -{ - LASSERT(ctx->cc_ops); + sptlrpc_sec_put(sec); - if (ctx->cc_ops->display == NULL) - return 0; + if (!req->rq_cli_ctx) { + CERROR("req %p: fail to get context\n", req); + RETURN(-ECONNREFUSED); + } - return ctx->cc_ops->display(ctx, buf, bufsize); + RETURN(0); } -void sptlrpc_req_put_ctx(struct ptlrpc_request *req) +/** + * Drop the context for \a req. + * \pre req->rq_cli_ctx != NULL. + * \post req->rq_cli_ctx == NULL. + * + * If \a sync == 0, this function should return quickly without sleep; + * otherwise it might trigger and wait for the whole process of sending + * an context-destroying rpc to server. + */ +void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync) { ENTRY; @@ -727,56 +469,153 @@ void sptlrpc_req_put_ctx(struct ptlrpc_request *req) /* request might be asked to release earlier while still * in the context waiting list. */ - if (!list_empty(&req->rq_ctx_chain)) { - spin_lock(&req->rq_cli_ctx->cc_lock); - list_del_init(&req->rq_ctx_chain); - spin_unlock(&req->rq_cli_ctx->cc_lock); + if (!list_empty(&req->rq_ctx_chain)) { + spin_lock(&req->rq_cli_ctx->cc_lock); + list_del_init(&req->rq_ctx_chain); + spin_unlock(&req->rq_cli_ctx->cc_lock); } - /* this could be called with spinlock hold, use async mode */ - sptlrpc_ctx_put(req->rq_cli_ctx, 0); + sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync); req->rq_cli_ctx = NULL; EXIT; } -/* - * request must have a context. if failed to get new context, - * just restore the old one +static +int sptlrpc_req_ctx_switch(struct ptlrpc_request *req, + struct ptlrpc_cli_ctx *oldctx, + struct ptlrpc_cli_ctx *newctx) +{ + struct sptlrpc_flavor old_flvr; + char *reqmsg = NULL; /* to workaround old gcc */ + int reqmsg_size; + int rc = 0; + + LASSERT(req->rq_reqmsg); + LASSERT(req->rq_reqlen); + LASSERT(req->rq_replen); + + CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), " + "switch sec %p(%s) -> %p(%s)\n", req, + oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec), + newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec), + oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name, + newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name); + + /* save flavor */ + old_flvr = req->rq_flvr; + + /* save request message */ + reqmsg_size = req->rq_reqlen; + if (reqmsg_size != 0) { + OBD_ALLOC_LARGE(reqmsg, reqmsg_size); + if (reqmsg == NULL) + return -ENOMEM; + memcpy(reqmsg, req->rq_reqmsg, reqmsg_size); + } + + /* release old req/rep buf */ + req->rq_cli_ctx = oldctx; + sptlrpc_cli_free_reqbuf(req); + sptlrpc_cli_free_repbuf(req); + req->rq_cli_ctx = newctx; + + /* recalculate the flavor */ + sptlrpc_req_set_flavor(req, 0); + + /* alloc new request buffer + * we don't need to alloc reply buffer here, leave it to the + * rest procedure of ptlrpc */ + if (reqmsg_size != 0) { + rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size); + if (!rc) { + LASSERT(req->rq_reqmsg); + memcpy(req->rq_reqmsg, reqmsg, reqmsg_size); + } else { + CWARN("failed to alloc reqbuf: %d\n", rc); + req->rq_flvr = old_flvr; + } + + OBD_FREE_LARGE(reqmsg, reqmsg_size); + } + return rc; +} + +/** + * If current context of \a req is dead somehow, e.g. we just switched flavor + * thus marked original contexts dead, we'll find a new context for it. if + * no switch is needed, \a req will end up with the same context. + * + * \note a request must have a context, to keep other parts of code happy. + * In any case of failure during the switching, we must restore the old one. */ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) { - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - int rc; + struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx; + struct ptlrpc_cli_ctx *newctx; + int rc; ENTRY; - LASSERT(ctx); - LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)); + LASSERT(oldctx); - /* make sure not on context waiting list */ - spin_lock(&ctx->cc_lock); - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); + sptlrpc_cli_ctx_get(oldctx); + sptlrpc_req_put_ctx(req, 0); - sptlrpc_ctx_get(ctx); - sptlrpc_req_put_ctx(req); rc = sptlrpc_req_get_ctx(req); - if (!rc) { - LASSERT(req->rq_cli_ctx); - LASSERT(req->rq_cli_ctx != ctx); - sptlrpc_ctx_put(ctx, 1); - } else { + if (unlikely(rc)) { LASSERT(!req->rq_cli_ctx); - req->rq_cli_ctx = ctx; + + /* restore old ctx */ + req->rq_cli_ctx = oldctx; + RETURN(rc); } - RETURN(rc); + + newctx = req->rq_cli_ctx; + LASSERT(newctx); + + if (unlikely(newctx == oldctx && + test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) { + /* + * still get the old dead ctx, usually means system too busy + */ + CDEBUG(D_SEC, + "ctx (%p, fl %lx) doesn't switch, relax a little bit\n", + newctx, newctx->cc_flags); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC)); + } else if (unlikely(test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags) + == 0)) { + /* + * new ctx not up to date yet + */ + CDEBUG(D_SEC, + "ctx (%p, fl %lx) doesn't switch, not up to date yet\n", + newctx, newctx->cc_flags); + } else { + /* + * it's possible newctx == oldctx if we're switching + * subflavor with the same sec. + */ + rc = sptlrpc_req_ctx_switch(req, oldctx, newctx); + if (rc) { + /* restore old ctx */ + sptlrpc_req_put_ctx(req, 0); + req->rq_cli_ctx = oldctx; + RETURN(rc); + } + + LASSERT(req->rq_cli_ctx == newctx); + } + + sptlrpc_cli_ctx_put(oldctx, 1); + RETURN(0); } EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx); static int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) { - smp_mb(); - if (ctx_is_refreshed(ctx)) + if (cli_ctx_is_refreshed(ctx)) return 1; return 0; } @@ -790,7 +629,7 @@ int ctx_refresh_timeout(void *data) /* conn_cnt is needed in expire_one_request */ lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); - rc = ptlrpc_expire_one_request(req); + rc = ptlrpc_expire_one_request(req, 1); /* if we started recovery, we should mark this ctx dead; otherwise * in case of lgssd died nobody would retire this ctx, following * connecting will still find the same ctx thus cause deadlock. @@ -798,162 +637,198 @@ int ctx_refresh_timeout(void *data) * later than the context refresh expire time. */ if (rc == 0) - ctx_expire(req->rq_cli_ctx); + req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0); return rc; } static void ctx_refresh_interrupt(void *data) { - /* do nothing */ + struct ptlrpc_request *req = data; + + spin_lock(&req->rq_lock); + req->rq_intr = 1; + spin_unlock(&req->rq_lock); } -/* - * the status of context could be subject to be changed by other threads at any - * time. we allow this race. but once we return with 0, the caller will - * suppose it's uptodated and keep using it until the affected rpc is done. +static +void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx) +{ + spin_lock(&ctx->cc_lock); + if (!list_empty(&req->rq_ctx_chain)) + list_del_init(&req->rq_ctx_chain); + spin_unlock(&ctx->cc_lock); +} + +/** + * To refresh the context of \req, if it's not up-to-date. + * \param timeout + * - < 0: don't wait + * - = 0: wait until success or fatal error occur + * - > 0: timeout value (in seconds) * - * @timeout: - * < 0 - don't wait - * = 0 - wait until success or fatal error occur - * > 0 - timeout value + * The status of the context could be subject to be changed by other threads + * at any time. We allow this race, but once we return with 0, the caller will + * suppose it's uptodated and keep using it until the owning rpc is done. * - * return 0 only if the context is uptodated. + * \retval 0 only if the context is uptodated. + * \retval -ev error number. */ int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec *sec; struct l_wait_info lwi; int rc; ENTRY; - LASSERT(ctx); + LASSERT(ctx); + + if (req->rq_ctx_init || req->rq_ctx_fini) + RETURN(0); + + /* + * during the process a request's context might change type even + * (e.g. from gss ctx to null ctx), so each loop we need to re-check + * everything + */ +again: + rc = import_sec_validate_get(req->rq_import, &sec); + if (rc) + RETURN(rc); + + if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) { + CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n", + req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc); + req_off_ctx_list(req, ctx); + sptlrpc_req_replace_dead_ctx(req); + ctx = req->rq_cli_ctx; + } + sptlrpc_sec_put(sec); - /* special ctxs */ - if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini) + if (cli_ctx_is_eternal(ctx)) RETURN(0); - /* reverse ctxs, don't refresh */ - if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE) - RETURN(0); + if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) { + LASSERT(ctx->cc_ops->refresh); + ctx->cc_ops->refresh(ctx); + } + LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0); - spin_lock(&ctx->cc_lock); -again: - if (ctx_check_uptodate(ctx)) { - if (!list_empty(&req->rq_ctx_chain)) - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); + LASSERT(ctx->cc_ops->validate); + if (ctx->cc_ops->validate(ctx) == 0) { + req_off_ctx_list(req, ctx); RETURN(0); } - if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) { - req->rq_err = 1; - if (!list_empty(&req->rq_ctx_chain)) - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); + if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) { + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + req_off_ctx_list(req, ctx); RETURN(-EPERM); } - /* This is subtle. For resent message we have to keep original - * context to survive following situation: - * 1. the request sent to server - * 2. recovery was kick start - * 3. recovery finished, the request marked as resent - * 4. resend the request - * 5. old reply from server received (because xid is the same) - * 6. verify reply (has to be success) - * 7. new reply from server received, lnet drop it + /* + * There's a subtle issue for resending RPCs, suppose following + * situation: + * 1. the request was sent to server. + * 2. recovery was kicked start, after finished the request was + * marked as resent. + * 3. resend the request. + * 4. old reply from server received, we accept and verify the reply. + * this has to be success, otherwise the error will be aware + * by application. + * 5. new reply from server received, dropped by LNet. * - * Note we can't simply change xid for resent request because - * server reply on it for reply reconstruction. + * Note the xid of old & new request is the same. We can't simply + * change xid for the resent request because the server replies on + * it for reply reconstruction. * * Commonly the original context should be uptodate because we - * have a expiry nice time; And server will keep their half part - * context because we at least hold a ref of old context which - * prevent the context detroy RPC be sent. So server still can - * accept the request and finish RPC. Two cases: - * 1. If server side context has been trimed, a NO_CONTEXT will + * have an expiry nice time; server will keep its context because + * we at least hold a ref of old context which prevent context + * from destroying RPC being sent. So server still can accept the + * request and finish the RPC. But if that's not the case: + * 1. If server side context has been trimmed, a NO_CONTEXT will * be returned, gss_cli_ctx_verify/unseal will switch to new * context by force. * 2. Current context never be refreshed, then we are fine: we * never really send request with old context before. */ - if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) && - req->rq_reqmsg && - lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { - if (!list_empty(&req->rq_ctx_chain)) - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); - RETURN(0); - } - - if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { - spin_unlock(&ctx->cc_lock); - - /* don't have to, but we don't want to release it too soon */ - sptlrpc_ctx_get(ctx); - - rc = sptlrpc_req_replace_dead_ctx(req); - if (rc) { - LASSERT(ctx == req->rq_cli_ctx); - CERROR("req %p: failed to replace dead ctx %p\n", - req, ctx); - req->rq_err = 1; - LASSERT(list_empty(&req->rq_ctx_chain)); - sptlrpc_ctx_put(ctx, 1); - RETURN(-ENOMEM); + if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) && + unlikely(req->rq_reqmsg) && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + req_off_ctx_list(req, ctx); + RETURN(0); + } + + if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { + req_off_ctx_list(req, ctx); + /* + * don't switch ctx if import was deactivated + */ + if (req->rq_import->imp_deactive) { + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + RETURN(-EINTR); + } + + rc = sptlrpc_req_replace_dead_ctx(req); + if (rc) { + LASSERT(ctx == req->rq_cli_ctx); + CERROR("req %p: failed to replace dead ctx %p: %d\n", + req, ctx, rc); + spin_lock(&req->rq_lock); + req->rq_err = 1; + spin_unlock(&req->rq_lock); + RETURN(rc); } - LASSERT(ctx != req->rq_cli_ctx); - CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n", - req, ctx, ctx->cc_vcred.vc_uid, - sec2target_str(ctx->cc_sec), req->rq_cli_ctx); - - sptlrpc_ctx_put(ctx, 1); ctx = req->rq_cli_ctx; - LASSERT(list_empty(&req->rq_ctx_chain)); - - spin_lock(&ctx->cc_lock); goto again; } - /* Now we're sure this context is during upcall, add myself into + /* + * Now we're sure this context is during upcall, add myself into * waiting list */ - if (list_empty(&req->rq_ctx_chain)) - list_add(&req->rq_ctx_chain, &ctx->cc_req_list); - - spin_unlock(&ctx->cc_lock); - - if (timeout < 0) { - RETURN(-EWOULDBLOCK); - } - - /* Clear any flags that may be present from previous sends */ - LASSERT(req->rq_receiving_reply == 0); - spin_lock(&req->rq_lock); - req->rq_err = 0; - req->rq_timedout = 0; - req->rq_resend = 0; - req->rq_restart = 0; - spin_unlock(&req->rq_lock); - - lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ, - ctx_refresh_timeout, ctx_refresh_interrupt, req); - rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); - - spin_lock(&ctx->cc_lock); - /* five cases we are here: - * 1. successfully refreshed; - * 2. someone else mark this ctx dead by force; - * 3. interruptted; - * 4. timedout, and we don't want recover from the failure; - * 5. timedout, and waked up upon recovery finished; + spin_lock(&ctx->cc_lock); + if (list_empty(&req->rq_ctx_chain)) + list_add(&req->rq_ctx_chain, &ctx->cc_req_list); + spin_unlock(&ctx->cc_lock); + + if (timeout < 0) + RETURN(-EWOULDBLOCK); + + /* Clear any flags that may be present from previous sends */ + LASSERT(req->rq_receiving_reply == 0); + spin_lock(&req->rq_lock); + req->rq_err = 0; + req->rq_timedout = 0; + req->rq_resend = 0; + req->rq_restart = 0; + spin_unlock(&req->rq_lock); + + lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC), + ctx_refresh_timeout, + ctx_refresh_interrupt, req); + rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi); + + /* + * following cases could lead us here: + * - successfully refreshed; + * - interrupted; + * - timedout, and we don't want recover from the failure; + * - timedout, and waked up upon recovery finished; + * - someone else mark this ctx dead by force; + * - someone invalidate the req and call ptlrpc_client_wake_req(), + * e.g. ptlrpc_abort_inflight(); */ - if (!ctx_is_refreshed(ctx)) { + if (!cli_ctx_is_refreshed(ctx)) { /* timed out or interruptted */ - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); + req_off_ctx_list(req, ctx); LASSERT(rc != 0); RETURN(rc); @@ -962,22 +837,32 @@ again: goto again; } +/** + * Initialize flavor settings for \a req, according to \a opcode. + * + * \note this could be called in two situations: + * - new request from ptlrpc_pre_req(), with proper @opcode + * - old request which changed ctx in the middle, with @opcode == 0 + */ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) { - struct sec_flavor_config *conf; + struct ptlrpc_sec *sec; LASSERT(req->rq_import); - LASSERT(req->rq_import->imp_sec); LASSERT(req->rq_cli_ctx); LASSERT(req->rq_cli_ctx->cc_sec); LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0); - /* special security flags accoding to opcode */ + /* special security flags according to opcode */ switch (opcode) { case OST_READ: + case MDS_READPAGE: + case MGS_CONFIG_READ: + case OBD_IDX_READ: req->rq_bulk_read = 1; break; case OST_WRITE: + case MDS_WRITEPAGE: req->rq_bulk_write = 1; break; case SEC_CTX_INIT: @@ -986,44 +871,44 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) case SEC_CTX_FINI: req->rq_ctx_fini = 1; break; + case 0: + /* init/fini rpc won't be resend, so can't be here */ + LASSERT(req->rq_ctx_init == 0); + LASSERT(req->rq_ctx_fini == 0); + + /* cleanup flags, which should be recalculated */ + req->rq_pack_udesc = 0; + req->rq_pack_bulk = 0; + break; } - req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor; + sec = req->rq_cli_ctx->cc_sec; - /* force SVC_NONE for context initiation rpc, SVC_AUTH for context - * destruction rpc - */ - if (unlikely(req->rq_ctx_init)) { - req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( - SEC_FLAVOR_POLICY(req->rq_sec_flavor), - SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), - SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE)); - } else if (unlikely(req->rq_ctx_fini)) { - req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR( - SEC_FLAVOR_POLICY(req->rq_sec_flavor), - SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor), - SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH)); - } + spin_lock(&sec->ps_lock); + req->rq_flvr = sec->ps_flvr; + spin_unlock(&sec->ps_lock); - conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf; + /* force SVC_NULL for context initiation rpc, SVC_INTG for context + * destruction rpc */ + if (unlikely(req->rq_ctx_init)) + flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL); + else if (unlikely(req->rq_ctx_fini)) + flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG); - /* user descriptor flag, except ROOTONLY which don't need, and - * null security which can't - */ - if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 && - req->rq_sec_flavor != SPTLRPC_FLVR_NULL) - req->rq_sec_flavor |= SEC_FLAVOR_FL_USER; + /* user descriptor flag, null security can't do it anyway */ + if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) && + (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL)) + req->rq_pack_udesc = 1; /* bulk security flag */ if ((req->rq_bulk_read || req->rq_bulk_write) && - (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL || - conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL)) - req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK; + sptlrpc_flavor_has_bulk(&req->rq_flvr)) + req->rq_pack_bulk = 1; } void sptlrpc_request_out_callback(struct ptlrpc_request *req) { - if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV) + if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV) return; LASSERT(req->rq_clrbuf); @@ -1035,48 +920,63 @@ void sptlrpc_request_out_callback(struct ptlrpc_request *req) req->rq_reqbuf_len = 0; } -/* - * check whether current user have valid context for an import or not. - * might repeatedly try in case of non-fatal errors. - * return 0 on success, < 0 on failure +/** + * Given an import \a imp, check whether current user has a valid context + * or not. We may create a new context and try to refresh it, and try + * repeatedly try in case of non-fatal errors. Return 0 means success. */ int sptlrpc_import_check_ctx(struct obd_import *imp) { - struct ptlrpc_cli_ctx *ctx; - struct ptlrpc_request *req = NULL; - int rc; - ENTRY; + struct ptlrpc_sec *sec; + struct ptlrpc_cli_ctx *ctx; + struct ptlrpc_request *req = NULL; + int rc; + ENTRY; + + might_sleep(); - might_sleep(); + sec = sptlrpc_import_sec_ref(imp); + ctx = get_my_ctx(sec); + sptlrpc_sec_put(sec); - ctx = get_my_ctx(imp->imp_sec); if (!ctx) - RETURN(1); + RETURN(-ENOMEM); - if (ctx_is_eternal(ctx)) { - sptlrpc_ctx_put(ctx, 1); + if (cli_ctx_is_eternal(ctx) || + ctx->cc_ops->validate(ctx) == 0) { + sptlrpc_cli_ctx_put(ctx, 1); RETURN(0); } - OBD_ALLOC_PTR(req); - if (!req) - RETURN(-ENOMEM); + if (cli_ctx_is_error(ctx)) { + sptlrpc_cli_ctx_put(ctx, 1); + RETURN(-EACCES); + } + + req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (!req) + RETURN(-ENOMEM); + + ptlrpc_cli_req_init(req); + atomic_set(&req->rq_refcount, 10000); - spin_lock_init(&req->rq_lock); - atomic_set(&req->rq_refcount, 10000); - INIT_LIST_HEAD(&req->rq_ctx_chain); - init_waitqueue_head(&req->rq_reply_waitq); - req->rq_import = imp; - req->rq_cli_ctx = ctx; + req->rq_import = imp; + req->rq_flvr = sec->ps_flvr; + req->rq_cli_ctx = ctx; rc = sptlrpc_req_refresh_ctx(req, 0); - LASSERT(list_empty(&req->rq_ctx_chain)); - sptlrpc_ctx_put(req->rq_cli_ctx, 1); - OBD_FREE_PTR(req); + LASSERT(list_empty(&req->rq_ctx_chain)); + sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1); + ptlrpc_request_cache_free(req); - RETURN(rc); + RETURN(rc); } +/** + * Used by ptlrpc client, to perform the pre-defined security transformation + * upon the request message of \a req. After this function called, + * req->rq_reqmsg is still accessible as clear text. + */ int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; @@ -1096,9 +996,10 @@ int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) RETURN(rc); } - switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { - case SPTLRPC_SVC_NONE: + switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { + case SPTLRPC_SVC_NULL: case SPTLRPC_SVC_AUTH: + case SPTLRPC_SVC_INTG: LASSERT(ctx->cc_ops->sign); rc = ctx->cc_ops->sign(ctx, req); break; @@ -1119,62 +1020,49 @@ int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) RETURN(rc); } -/* - * rq_nob_received is the actual received data length - */ -int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) +static int do_cli_unwrap_reply(struct ptlrpc_request *req) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - int rc; + int rc; ENTRY; LASSERT(ctx); LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_ops); LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata); + LASSERT(req->rq_repmsg == NULL); - req->rq_repdata_len = req->rq_nob_received; + req->rq_rep_swab_mask = 0; - if (req->rq_nob_received < sizeof(struct lustre_msg)) { - CERROR("replied data length %d too small\n", - req->rq_nob_received); + rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len); + switch (rc) { + case 1: + lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); + case 0: + break; + default: + CERROR("failed unpack reply: x%llu\n", req->rq_xid); RETURN(-EPROTO); } - if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 || - req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { - /* it's must be null flavor, so our requets also should be - * in null flavor */ - if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) != - SPTLRPC_POLICY_NULL) { - CERROR("request flavor is %x but reply with null\n", - req->rq_sec_flavor); - RETURN(-EPROTO); - } - } else { - /* v2 message... */ - ptlrpc_sec_flavor_t tmpf = req->rq_repbuf->lm_secflvr; - - if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) - __swab32s(&tmpf); - - if (SEC_FLAVOR_POLICY(tmpf) != - SEC_FLAVOR_POLICY(req->rq_sec_flavor)) { - CERROR("request policy %u while reply with %d\n", - SEC_FLAVOR_POLICY(req->rq_sec_flavor), - SEC_FLAVOR_POLICY(tmpf)); - RETURN(-EPROTO); - } + if (req->rq_repdata_len < sizeof(struct lustre_msg)) { + CERROR("replied data length %d too small\n", + req->rq_repdata_len); + RETURN(-EPROTO); + } - if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != - SPTLRPC_POLICY_NULL) && - lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received)) - RETURN(-EPROTO); + if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) != + SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) { + CERROR("reply policy %u doesn't match request policy %u\n", + SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr), + SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)); + RETURN(-EPROTO); } - switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) { - case SPTLRPC_SVC_NONE: + switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { + case SPTLRPC_SVC_NULL: case SPTLRPC_SVC_AUTH: + case SPTLRPC_SVC_INTG: LASSERT(ctx->cc_ops->verify); rc = ctx->cc_ops->verify(ctx, req); break; @@ -1185,131 +1073,485 @@ int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) default: LBUG(); } - LASSERT(rc || req->rq_repmsg || req->rq_resend); + + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL && + !req->rq_ctx_init) + req->rq_rep_swab_mask = 0; RETURN(rc); } +/** + * Used by ptlrpc client, to perform security transformation upon the reply + * message of \a req. After return successfully, req->rq_repmsg points to + * the reply message in clear text. + * + * \pre the reply buffer should have been un-posted from LNet, so nothing is + * going to change. + */ +int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) +{ + LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata == NULL); + LASSERT(req->rq_repmsg == NULL); + LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len); + + if (req->rq_reply_off == 0 && + (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { + CERROR("real reply with offset 0\n"); + return -EPROTO; + } + + if (req->rq_reply_off % 8 != 0) { + CERROR("reply at odd offset %u\n", req->rq_reply_off); + return -EPROTO; + } + + req->rq_repdata = (struct lustre_msg *) + (req->rq_repbuf + req->rq_reply_off); + req->rq_repdata_len = req->rq_nob_received; + + return do_cli_unwrap_reply(req); +} + +/** + * Used by ptlrpc client, to perform security transformation upon the early + * reply message of \a req. We expect the rq_reply_off is 0, and + * rq_nob_received is the early reply size. + * + * Because the receive buffer might be still posted, the reply data might be + * changed at any time, no matter we're holding rq_lock or not. For this reason + * we allocate a separate ptlrpc_request and reply buffer for early reply + * processing. + * + * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request. + * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned + * \a *req_ret to release it. + * \retval -ev error number, and \a req_ret will not be set. + */ +int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, + struct ptlrpc_request **req_ret) +{ + struct ptlrpc_request *early_req; + char *early_buf; + int early_bufsz, early_size; + int rc; + ENTRY; + + early_req = ptlrpc_request_cache_alloc(GFP_NOFS); + if (early_req == NULL) + RETURN(-ENOMEM); + + ptlrpc_cli_req_init(early_req); + + early_size = req->rq_nob_received; + early_bufsz = size_roundup_power2(early_size); + OBD_ALLOC_LARGE(early_buf, early_bufsz); + if (early_buf == NULL) + GOTO(err_req, rc = -ENOMEM); + + /* sanity checkings and copy data out, do it inside spinlock */ + spin_lock(&req->rq_lock); + + if (req->rq_replied) { + spin_unlock(&req->rq_lock); + GOTO(err_buf, rc = -EALREADY); + } + + LASSERT(req->rq_repbuf); + LASSERT(req->rq_repdata == NULL); + LASSERT(req->rq_repmsg == NULL); + + if (req->rq_reply_off != 0) { + CERROR("early reply with offset %u\n", req->rq_reply_off); + spin_unlock(&req->rq_lock); + GOTO(err_buf, rc = -EPROTO); + } + + if (req->rq_nob_received != early_size) { + /* even another early arrived the size should be the same */ + CERROR("data size has changed from %u to %u\n", + early_size, req->rq_nob_received); + spin_unlock(&req->rq_lock); + GOTO(err_buf, rc = -EINVAL); + } + + if (req->rq_nob_received < sizeof(struct lustre_msg)) { + CERROR("early reply length %d too small\n", + req->rq_nob_received); + spin_unlock(&req->rq_lock); + GOTO(err_buf, rc = -EALREADY); + } + + memcpy(early_buf, req->rq_repbuf, early_size); + spin_unlock(&req->rq_lock); + + early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx); + early_req->rq_flvr = req->rq_flvr; + early_req->rq_repbuf = early_buf; + early_req->rq_repbuf_len = early_bufsz; + early_req->rq_repdata = (struct lustre_msg *) early_buf; + early_req->rq_repdata_len = early_size; + early_req->rq_early = 1; + early_req->rq_reqmsg = req->rq_reqmsg; + + rc = do_cli_unwrap_reply(early_req); + if (rc) { + DEBUG_REQ(D_ADAPTTO, early_req, + "error %d unwrap early reply", rc); + GOTO(err_ctx, rc); + } + + LASSERT(early_req->rq_repmsg); + *req_ret = early_req; + RETURN(0); + +err_ctx: + sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); +err_buf: + OBD_FREE_LARGE(early_buf, early_bufsz); +err_req: + ptlrpc_request_cache_free(early_req); + RETURN(rc); +} + +/** + * Used by ptlrpc client, to release a processed early reply \a early_req. + * + * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply(). + */ +void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req) +{ + LASSERT(early_req->rq_repbuf); + LASSERT(early_req->rq_repdata); + LASSERT(early_req->rq_repmsg); + + sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); + OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len); + ptlrpc_request_cache_free(early_req); +} + +/************************************************** + * sec ID * + **************************************************/ + +/* + * "fixed" sec (e.g. null) use sec_id < 0 + */ +static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1); + +int sptlrpc_get_next_secid(void) +{ + return atomic_inc_return(&sptlrpc_sec_id); +} +EXPORT_SYMBOL(sptlrpc_get_next_secid); + /************************************************** - * security APIs * + * client side high-level security APIs * **************************************************/ +static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid, + int grace, int force) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT(policy->sp_cops); + LASSERT(policy->sp_cops->flush_ctx_cache); + + return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force); +} + +static void sec_cop_destroy_sec(struct ptlrpc_sec *sec) +{ + struct ptlrpc_sec_policy *policy = sec->ps_policy; + + LASSERT_ATOMIC_ZERO(&sec->ps_refcount); + LASSERT_ATOMIC_ZERO(&sec->ps_nctx); + LASSERT(policy->sp_cops->destroy_sec); + + CDEBUG(D_SEC, "%s@%p: being destroied\n", sec->ps_policy->sp_name, sec); + + policy->sp_cops->destroy_sec(sec); + sptlrpc_policy_put(policy); +} + +void sptlrpc_sec_destroy(struct ptlrpc_sec *sec) +{ + sec_cop_destroy_sec(sec); +} +EXPORT_SYMBOL(sptlrpc_sec_destroy); + +static void sptlrpc_sec_kill(struct ptlrpc_sec *sec) +{ + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + if (sec->ps_policy->sp_cops->kill_sec) { + sec->ps_policy->sp_cops->kill_sec(sec); + + sec_cop_flush_ctx_cache(sec, -1, 1, 1); + } +} + +struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec) +{ + if (sec) + atomic_inc(&sec->ps_refcount); + + return sec; +} +EXPORT_SYMBOL(sptlrpc_sec_get); + +void sptlrpc_sec_put(struct ptlrpc_sec *sec) +{ + if (sec) { + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + if (atomic_dec_and_test(&sec->ps_refcount)) { + sptlrpc_gc_del_sec(sec); + sec_cop_destroy_sec(sec); + } + } +} +EXPORT_SYMBOL(sptlrpc_sec_put); + /* - * let policy module to determine whether take refrence of - * import or not. + * policy module is responsible for taking refrence of import */ static struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - __u32 flavor, - unsigned long flags) + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *sf, + enum lustre_sec_part sp) { struct ptlrpc_sec_policy *policy; - struct ptlrpc_sec *sec; + struct ptlrpc_sec *sec; + char str[32]; ENTRY; - flavor = SEC_FLAVOR_RPC(flavor); - - if (ctx) { + if (svc_ctx) { LASSERT(imp->imp_dlm_fake == 1); CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n", imp->imp_obd->obd_type->typ_name, imp->imp_obd->obd_name, - sptlrpc_flavor2name(flavor)); + sptlrpc_flavor2name(sf, str, sizeof(str))); - policy = sptlrpc_policy_get(ctx->sc_policy); - flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; + policy = sptlrpc_policy_get(svc_ctx->sc_policy); + sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; } else { LASSERT(imp->imp_dlm_fake == 0); CDEBUG(D_SEC, "%s %s: select security flavor %s\n", imp->imp_obd->obd_type->typ_name, imp->imp_obd->obd_name, - sptlrpc_flavor2name(flavor)); + sptlrpc_flavor2name(sf, str, sizeof(str))); - policy = sptlrpc_flavor2policy(flavor); + policy = sptlrpc_wireflavor2policy(sf->sf_rpc); if (!policy) { - CERROR("invalid flavor 0x%x\n", flavor); + CERROR("invalid flavor 0x%x\n", sf->sf_rpc); RETURN(NULL); } } - sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags); - if (sec) { - atomic_inc(&sec->ps_refcount); + sec = policy->sp_cops->create_sec(imp, svc_ctx, sf); + if (sec) { + atomic_inc(&sec->ps_refcount); - /* take 1 busy count on behalf of sec itself, - * balanced in sptlrpc_set_put() - */ - atomic_inc(&sec->ps_busy); - } else - sptlrpc_policy_put(policy); + sec->ps_part = sp; + + if (sec->ps_gc_interval && policy->sp_cops->gc_ctx) + sptlrpc_gc_add_sec(sec); + } else { + sptlrpc_policy_put(policy); + } - RETURN(sec); + RETURN(sec); } -static -void sptlrpc_sec_destroy(struct ptlrpc_sec *sec) +struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp) { - struct ptlrpc_sec_policy *policy = sec->ps_policy; + struct ptlrpc_sec *sec; - LASSERT(policy); - LASSERT(atomic_read(&sec->ps_refcount) == 0); - LASSERT(atomic_read(&sec->ps_busy) == 0); - LASSERT(policy->sp_cops->destroy_sec); + spin_lock(&imp->imp_lock); + sec = sptlrpc_sec_get(imp->imp_sec); + spin_unlock(&imp->imp_lock); - policy->sp_cops->destroy_sec(sec); - sptlrpc_policy_put(policy); + return sec; } +EXPORT_SYMBOL(sptlrpc_import_sec_ref); -static -void sptlrpc_sec_put(struct ptlrpc_sec *sec) +static void sptlrpc_import_sec_install(struct obd_import *imp, + struct ptlrpc_sec *sec) { - struct ptlrpc_sec_policy *policy = sec->ps_policy; + struct ptlrpc_sec *old_sec; - if (!atomic_dec_and_test(&sec->ps_refcount)) { - sptlrpc_policy_put(policy); - return; + LASSERT_ATOMIC_POS(&sec->ps_refcount); + + spin_lock(&imp->imp_lock); + old_sec = imp->imp_sec; + imp->imp_sec = sec; + spin_unlock(&imp->imp_lock); + + if (old_sec) { + sptlrpc_sec_kill(old_sec); + + /* balance the ref taken by this import */ + sptlrpc_sec_put(old_sec); } +} - ctx_cache_flush(sec, -1, 1, 1); +static inline +int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2) +{ + return (memcmp(sf1, sf2, sizeof(*sf1)) == 0); +} - if (atomic_dec_and_test(&sec->ps_busy)) - sptlrpc_sec_destroy(sec); - else - CWARN("delay to destroy %s@%p: busy contexts\n", - policy->sp_name, sec); +static inline +void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src) +{ + *dst = *src; } -/* - * return 1 means we should also destroy the sec structure. - * normally return 0 +/** + * To get an appropriate ptlrpc_sec for the \a imp, according to the current + * configuration. Upon called, imp->imp_sec may or may not be NULL. + * + * - regular import: \a svc_ctx should be NULL and \a flvr is ignored; + * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request. */ -static -int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec, - struct ptlrpc_cli_ctx *ctx) -{ - LASSERT(sec == ctx->cc_sec); - LASSERT(atomic_read(&sec->ps_busy)); - LASSERT(atomic_read(&ctx->cc_refcount) == 0); - LASSERT(hlist_unhashed(&ctx->cc_hash)); - LASSERT(list_empty(&ctx->cc_req_list)); - LASSERT(sec->ps_policy->sp_cops->destroy_ctx); +int sptlrpc_import_sec_adapt(struct obd_import *imp, + struct ptlrpc_svc_ctx *svc_ctx, + struct sptlrpc_flavor *flvr) +{ + struct ptlrpc_connection *conn; + struct sptlrpc_flavor sf; + struct ptlrpc_sec *sec, *newsec; + enum lustre_sec_part sp; + char str[24]; + int rc = 0; + ENTRY; + + might_sleep(); + + if (imp == NULL) + RETURN(0); + + conn = imp->imp_connection; + + if (svc_ctx == NULL) { + struct client_obd *cliobd = &imp->imp_obd->u.cli; + /* + * normal import, determine flavor from rule set, except + * for mgc the flavor is predetermined. + */ + if (cliobd->cl_sp_me == LUSTRE_SP_MGC) + sf = cliobd->cl_flvr_mgc; + else + sptlrpc_conf_choose_flavor(cliobd->cl_sp_me, + cliobd->cl_sp_to, + &cliobd->cl_target_uuid, + conn->c_self, &sf); + + sp = imp->imp_obd->u.cli.cl_sp_me; + } else { + /* reverse import, determine flavor from incoming reqeust */ + sf = *flvr; - sec->ps_policy->sp_cops->destroy_ctx(sec, ctx); + if (sf.sf_rpc != SPTLRPC_FLVR_NULL) + sf.sf_flags = PTLRPC_SEC_FL_REVERSE | + PTLRPC_SEC_FL_ROOTONLY; - if (atomic_dec_and_test(&sec->ps_busy)) { - LASSERT(atomic_read(&sec->ps_refcount) == 0); - return 1; + sp = sptlrpc_target_sec_part(imp->imp_obd); } - return 0; + sec = sptlrpc_import_sec_ref(imp); + if (sec) { + char str2[24]; + + if (flavor_equal(&sf, &sec->ps_flvr)) + GOTO(out, rc); + + CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid), + sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)), + sptlrpc_flavor2name(&sf, str2, sizeof(str2))); + } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) != + SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) { + CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid), + LNET_NIDNET(conn->c_self), + sptlrpc_flavor2name(&sf, str, sizeof(str))); + } + + mutex_lock(&imp->imp_sec_mutex); + + newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp); + if (newsec) { + sptlrpc_import_sec_install(imp, newsec); + } else { + CERROR("import %s->%s: failed to create new sec\n", + imp->imp_obd->obd_name, + obd_uuid2str(&conn->c_remote_uuid)); + rc = -EPERM; + } + + mutex_unlock(&imp->imp_sec_mutex); +out: + sptlrpc_sec_put(sec); + RETURN(rc); } -/* - * when complete successfully, req->rq_reqmsg should point to the - * right place. +void sptlrpc_import_sec_put(struct obd_import *imp) +{ + if (imp->imp_sec) { + sptlrpc_sec_kill(imp->imp_sec); + + sptlrpc_sec_put(imp->imp_sec); + imp->imp_sec = NULL; + } +} + +static void import_flush_ctx_common(struct obd_import *imp, + uid_t uid, int grace, int force) +{ + struct ptlrpc_sec *sec; + + if (imp == NULL) + return; + + sec = sptlrpc_import_sec_ref(imp); + if (sec == NULL) + return; + + sec_cop_flush_ctx_cache(sec, uid, grace, force); + sptlrpc_sec_put(sec); +} + +void sptlrpc_import_flush_root_ctx(struct obd_import *imp) +{ + /* it's important to use grace mode, see explain in + * sptlrpc_req_refresh_ctx() */ + import_flush_ctx_common(imp, 0, 1, 1); +} + +void sptlrpc_import_flush_my_ctx(struct obd_import *imp) +{ + import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()), + 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); + +void sptlrpc_import_flush_all_ctx(struct obd_import *imp) +{ + import_flush_ctx_common(imp, -1, 1, 1); +} +EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx); + +/** + * Used by ptlrpc client to allocate request buffer of \a req. Upon return + * successfully, req->rq_reqmsg points to a buffer with size \a msgsize. */ int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) { @@ -1318,10 +1560,10 @@ int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) int rc; LASSERT(ctx); - LASSERT(atomic_read(&ctx->cc_refcount)); LASSERT(ctx->cc_sec); LASSERT(ctx->cc_sec->ps_policy); LASSERT(req->rq_reqmsg == NULL); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); policy = ctx->cc_sec->ps_policy; rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize); @@ -1337,19 +1579,26 @@ int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) return rc; } +/** + * Used by ptlrpc client to free request buffer of \a req. After this + * req->rq_reqmsg is set to NULL and should not be accessed anymore. + */ void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; struct ptlrpc_sec_policy *policy; LASSERT(ctx); - LASSERT(atomic_read(&ctx->cc_refcount)); LASSERT(ctx->cc_sec); LASSERT(ctx->cc_sec->ps_policy); - LASSERT(req->rq_reqbuf || req->rq_clrbuf); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); + + if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL) + return; policy = ctx->cc_sec->ps_policy; policy->sp_cops->free_reqbuf(ctx->cc_sec, req); + req->rq_reqmsg = NULL; } /* @@ -1396,20 +1645,24 @@ void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, } EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace); -/* - * enlarge @segment of upper message req->rq_reqmsg to @newsize, all data - * will be preserved after enlargement. this must be called after rq_reqmsg has - * been intialized at least. +/** + * Used by ptlrpc client to enlarge the \a segment of request message pointed + * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be + * preserved after the enlargement. this must be called after original request + * buffer being allocated. * - * caller's attention: upon return, rq_reqmsg and rq_reqlen might have - * been changed. + * \note after this be called, rq_reqmsg and rq_reqlen might have been changed, + * so caller should refresh its local pointers if needed. */ int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, - int segment, int newsize) + const struct req_msg_field *field, + int newsize) { - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_cops *cops; - struct lustre_msg *msg = req->rq_reqmsg; + struct req_capsule *pill = &req->rq_pill; + struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; + struct ptlrpc_sec_cops *cops; + struct lustre_msg *msg = req->rq_reqmsg; + int segment = __req_capsule_offset(pill, field, RCL_CLIENT); LASSERT(ctx); LASSERT(msg); @@ -1425,6 +1678,11 @@ int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, } EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf); +/** + * Used by ptlrpc client to allocate reply buffer of \a req. + * + * \note After this, req->rq_repmsg is still not accessible. + */ int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; @@ -1432,7 +1690,6 @@ int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) ENTRY; LASSERT(ctx); - LASSERT(atomic_read(&ctx->cc_refcount)); LASSERT(ctx->cc_sec); LASSERT(ctx->cc_sec->ps_policy); @@ -1443,82 +1700,31 @@ int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize)); } +/** + * Used by ptlrpc client to free reply buffer of \a req. After this + * req->rq_repmsg is set to NULL and should not be accessed anymore. + */ void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req) { struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; struct ptlrpc_sec_policy *policy; ENTRY; - LASSERT(ctx); - LASSERT(atomic_read(&ctx->cc_refcount)); - LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_sec->ps_policy); - LASSERT(req->rq_repbuf); - - policy = ctx->cc_sec->ps_policy; - policy->sp_cops->free_repbuf(ctx->cc_sec, req); - EXIT; -} - -int sptlrpc_import_get_sec(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - __u32 flavor, - unsigned long flags) -{ - struct obd_device *obd = imp->imp_obd; - ENTRY; - - LASSERT(obd); - LASSERT(obd->obd_type); - - /* old sec might be still there in reconnecting */ - if (imp->imp_sec) - RETURN(0); - - imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags); - if (!imp->imp_sec) - RETURN(-EINVAL); - - RETURN(0); -} - -void sptlrpc_import_put_sec(struct obd_import *imp) -{ - if (imp->imp_sec == NULL) - return; - - sptlrpc_sec_put(imp->imp_sec); - imp->imp_sec = NULL; -} - -void sptlrpc_import_flush_root_ctx(struct obd_import *imp) -{ - if (imp == NULL || imp->imp_sec == NULL) - return; - - /* use 'grace' mode, it's crutial see explain in - * sptlrpc_req_refresh_ctx() - */ - ctx_cache_flush(imp->imp_sec, 0, 1, 1); -} - -void sptlrpc_import_flush_my_ctx(struct obd_import *imp) -{ - if (imp == NULL || imp->imp_sec == NULL) - return; - - ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1); -} -EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); + LASSERT(ctx); + LASSERT(ctx->cc_sec); + LASSERT(ctx->cc_sec->ps_policy); + LASSERT_ATOMIC_POS(&ctx->cc_refcount); -void sptlrpc_import_flush_all_ctx(struct obd_import *imp) -{ - if (imp == NULL || imp->imp_sec == NULL) + if (req->rq_repbuf == NULL) return; + LASSERT(req->rq_repbuf_len); - ctx_cache_flush(imp->imp_sec, -1, 0, 1); + policy = ctx->cc_sec->ps_policy; + policy->sp_cops->free_repbuf(ctx->cc_sec, req); + req->rq_repmsg = NULL; + EXIT; } -EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx); +EXPORT_SYMBOL(sptlrpc_cli_free_repbuf); int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp, struct ptlrpc_cli_ctx *ctx) @@ -1544,69 +1750,360 @@ int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, * server side security * ****************************************/ +static int flavor_allowed(struct sptlrpc_flavor *exp, + struct ptlrpc_request *req) +{ + struct sptlrpc_flavor *flvr = &req->rq_flvr; + + if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc) + return 1; + + if ((req->rq_ctx_init || req->rq_ctx_fini) && + SPTLRPC_FLVR_POLICY(exp->sf_rpc) == + SPTLRPC_FLVR_POLICY(flvr->sf_rpc) && + SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc)) + return 1; + + return 0; +} + +#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10) + +/** + * Given an export \a exp, check whether the flavor of incoming \a req + * is allowed by the export \a exp. Main logic is about taking care of + * changing configurations. Return 0 means success. + */ +int sptlrpc_target_export_check(struct obd_export *exp, + struct ptlrpc_request *req) +{ + struct sptlrpc_flavor flavor; + + if (exp == NULL) + return 0; + + /* client side export has no imp_reverse, skip + * FIXME maybe we should check flavor this as well??? */ + if (exp->exp_imp_reverse == NULL) + return 0; + + /* don't care about ctx fini rpc */ + if (req->rq_ctx_fini) + return 0; + + spin_lock(&exp->exp_lock); + + /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for + * the first req with the new flavor, then treat it as current flavor, + * adapt reverse sec according to it. + * note the first rpc with new flavor might not be with root ctx, in + * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */ + if (unlikely(exp->exp_flvr_changed) && + flavor_allowed(&exp->exp_flvr_old[1], req)) { + /* make the new flavor as "current", and old ones as + * about-to-expire */ + CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp, + exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc); + flavor = exp->exp_flvr_old[1]; + exp->exp_flvr_old[1] = exp->exp_flvr_old[0]; + exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0]; + exp->exp_flvr_old[0] = exp->exp_flvr; + exp->exp_flvr_expire[0] = ktime_get_real_seconds() + + EXP_FLVR_UPDATE_EXPIRE; + exp->exp_flvr = flavor; + + /* flavor change finished */ + exp->exp_flvr_changed = 0; + LASSERT(exp->exp_flvr_adapt == 1); + + /* if it's gss, we only interested in root ctx init */ + if (req->rq_auth_gss && + !(req->rq_ctx_init && + (req->rq_auth_usr_root || req->rq_auth_usr_mdt || + req->rq_auth_usr_ost))) { + spin_unlock(&exp->exp_lock); + CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n", + req->rq_auth_gss, req->rq_ctx_init, + req->rq_auth_usr_root, req->rq_auth_usr_mdt, + req->rq_auth_usr_ost); + return 0; + } + + exp->exp_flvr_adapt = 0; + spin_unlock(&exp->exp_lock); + + return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, + req->rq_svc_ctx, &flavor); + } + + /* if it equals to the current flavor, we accept it, but need to + * dealing with reverse sec/ctx */ + if (likely(flavor_allowed(&exp->exp_flvr, req))) { + /* most cases should return here, we only interested in + * gss root ctx init */ + if (!req->rq_auth_gss || !req->rq_ctx_init || + (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && + !req->rq_auth_usr_ost)) { + spin_unlock(&exp->exp_lock); + return 0; + } + + /* if flavor just changed, we should not proceed, just leave + * it and current flavor will be discovered and replaced + * shortly, and let _this_ rpc pass through */ + if (exp->exp_flvr_changed) { + LASSERT(exp->exp_flvr_adapt); + spin_unlock(&exp->exp_lock); + return 0; + } + + if (exp->exp_flvr_adapt) { + exp->exp_flvr_adapt = 0; + CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n", + exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + flavor = exp->exp_flvr; + spin_unlock(&exp->exp_lock); + + return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, + req->rq_svc_ctx, + &flavor); + } else { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, " + "install rvs ctx\n", exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + spin_unlock(&exp->exp_lock); + + return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse, + req->rq_svc_ctx); + } + } + + if (exp->exp_flvr_expire[0]) { + if (exp->exp_flvr_expire[0] >= ktime_get_real_seconds()) { + if (flavor_allowed(&exp->exp_flvr_old[0], req)) { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (%lld)\n", exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc, + (s64)(exp->exp_flvr_expire[0] - + ktime_get_real_seconds())); + spin_unlock(&exp->exp_lock); + return 0; + } + } else { + CDEBUG(D_SEC, "mark middle expired\n"); + exp->exp_flvr_expire[0] = 0; + } + CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, + req->rq_flvr.sf_rpc); + } + + /* now it doesn't match the current flavor, the only chance we can + * accept it is match the old flavors which is not expired. */ + if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) { + if (exp->exp_flvr_expire[1] >= ktime_get_real_seconds()) { + if (flavor_allowed(&exp->exp_flvr_old[1], req)) { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (%lld)\n", + exp, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc, + (s64)(exp->exp_flvr_expire[1] - + ktime_get_real_seconds())); + spin_unlock(&exp->exp_lock); + return 0; + } + } else { + CDEBUG(D_SEC, "mark oldest expired\n"); + exp->exp_flvr_expire[1] = 0; + } + CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n", + exp, exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, + req->rq_flvr.sf_rpc); + } else { + CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n", + exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + } + + spin_unlock(&exp->exp_lock); + + CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+lld)|%x(%+lld)\n", + exp, exp->exp_obd->obd_name, + req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini, + req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost, + req->rq_flvr.sf_rpc, + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[0].sf_rpc, + exp->exp_flvr_expire[0] ? + (s64)(exp->exp_flvr_expire[0] - ktime_get_real_seconds()) : 0, + exp->exp_flvr_old[1].sf_rpc, + exp->exp_flvr_expire[1] ? + (s64)(exp->exp_flvr_expire[1] - ktime_get_real_seconds()) : 0); + return -EACCES; +} +EXPORT_SYMBOL(sptlrpc_target_export_check); + +void sptlrpc_target_update_exp_flavor(struct obd_device *obd, + struct sptlrpc_rule_set *rset) +{ + struct obd_export *exp; + struct sptlrpc_flavor new_flvr; + + LASSERT(obd); + + spin_lock(&obd->obd_dev_lock); + + list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { + if (exp->exp_connection == NULL) + continue; + + /* note if this export had just been updated flavor + * (exp_flvr_changed == 1), this will override the + * previous one. */ + spin_lock(&exp->exp_lock); + sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer, + exp->exp_connection->c_peer.nid, + &new_flvr); + if (exp->exp_flvr_changed || + !flavor_equal(&new_flvr, &exp->exp_flvr)) { + exp->exp_flvr_old[1] = new_flvr; + exp->exp_flvr_expire[1] = 0; + exp->exp_flvr_changed = 1; + exp->exp_flvr_adapt = 1; + + CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n", + exp, sptlrpc_part2name(exp->exp_sp_peer), + exp->exp_flvr.sf_rpc, + exp->exp_flvr_old[1].sf_rpc); + } + spin_unlock(&exp->exp_lock); + } + + spin_unlock(&obd->obd_dev_lock); +} +EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor); + +static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc) +{ + /* peer's claim is unreliable unless gss is being used */ + if (!req->rq_auth_gss || svc_rc == SECSVC_DROP) + return svc_rc; + + switch (req->rq_sp_from) { + case LUSTRE_SP_CLI: + if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source CLI"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_MDT: + if (!req->rq_auth_usr_mdt) { + DEBUG_REQ(D_ERROR, req, "faked source MDT"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_OST: + if (!req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source OST"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_MGS: + case LUSTRE_SP_MGC: + if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && + !req->rq_auth_usr_ost) { + DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS"); + svc_rc = SECSVC_DROP; + } + break; + case LUSTRE_SP_ANY: + default: + DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from); + svc_rc = SECSVC_DROP; + } + + return svc_rc; +} + +/** + * Used by ptlrpc server, to perform transformation upon request message of + * incoming \a req. This must be the first thing to do with an incoming + * request in ptlrpc layer. + * + * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in + * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set. + * \retval SECSVC_COMPLETE success, the request has been fully processed, and + * reply message has been prepared. + * \retval SECSVC_DROP failed, this request should be dropped. + */ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) { struct ptlrpc_sec_policy *policy; - struct lustre_msg *msg = req->rq_reqbuf; - int rc; + struct lustre_msg *msg = req->rq_reqbuf; + int rc; ENTRY; LASSERT(msg); LASSERT(req->rq_reqmsg == NULL); LASSERT(req->rq_repmsg == NULL); + LASSERT(req->rq_svc_ctx == NULL); - /* - * in any case we avoid to call unpack_msg() for request of null flavor - * which will later be done by ptlrpc_server_handle_request(). - */ - if (req->rq_reqdata_len < sizeof(struct lustre_msg)) { - CERROR("request size %d too small\n", req->rq_reqdata_len); + req->rq_req_swab_mask = 0; + + rc = __lustre_unpack_msg(msg, req->rq_reqdata_len); + switch (rc) { + case 1: + lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); + case 0: + break; + default: + CERROR("error unpacking request from %s x%llu\n", + libcfs_id2str(req->rq_peer), req->rq_xid); RETURN(SECSVC_DROP); } - if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 || - msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) { - req->rq_sec_flavor = SPTLRPC_FLVR_NULL; - } else { - req->rq_sec_flavor = msg->lm_secflvr; - - if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED) - __swab32s(&req->rq_sec_flavor); - - if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) != - SPTLRPC_POLICY_NULL) && - lustre_unpack_msg(msg, req->rq_reqdata_len)) - RETURN(SECSVC_DROP); - } + req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr); + req->rq_sp_from = LUSTRE_SP_ANY; + req->rq_auth_uid = -1; /* set to INVALID_UID */ + req->rq_auth_mapped_uid = -1; - policy = sptlrpc_flavor2policy(req->rq_sec_flavor); + policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc); if (!policy) { - CERROR("unsupported security flavor %x\n", req->rq_sec_flavor); + CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc); RETURN(SECSVC_DROP); } LASSERT(policy->sp_sops->accept); rc = policy->sp_sops->accept(req); - - LASSERT(req->rq_reqmsg || rc != SECSVC_OK); sptlrpc_policy_put(policy); + LASSERT(req->rq_reqmsg || rc != SECSVC_OK); + LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP); - /* FIXME move to proper place */ - if (rc == SECSVC_OK) { - __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); - - if (opc == OST_WRITE) - req->rq_bulk_write = 1; - else if (opc == OST_READ) - req->rq_bulk_read = 1; - } + /* + * if it's not null flavor (which means embedded packing msg), + * reset the swab mask for the comming inner msg unpacking. + */ + if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) + req->rq_req_swab_mask = 0; - LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP); + /* sanity check for the request source */ + rc = sptlrpc_svc_check_from(req, rc); RETURN(rc); } -int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, - int msglen) +/** + * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed, + * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to + * a buffer of \a msglen size. + */ +int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen) { struct ptlrpc_sec_policy *policy; struct ptlrpc_reply_state *rs; @@ -1621,8 +2118,18 @@ int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, rc = policy->sp_sops->alloc_rs(req, msglen); if (unlikely(rc == -ENOMEM)) { + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + if (svcpt->scp_service->srv_max_reply_size < + msglen + sizeof(struct ptlrpc_reply_state)) { + /* Just return failure if the size is too big */ + CERROR("size of message is too big (%zd), %d allowed\n", + msglen + sizeof(struct ptlrpc_reply_state), + svcpt->scp_service->srv_max_reply_size); + RETURN(-ENOMEM); + } + /* failed alloc, try emergency pool */ - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service); + rs = lustre_get_emerg_rs(svcpt); if (rs == NULL) RETURN(-ENOMEM); @@ -1640,6 +2147,12 @@ int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, RETURN(rc); } +/** + * Used by ptlrpc server, to perform transformation upon reply message. + * + * \post req->rq_reply_off is set to approriate server-controlled reply offset. + * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible. + */ int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) { struct ptlrpc_sec_policy *policy; @@ -1658,6 +2171,9 @@ int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) RETURN(rc); } +/** + * Used by ptlrpc server, to free reply_state. + */ void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) { struct ptlrpc_sec_policy *policy; @@ -1680,28 +2196,25 @@ void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req) { - struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; - - if (ctx == NULL) - return; + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; - LASSERT(atomic_read(&ctx->sc_refcount) > 0); - atomic_inc(&ctx->sc_refcount); + if (ctx != NULL) + atomic_inc(&ctx->sc_refcount); } void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req) { - struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; + struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; - if (ctx == NULL) - return; + if (ctx == NULL) + return; - LASSERT(atomic_read(&ctx->sc_refcount) > 0); - if (atomic_dec_and_test(&ctx->sc_refcount)) { - if (ctx->sc_policy->sp_sops->free_ctx) - ctx->sc_policy->sp_sops->free_ctx(ctx); - } - req->rq_svc_ctx = NULL; + LASSERT_ATOMIC_POS(&ctx->sc_refcount); + if (atomic_dec_and_test(&ctx->sc_refcount)) { + if (ctx->sc_policy->sp_sops->free_ctx) + ctx->sc_policy->sp_sops->free_ctx(ctx); + } + req->rq_svc_ctx = NULL; } void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req) @@ -1711,7 +2224,7 @@ void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req) if (ctx == NULL) return; - LASSERT(atomic_read(&ctx->sc_refcount) > 0); + LASSERT_ATOMIC_POS(&ctx->sc_refcount); if (ctx->sc_policy->sp_sops->invalidate_ctx) ctx->sc_policy->sp_sops->invalidate_ctx(ctx); } @@ -1721,16 +2234,20 @@ EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate); * bulk security * ****************************************/ +/** + * Perform transformation upon bulk data pointed by \a desc. This is called + * before transforming the request message. + */ int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc) { struct ptlrpc_cli_ctx *ctx; - if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) - return 0; - LASSERT(req->rq_bulk_read || req->rq_bulk_write); + if (!req->rq_pack_bulk) + return 0; + ctx = req->rq_cli_ctx; if (ctx->cc_ops->wrap_bulk) return ctx->cc_ops->wrap_bulk(ctx, req, desc); @@ -1738,91 +2255,81 @@ int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, } EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk); -static -void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga, - struct ptlrpc_bulk_desc *desc) -{ - int i; - - LASSERT(pga); - LASSERT(*pga); - - for (i = 0; i < pg_count && nob > 0; i++) { -#ifdef __KERNEL__ - desc->bd_iov[i].kiov_page = pga[i]->pg; - desc->bd_iov[i].kiov_len = pga[i]->count > nob ? - nob : pga[i]->count; - desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK; -#else -#warning FIXME for liblustre! - desc->bd_iov[i].iov_base = pga[i]->pg->addr; - desc->bd_iov[i].iov_len = pga[i]->count > nob ? - nob : pga[i]->count; -#endif - - desc->bd_iov_count++; - nob -= pga[i]->count; - } -} - +/** + * This is called after unwrap the reply message. + * return nob of actual plain text size received, or error code. + */ int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, - int nob, obd_count pg_count, - struct brw_page **pga) + struct ptlrpc_bulk_desc *desc, + int nob) { - struct ptlrpc_bulk_desc *desc; - struct ptlrpc_cli_ctx *ctx; - int rc = 0; - - if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) - return 0; + struct ptlrpc_cli_ctx *ctx; + int rc; LASSERT(req->rq_bulk_read && !req->rq_bulk_write); - OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); - if (desc == NULL) { - CERROR("out of memory, can't verify bulk read data\n"); - return -ENOMEM; - } - - pga_to_bulk_desc(nob, pg_count, pga, desc); + if (!req->rq_pack_bulk) + return desc->bd_nob_transferred; ctx = req->rq_cli_ctx; - if (ctx->cc_ops->unwrap_bulk) + if (ctx->cc_ops->unwrap_bulk) { rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); - - OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count])); - - return rc; + if (rc < 0) + return rc; + } + return desc->bd_nob_transferred; } EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read); +/** + * This is called after unwrap the reply message. + * return 0 for success or error code. + */ int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc) { - struct ptlrpc_cli_ctx *ctx; - - if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) - return 0; + struct ptlrpc_cli_ctx *ctx; + int rc; LASSERT(!req->rq_bulk_read && req->rq_bulk_write); + if (!req->rq_pack_bulk) + return 0; + ctx = req->rq_cli_ctx; - if (ctx->cc_ops->unwrap_bulk) - return ctx->cc_ops->unwrap_bulk(ctx, req, desc); + if (ctx->cc_ops->unwrap_bulk) { + rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); + if (rc < 0) + return rc; + } - return 0; + /* + * if everything is going right, nob should equals to nob_transferred. + * in case of privacy mode, nob_transferred needs to be adjusted. + */ + if (desc->bd_nob != desc->bd_nob_transferred) { + CERROR("nob %d doesn't match transferred nob %d\n", + desc->bd_nob, desc->bd_nob_transferred); + return -EPROTO; + } + + return 0; } EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write); +#ifdef HAVE_SERVER_SUPPORT +/** + * Performe transformation upon outgoing bulk read. + */ int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc) { struct ptlrpc_svc_ctx *ctx; - if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) - return 0; + LASSERT(req->rq_bulk_read); - LASSERT(req->rq_bulk_read || req->rq_bulk_write); + if (!req->rq_pack_bulk) + return 0; ctx = req->rq_svc_ctx; if (ctx->sc_policy->sp_sops->wrap_bulk) @@ -1832,24 +2339,67 @@ int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req, } EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk); +/** + * Performe transformation upon incoming bulk write. + */ int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req, struct ptlrpc_bulk_desc *desc) { struct ptlrpc_svc_ctx *ctx; + int rc; - if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor)) - return 0; + LASSERT(req->rq_bulk_write); - LASSERT(req->rq_bulk_read || req->rq_bulk_write); + /* + * if it's in privacy mode, transferred should >= expected; otherwise + * transferred should == expected. + */ + if (desc->bd_nob_transferred < desc->bd_nob || + (desc->bd_nob_transferred > desc->bd_nob && + SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) != + SPTLRPC_BULK_SVC_PRIV)) { + DEBUG_REQ(D_ERROR, req, "truncated bulk GET %d(%d)", + desc->bd_nob_transferred, desc->bd_nob); + return -ETIMEDOUT; + } + + if (!req->rq_pack_bulk) + return 0; ctx = req->rq_svc_ctx; - if (ctx->sc_policy->sp_sops->unwrap_bulk); - return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc); + if (ctx->sc_policy->sp_sops->unwrap_bulk) { + rc = ctx->sc_policy->sp_sops->unwrap_bulk(req, desc); + if (rc) + CERROR("error unwrap bulk: %d\n", rc); + } + /* return 0 to allow reply be sent */ return 0; } EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk); +/** + * Prepare buffers for incoming bulk write. + */ +int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req, + struct ptlrpc_bulk_desc *desc) +{ + struct ptlrpc_svc_ctx *ctx; + + LASSERT(req->rq_bulk_write); + + if (!req->rq_pack_bulk) + return 0; + + ctx = req->rq_svc_ctx; + if (ctx->sc_policy->sp_sops->prep_bulk) + return ctx->sc_policy->sp_sops->prep_bulk(req, desc); + + return 0; +} +EXPORT_SYMBOL(sptlrpc_svc_prep_bulk); + +#endif /* HAVE_SERVER_SUPPORT */ /**************************************** * user descriptor helpers * @@ -1859,45 +2409,44 @@ int sptlrpc_current_user_desc_size(void) { int ngroups; -#ifdef __KERNEL__ ngroups = current_ngroups; if (ngroups > LUSTRE_MAX_GROUPS) ngroups = LUSTRE_MAX_GROUPS; -#else - ngroups = 0; -#endif return sptlrpc_user_desc_size(ngroups); } EXPORT_SYMBOL(sptlrpc_current_user_desc_size); int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) { - struct ptlrpc_user_desc *pud; + struct ptlrpc_user_desc *pud; - pud = lustre_msg_buf(msg, offset, 0); - - pud->pud_uid = cfs_current()->uid; - pud->pud_gid = cfs_current()->gid; - pud->pud_fsuid = cfs_current()->fsuid; - pud->pud_fsgid = cfs_current()->fsgid; - pud->pud_cap = cfs_current()->cap_effective; - pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; - -#ifdef __KERNEL__ - task_lock(current); - if (pud->pud_ngroups > current_ngroups) - pud->pud_ngroups = current_ngroups; - memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0], - pud->pud_ngroups * sizeof(__u32)); - task_unlock(current); -#endif + pud = lustre_msg_buf(msg, offset, 0); - return 0; + pud->pud_uid = from_kuid(&init_user_ns, current_uid()); + pud->pud_gid = from_kgid(&init_user_ns, current_gid()); + pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid()); + pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid()); + pud->pud_cap = cfs_curproc_cap_pack(); + pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; + + task_lock(current); + if (pud->pud_ngroups > current_ngroups) + pud->pud_ngroups = current_ngroups; +#ifdef HAVE_GROUP_INFO_GID + memcpy(pud->pud_groups, current_cred()->group_info->gid, + pud->pud_ngroups * sizeof(__u32)); +#else /* !HAVE_GROUP_INFO_GID */ + memcpy(pud->pud_groups, current_cred()->group_info->blocks[0], + pud->pud_ngroups * sizeof(__u32)); +#endif /* HAVE_GROUP_INFO_GID */ + task_unlock(current); + + return 0; } EXPORT_SYMBOL(sptlrpc_pack_user_desc); -int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) +int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed) { struct ptlrpc_user_desc *pud; int i; @@ -1906,7 +2455,7 @@ int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) if (!pud) return -EINVAL; - if (lustre_msg_swabbed(msg)) { + if (swabbed) { __swab32s(&pud->pud_uid); __swab32s(&pud->pud_gid); __swab32s(&pud->pud_fsuid); @@ -1927,7 +2476,7 @@ int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) return -EINVAL; } - if (lustre_msg_swabbed(msg)) { + if (swabbed) { for (i = 0; i < pud->pud_ngroups; i++) __swab32s(&pud->pud_groups[i]); } @@ -1937,232 +2486,38 @@ int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset) EXPORT_SYMBOL(sptlrpc_unpack_user_desc); /**************************************** - * user supplied flavor string parsing * + * misc helpers * ****************************************/ -static -int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf) -{ - conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; - conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; - conf->sfc_flags = 0; - - switch (to_part) { - case LUSTRE_MDT: - conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN; - return 0; - case LUSTRE_OST: - conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; - return 0; - default: - CERROR("Unknown to lustre part %d, apply defaults\n", to_part); - conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL; - return -EINVAL; - } -} - -static -void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf) -{ - conf->sfc_rpc_flavor = rpc_flavor; - conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; - conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; - conf->sfc_flags = 0; - - switch (rpc_flavor) { - case SPTLRPC_FLVR_NULL: - case SPTLRPC_FLVR_PLAIN: - break; - case SPTLRPC_FLVR_KRB5P: - conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; - /* fall through */ - case SPTLRPC_FLVR_KRB5I: - conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; - break; - default: - LBUG(); - } -} - -static -void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv, - struct sec_flavor_config *conf) +const char * sec2target_str(struct ptlrpc_sec *sec) { - if (bulk_priv) - conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; - else - conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; - - switch (rpc_flavor) { - case SPTLRPC_FLVR_PLAIN: - conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5; - break; - case SPTLRPC_FLVR_KRB5I: - case SPTLRPC_FLVR_KRB5P: - conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1; - break; - default: - LBUG(); - } + if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) + return "*"; + if (sec_is_reverse(sec)) + return "c"; + return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); } - -static __u32 __flavors[] = { - SPTLRPC_FLVR_NULL, - SPTLRPC_FLVR_PLAIN, - SPTLRPC_FLVR_KRB5I, - SPTLRPC_FLVR_KRB5P, -}; - -#define __nflavors (sizeof(__flavors)/sizeof(__u32)) +EXPORT_SYMBOL(sec2target_str); /* - * flavor string format: rpc[-bulk{n|i|p}[:cksum/enc]] - * for examples: - * null - * plain-bulki - * krb5p-bulkn - * krb5i-bulkp - * krb5i-bulkp:sha512/arc4 + * return true if the bulk data is protected */ -int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part, - char *str, struct sec_flavor_config *conf) +int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr) { - char *f, *bulk, *alg, *enc; - char buf[64]; - int i, bulk_priv; - ENTRY; - - if (str == NULL) { - if (get_default_flavor(to_part, conf)) - return -EINVAL; - goto set_flags; - } - - for (i = 0; i < __nflavors; i++) { - f = sptlrpc_flavor2name(__flavors[i]); - if (strncmp(str, f, strlen(f)) == 0) - break; - } - - if (i >= __nflavors) - GOTO(invalid, -EINVAL); - - /* prepare local buffer thus we can modify it as we want */ - strncpy(buf, str, 64); - buf[64 - 1] = '\0'; - - /* find bulk string */ - bulk = strchr(buf, '-'); - if (bulk) - *bulk++ = '\0'; - - /* now the first part must equal to rpc flavor name */ - if (strcmp(buf, f) != 0) - GOTO(invalid, -EINVAL); - - get_flavor_by_rpc(__flavors[i], conf); - - if (bulk == NULL) - goto set_flags; - - /* null flavor should not have any suffix */ - if (__flavors[i] == SPTLRPC_FLVR_NULL) - GOTO(invalid, -EINVAL); - - /* find bulk algorithm string */ - alg = strchr(bulk, ':'); - if (alg) - *alg++ = '\0'; - - /* verify bulk section */ - if (strcmp(bulk, "bulkn") == 0) { - conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL; - conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL; - goto set_flags; - } - - if (strcmp(bulk, "bulki") == 0) - bulk_priv = 0; - else if (strcmp(bulk, "bulkp") == 0) - bulk_priv = 1; - else - GOTO(invalid, -EINVAL); - - /* plain policy dosen't support bulk encryption */ - if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN) - GOTO(invalid, -EINVAL); - - get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf); - - if (alg == NULL) - goto set_flags; - - /* find encryption algorithm string */ - enc = strchr(alg, '/'); - if (enc) - *enc++ = '\0'; - - /* bulk combination sanity check */ - if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc)) - GOTO(invalid, -EINVAL); - - /* checksum algorithm */ - for (i = 0; i < BULK_CSUM_ALG_MAX; i++) { - if (strcmp(alg, sptlrpc_bulk_csum_alg2name(i)) == 0) { - conf->sfc_bulk_csum = i; - break; - } - } - if (i >= BULK_CSUM_ALG_MAX) - GOTO(invalid, -EINVAL); - - /* privacy algorithm */ - if (enc) { - if (strcmp(enc, "arc4") != 0) - GOTO(invalid, -EINVAL); - conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4; + switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) { + case SPTLRPC_BULK_SVC_INTG: + case SPTLRPC_BULK_SVC_PRIV: + return 1; + default: + return 0; } - -set_flags: - /* * set ROOTONLY flag: - * - to OST - * - from MDT to MDT - * * set BULK flag for: - * - from CLI to OST - */ - if (to_part == LUSTRE_OST || - (from_part == LUSTRE_MDT && to_part == LUSTRE_MDT)) - conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY; - if (from_part == LUSTRE_CLI && to_part == LUSTRE_OST) - conf->sfc_flags |= PTLRPC_SEC_FL_BULK; - -#ifdef __BIG_ENDIAN - __swab32s(&conf->sfc_rpc_flavor); - __swab32s(&conf->sfc_bulk_csum); - __swab32s(&conf->sfc_bulk_priv); - __swab32s(&conf->sfc_flags); -#endif - return 0; -invalid: - CERROR("invalid flavor string: %s\n", str); - return -EINVAL; } -EXPORT_SYMBOL(sptlrpc_parse_flavor); +EXPORT_SYMBOL(sptlrpc_flavor_has_bulk); /**************************************** - * misc helpers * + * crypto API helper/alloc blkciper * ****************************************/ -const char * sec2target_str(struct ptlrpc_sec *sec) -{ - if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) - return "*"; - if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) - return "c"; - return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); -} -EXPORT_SYMBOL(sec2target_str); - /**************************************** * initialize/finalize * ****************************************/ @@ -2171,10 +2526,20 @@ int sptlrpc_init(void) { int rc; - rc = sptlrpc_enc_pool_init(); + rwlock_init(&policy_lock); + + rc = sptlrpc_gc_init(); if (rc) goto out; + rc = sptlrpc_conf_init(); + if (rc) + goto out_gc; + + rc = sptlrpc_enc_pool_init(); + if (rc) + goto out_conf; + rc = sptlrpc_null_init(); if (rc) goto out_pool; @@ -2195,6 +2560,10 @@ out_null: sptlrpc_null_fini(); out_pool: sptlrpc_enc_pool_fini(); +out_conf: + sptlrpc_conf_fini(); +out_gc: + sptlrpc_gc_fini(); out: return rc; } @@ -2205,4 +2574,6 @@ void sptlrpc_fini(void) sptlrpc_plain_fini(); sptlrpc_null_fini(); sptlrpc_enc_pool_fini(); + sptlrpc_conf_fini(); + sptlrpc_gc_fini(); }