Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
index 5bd1768..977ac89 100644 (file)
@@ -1,23 +1,41 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2004-2007 Cluster File Systems, Inc.
- *   Author: Eric Mei <ericm@clusterfs.com>
+ * GPL HEADER START
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/sec.c
+ *
+ * Author: Eric Mei <ericm@clusterfs.com>
  */
 
 #ifndef EXPORT_SYMTAB
@@ -48,7 +66,7 @@
  * policy registers                            *
  ***********************************************/
 
-static rwlock_t policy_lock = RW_LOCK_UNLOCKED;
+static rwlock_t policy_lock;
 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
         NULL,
 };
@@ -110,30 +128,30 @@ struct ptlrpc_sec_policy * sptlrpc_rpcflavor2policy(__u16 flavor)
         if (number >= SPTLRPC_POLICY_MAX)
                 return NULL;
 
-again:
-        read_lock(&policy_lock);
-        policy = policies[number];
-        if (policy && !try_module_get(policy->sp_owner))
-                policy = NULL;
-        if (policy == NULL)
-                flag = atomic_read(&loaded);
-        read_unlock(&policy_lock);
-
-        /* if failure, try to load gss module, once */
-        if (unlikely(policy == NULL) && flag == 0 &&
-            number == SPTLRPC_POLICY_GSS) {
+        while (1) {
+                read_lock(&policy_lock);
+                policy = policies[number];
+                if (policy && !try_module_get(policy->sp_owner))
+                        policy = NULL;
+                if (policy == NULL)
+                        flag = atomic_read(&loaded);
+                read_unlock(&policy_lock);
+
+                if (policy != NULL || flag != 0 ||
+                    number != SPTLRPC_POLICY_GSS)
+                        break;
+
+                /* try to load gss module, once */
                 mutex_down(&load_mutex);
                 if (atomic_read(&loaded) == 0) {
-                        if (request_module("ptlrpc_gss") != 0)
-                                CERROR("Unable to load module ptlrpc_gss\n");
-                        else
+                        if (request_module("ptlrpc_gss") == 0)
                                 CWARN("module ptlrpc_gss loaded on demand\n");
+                        else
+                                CERROR("Unable to load module ptlrpc_gss\n");
 
                         atomic_set(&loaded, 1);
                 }
                 mutex_up(&load_mutex);
-
-                goto again;
         }
 
         return policy;
@@ -147,6 +165,8 @@ __u16 sptlrpc_name2rpcflavor(const char *name)
                 return SPTLRPC_FLVR_PLAIN;
         if (!strcmp(name, "krb5n"))
                 return SPTLRPC_FLVR_KRB5N;
+        if (!strcmp(name, "krb5a"))
+                return SPTLRPC_FLVR_KRB5A;
         if (!strcmp(name, "krb5i"))
                 return SPTLRPC_FLVR_KRB5I;
         if (!strcmp(name, "krb5p"))
@@ -463,7 +483,7 @@ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
                 CWARN("ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
                       newctx, newctx->cc_flags);
 
-                schedule_timeout(HZ);
+                cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ);
         } else {
                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
                 if (rc) {
@@ -789,7 +809,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
         spin_lock_init(&req->rq_lock);
         atomic_set(&req->rq_refcount, 10000);
         CFS_INIT_LIST_HEAD(&req->rq_ctx_chain);
-        init_waitqueue_head(&req->rq_reply_waitq);
+        cfs_waitq_init(&req->rq_reply_waitq);
         req->rq_import = imp;
         req->rq_cli_ctx = ctx;
 
@@ -844,66 +864,45 @@ int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
         RETURN(rc);
 }
 
-/*
- * rq_nob_received is the actual received data length
- */
-int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+static int do_cli_unwrap_reply(struct ptlrpc_request *req)
 {
         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
         int                    rc;
+        __u16                  rpc_flvr;
         ENTRY;
 
         LASSERT(ctx);
         LASSERT(ctx->cc_sec);
-        LASSERT(ctx->cc_ops);
         LASSERT(req->rq_repbuf);
+        LASSERT(req->rq_repdata);
+        LASSERT(req->rq_repmsg == NULL);
 
-        req->rq_repdata_len = req->rq_nob_received;
-
-        if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+        if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
                 CERROR("replied data length %d too small\n",
-                       req->rq_nob_received);
+                       req->rq_repdata_len);
                 RETURN(-EPROTO);
         }
 
+        /* v2 message, check request/reply policy match */
+        rpc_flvr = WIRE_FLVR_RPC(req->rq_repdata->lm_secflvr);
 
-        if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
-            req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
-                /*
-                 * v1 message, it's must be null flavor, so our requets also
-                 * should be in null flavor
-                 */
-                if (RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) !=
-                    SPTLRPC_POLICY_NULL) {
-                        CERROR("request was %s but reply with null\n",
-                               sptlrpc_rpcflavor2name(req->rq_flvr.sf_rpc));
-                        RETURN(-EPROTO);
-                }
-        } else {
-                /*
-                 * v2 message, check request/reply policy match
-                 */
-                __u16 rpc_flvr = WIRE_FLVR_RPC(req->rq_repbuf->lm_secflvr);
-
-                if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
-                        __swab16s(&rpc_flvr);
+        if (req->rq_repdata->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
+                __swab16s(&rpc_flvr);
 
-                if (RPC_FLVR_POLICY(rpc_flvr) !=
-                    RPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
-                        CERROR("request policy was %u while reply with %u\n",
-                               RPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
-                               RPC_FLVR_POLICY(rpc_flvr));
-                        RETURN(-EPROTO);
-                }
-
-                /* do nothing if it's null policy; otherwise unpack the
-                 * wrapper message
-                 */
-                if (RPC_FLVR_POLICY(rpc_flvr) != SPTLRPC_POLICY_NULL &&
-                    lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received))
-                        RETURN(-EPROTO);
+        if (RPC_FLVR_POLICY(rpc_flvr) !=
+            RPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+                CERROR("request policy was %u while reply with %u\n",
+                       RPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
+                       RPC_FLVR_POLICY(rpc_flvr));
+                RETURN(-EPROTO);
         }
 
+        /* do nothing if it's null policy; otherwise unpack the
+         * wrapper message */
+        if (RPC_FLVR_POLICY(rpc_flvr) != SPTLRPC_POLICY_NULL &&
+            lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len))
+                RETURN(-EPROTO);
+
         switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
         case SPTLRPC_SVC_NULL:
         case SPTLRPC_SVC_AUTH:
@@ -923,6 +922,139 @@ int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
         RETURN(rc);
 }
 
+/*
+ * upon this be called, the reply buffer should have been un-posted,
+ * so nothing is going to change.
+ */
+int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
+{
+        LASSERT(req->rq_repbuf);
+        LASSERT(req->rq_repdata == NULL);
+        LASSERT(req->rq_repmsg == NULL);
+        LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
+
+        if (req->rq_reply_off == 0 &&
+            (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
+                CERROR("real reply with offset 0\n");
+                return -EPROTO;
+        }
+
+        if (req->rq_reply_off % 8 != 0) {
+                CERROR("reply at odd offset %u\n", req->rq_reply_off);
+                return -EPROTO;
+        }
+
+        req->rq_repdata = (struct lustre_msg *)
+                                (req->rq_repbuf + req->rq_reply_off);
+        req->rq_repdata_len = req->rq_nob_received;
+
+        return do_cli_unwrap_reply(req);
+}
+
+/**
+ * Upon called, the receive buffer might be still posted, so the reply data
+ * might be changed at any time, no matter we're holding rq_lock or not. we
+ * expect the rq_reply_off be 0, rq_nob_received is the early reply size.
+ *
+ * we allocate separate ptlrpc_request and reply buffer for early reply
+ * processing, return 0 and @req_ret is a duplicated ptlrpc_request. caller
+ * must call sptlrpc_cli_finish_early_reply() on the returned request to
+ * release it. if anything goes wrong @req_ret will not be set.
+ */
+int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
+                                   struct ptlrpc_request **req_ret)
+{
+        struct ptlrpc_request  *early_req;
+        char                   *early_buf;
+        int                     early_bufsz, early_size;
+        int                     rc;
+        ENTRY;
+
+        OBD_ALLOC_PTR(early_req);
+        if (early_req == NULL)
+                RETURN(-ENOMEM);
+
+        early_size = req->rq_nob_received;
+        early_bufsz = size_roundup_power2(early_size);
+        OBD_ALLOC(early_buf, early_bufsz);
+        if (early_buf == NULL)
+                GOTO(err_req, rc = -ENOMEM);
+
+        /* sanity checkings and copy data out, do it inside spinlock */
+        spin_lock(&req->rq_lock);
+
+        if (req->rq_replied) {
+                spin_unlock(&req->rq_lock);
+                GOTO(err_buf, rc = -EALREADY);
+        }
+
+        LASSERT(req->rq_repbuf);
+        LASSERT(req->rq_repdata == NULL);
+        LASSERT(req->rq_repmsg == NULL);
+
+        if (req->rq_reply_off != 0) {
+                CERROR("early reply with offset %u\n", req->rq_reply_off);
+                spin_unlock(&req->rq_lock);
+                GOTO(err_buf, rc = -EPROTO);
+        }
+
+        if (req->rq_nob_received != early_size) {
+                /* even another early arrived the size should be the same */
+                CERROR("data size has changed from %u to %u\n",
+                       early_size, req->rq_nob_received);
+                spin_unlock(&req->rq_lock);
+                GOTO(err_buf, rc = -EINVAL);
+        }
+
+        if (req->rq_nob_received < sizeof(struct lustre_msg)) {
+                CERROR("early reply length %d too small\n",
+                       req->rq_nob_received);
+                spin_unlock(&req->rq_lock);
+                GOTO(err_buf, rc = -EALREADY);
+        }
+
+        memcpy(early_buf, req->rq_repbuf, early_size);
+        spin_unlock(&req->rq_lock);
+
+        early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
+        early_req->rq_flvr = req->rq_flvr;
+        early_req->rq_repbuf = early_buf;
+        early_req->rq_repbuf_len = early_bufsz;
+        early_req->rq_repdata = (struct lustre_msg *) early_buf;
+        early_req->rq_repdata_len = early_size;
+        early_req->rq_early = 1;
+
+        rc = do_cli_unwrap_reply(early_req);
+        if (rc) {
+                DEBUG_REQ(D_ADAPTTO, early_req,
+                          "error %d unwrap early reply", rc);
+                GOTO(err_ctx, rc);
+        }
+
+        LASSERT(early_req->rq_repmsg);
+        *req_ret = early_req;
+        RETURN(0);
+
+err_ctx:
+        sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+err_buf:
+        OBD_FREE(early_buf, early_bufsz);
+err_req:
+        OBD_FREE_PTR(early_req);
+        RETURN(rc);
+}
+
+void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
+{
+        LASSERT(early_req->rq_repbuf);
+        LASSERT(early_req->rq_repdata);
+        LASSERT(early_req->rq_repmsg);
+
+        sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
+        OBD_FREE(early_req->rq_repbuf, early_req->rq_repbuf_len);
+        OBD_FREE_PTR(early_req);
+}
+
 /**************************************************
  * sec ID                                         *
  **************************************************/
@@ -1588,7 +1720,7 @@ int sptlrpc_target_export_check(struct obd_export *exp,
                 if (exp->exp_flvr_expire[0] >= cfs_time_current_sec()) {
                         if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
-                                       "middle one (%lu)\n", exp,
+                                       "middle one ("CFS_DURATION_T")\n", exp,
                                        exp->exp_flvr.sf_rpc,
                                        exp->exp_flvr_old[0].sf_rpc,
                                        exp->exp_flvr_old[1].sf_rpc,
@@ -1613,7 +1745,7 @@ int sptlrpc_target_export_check(struct obd_export *exp,
                 if (exp->exp_flvr_expire[1] >= cfs_time_current_sec()) {
                         if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
-                                       "oldest one (%lu)\n", exp,
+                                       "oldest one ("CFS_DURATION_T")\n", exp,
                                        exp->exp_flvr.sf_rpc,
                                        exp->exp_flvr_old[0].sf_rpc,
                                        exp->exp_flvr_old[1].sf_rpc,
@@ -1746,28 +1878,18 @@ int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
                 RETURN(SECSVC_DROP);
         }
 
-        if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
-            msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
-                /*
-                 * v1 message, treat as to be null
-                 */
-                req->rq_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
-        } else {
-                /*
-                 * v2 message.
-                 */
-                if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2)
-                        req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(msg->lm_secflvr);
-                else
-                        req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(
-                                                __swab32(msg->lm_secflvr));
+        /*
+         * v2 message.
+         */
+        if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2)
+                req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(msg->lm_secflvr);
+        else
+                req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(__swab32(msg->lm_secflvr));
 
-                /* unpack the wrapper message if the policy is not null */
-                if ((RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) !=
-                     SPTLRPC_POLICY_NULL) &&
-                    lustre_unpack_msg(msg, req->rq_reqdata_len))
-                        RETURN(SECSVC_DROP);
-        }
+        /* unpack the wrapper message if the policy is not null */
+        if ((RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) &&
+             lustre_unpack_msg(msg, req->rq_reqdata_len))
+                RETURN(SECSVC_DROP);
 
         policy = sptlrpc_rpcflavor2policy(req->rq_flvr.sf_rpc);
         if (!policy) {
@@ -1947,7 +2069,9 @@ void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
                                            nob : pga[i]->count;
                 desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
 #else
-#warning FIXME for liblustre!
+                /* FIXME currently liblustre doesn't support bulk encryption.
+                 * if we do, check again following may not be right. */
+                LASSERTF(0, "Bulk encryption not implemented for liblustre\n");
                 desc->bd_iov[i].iov_base = pga[i]->pg->addr;
                 desc->bd_iov[i].iov_len = pga[i]->count > nob ?
                                            nob : pga[i]->count;
@@ -2074,7 +2198,7 @@ int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
         pud->pud_gid = cfs_current()->gid;
         pud->pud_fsuid = cfs_current()->fsuid;
         pud->pud_fsgid = cfs_current()->fsgid;
-        pud->pud_cap = cfs_current()->cap_effective;
+        pud->pud_cap = cfs_curproc_cap_pack();
         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
 
 #ifdef __KERNEL__
@@ -2144,6 +2268,39 @@ const char * sec2target_str(struct ptlrpc_sec *sec)
 EXPORT_SYMBOL(sec2target_str);
 
 /****************************************
+ * crypto API helper/alloc blkciper     *
+ ****************************************/
+
+#ifdef __KERNEL__
+#ifndef HAVE_ASYNC_BLOCK_CIPHER
+struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname,
+                                                   u32 type, u32 mask)
+{
+        char        buf[CRYPTO_MAX_ALG_NAME + 1];
+        const char *pan = algname;
+        u32         flag = 0; 
+
+        if (strncmp("cbc(", algname, 4) == 0)
+                flag |= CRYPTO_TFM_MODE_CBC;
+        else if (strncmp("ecb(", algname, 4) == 0)
+                flag |= CRYPTO_TFM_MODE_ECB;
+        if (flag) {
+                char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')');
+                if (vp) {
+                        memcpy(buf, algname + 4, vp - algname - 4);
+                        buf[vp - algname - 4] = '\0';
+                        pan = buf;
+                } else {
+                        flag = 0;
+                }
+        }
+        return crypto_alloc_tfm(pan, flag);
+}
+EXPORT_SYMBOL(ll_crypto_alloc_blkcipher);
+#endif
+#endif
+
+/****************************************
  * initialize/finalize                  *
  ****************************************/
 
@@ -2151,6 +2308,8 @@ int __init sptlrpc_init(void)
 {
         int rc;
 
+        rwlock_init(&policy_lock);
+
         rc = sptlrpc_gc_start_thread();
         if (rc)
                 goto out;