Whamcloud - gitweb
LU-7845 gss: support namespace in lgss_keyring 81/18781/35
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 14 Jan 2016 16:57:10 +0000 (17:57 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 17 Dec 2016 05:38:48 +0000 (05:38 +0000)
When we want Kerberos authentication for a Lustre client run from
a Docker container, it is necessary that request_key() forwards
to lgss_keyring userland helper sufficient information regarding
the namespace from which mount command was launched. That way,
helper function can bind to caller's namespace, and retrieve
Kerberos credentials of the container.

To achieve this:
- add a lpi_reftask field to struct llog_process_info: this is the
  task_struct of the process initiating the mount;
- use lpi_reftask to put llog_process_thread_daemonize thread in
  same namespace as calling llog_process_or_fork();
- add a reference PID to struct obd_import: this is the PID of
  the init process of the namespace from which the import
  creation was originated;
- use reference PID from struct obd_import as a new 'pid' parameter
  to call-out information sent from kernel space to userspace
  via request-key;
- in lgss_keyring, if necessary perform credentials retrieval
  in caller's namespace, thanks to a call to 'setns'.

For user's credentials, use current PID instead of import's
reference PID to get the reference namespace.

Change-Id: I934ed857fdf323e55a2a5acf06e8799c3a6421f0
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-on: https://review.whamcloud.com/18781
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/autoconf/lustre-core.m4
lustre/include/lustre_compat.h
lustre/include/lustre_import.h
lustre/obdclass/genops.c
lustre/obdclass/llog.c
lustre/obdclass/llog_internal.h
lustre/ptlrpc/gss/gss_keyring.c
lustre/utils/gss/Makefile.am
lustre/utils/gss/lgss_keyring.c

index 0578325..51847b1 100644 (file)
@@ -789,6 +789,18 @@ dirty_inode_super_operation_flag, [
 ]) # LC_DIRTY_INODE_WITH_FLAG
 
 #
+# LC_SETNS
+#
+# 3.0 introduced setns
+#
+AC_DEFUN([LC_SETNS], [
+AC_CHECK_HEADERS([sched.h], [], [],
+                [#define _GNU_SOURCE
+                ])
+AC_CHECK_FUNCS([setns])
+]) # LC_SETNS
+
+#
 # LC_GENERIC_PERMISSION
 #
 # 2.6.38 generic_permission taken 4 parameters.
@@ -1541,6 +1553,24 @@ d_count, [
 ]) # LC_HAVE_DCOUNT
 
 #
+# LC_PID_NS_FOR_CHILDREN
+#
+# 3.11 replaces pid_ns by pid_ns_for_children in struct nsproxy
+#
+AC_DEFUN([LC_PID_NS_FOR_CHILDREN], [
+LB_CHECK_COMPILE([if 'struct nsproxy' has 'pid_ns_for_children'],
+pid_ns_for_children, [
+       #include <linux/nsproxy.h>
+],[
+       struct nsproxy ns;
+       ns.pid_ns_for_children = NULL;
+],[
+       AC_DEFINE(HAVE_PID_NS_FOR_CHILDREN, 1,
+                 ['struct nsproxy' has 'pid_ns_for_children'])
+])
+]) # LC_PID_NS_FOR_CHILDREN
+
+#
 # LC_OLDSIZE_TRUNCATE_PAGECACHE
 #
 # 3.12 truncate_pagecache without oldsize parameter
@@ -2280,6 +2310,7 @@ AC_DEFUN([LC_PROG_LINUX], [
 
        # 3.0
        LC_DIRTY_INODE_WITH_FLAG
+       LC_SETNS
 
        # 3.1
        LC_LM_XXX_LOCK_MANAGER_OPS
@@ -2342,6 +2373,7 @@ AC_DEFUN([LC_PROG_LINUX], [
        LC_HAVE_DCOUNT
        LC_HAVE_DENTRY_D_U_D_ALIAS
        LC_HAVE_DENTRY_D_CHILD
+       LC_PID_NS_FOR_CHILDREN
 
        # 3.12
        LC_OLDSIZE_TRUNCATE_PAGECACHE
index 4229fa5..238e668 100644 (file)
@@ -413,4 +413,10 @@ static inline void truncate_inode_pages_final(struct address_space *map)
 #endif
 #endif
 
+#ifdef HAVE_PID_NS_FOR_CHILDREN
+# define ll_task_pid_ns(task)  ((task)->nsproxy->pid_ns_for_children)
+#else
+# define ll_task_pid_ns(task)  ((task)->nsproxy->pid_ns)
+#endif
+
 #endif /* _LUSTRE_COMPAT_H */
index 40371ee..7a1b1f4 100644 (file)
@@ -204,9 +204,10 @@ struct obd_import {
          * some seciruty-related fields
          * @{
          */
-        struct ptlrpc_sec        *imp_sec;
+       struct ptlrpc_sec        *imp_sec;
        struct mutex              imp_sec_mutex;
-        cfs_time_t                imp_sec_expire;
+       cfs_time_t                imp_sec_expire;
+       pid_t                     imp_sec_refpid;
         /** @} */
 
        /** Wait queue for those who need to wait for recovery completion */
index a48f887..d5352b0 100644 (file)
@@ -37,6 +37,7 @@
 
 #define DEBUG_SUBSYSTEM S_CLASS
 
+#include <linux/pid_namespace.h>
 #include <linux/kthread.h>
 #include <obd_class.h>
 #include <lprocfs_status.h>
@@ -1045,6 +1046,7 @@ static void init_imp_at(struct imp_at *at) {
 struct obd_import *class_new_import(struct obd_device *obd)
 {
        struct obd_import *imp;
+       struct pid_namespace *curr_pid_ns = ll_task_pid_ns(current);
 
        OBD_ALLOC(imp, sizeof(*imp));
        if (imp == NULL)
@@ -1066,6 +1068,11 @@ struct obd_import *class_new_import(struct obd_device *obd)
        mutex_init(&imp->imp_sec_mutex);
        init_waitqueue_head(&imp->imp_recovery_waitq);
 
+       if (curr_pid_ns->child_reaper)
+               imp->imp_sec_refpid = curr_pid_ns->child_reaper->pid;
+       else
+               imp->imp_sec_refpid = 1;
+
        atomic_set(&imp->imp_refcount, 2);
        atomic_set(&imp->imp_unregistering, 0);
        atomic_set(&imp->imp_inflight, 0);
index f2f2fc6..addb1cf 100644 (file)
@@ -43,6 +43,7 @@
 
 #define DEBUG_SUBSYSTEM S_LOG
 
+#include <linux/pid_namespace.h>
 #include <linux/kthread.h>
 #include <llog_swab.h>
 #include <lustre_log.h>
@@ -630,6 +631,22 @@ static int llog_process_thread_daemonize(void *arg)
        struct llog_process_info        *lpi = arg;
        struct lu_env                    env;
        int                              rc;
+       struct nsproxy                  *new_ns, *curr_ns = current->nsproxy;
+
+       task_lock(lpi->lpi_reftask);
+       new_ns = lpi->lpi_reftask->nsproxy;
+       if (curr_ns != new_ns) {
+               get_nsproxy(new_ns);
+
+               current->nsproxy = new_ns;
+               /* XXX: we should call put_nsproxy() instead of
+                * atomic_dec(&ns->count) directly. But put_nsproxy() cannot be
+                * used outside of the kernel itself, because it calls
+                * free_nsproxy() which is not exported by the kernel
+                * (defined in kernel/nsproxy.c) */
+               atomic_dec(&curr_ns->count);
+       }
+       task_unlock(lpi->lpi_reftask);
 
        unshare_fs_struct();
 
@@ -656,15 +673,15 @@ int llog_process_or_fork(const struct lu_env *env,
 
         ENTRY;
 
-        OBD_ALLOC_PTR(lpi);
-        if (lpi == NULL) {
-                CERROR("cannot alloc pointer\n");
-                RETURN(-ENOMEM);
-        }
-        lpi->lpi_loghandle = loghandle;
-        lpi->lpi_cb        = cb;
-        lpi->lpi_cbdata    = data;
-        lpi->lpi_catdata   = catdata;
+       OBD_ALLOC_PTR(lpi);
+       if (lpi == NULL) {
+               CERROR("cannot alloc pointer\n");
+               RETURN(-ENOMEM);
+       }
+       lpi->lpi_loghandle = loghandle;
+       lpi->lpi_cb        = cb;
+       lpi->lpi_cbdata    = data;
+       lpi->lpi_catdata   = catdata;
 
        if (fork) {
                struct task_struct *task;
@@ -673,6 +690,10 @@ int llog_process_or_fork(const struct lu_env *env,
                 * init the new one in llog_process_thread_daemonize. */
                lpi->lpi_env = NULL;
                init_completion(&lpi->lpi_completion);
+               /* take reference to current, so that
+                * llog_process_thread_daemonize() can use it to switch to
+                * namespace associated with current  */
+               lpi->lpi_reftask = current;
                task = kthread_run(llog_process_thread_daemonize, lpi,
                                   "llog_process_thread");
                if (IS_ERR(task)) {
index 8d5d2ad..65752a0 100644 (file)
 #include <lustre_log.h>
 
 struct llog_process_info {
-        struct llog_handle *lpi_loghandle;
-        llog_cb_t           lpi_cb;
-        void               *lpi_cbdata;
-        void               *lpi_catdata;
-        int                 lpi_rc;
-       struct completion       lpi_completion;
+       struct llog_handle      *lpi_loghandle;
+       llog_cb_t                lpi_cb;
+       void                    *lpi_cbdata;
+       void                    *lpi_catdata;
+       int                      lpi_rc;
+       struct completion        lpi_completion;
        const struct lu_env     *lpi_env;
+       struct task_struct      *lpi_reftask;
 };
 
 struct llog_thread_info {
index 2ebaeca..6186c5f 100644 (file)
@@ -817,19 +817,27 @@ struct ptlrpc_cli_ctx * gss_sec_lookup_ctx_kr(struct ptlrpc_sec *sec,
        construct_key_desc(desc, sizeof(desc), sec, vcred->vc_uid);
 
        /* callout info format:
-        * secid:mech:uid:gid:sec_flags:svc_flag:svc_type:peer_nid:target_uuid
+        * secid:mech:uid:gid:sec_flags:svc_flag:svc_type:peer_nid:target_uuid:
+        * self_nid:pid
         */
-        coinfo_size = sizeof(struct obd_uuid) + MAX_OBD_NAME + 64;
-        OBD_ALLOC(coinfo, coinfo_size);
-        if (coinfo == NULL)
-                goto out;
-
-       snprintf(coinfo, coinfo_size, "%d:%s:%u:%u:%s:%c:%d:%#llx:%s:%#llx",
+       coinfo_size = sizeof(struct obd_uuid) + MAX_OBD_NAME + 64;
+       OBD_ALLOC(coinfo, coinfo_size);
+       if (coinfo == NULL)
+               goto out;
+
+       /* Last callout parameter is pid of process whose namespace will be used
+        * for credentials' retrieval.
+        * For user's credentials (in which case sec_part_flags is empty), use
+        * current PID instead of import's reference PID to get reference
+        * namespace. */
+       snprintf(coinfo, coinfo_size, "%d:%s:%u:%u:%s:%c:%d:%#llx:%s:%#llx:%d",
                 sec->ps_id, sec2gsec(sec)->gs_mech->gm_name,
                 vcred->vc_uid, vcred->vc_gid,
                 sec_part_flags, svc_flag, import_to_gss_svc(imp),
                 imp->imp_connection->c_peer.nid, imp->imp_obd->obd_name,
-                imp->imp_connection->c_self);
+                imp->imp_connection->c_self,
+                sec_part_flags[0] == '\0' ?
+                      current_pid() : imp->imp_sec_refpid);
 
         CDEBUG(D_SEC, "requesting key for %s\n", desc);
 
index e89ce96..5bac807 100644 (file)
@@ -1,7 +1,8 @@
 # GSS daemons & tools Makefile
 
 AM_CFLAGS := -fPIC \
-            -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -DLUSTRE_UTILS=1
+            -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -DLUSTRE_UTILS=1 \
+            -D_GNU_SOURCE
 
 LIBCFS := $(top_builddir)/libcfs/libcfs/libcfs.a
 
index f4d7435..bf936de 100644 (file)
@@ -36,6 +36,9 @@
  * Author: Eric Mei <ericm@clusterfs.com>
  */
 
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -95,21 +98,22 @@ struct lgss_init_res {
 };
 
 struct keyring_upcall_param {
-       uint32_t        kup_ver;
-       uint32_t        kup_secid;
-       uint32_t        kup_uid;
-       uint32_t        kup_fsuid;
-       uint32_t        kup_gid;
-       uint32_t        kup_fsgid;
-       uint32_t        kup_svc;
-       uint64_t        kup_nid;
-       uint64_t        kup_selfnid;
+       uint32_t        kup_ver;
+       uint32_t        kup_secid;
+       uint32_t        kup_uid;
+       uint32_t        kup_fsuid;
+       uint32_t        kup_gid;
+       uint32_t        kup_fsgid;
+       uint32_t        kup_svc;
+       uint64_t        kup_nid;
+       uint64_t        kup_selfnid;
        char            kup_svc_type;
-       char            kup_tgt[64];
-       char            kup_mech[16];
-       unsigned int    kup_is_root:1,
+       char            kup_tgt[64];
+       char            kup_mech[16];
+       unsigned int    kup_is_root:1,
                        kup_is_mdt:1,
                        kup_is_ost:1;
+       uint32_t        kup_pid;
 };
 
 /****************************************
@@ -719,11 +723,12 @@ static int lgssc_kr_negotiate(key_serial_t keyid, struct lgss_cred *cred,
  *  [7]: target_nid     (uint64)
  *  [8]: target_uuid    (string)
  *  [9]: self_nid        (uint64)
+ *  [10]: pid            (uint)
  */
 static int parse_callout_info(const char *coinfo,
                               struct keyring_upcall_param *uparam)
 {
-       const int       nargs = 10;
+       const int       nargs = 11;
        char            buf[1024];
        char           *string = buf;
        int             length, i;
@@ -750,9 +755,9 @@ static int parse_callout_info(const char *coinfo,
         }
         data[i] = string;
 
-       logmsg(LL_TRACE, "components: %s,%s,%s,%s,%s,%c,%s,%s,%s,%s\n",
+       logmsg(LL_TRACE, "components: %s,%s,%s,%s,%s,%c,%s,%s,%s,%s,%s\n",
               data[0], data[1], data[2], data[3], data[4], data[5][0],
-              data[6], data[7], data[8], data[9]);
+              data[6], data[7], data[8], data[9], data[10]);
 
        uparam->kup_secid = strtol(data[0], NULL, 0);
        strlcpy(uparam->kup_mech, data[1], sizeof(uparam->kup_mech));
@@ -769,15 +774,16 @@ static int parse_callout_info(const char *coinfo,
        uparam->kup_nid = strtoll(data[7], NULL, 0);
        strlcpy(uparam->kup_tgt, data[8], sizeof(uparam->kup_tgt));
        uparam->kup_selfnid = strtoll(data[9], NULL, 0);
+       uparam->kup_pid = strtol(data[10], NULL, 0);
 
        logmsg(LL_DEBUG, "parse call out info: secid %d, mech %s, ugid %u:%u, "
               "is_root %d, is_mdt %d, is_ost %d, svc type %c, svc %d, "
-              "nid 0x%"PRIx64", tgt %s, self nid 0x%"PRIx64"\n",
+              "nid 0x%"PRIx64", tgt %s, self nid 0x%"PRIx64", pid %d\n",
               uparam->kup_secid, uparam->kup_mech,
               uparam->kup_uid, uparam->kup_gid,
               uparam->kup_is_root, uparam->kup_is_mdt, uparam->kup_is_ost,
               uparam->kup_svc_type, uparam->kup_svc, uparam->kup_nid,
-              uparam->kup_tgt, uparam->kup_selfnid);
+              uparam->kup_tgt, uparam->kup_selfnid, uparam->kup_pid);
        return 0;
 }
 
@@ -808,6 +814,21 @@ out:
        fclose(file);
 }
 
+#ifdef HAVE_SETNS
+static int associate_with_ns(char *path)
+{
+       int fd, rc = -1;
+
+       fd = open(path, O_RDONLY);
+       if (fd != -1) {
+               rc = setns(fd, 0);
+               close(fd);
+       }
+
+       return rc;
+}
+#endif
+
 /****************************************
  * main process                         *
  ****************************************/
@@ -821,6 +842,10 @@ int main(int argc, char *argv[])
         pid_t                           child;
         struct lgss_mech_type          *mech;
         struct lgss_cred               *cred;
+#ifdef HAVE_SETNS
+       char                            path[PATH_MAX];
+       struct stat parent_ns = { .st_ino = 0 }, caller_ns = { .st_ino = 0 };
+#endif
 
         set_log_level();
 
@@ -914,11 +939,35 @@ int main(int argc, char *argv[])
        cred->lc_svc_type = uparam.kup_svc_type;
        cred->lc_self_nid = uparam.kup_selfnid;
 
-        if (lgss_prepare_cred(cred)) {
-                logmsg(LL_ERR, "key %08x: failed to prepare credentials "
-                       "for user %d\n", keyid, uparam.kup_uid);
-                return 1;
-        }
+#ifdef HAVE_SETNS
+       /* Is caller in different namespace? */
+       snprintf(path, sizeof(path), "/proc/%d/ns/mnt", getpid());
+       if (stat(path, &parent_ns))
+               logmsg(LL_ERR, "cannot stat %s: %s\n", path, strerror(errno));
+       snprintf(path, sizeof(path), "/proc/%d/ns/mnt", uparam.kup_pid);
+       if (stat(path, &caller_ns))
+               logmsg(LL_ERR, "cannot stat %s: %s\n", path, strerror(errno));
+       if (caller_ns.st_ino != parent_ns.st_ino) {
+               /*
+                * do credentials preparation in caller's namespace
+                */
+               if (associate_with_ns(path) != 0) {
+                       logmsg(LL_ERR, "failed to attach to pid %d namespace: "
+                              "%s\n", uparam.kup_pid, strerror(errno));
+                       return 1;
+               }
+               logmsg(LL_TRACE, "working in namespace of pid %d\n",
+                      uparam.kup_pid);
+       } else {
+               logmsg(LL_TRACE, "caller's namespace is the same\n");
+       }
+#endif /* HAVE_SETNS */
+
+       if (lgss_prepare_cred(cred)) {
+               logmsg(LL_ERR, "key %08x: failed to prepare credentials "
+                      "for user %d\n", keyid, uparam.kup_uid);
+               return 1;
+       }
 
         /* pre initialize the key. note the keyring linked to is actually of the
          * original requesting process, not _this_ upcall process. if it's for