Whamcloud - gitweb
LU-17662 osd-zfs: Support for ZFS 2.2.3
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/ptlrpc/sec.c
32  *
33  * Author: Eric Mei <ericm@clusterfs.com>
34  */
35
36 #define DEBUG_SUBSYSTEM S_SEC
37
38 #include <linux/user_namespace.h>
39 #include <linux/uidgid.h>
40 #include <linux/crypto.h>
41 #include <linux/key.h>
42
43 #include <libcfs/libcfs.h>
44 #include <obd.h>
45 #include <obd_class.h>
46 #include <obd_support.h>
47 #include <lustre_net.h>
48 #include <lustre_import.h>
49 #include <lustre_dlm.h>
50 #include <lustre_sec.h>
51 #include <libcfs/libcfs_crypto.h>
52
53 #include "ptlrpc_internal.h"
54
55 static int send_sepol;
56 module_param(send_sepol, int, 0644);
57 MODULE_PARM_DESC(send_sepol, "Client sends SELinux policy status");
58
59 /*
60  * policy registers
61  */
62
63 static rwlock_t policy_lock;
64 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
65         NULL,
66 };
67
68 int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
69 {
70         __u16 number = policy->sp_policy;
71
72         LASSERT(policy->sp_name);
73         LASSERT(policy->sp_cops);
74         LASSERT(policy->sp_sops);
75
76         if (number >= SPTLRPC_POLICY_MAX)
77                 return -EINVAL;
78
79         write_lock(&policy_lock);
80         if (unlikely(policies[number])) {
81                 write_unlock(&policy_lock);
82                 return -EALREADY;
83         }
84         policies[number] = policy;
85         write_unlock(&policy_lock);
86
87         CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
88         return 0;
89 }
90 EXPORT_SYMBOL(sptlrpc_register_policy);
91
92 int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
93 {
94         __u16 number = policy->sp_policy;
95
96         LASSERT(number < SPTLRPC_POLICY_MAX);
97
98         write_lock(&policy_lock);
99         if (unlikely(policies[number] == NULL)) {
100                 write_unlock(&policy_lock);
101                 CERROR("%s: already unregistered\n", policy->sp_name);
102                 return -EINVAL;
103         }
104
105         LASSERT(policies[number] == policy);
106         policies[number] = NULL;
107         write_unlock(&policy_lock);
108
109         CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
110         return 0;
111 }
112 EXPORT_SYMBOL(sptlrpc_unregister_policy);
113
114 static
115 struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor)
116 {
117         static DEFINE_MUTEX(load_mutex);
118         struct ptlrpc_sec_policy *policy;
119         __u16 number = SPTLRPC_FLVR_POLICY(flavor);
120         int rc;
121
122         if (number >= SPTLRPC_POLICY_MAX)
123                 return NULL;
124
125         while (1) {
126                 read_lock(&policy_lock);
127                 policy = policies[number];
128                 if (policy && !try_module_get(policy->sp_owner))
129                         policy = NULL;
130                 read_unlock(&policy_lock);
131
132                 if (policy != NULL || number != SPTLRPC_POLICY_GSS)
133                         break;
134
135                 /* try to load gss module, happens only if policy at index
136                  * SPTLRPC_POLICY_GSS is not already referenced in
137                  * global array policies[]
138                  */
139                 mutex_lock(&load_mutex);
140                 /* The fact that request_module() returns 0 does not guarantee
141                  * the module has done its job. So we must check that the
142                  * requested policy is now available. This is done by checking
143                  * again for policies[number] in the loop.
144                  */
145                 rc = request_module("ptlrpc_gss");
146                 if (rc == 0)
147                         CDEBUG(D_SEC, "module ptlrpc_gss loaded on demand\n");
148                 else
149                         CERROR("Unable to load module ptlrpc_gss: rc %d\n", rc);
150                 mutex_unlock(&load_mutex);
151         }
152
153         return policy;
154 }
155
156 __u32 sptlrpc_name2flavor_base(const char *name)
157 {
158         if (!strcmp(name, "null"))
159                 return SPTLRPC_FLVR_NULL;
160         if (!strcmp(name, "plain"))
161                 return SPTLRPC_FLVR_PLAIN;
162         if (!strcmp(name, "gssnull"))
163                 return SPTLRPC_FLVR_GSSNULL;
164         if (!strcmp(name, "krb5n"))
165                 return SPTLRPC_FLVR_KRB5N;
166         if (!strcmp(name, "krb5a"))
167                 return SPTLRPC_FLVR_KRB5A;
168         if (!strcmp(name, "krb5i"))
169                 return SPTLRPC_FLVR_KRB5I;
170         if (!strcmp(name, "krb5p"))
171                 return SPTLRPC_FLVR_KRB5P;
172         if (!strcmp(name, "skn"))
173                 return SPTLRPC_FLVR_SKN;
174         if (!strcmp(name, "ska"))
175                 return SPTLRPC_FLVR_SKA;
176         if (!strcmp(name, "ski"))
177                 return SPTLRPC_FLVR_SKI;
178         if (!strcmp(name, "skpi"))
179                 return SPTLRPC_FLVR_SKPI;
180
181         return SPTLRPC_FLVR_INVALID;
182 }
183 EXPORT_SYMBOL(sptlrpc_name2flavor_base);
184
185 const char *sptlrpc_flavor2name_base(__u32 flvr)
186 {
187         __u32   base = SPTLRPC_FLVR_BASE(flvr);
188
189         if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
190                 return "null";
191         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
192                 return "plain";
193         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_GSSNULL))
194                 return "gssnull";
195         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
196                 return "krb5n";
197         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
198                 return "krb5a";
199         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
200                 return "krb5i";
201         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
202                 return "krb5p";
203         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKN))
204                 return "skn";
205         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKA))
206                 return "ska";
207         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKI))
208                 return "ski";
209         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_SKPI))
210                 return "skpi";
211
212         CERROR("invalid wire flavor 0x%x\n", flvr);
213         return "invalid";
214 }
215 EXPORT_SYMBOL(sptlrpc_flavor2name_base);
216
217 char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
218                                char *buf, int bufsize)
219 {
220         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
221                 snprintf(buf, bufsize, "hash:%s",
222                         sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
223         else
224                 snprintf(buf, bufsize, "%s",
225                         sptlrpc_flavor2name_base(sf->sf_rpc));
226
227         buf[bufsize - 1] = '\0';
228         return buf;
229 }
230 EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
231
232 char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
233 {
234         size_t ln;
235
236         ln = snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc));
237
238         /*
239          * currently we don't support customized bulk specification for
240          * flavors other than plain
241          */
242         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
243                 char bspec[16];
244
245                 bspec[0] = '-';
246                 sptlrpc_flavor2name_bulk(sf, bspec + 1, sizeof(bspec) - 1);
247                 strncat(buf, bspec, bufsize - ln);
248         }
249
250         buf[bufsize - 1] = '\0';
251         return buf;
252 }
253 EXPORT_SYMBOL(sptlrpc_flavor2name);
254
255 char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
256 {
257         buf[0] = '\0';
258
259         if (flags & PTLRPC_SEC_FL_REVERSE)
260                 strlcat(buf, "reverse,", bufsize);
261         if (flags & PTLRPC_SEC_FL_ROOTONLY)
262                 strlcat(buf, "rootonly,", bufsize);
263         if (flags & PTLRPC_SEC_FL_UDESC)
264                 strlcat(buf, "udesc,", bufsize);
265         if (flags & PTLRPC_SEC_FL_BULK)
266                 strlcat(buf, "bulk,", bufsize);
267         if (buf[0] == '\0')
268                 strlcat(buf, "-,", bufsize);
269
270         return buf;
271 }
272 EXPORT_SYMBOL(sptlrpc_secflags2str);
273
274 /*
275  * client context APIs
276  */
277
278 /* existingroot to tell we only want to fetch an already existing root ctx */
279 static
280 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec, bool existingroot)
281 {
282         struct vfs_cred vcred;
283         int create = 1, remove_dead = 1;
284
285         LASSERT(sec);
286         LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
287
288         if (existingroot) {
289                 vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
290                 vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
291                 create = 0;
292                 remove_dead = 0;
293
294                 if (!(sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY) &&
295                     vcred.vc_uid != 0)
296                         return ERR_PTR(-EINVAL);
297         } else if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
298                                             PTLRPC_SEC_FL_ROOTONLY)) {
299                 vcred.vc_uid = 0;
300                 vcred.vc_gid = 0;
301                 if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
302                         create = 0;
303                         remove_dead = 0;
304                 }
305         } else {
306                 vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
307                 vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
308         }
309
310         return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred, create,
311                                                    remove_dead);
312 }
313
314 struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
315 {
316         atomic_inc(&ctx->cc_refcount);
317         return ctx;
318 }
319 EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
320
321 void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
322 {
323         struct ptlrpc_sec *sec = ctx->cc_sec;
324
325         LASSERT(sec);
326         LASSERT(atomic_read(&(ctx)->cc_refcount) > 0);
327
328         if (!atomic_dec_and_test(&ctx->cc_refcount))
329                 return;
330
331         sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
332 }
333 EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
334
335 /**
336  * Expire the client context immediately.
337  *
338  * \pre Caller must hold at least 1 reference on the \a ctx.
339  */
340 void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
341 {
342         LASSERT(ctx->cc_ops->die);
343         ctx->cc_ops->die(ctx, 0);
344 }
345 EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
346
347 /**
348  * To wake up the threads who are waiting for this client context. Called
349  * after some status change happened on \a ctx.
350  */
351 void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
352 {
353         struct ptlrpc_request *req, *next;
354
355         spin_lock(&ctx->cc_lock);
356         list_for_each_entry_safe(req, next, &ctx->cc_req_list,
357                                      rq_ctx_chain) {
358                 list_del_init(&req->rq_ctx_chain);
359                 ptlrpc_client_wake_req(req);
360         }
361         spin_unlock(&ctx->cc_lock);
362 }
363 EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
364
365 int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
366 {
367         LASSERT(ctx->cc_ops);
368
369         if (ctx->cc_ops->display == NULL)
370                 return 0;
371
372         return ctx->cc_ops->display(ctx, buf, bufsize);
373 }
374
375 static int import_sec_check_expire(struct obd_import *imp)
376 {
377         int adapt = 0;
378
379         write_lock(&imp->imp_sec_lock);
380         if (imp->imp_sec_expire &&
381             imp->imp_sec_expire < ktime_get_real_seconds()) {
382                 adapt = 1;
383                 imp->imp_sec_expire = 0;
384         }
385         write_unlock(&imp->imp_sec_lock);
386
387         if (!adapt)
388                 return 0;
389
390         CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
391         return sptlrpc_import_sec_adapt(imp, NULL, NULL);
392 }
393
394 /**
395  * Get and validate the client side ptlrpc security facilities from
396  * \a imp. There is a race condition on client reconnect when the import is
397  * being destroyed while there are outstanding client bound requests. In
398  * this case do not output any error messages if import secuity is not
399  * found.
400  *
401  * \param[in] imp obd import associated with client
402  * \param[out] sec client side ptlrpc security
403  *
404  * \retval 0 if security retrieved successfully
405  * \retval -ve errno if there was a problem
406  */
407 static int import_sec_validate_get(struct obd_import *imp,
408                                    struct ptlrpc_sec **sec)
409 {
410         int rc;
411
412         if (unlikely(imp->imp_sec_expire)) {
413                 rc = import_sec_check_expire(imp);
414                 if (rc)
415                         return rc;
416         }
417
418         *sec = sptlrpc_import_sec_ref(imp);
419         if (*sec == NULL) {
420                 /* Only output an error when the import is still active */
421                 if (!test_bit(WORK_STRUCT_PENDING_BIT,
422                               work_data_bits(&imp->imp_zombie_work)))
423                         CERROR("import %p (%s) with no sec\n",
424                                imp, ptlrpc_import_state_name(imp->imp_state));
425                 return -EACCES;
426         }
427
428         if (unlikely((*sec)->ps_dying)) {
429                 CERROR("attempt to use dying sec %p\n", sec);
430                 sptlrpc_sec_put(*sec);
431                 return -EACCES;
432         }
433
434         return 0;
435 }
436
437 /**
438  * Given a \a req, find or allocate an appropriate context for it.
439  * \pre req->rq_cli_ctx == NULL.
440  *
441  * \retval 0 succeed, and req->rq_cli_ctx is set.
442  * \retval -ev error number, and req->rq_cli_ctx == NULL.
443  */
444 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
445 {
446         struct obd_import *imp = req->rq_import;
447         struct ptlrpc_sec *sec;
448         int rc;
449
450         ENTRY;
451
452         LASSERT(!req->rq_cli_ctx);
453         LASSERT(imp);
454
455         rc = import_sec_validate_get(imp, &sec);
456         if (rc)
457                 RETURN(rc);
458
459         req->rq_cli_ctx = get_my_ctx(sec, false);
460
461         sptlrpc_sec_put(sec);
462
463         if (!req->rq_cli_ctx) {
464                 rc = -ECONNREFUSED;
465         } else if (IS_ERR(req->rq_cli_ctx)) {
466                 rc = PTR_ERR(req->rq_cli_ctx);
467                 req->rq_cli_ctx = NULL;
468         }
469
470         if (rc)
471                 CERROR("%s: fail to get context for req %p: rc = %d\n",
472                        imp->imp_obd->obd_name, req, rc);
473
474         RETURN(rc);
475 }
476
477 /**
478  * Drop the context for \a req.
479  * \pre req->rq_cli_ctx != NULL.
480  * \post req->rq_cli_ctx == NULL.
481  *
482  * If \a sync == 0, this function should return quickly without sleep;
483  * otherwise it might trigger and wait for the whole process of sending
484  * an context-destroying rpc to server.
485  */
486 void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
487 {
488         ENTRY;
489
490         LASSERT(req);
491         LASSERT(req->rq_cli_ctx);
492
493         /*
494          * request might be asked to release earlier while still
495          * in the context waiting list.
496          */
497         if (!list_empty(&req->rq_ctx_chain)) {
498                 spin_lock(&req->rq_cli_ctx->cc_lock);
499                 list_del_init(&req->rq_ctx_chain);
500                 spin_unlock(&req->rq_cli_ctx->cc_lock);
501         }
502
503         sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
504         req->rq_cli_ctx = NULL;
505         EXIT;
506 }
507
508 static
509 int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
510                            struct ptlrpc_cli_ctx *oldctx,
511                            struct ptlrpc_cli_ctx *newctx)
512 {
513         struct sptlrpc_flavor old_flvr;
514         char *reqmsg = NULL; /* to workaround old gcc */
515         int reqmsg_size;
516         int rc = 0;
517
518         CDEBUG(D_SEC,
519                "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
520                req, oldctx, oldctx->cc_vcred.vc_uid,
521                sec2target_str(oldctx->cc_sec), newctx, newctx->cc_vcred.vc_uid,
522                sec2target_str(newctx->cc_sec), oldctx->cc_sec,
523                oldctx->cc_sec->ps_policy->sp_name, newctx->cc_sec,
524                newctx->cc_sec->ps_policy->sp_name);
525
526         /* save flavor */
527         old_flvr = req->rq_flvr;
528
529         /* save request message */
530         reqmsg_size = req->rq_reqlen;
531         if (reqmsg_size != 0) {
532                 LASSERT(req->rq_reqmsg);
533                 OBD_ALLOC_LARGE(reqmsg, reqmsg_size);
534                 if (reqmsg == NULL)
535                         return -ENOMEM;
536                 memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
537         }
538
539         /* release old req/rep buf */
540         req->rq_cli_ctx = oldctx;
541         sptlrpc_cli_free_reqbuf(req);
542         sptlrpc_cli_free_repbuf(req);
543         req->rq_cli_ctx = newctx;
544
545         /* recalculate the flavor */
546         sptlrpc_req_set_flavor(req, 0);
547
548         /*
549          * alloc new request buffer
550          * we don't need to alloc reply buffer here, leave it to the
551          * rest procedure of ptlrpc
552          */
553         if (reqmsg_size != 0) {
554                 rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
555                 if (!rc) {
556                         LASSERT(req->rq_reqmsg);
557                         memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
558                 } else {
559                         CWARN("failed to alloc reqbuf: %d\n", rc);
560                         req->rq_flvr = old_flvr;
561                 }
562
563                 OBD_FREE_LARGE(reqmsg, reqmsg_size);
564         }
565         return rc;
566 }
567
568 /**
569  * If current context of \a req is dead somehow, e.g. we just switched flavor
570  * thus marked original contexts dead, we'll find a new context for it. if
571  * no switch is needed, \a req will end up with the same context.
572  *
573  * \note a request must have a context, to keep other parts of code happy.
574  * In any case of failure during the switching, we must restore the old one.
575  */
576 int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req,
577                                  struct ptlrpc_sec *sec)
578 {
579         struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
580         struct ptlrpc_cli_ctx *newctx;
581         int rc;
582
583         ENTRY;
584
585         LASSERT(oldctx);
586
587         sptlrpc_cli_ctx_get(oldctx);
588         sptlrpc_req_put_ctx(req, 0);
589
590         /* If sec is provided, we must use the existing context for root that
591          * it references. If not root, or no existing context, or same context,
592          * just fail replacing the dead context.
593          */
594         if (sec) {
595                 newctx = get_my_ctx(sec, true);
596                 if (!newctx)
597                         GOTO(restore, rc = -EINVAL);
598                 if (IS_ERR(newctx))
599                         GOTO(restore, rc = PTR_ERR(newctx));
600                 if (newctx == oldctx) {
601                         sptlrpc_cli_ctx_put(newctx, 0);
602                         GOTO(restore, rc = -ENODATA);
603                 }
604                 /* Because we are replacing an erroneous ctx, new sec ctx is
605                  * expected to have higher imp generation or same imp generation
606                  * but higher imp connection count.
607                  */
608                 if (newctx->cc_impgen < oldctx->cc_impgen ||
609                     (newctx->cc_impgen == oldctx->cc_impgen &&
610                      newctx->cc_impconncnt <= oldctx->cc_impconncnt))
611                         CERROR("ctx (%p, fl %lx) will switch, but does not look more recent than old ctx: imp gen %d vs %d, imp conn cnt %d vs %d\n",
612                                newctx, newctx->cc_flags,
613                                newctx->cc_impgen, oldctx->cc_impgen,
614                                newctx->cc_impconncnt, oldctx->cc_impconncnt);
615                 req->rq_cli_ctx = newctx;
616         } else {
617                 rc = sptlrpc_req_get_ctx(req);
618                 if (unlikely(rc)) {
619                         LASSERT(!req->rq_cli_ctx);
620
621                         /* restore old ctx */
622                         GOTO(restore, rc);
623                 }
624                 newctx = req->rq_cli_ctx;
625         }
626
627         LASSERT(newctx);
628
629         if (unlikely(newctx == oldctx &&
630                      test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
631                 /*
632                  * still get the old dead ctx, usually means system too busy
633                  */
634                 CDEBUG(D_SEC,
635                        "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
636                        newctx, newctx->cc_flags);
637
638                 schedule_timeout_interruptible(cfs_time_seconds(1));
639         } else if (unlikely(test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags)
640                             == 0)) {
641                 /*
642                  * new ctx not up to date yet
643                  */
644                 CDEBUG(D_SEC,
645                        "ctx (%p, fl %lx) doesn't switch, not up to date yet\n",
646                        newctx, newctx->cc_flags);
647         } else {
648                 /*
649                  * it's possible newctx == oldctx if we're switching
650                  * subflavor with the same sec.
651                  */
652                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
653                 if (rc) {
654                         /* restore old ctx */
655                         sptlrpc_req_put_ctx(req, 0);
656                         GOTO(restore, rc);
657                 }
658
659                 LASSERT(req->rq_cli_ctx == newctx);
660         }
661
662         sptlrpc_cli_ctx_put(oldctx, 1);
663         RETURN(0);
664
665 restore:
666         req->rq_cli_ctx = oldctx;
667         RETURN(rc);
668 }
669 EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
670
671 static
672 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
673 {
674         if (cli_ctx_is_refreshed(ctx))
675                 return 1;
676         return 0;
677 }
678
679 static
680 void ctx_refresh_interrupt(struct ptlrpc_request *req)
681 {
682
683         spin_lock(&req->rq_lock);
684         req->rq_intr = 1;
685         spin_unlock(&req->rq_lock);
686 }
687
688 static
689 void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
690 {
691         spin_lock(&ctx->cc_lock);
692         if (!list_empty(&req->rq_ctx_chain))
693                 list_del_init(&req->rq_ctx_chain);
694         spin_unlock(&ctx->cc_lock);
695 }
696
697 /**
698  * To refresh the context of \req, if it's not up-to-date.
699  * \param timeout
700  * - == 0: do not wait
701  * - == MAX_SCHEDULE_TIMEOUT: wait indefinitely
702  * - > 0: not supported
703  *
704  * The status of the context could be subject to be changed by other threads
705  * at any time. We allow this race, but once we return with 0, the caller will
706  * suppose it's uptodated and keep using it until the owning rpc is done.
707  *
708  * \retval 0 only if the context is uptodated.
709  * \retval -ev error number.
710  */
711 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
712 {
713         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
714         struct ptlrpc_sec *sec;
715         int rc;
716
717         ENTRY;
718
719         LASSERT(ctx);
720
721         if (req->rq_ctx_init || req->rq_ctx_fini)
722                 RETURN(0);
723
724         if (timeout != 0 && timeout != MAX_SCHEDULE_TIMEOUT) {
725                 CERROR("req %p: invalid timeout %lu\n", req, timeout);
726                 RETURN(-EINVAL);
727         }
728
729         /*
730          * during the process a request's context might change type even
731          * (e.g. from gss ctx to null ctx), so each loop we need to re-check
732          * everything
733          */
734 again:
735         rc = import_sec_validate_get(req->rq_import, &sec);
736         if (rc)
737                 RETURN(rc);
738
739         if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
740                 CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
741                        req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
742                 req_off_ctx_list(req, ctx);
743                 sptlrpc_req_replace_dead_ctx(req, NULL);
744                 ctx = req->rq_cli_ctx;
745         }
746
747         if (cli_ctx_is_eternal(ctx))
748                 GOTO(out_sec_put, rc = 0);
749
750         if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
751                 if (ctx->cc_ops->refresh)
752                         ctx->cc_ops->refresh(ctx);
753         }
754         LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
755
756         LASSERT(ctx->cc_ops->validate);
757         if (ctx->cc_ops->validate(ctx) == 0) {
758                 req_off_ctx_list(req, ctx);
759                 GOTO(out_sec_put, rc = 0);
760         }
761
762         if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
763                 if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) &&
764                     sptlrpc_req_replace_dead_ctx(req, sec) == 0) {
765                         ctx = req->rq_cli_ctx;
766                         sptlrpc_sec_put(sec);
767                         goto again;
768                 }
769                 spin_lock(&req->rq_lock);
770                 req->rq_err = 1;
771                 spin_unlock(&req->rq_lock);
772                 req_off_ctx_list(req, ctx);
773                 GOTO(out_sec_put, rc = -EPERM);
774 out_sec_put:
775                 sptlrpc_sec_put(sec);
776                 RETURN(rc);
777         }
778         sptlrpc_sec_put(sec);
779
780         /*
781          * There's a subtle issue for resending RPCs, suppose following
782          * situation:
783          *  1. the request was sent to server.
784          *  2. recovery was kicked start, after finished the request was
785          *     marked as resent.
786          *  3. resend the request.
787          *  4. old reply from server received, we accept and verify the reply.
788          *     this has to be success, otherwise the error will be aware
789          *     by application.
790          *  5. new reply from server received, dropped by LNet.
791          *
792          * Note the xid of old & new request is the same. We can't simply
793          * change xid for the resent request because the server replies on
794          * it for reply reconstruction.
795          *
796          * Commonly the original context should be uptodate because we
797          * have an expiry nice time; server will keep its context because
798          * we at least hold a ref of old context which prevent context
799          * from destroying RPC being sent. So server still can accept the
800          * request and finish the RPC. But if that's not the case:
801          *  1. If server side context has been trimmed, a NO_CONTEXT will
802          *     be returned, gss_cli_ctx_verify/unseal will switch to new
803          *     context by force.
804          *  2. Current context never be refreshed, then we are fine: we
805          *     never really send request with old context before.
806          */
807         if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
808             unlikely(req->rq_reqmsg) &&
809             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
810                 req_off_ctx_list(req, ctx);
811                 RETURN(0);
812         }
813
814         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
815                 req_off_ctx_list(req, ctx);
816                 /*
817                  * don't switch ctx if import was deactivated
818                  */
819                 if (req->rq_import->imp_deactive) {
820                         spin_lock(&req->rq_lock);
821                         req->rq_err = 1;
822                         spin_unlock(&req->rq_lock);
823                         RETURN(-EINTR);
824                 }
825
826                 rc = sptlrpc_req_replace_dead_ctx(req, NULL);
827                 if (rc) {
828                         LASSERT(ctx == req->rq_cli_ctx);
829                         CERROR("req %p: failed to replace dead ctx %p: %d\n",
830                                req, ctx, rc);
831                         spin_lock(&req->rq_lock);
832                         req->rq_err = 1;
833                         spin_unlock(&req->rq_lock);
834                         RETURN(rc);
835                 }
836
837                 ctx = req->rq_cli_ctx;
838                 goto again;
839         }
840
841         /*
842          * Now we're sure this context is during upcall, add myself into
843          * waiting list
844          */
845         spin_lock(&ctx->cc_lock);
846         if (list_empty(&req->rq_ctx_chain))
847                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
848         spin_unlock(&ctx->cc_lock);
849
850         if (timeout == 0)
851                 RETURN(-EAGAIN);
852
853         /* Clear any flags that may be present from previous sends */
854         LASSERT(req->rq_receiving_reply == 0);
855         spin_lock(&req->rq_lock);
856         req->rq_err = 0;
857         req->rq_timedout = 0;
858         req->rq_resend = 0;
859         req->rq_restart = 0;
860         spin_unlock(&req->rq_lock);
861
862         /* by now we know that timeout value is MAX_SCHEDULE_TIMEOUT,
863          * so wait indefinitely with non-fatal signals blocked
864          */
865         if (l_wait_event_abortable(req->rq_reply_waitq,
866                                    ctx_check_refresh(ctx)) == -ERESTARTSYS) {
867                 rc = -EINTR;
868                 ctx_refresh_interrupt(req);
869         }
870
871         /*
872          * following cases could lead us here:
873          * - successfully refreshed;
874          * - interrupted;
875          * - timedout, and we don't want recover from the failure;
876          * - timedout, and waked up upon recovery finished;
877          * - someone else mark this ctx dead by force;
878          * - someone invalidate the req and call ptlrpc_client_wake_req(),
879          *   e.g. ptlrpc_abort_inflight();
880          */
881         if (!cli_ctx_is_refreshed(ctx)) {
882                 /* timed out or interruptted */
883                 req_off_ctx_list(req, ctx);
884
885                 LASSERT(rc != 0);
886                 RETURN(rc);
887         }
888
889         goto again;
890 }
891
892 /* Bring ptlrpc_sec context up-to-date */
893 int sptlrpc_export_update_ctx(struct obd_export *exp)
894 {
895         struct obd_import *imp = exp ? exp->exp_imp_reverse : NULL;
896         struct ptlrpc_sec *sec = NULL;
897         struct ptlrpc_cli_ctx *ctx = NULL;
898         int rc = 0;
899
900         if (imp)
901                 sec = sptlrpc_import_sec_ref(imp);
902         if (sec) {
903                 ctx = get_my_ctx(sec, false);
904                 if (IS_ERR(ctx))
905                         ctx = NULL;
906                 sptlrpc_sec_put(sec);
907         }
908
909         if (ctx) {
910                 if (ctx->cc_ops->refresh)
911                         rc = ctx->cc_ops->refresh(ctx);
912                 sptlrpc_cli_ctx_put(ctx, 1);
913         }
914         return rc;
915 }
916
917 /**
918  * Initialize flavor settings for \a req, according to \a opcode.
919  *
920  * \note this could be called in two situations:
921  * - new request from ptlrpc_pre_req(), with proper @opcode
922  * - old request which changed ctx in the middle, with @opcode == 0
923  */
924 void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
925 {
926         struct ptlrpc_sec *sec;
927
928         LASSERT(req->rq_import);
929         LASSERT(req->rq_cli_ctx);
930         LASSERT(req->rq_cli_ctx->cc_sec);
931         LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
932
933         /* special security flags according to opcode */
934         switch (opcode) {
935         case OST_READ:
936         case MDS_READPAGE:
937         case MGS_CONFIG_READ:
938         case OBD_IDX_READ:
939                 req->rq_bulk_read = 1;
940                 break;
941         case OST_WRITE:
942         case MDS_WRITEPAGE:
943                 req->rq_bulk_write = 1;
944                 break;
945         case SEC_CTX_INIT:
946                 req->rq_ctx_init = 1;
947                 break;
948         case SEC_CTX_FINI:
949                 req->rq_ctx_fini = 1;
950                 break;
951         case 0:
952                 /* init/fini rpc won't be resend, so can't be here */
953                 LASSERT(req->rq_ctx_init == 0);
954                 LASSERT(req->rq_ctx_fini == 0);
955
956                 /* cleanup flags, which should be recalculated */
957                 req->rq_pack_udesc = 0;
958                 req->rq_pack_bulk = 0;
959                 break;
960         }
961
962         sec = req->rq_cli_ctx->cc_sec;
963
964         spin_lock(&sec->ps_lock);
965         req->rq_flvr = sec->ps_flvr;
966         spin_unlock(&sec->ps_lock);
967
968         /*
969          * force SVC_NULL for context initiation rpc, SVC_INTG for context
970          * destruction rpc
971          */
972         if (unlikely(req->rq_ctx_init))
973                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
974         else if (unlikely(req->rq_ctx_fini))
975                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
976
977         /* user descriptor flag, null security can't do it anyway */
978         if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
979             (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
980                 req->rq_pack_udesc = 1;
981
982         /* bulk security flag */
983         if ((req->rq_bulk_read || req->rq_bulk_write) &&
984             sptlrpc_flavor_has_bulk(&req->rq_flvr))
985                 req->rq_pack_bulk = 1;
986 }
987
988 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
989 {
990         if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
991                 return;
992
993         LASSERT(req->rq_clrbuf);
994         if (req->rq_pool || !req->rq_reqbuf)
995                 return;
996
997         OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
998         req->rq_reqbuf = NULL;
999         req->rq_reqbuf_len = 0;
1000 }
1001
1002 /**
1003  * Given an import \a imp, check whether current user has a valid context
1004  * or not. We may create a new context and try to refresh it, and try
1005  * repeatedly try in case of non-fatal errors. Return 0 means success.
1006  */
1007 int sptlrpc_import_check_ctx(struct obd_import *imp)
1008 {
1009         struct ptlrpc_sec     *sec;
1010         struct ptlrpc_cli_ctx *ctx;
1011         struct ptlrpc_request *req = NULL;
1012         int rc;
1013
1014         ENTRY;
1015
1016         might_sleep();
1017
1018         sec = sptlrpc_import_sec_ref(imp);
1019         ctx = get_my_ctx(sec, false);
1020         sptlrpc_sec_put(sec);
1021
1022         if (IS_ERR(ctx))
1023                 RETURN(PTR_ERR(ctx));
1024         else if (!ctx)
1025                 RETURN(-ENOMEM);
1026
1027         if (cli_ctx_is_eternal(ctx) ||
1028             ctx->cc_ops->validate(ctx) == 0) {
1029                 sptlrpc_cli_ctx_put(ctx, 1);
1030                 RETURN(0);
1031         }
1032
1033         if (cli_ctx_is_error(ctx)) {
1034                 sptlrpc_cli_ctx_put(ctx, 1);
1035                 RETURN(-EACCES);
1036         }
1037
1038         req = ptlrpc_request_cache_alloc(GFP_NOFS);
1039         if (!req)
1040                 RETURN(-ENOMEM);
1041
1042         ptlrpc_cli_req_init(req);
1043         atomic_set(&req->rq_refcount, 10000);
1044
1045         req->rq_import = imp;
1046         req->rq_flvr = sec->ps_flvr;
1047         req->rq_cli_ctx = ctx;
1048
1049         rc = sptlrpc_req_refresh_ctx(req, MAX_SCHEDULE_TIMEOUT);
1050         LASSERT(list_empty(&req->rq_ctx_chain));
1051         sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
1052         ptlrpc_request_cache_free(req);
1053
1054         RETURN(rc);
1055 }
1056
1057 /**
1058  * Used by ptlrpc client, to perform the pre-defined security transformation
1059  * upon the request message of \a req. After this function called,
1060  * req->rq_reqmsg is still accessible as clear text.
1061  */
1062 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
1063 {
1064         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1065         int rc = 0;
1066
1067         ENTRY;
1068
1069         LASSERT(ctx);
1070         LASSERT(ctx->cc_sec);
1071         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1072
1073         /*
1074          * we wrap bulk request here because now we can be sure
1075          * the context is uptodate.
1076          */
1077         if (req->rq_bulk) {
1078                 rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
1079                 if (rc)
1080                         RETURN(rc);
1081         }
1082
1083         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
1084         case SPTLRPC_SVC_NULL:
1085         case SPTLRPC_SVC_AUTH:
1086         case SPTLRPC_SVC_INTG:
1087                 LASSERT(ctx->cc_ops->sign);
1088                 rc = ctx->cc_ops->sign(ctx, req);
1089                 break;
1090         case SPTLRPC_SVC_PRIV:
1091                 LASSERT(ctx->cc_ops->seal);
1092                 rc = ctx->cc_ops->seal(ctx, req);
1093                 break;
1094         default:
1095                 LBUG();
1096         }
1097
1098         if (rc == 0) {
1099                 LASSERT(req->rq_reqdata_len);
1100                 LASSERT(req->rq_reqdata_len % 8 == 0);
1101                 LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
1102         }
1103
1104         RETURN(rc);
1105 }
1106
1107 static int do_cli_unwrap_reply(struct ptlrpc_request *req)
1108 {
1109         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1110         int rc;
1111
1112         ENTRY;
1113
1114         LASSERT(ctx);
1115         LASSERT(ctx->cc_sec);
1116         LASSERT(req->rq_repbuf);
1117         LASSERT(req->rq_repdata);
1118         LASSERT(req->rq_repmsg == NULL);
1119
1120         req->rq_rep_swab_mask = 0;
1121
1122         rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
1123         switch (rc) {
1124         case 1:
1125                 req_capsule_set_rep_swabbed(&req->rq_pill,
1126                                             MSG_PTLRPC_HEADER_OFF);
1127         case 0:
1128                 break;
1129         default:
1130                 CERROR("failed unpack reply: x%llu\n", req->rq_xid);
1131                 RETURN(-EPROTO);
1132         }
1133
1134         if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
1135                 CERROR("replied data length %d too small\n",
1136                        req->rq_repdata_len);
1137                 RETURN(-EPROTO);
1138         }
1139
1140         if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
1141             SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
1142                 CERROR("reply policy %u doesn't match request policy %u\n",
1143                        SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
1144                        SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
1145                 RETURN(-EPROTO);
1146         }
1147
1148         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
1149         case SPTLRPC_SVC_NULL:
1150         case SPTLRPC_SVC_AUTH:
1151         case SPTLRPC_SVC_INTG:
1152                 LASSERT(ctx->cc_ops->verify);
1153                 rc = ctx->cc_ops->verify(ctx, req);
1154                 break;
1155         case SPTLRPC_SVC_PRIV:
1156                 LASSERT(ctx->cc_ops->unseal);
1157                 rc = ctx->cc_ops->unseal(ctx, req);
1158                 break;
1159         default:
1160                 LBUG();
1161         }
1162         LASSERT(rc || req->rq_repmsg || req->rq_resend);
1163
1164         if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
1165             !req->rq_ctx_init)
1166                 req->rq_rep_swab_mask = 0;
1167         RETURN(rc);
1168 }
1169
1170 /**
1171  * Used by ptlrpc client, to perform security transformation upon the reply
1172  * message of \a req. After return successfully, req->rq_repmsg points to
1173  * the reply message in clear text.
1174  *
1175  * \pre the reply buffer should have been un-posted from LNet, so nothing is
1176  * going to change.
1177  */
1178 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
1179 {
1180         LASSERT(req->rq_repbuf);
1181         LASSERT(req->rq_repdata == NULL);
1182         LASSERT(req->rq_repmsg == NULL);
1183         LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
1184
1185         if (req->rq_reply_off == 0 &&
1186             (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
1187                 CERROR("real reply with offset 0\n");
1188                 return -EPROTO;
1189         }
1190
1191         if (req->rq_reply_off % 8 != 0) {
1192                 CERROR("reply at odd offset %u\n", req->rq_reply_off);
1193                 return -EPROTO;
1194         }
1195
1196         req->rq_repdata = (struct lustre_msg *)
1197                                 (req->rq_repbuf + req->rq_reply_off);
1198         req->rq_repdata_len = req->rq_nob_received;
1199
1200         return do_cli_unwrap_reply(req);
1201 }
1202
1203 /**
1204  * Used by ptlrpc client, to perform security transformation upon the early
1205  * reply message of \a req. We expect the rq_reply_off is 0, and
1206  * rq_nob_received is the early reply size.
1207  *
1208  * Because the receive buffer might be still posted, the reply data might be
1209  * changed at any time, no matter we're holding rq_lock or not. For this reason
1210  * we allocate a separate ptlrpc_request and reply buffer for early reply
1211  * processing.
1212  *
1213  * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
1214  * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
1215  * \a *req_ret to release it.
1216  * \retval -ev error number, and \a req_ret will not be set.
1217  */
1218 int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
1219                                    struct ptlrpc_request **req_ret)
1220 {
1221         struct ptlrpc_request *early_req;
1222         char *early_buf;
1223         int early_bufsz, early_size;
1224         int rc;
1225
1226         ENTRY;
1227
1228         early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
1229         if (early_req == NULL)
1230                 RETURN(-ENOMEM);
1231
1232         ptlrpc_cli_req_init(early_req);
1233
1234         early_size = req->rq_nob_received;
1235         early_bufsz = size_roundup_power2(early_size);
1236         OBD_ALLOC_LARGE(early_buf, early_bufsz);
1237         if (early_buf == NULL)
1238                 GOTO(err_req, rc = -ENOMEM);
1239
1240         /* sanity checkings and copy data out, do it inside spinlock */
1241         spin_lock(&req->rq_lock);
1242
1243         if (req->rq_replied) {
1244                 spin_unlock(&req->rq_lock);
1245                 GOTO(err_buf, rc = -EALREADY);
1246         }
1247
1248         LASSERT(req->rq_repbuf);
1249         LASSERT(req->rq_repdata == NULL);
1250         LASSERT(req->rq_repmsg == NULL);
1251
1252         if (req->rq_reply_off != 0) {
1253                 CERROR("early reply with offset %u\n", req->rq_reply_off);
1254                 spin_unlock(&req->rq_lock);
1255                 GOTO(err_buf, rc = -EPROTO);
1256         }
1257
1258         if (req->rq_nob_received != early_size) {
1259                 /* even another early arrived the size should be the same */
1260                 CERROR("data size has changed from %u to %u\n",
1261                        early_size, req->rq_nob_received);
1262                 spin_unlock(&req->rq_lock);
1263                 GOTO(err_buf, rc = -EINVAL);
1264         }
1265
1266         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
1267                 CERROR("early reply length %d too small\n",
1268                        req->rq_nob_received);
1269                 spin_unlock(&req->rq_lock);
1270                 GOTO(err_buf, rc = -EALREADY);
1271         }
1272
1273         memcpy(early_buf, req->rq_repbuf, early_size);
1274         spin_unlock(&req->rq_lock);
1275
1276         early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
1277         early_req->rq_flvr = req->rq_flvr;
1278         early_req->rq_repbuf = early_buf;
1279         early_req->rq_repbuf_len = early_bufsz;
1280         early_req->rq_repdata = (struct lustre_msg *) early_buf;
1281         early_req->rq_repdata_len = early_size;
1282         early_req->rq_early = 1;
1283         early_req->rq_reqmsg = req->rq_reqmsg;
1284
1285         rc = do_cli_unwrap_reply(early_req);
1286         if (rc) {
1287                 DEBUG_REQ(D_ADAPTTO, early_req,
1288                           "unwrap early reply: rc = %d", rc);
1289                 GOTO(err_ctx, rc);
1290         }
1291
1292         LASSERT(early_req->rq_repmsg);
1293         *req_ret = early_req;
1294         RETURN(0);
1295
1296 err_ctx:
1297         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1298 err_buf:
1299         OBD_FREE_LARGE(early_buf, early_bufsz);
1300 err_req:
1301         ptlrpc_request_cache_free(early_req);
1302         RETURN(rc);
1303 }
1304
1305 /**
1306  * Used by ptlrpc client, to release a processed early reply \a early_req.
1307  *
1308  * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
1309  */
1310 void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
1311 {
1312         LASSERT(early_req->rq_repbuf);
1313         LASSERT(early_req->rq_repdata);
1314         LASSERT(early_req->rq_repmsg);
1315
1316         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1317         OBD_FREE_LARGE(early_req->rq_repbuf, early_req->rq_repbuf_len);
1318         ptlrpc_request_cache_free(early_req);
1319 }
1320
1321 /**************************************************
1322  * sec ID                                         *
1323  **************************************************/
1324
1325 /*
1326  * "fixed" sec (e.g. null) use sec_id < 0
1327  */
1328 static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
1329
1330 int sptlrpc_get_next_secid(void)
1331 {
1332         return atomic_inc_return(&sptlrpc_sec_id);
1333 }
1334 EXPORT_SYMBOL(sptlrpc_get_next_secid);
1335
1336 /*
1337  * client side high-level security APIs
1338  */
1339
1340 static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
1341                                    int grace, int force)
1342 {
1343         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1344
1345         LASSERT(policy->sp_cops);
1346         LASSERT(policy->sp_cops->flush_ctx_cache);
1347
1348         return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
1349 }
1350
1351 static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
1352 {
1353         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1354         struct sptlrpc_sepol *sepol;
1355
1356         LASSERT(atomic_read(&sec->ps_refcount) == 0);
1357         LASSERT(policy->sp_cops->destroy_sec);
1358
1359         CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec);
1360
1361         spin_lock(&sec->ps_lock);
1362         sec->ps_sepol_checknext = ktime_set(0, 0);
1363         sepol = rcu_dereference_protected(sec->ps_sepol, 1);
1364         rcu_assign_pointer(sec->ps_sepol, NULL);
1365         spin_unlock(&sec->ps_lock);
1366
1367         sptlrpc_sepol_put(sepol);
1368
1369         policy->sp_cops->destroy_sec(sec);
1370         sptlrpc_policy_put(policy);
1371 }
1372
1373 void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
1374 {
1375         sec_cop_destroy_sec(sec);
1376 }
1377 EXPORT_SYMBOL(sptlrpc_sec_destroy);
1378
1379 static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
1380 {
1381         LASSERT(atomic_read(&(sec)->ps_refcount) > 0);
1382
1383         if (sec->ps_policy->sp_cops->kill_sec) {
1384                 sec->ps_policy->sp_cops->kill_sec(sec);
1385
1386                 sec_cop_flush_ctx_cache(sec, -1, 1, 1);
1387         }
1388 }
1389
1390 struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
1391 {
1392         if (sec)
1393                 atomic_inc(&sec->ps_refcount);
1394
1395         return sec;
1396 }
1397 EXPORT_SYMBOL(sptlrpc_sec_get);
1398
1399 void sptlrpc_sec_put(struct ptlrpc_sec *sec)
1400 {
1401         if (sec) {
1402                 LASSERT(atomic_read(&(sec)->ps_refcount) > 0);
1403
1404                 if (atomic_dec_and_test(&sec->ps_refcount)) {
1405                         sptlrpc_gc_del_sec(sec);
1406                         sec_cop_destroy_sec(sec);
1407                 }
1408         }
1409 }
1410 EXPORT_SYMBOL(sptlrpc_sec_put);
1411
1412 /*
1413  * policy module is responsible for taking refrence of import
1414  */
1415 static
1416 struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
1417                                        struct ptlrpc_svc_ctx *svc_ctx,
1418                                        struct sptlrpc_flavor *sf,
1419                                        enum lustre_sec_part sp)
1420 {
1421         struct ptlrpc_sec_policy *policy;
1422         struct ptlrpc_sec *sec;
1423         char str[32];
1424
1425         ENTRY;
1426
1427         if (svc_ctx) {
1428                 LASSERT(imp->imp_dlm_fake == 1);
1429
1430                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
1431                        imp->imp_obd->obd_type->typ_name,
1432                        imp->imp_obd->obd_name,
1433                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1434
1435                 policy = sptlrpc_policy_get(svc_ctx->sc_policy);
1436                 sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1437         } else {
1438                 LASSERT(imp->imp_dlm_fake == 0);
1439
1440                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
1441                        imp->imp_obd->obd_type->typ_name,
1442                        imp->imp_obd->obd_name,
1443                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1444
1445                 policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
1446                 if (!policy) {
1447                         CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
1448                         RETURN(NULL);
1449                 }
1450         }
1451
1452         sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
1453         if (sec) {
1454                 atomic_inc(&sec->ps_refcount);
1455
1456                 sec->ps_part = sp;
1457
1458                 if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
1459                         sptlrpc_gc_add_sec(sec);
1460         } else {
1461                 sptlrpc_policy_put(policy);
1462         }
1463
1464         RETURN(sec);
1465 }
1466
1467 static int print_srpc_serverctx_seq(struct obd_export *exp, void *cb_data)
1468 {
1469         struct seq_file *m = cb_data;
1470         struct obd_import *imp = exp->exp_imp_reverse;
1471         struct ptlrpc_sec *sec = NULL;
1472
1473         if (imp)
1474                 sec = sptlrpc_import_sec_ref(imp);
1475         if (sec == NULL)
1476                 goto out;
1477
1478         if (sec->ps_policy->sp_cops->display)
1479                 sec->ps_policy->sp_cops->display(sec, m);
1480
1481         sptlrpc_sec_put(sec);
1482 out:
1483         return 0;
1484 }
1485
1486 int lprocfs_srpc_serverctx_seq_show(struct seq_file *m, void *data)
1487 {
1488         struct obd_device *obd = m->private;
1489         struct obd_export *exp, *n;
1490
1491         spin_lock(&obd->obd_dev_lock);
1492         list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
1493                 print_srpc_serverctx_seq(exp, m);
1494         }
1495         spin_unlock(&obd->obd_dev_lock);
1496
1497         return 0;
1498 }
1499 EXPORT_SYMBOL(lprocfs_srpc_serverctx_seq_show);
1500
1501 struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
1502 {
1503         struct ptlrpc_sec *sec;
1504
1505         read_lock(&imp->imp_sec_lock);
1506         sec = sptlrpc_sec_get(imp->imp_sec);
1507         read_unlock(&imp->imp_sec_lock);
1508
1509         return sec;
1510 }
1511 EXPORT_SYMBOL(sptlrpc_import_sec_ref);
1512
1513 static void sptlrpc_import_sec_install(struct obd_import *imp,
1514                                        struct ptlrpc_sec *sec)
1515 {
1516         struct ptlrpc_sec *old_sec;
1517
1518         LASSERT(atomic_read(&(sec)->ps_refcount) > 0);
1519
1520         write_lock(&imp->imp_sec_lock);
1521         old_sec = imp->imp_sec;
1522         imp->imp_sec = sec;
1523         write_unlock(&imp->imp_sec_lock);
1524
1525         if (old_sec) {
1526                 sptlrpc_sec_kill(old_sec);
1527
1528                 /* balance the ref taken by this import */
1529                 sptlrpc_sec_put(old_sec);
1530         }
1531 }
1532
1533 static inline
1534 int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
1535 {
1536         return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
1537 }
1538
1539 static inline
1540 void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
1541 {
1542         *dst = *src;
1543 }
1544
1545 /**
1546  * To get an appropriate ptlrpc_sec for the \a imp, according to the current
1547  * configuration. Upon called, imp->imp_sec may or may not be NULL.
1548  *
1549  *  - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
1550  *  - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
1551  */
1552 int sptlrpc_import_sec_adapt(struct obd_import *imp,
1553                              struct ptlrpc_svc_ctx *svc_ctx,
1554                              struct sptlrpc_flavor *flvr)
1555 {
1556         struct ptlrpc_connection *conn;
1557         struct sptlrpc_flavor sf;
1558         struct ptlrpc_sec *sec, *newsec;
1559         enum lustre_sec_part sp;
1560         char str[24];
1561         int rc = 0;
1562
1563         ENTRY;
1564
1565         might_sleep();
1566
1567         if (imp == NULL)
1568                 RETURN(0);
1569
1570         conn = imp->imp_connection;
1571
1572         if (svc_ctx == NULL) {
1573                 struct client_obd *cliobd = &imp->imp_obd->u.cli;
1574                 /*
1575                  * normal import, determine flavor from rule set, except
1576                  * for mgc the flavor is predetermined.
1577                  */
1578                 if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
1579                         sf = cliobd->cl_flvr_mgc;
1580                 else
1581                         sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
1582                                                    cliobd->cl_sp_to,
1583                                                    &cliobd->cl_target_uuid,
1584                                                    &conn->c_self, &sf);
1585
1586                 sp = imp->imp_obd->u.cli.cl_sp_me;
1587         } else {
1588                 /* reverse import, determine flavor from incoming reqeust */
1589                 sf = *flvr;
1590
1591                 if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
1592                         sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
1593                                       PTLRPC_SEC_FL_ROOTONLY;
1594
1595                 sp = sptlrpc_target_sec_part(imp->imp_obd);
1596         }
1597
1598         sec = sptlrpc_import_sec_ref(imp);
1599         if (sec) {
1600                 char str2[24];
1601
1602                 if (flavor_equal(&sf, &sec->ps_flvr))
1603                         GOTO(out, rc);
1604
1605                 CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
1606                        imp->imp_obd->obd_name,
1607                        obd_uuid2str(&conn->c_remote_uuid),
1608                        sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
1609                        sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
1610         } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
1611                    SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
1612                 CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
1613                        imp->imp_obd->obd_name,
1614                        obd_uuid2str(&conn->c_remote_uuid),
1615                        LNET_NID_NET(&conn->c_self),
1616                        sptlrpc_flavor2name(&sf, str, sizeof(str)));
1617         }
1618
1619         newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
1620         if (newsec) {
1621                 sptlrpc_import_sec_install(imp, newsec);
1622         } else {
1623                 CERROR("import %s->%s: failed to create new sec\n",
1624                        imp->imp_obd->obd_name,
1625                        obd_uuid2str(&conn->c_remote_uuid));
1626                 rc = -EPERM;
1627         }
1628
1629 out:
1630         sptlrpc_sec_put(sec);
1631         RETURN(rc);
1632 }
1633
1634 void sptlrpc_import_sec_put(struct obd_import *imp)
1635 {
1636         if (imp->imp_sec) {
1637                 sptlrpc_sec_kill(imp->imp_sec);
1638
1639                 sptlrpc_sec_put(imp->imp_sec);
1640                 imp->imp_sec = NULL;
1641         }
1642 }
1643
1644 static void import_flush_ctx_common(struct obd_import *imp,
1645                                     uid_t uid, int grace, int force)
1646 {
1647         struct ptlrpc_sec *sec;
1648
1649         if (imp == NULL)
1650                 return;
1651
1652         sec = sptlrpc_import_sec_ref(imp);
1653         if (sec == NULL)
1654                 return;
1655
1656         sec_cop_flush_ctx_cache(sec, uid, grace, force);
1657         sptlrpc_sec_put(sec);
1658 }
1659
1660 void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
1661 {
1662         /*
1663          * it's important to use grace mode, see explain in
1664          * sptlrpc_req_refresh_ctx()
1665          */
1666         import_flush_ctx_common(imp, 0, 1, 1);
1667 }
1668
1669 void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
1670 {
1671         import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()),
1672                                 1, 1);
1673 }
1674 EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
1675
1676 void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
1677 {
1678         import_flush_ctx_common(imp, -1, 1, 1);
1679 }
1680 EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
1681
1682 /**
1683  * Used by ptlrpc client to allocate request buffer of \a req. Upon return
1684  * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
1685  */
1686 int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
1687 {
1688         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1689         struct ptlrpc_sec_policy *policy;
1690         int rc;
1691
1692         LASSERT(ctx);
1693         LASSERT(ctx->cc_sec);
1694         LASSERT(ctx->cc_sec->ps_policy);
1695         LASSERT(req->rq_reqmsg == NULL);
1696         LASSERT(atomic_read(&(ctx)->cc_refcount) > 0);
1697
1698         policy = ctx->cc_sec->ps_policy;
1699         rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
1700         if (!rc) {
1701                 LASSERT(req->rq_reqmsg);
1702                 LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1703
1704                 /* zeroing preallocated buffer */
1705                 if (req->rq_pool)
1706                         memset(req->rq_reqmsg, 0, msgsize);
1707         }
1708
1709         return rc;
1710 }
1711
1712 /**
1713  * Used by ptlrpc client to free request buffer of \a req. After this
1714  * req->rq_reqmsg is set to NULL and should not be accessed anymore.
1715  */
1716 void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
1717 {
1718         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1719         struct ptlrpc_sec_policy *policy;
1720
1721         LASSERT(ctx);
1722         LASSERT(ctx->cc_sec);
1723         LASSERT(ctx->cc_sec->ps_policy);
1724         LASSERT(atomic_read(&(ctx)->cc_refcount) > 0);
1725
1726         if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
1727                 return;
1728
1729         policy = ctx->cc_sec->ps_policy;
1730         policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
1731         req->rq_reqmsg = NULL;
1732 }
1733
1734 /*
1735  * NOTE caller must guarantee the buffer size is enough for the enlargement
1736  */
1737 void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
1738                                   int segment, int newsize)
1739 {
1740         void *src, *dst;
1741         int oldsize, oldmsg_size, movesize;
1742
1743         LASSERT(segment < msg->lm_bufcount);
1744         LASSERT(msg->lm_buflens[segment] <= newsize);
1745
1746         if (msg->lm_buflens[segment] == newsize)
1747                 return;
1748
1749         /* nothing to do if we are enlarging the last segment */
1750         if (segment == msg->lm_bufcount - 1) {
1751                 msg->lm_buflens[segment] = newsize;
1752                 return;
1753         }
1754
1755         oldsize = msg->lm_buflens[segment];
1756
1757         src = lustre_msg_buf(msg, segment + 1, 0);
1758         msg->lm_buflens[segment] = newsize;
1759         dst = lustre_msg_buf(msg, segment + 1, 0);
1760         msg->lm_buflens[segment] = oldsize;
1761
1762         /* move from segment + 1 to end segment */
1763         LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
1764         oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
1765         movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
1766         LASSERT(movesize >= 0);
1767
1768         if (movesize)
1769                 memmove(dst, src, movesize);
1770
1771         /* note we don't clear the ares where old data live, not secret */
1772
1773         /* finally set new segment size */
1774         msg->lm_buflens[segment] = newsize;
1775 }
1776 EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
1777
1778 /**
1779  * Used by ptlrpc client to enlarge the \a segment of request message pointed
1780  * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
1781  * preserved after the enlargement. this must be called after original request
1782  * buffer being allocated.
1783  *
1784  * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
1785  * so caller should refresh its local pointers if needed.
1786  */
1787 int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
1788                                const struct req_msg_field *field,
1789                                int newsize)
1790 {
1791         struct req_capsule *pill = &req->rq_pill;
1792         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1793         struct ptlrpc_sec_cops *cops;
1794         struct lustre_msg *msg = req->rq_reqmsg;
1795         int segment = __req_capsule_offset(pill, field, RCL_CLIENT);
1796
1797         LASSERT(ctx);
1798         LASSERT(msg);
1799         LASSERT(msg->lm_bufcount > segment);
1800         LASSERT(msg->lm_buflens[segment] <= newsize);
1801
1802         if (msg->lm_buflens[segment] == newsize)
1803                 return 0;
1804
1805         cops = ctx->cc_sec->ps_policy->sp_cops;
1806         LASSERT(cops->enlarge_reqbuf);
1807         return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
1808 }
1809 EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
1810
1811 /**
1812  * Used by ptlrpc client to allocate reply buffer of \a req.
1813  *
1814  * \note After this, req->rq_repmsg is still not accessible.
1815  */
1816 int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
1817 {
1818         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1819         struct ptlrpc_sec_policy *policy;
1820
1821         ENTRY;
1822
1823         LASSERT(ctx);
1824         LASSERT(ctx->cc_sec);
1825         LASSERT(ctx->cc_sec->ps_policy);
1826
1827         if (req->rq_repbuf)
1828                 RETURN(0);
1829
1830         policy = ctx->cc_sec->ps_policy;
1831         RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
1832 }
1833
1834 /**
1835  * Used by ptlrpc client to free reply buffer of \a req. After this
1836  * req->rq_repmsg is set to NULL and should not be accessed anymore.
1837  */
1838 void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
1839 {
1840         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1841         struct ptlrpc_sec_policy *policy;
1842
1843         ENTRY;
1844
1845         LASSERT(ctx);
1846         LASSERT(ctx->cc_sec);
1847         LASSERT(ctx->cc_sec->ps_policy);
1848         LASSERT(atomic_read(&(ctx)->cc_refcount) > 0);
1849
1850         if (req->rq_repbuf == NULL)
1851                 return;
1852         LASSERT(req->rq_repbuf_len);
1853
1854         policy = ctx->cc_sec->ps_policy;
1855         policy->sp_cops->free_repbuf(ctx->cc_sec, req);
1856         req->rq_repmsg = NULL;
1857         EXIT;
1858 }
1859 EXPORT_SYMBOL(sptlrpc_cli_free_repbuf);
1860
1861 int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
1862                                 struct ptlrpc_cli_ctx *ctx)
1863 {
1864         struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
1865
1866         if (!policy->sp_cops->install_rctx)
1867                 return 0;
1868         return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
1869 }
1870
1871 int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
1872                                 struct ptlrpc_svc_ctx *ctx)
1873 {
1874         struct ptlrpc_sec_policy *policy = ctx->sc_policy;
1875
1876         if (!policy->sp_sops->install_rctx)
1877                 return 0;
1878         return policy->sp_sops->install_rctx(imp, ctx);
1879 }
1880
1881
1882 /* Get SELinux policy info from userspace */
1883 static int sepol_helper(struct obd_import *imp)
1884 {
1885         char mtime_str[21] = { 0 }, mode_str[2] = { 0 };
1886         char *argv[] = {
1887                 [0] = "/usr/sbin/l_getsepol",
1888                 [1] = "-o",
1889                 [2] = NULL,         /* obd type */
1890                 [3] = "-n",
1891                 [4] = NULL,         /* obd name */
1892                 [5] = "-t",
1893                 [6] = mtime_str,    /* policy mtime */
1894                 [7] = "-m",
1895                 [8] = mode_str,     /* enforcing mode */
1896                 [9] = NULL
1897         };
1898         struct sptlrpc_sepol *sepol;
1899         char *envp[] = {
1900                 [0] = "HOME=/",
1901                 [1] = "PATH=/sbin:/usr/sbin",
1902                 [2] = NULL
1903         };
1904         signed short ret;
1905         int rc = 0;
1906
1907         if (imp == NULL || imp->imp_obd == NULL ||
1908             imp->imp_obd->obd_type == NULL)
1909                 RETURN(-EINVAL);
1910
1911         argv[2] = (char *)imp->imp_obd->obd_type->typ_name;
1912         argv[4] = imp->imp_obd->obd_name;
1913
1914         rcu_read_lock();
1915         sepol = rcu_dereference(imp->imp_sec->ps_sepol);
1916         if (!sepol) {
1917                 /* ps_sepol has not been initialized */
1918                 argv[5] = NULL;
1919                 argv[7] = NULL;
1920         } else {
1921                 time64_t mtime_ms;
1922
1923                 mtime_ms = ktime_to_ms(sepol->ssp_mtime);
1924                 snprintf(mtime_str, sizeof(mtime_str), "%lld",
1925                          mtime_ms / MSEC_PER_SEC);
1926                 if (sepol->ssp_sepol_size > 1)
1927                         mode_str[0] = sepol->ssp_sepol[0];
1928         }
1929         rcu_read_unlock();
1930
1931         ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1932         rc = ret>>8;
1933
1934         return rc;
1935 }
1936
1937 static inline int sptlrpc_sepol_needs_check(struct ptlrpc_sec *imp_sec)
1938 {
1939         ktime_t checknext;
1940
1941         if (send_sepol == 0)
1942                 return 0;
1943
1944         if (send_sepol == -1)
1945                 /* send_sepol == -1 means fetch sepol status every time */
1946                 return 1;
1947
1948         spin_lock(&imp_sec->ps_lock);
1949         checknext = imp_sec->ps_sepol_checknext;
1950         spin_unlock(&imp_sec->ps_lock);
1951
1952         /* next check is too far in time, please update */
1953         if (ktime_after(checknext,
1954                         ktime_add(ktime_get(), ktime_set(send_sepol, 0))))
1955                 goto setnext;
1956
1957         if (ktime_before(ktime_get(), checknext))
1958                 /* too early to fetch sepol status */
1959                 return 0;
1960
1961 setnext:
1962         /* define new sepol_checknext time */
1963         spin_lock(&imp_sec->ps_lock);
1964         imp_sec->ps_sepol_checknext = ktime_add(ktime_get(),
1965                                                 ktime_set(send_sepol, 0));
1966         spin_unlock(&imp_sec->ps_lock);
1967
1968         return 1;
1969 }
1970
1971 static void sptlrpc_sepol_release(struct kref *ref)
1972 {
1973         struct sptlrpc_sepol *p = container_of(ref, struct sptlrpc_sepol,
1974                                               ssp_ref);
1975         kfree_rcu(p, ssp_rcu);
1976 }
1977
1978 void sptlrpc_sepol_put(struct sptlrpc_sepol *pol)
1979 {
1980         if (!pol)
1981                 return;
1982         kref_put(&pol->ssp_ref, sptlrpc_sepol_release);
1983 }
1984 EXPORT_SYMBOL(sptlrpc_sepol_put);
1985
1986 struct sptlrpc_sepol *sptlrpc_sepol_get_cached(struct ptlrpc_sec *imp_sec)
1987 {
1988         struct sptlrpc_sepol *p;
1989
1990 retry:
1991         rcu_read_lock();
1992         p = rcu_dereference(imp_sec->ps_sepol);
1993         if (p && !kref_get_unless_zero(&p->ssp_ref)) {
1994                 rcu_read_unlock();
1995                 goto retry;
1996         }
1997         rcu_read_unlock();
1998
1999         return p;
2000 }
2001 EXPORT_SYMBOL(sptlrpc_sepol_get_cached);
2002
2003 struct sptlrpc_sepol *sptlrpc_sepol_get(struct ptlrpc_request *req)
2004 {
2005         struct ptlrpc_sec *imp_sec = req->rq_import->imp_sec;
2006         struct sptlrpc_sepol *out;
2007         int rc = 0;
2008
2009         ENTRY;
2010
2011 #ifndef HAVE_SELINUX
2012         if (unlikely(send_sepol != 0))
2013                 CDEBUG(D_SEC,
2014                        "Client cannot report SELinux status, it was not built against libselinux.\n");
2015         RETURN(NULL);
2016 #endif
2017
2018         if (send_sepol == 0)
2019                 RETURN(NULL);
2020
2021         if (imp_sec == NULL)
2022                 RETURN(ERR_PTR(-EINVAL));
2023
2024         /* Retrieve SELinux status info */
2025         if (sptlrpc_sepol_needs_check(imp_sec))
2026                 rc = sepol_helper(req->rq_import);
2027
2028         if (unlikely(rc == -ENODEV)) {
2029                 CDEBUG(D_SEC,
2030                        "Client cannot report SELinux status, SELinux is disabled.\n");
2031                 RETURN(NULL);
2032         }
2033         if (unlikely(rc))
2034                 RETURN(ERR_PTR(rc));
2035
2036         out = sptlrpc_sepol_get_cached(imp_sec);
2037         if (!out)
2038                 RETURN(ERR_PTR(-ENODATA));
2039
2040         RETURN(out);
2041 }
2042 EXPORT_SYMBOL(sptlrpc_sepol_get);
2043
2044 /*
2045  * server side security
2046  */
2047
2048 static int flavor_allowed(struct sptlrpc_flavor *exp,
2049                           struct ptlrpc_request *req)
2050 {
2051         struct sptlrpc_flavor *flvr = &req->rq_flvr;
2052
2053         if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
2054                 return 1;
2055
2056         if ((req->rq_ctx_init || req->rq_ctx_fini) &&
2057             SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
2058             SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
2059             SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
2060                 return 1;
2061
2062         return 0;
2063 }
2064
2065 #define EXP_FLVR_UPDATE_EXPIRE      (OBD_TIMEOUT_DEFAULT + 10)
2066
2067 /**
2068  * Given an export \a exp, check whether the flavor of incoming \a req
2069  * is allowed by the export \a exp. Main logic is about taking care of
2070  * changing configurations. Return 0 means success.
2071  */
2072 int sptlrpc_target_export_check(struct obd_export *exp,
2073                                 struct ptlrpc_request *req)
2074 {
2075         struct sptlrpc_flavor   flavor;
2076
2077         if (exp == NULL)
2078                 return 0;
2079
2080         /*
2081          * client side export has no imp_reverse, skip
2082          * FIXME maybe we should check flavor this as well???
2083          */
2084         if (exp->exp_imp_reverse == NULL)
2085                 return 0;
2086
2087         /* don't care about ctx fini rpc */
2088         if (req->rq_ctx_fini)
2089                 return 0;
2090
2091         spin_lock(&exp->exp_lock);
2092
2093         /*
2094          * if flavor just changed (exp->exp_flvr_changed != 0), we wait for
2095          * the first req with the new flavor, then treat it as current flavor,
2096          * adapt reverse sec according to it.
2097          * note the first rpc with new flavor might not be with root ctx, in
2098          * which case delay the sec_adapt by leaving exp_flvr_adapt == 1.
2099          */
2100         if (unlikely(exp->exp_flvr_changed) &&
2101             flavor_allowed(&exp->exp_flvr_old[1], req)) {
2102                 /*
2103                  * make the new flavor as "current", and old ones as
2104                  * about-to-expire
2105                  */
2106                 CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
2107                        exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
2108                 flavor = exp->exp_flvr_old[1];
2109                 exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
2110                 exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
2111                 exp->exp_flvr_old[0] = exp->exp_flvr;
2112                 exp->exp_flvr_expire[0] = ktime_get_real_seconds() +
2113                                           EXP_FLVR_UPDATE_EXPIRE;
2114                 exp->exp_flvr = flavor;
2115
2116                 /* flavor change finished */
2117                 exp->exp_flvr_changed = 0;
2118                 LASSERT(exp->exp_flvr_adapt == 1);
2119
2120                 /* if it's gss, we only interested in root ctx init */
2121                 if (req->rq_auth_gss &&
2122                     !(req->rq_ctx_init &&
2123                     (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
2124                     req->rq_auth_usr_ost))) {
2125                         spin_unlock(&exp->exp_lock);
2126                         CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
2127                                req->rq_auth_gss, req->rq_ctx_init,
2128                                req->rq_auth_usr_root, req->rq_auth_usr_mdt,
2129                                req->rq_auth_usr_ost);
2130                         return 0;
2131                 }
2132
2133                 exp->exp_flvr_adapt = 0;
2134                 spin_unlock(&exp->exp_lock);
2135
2136                 return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
2137                                                 req->rq_svc_ctx, &flavor);
2138         }
2139
2140         /*
2141          * if it equals to the current flavor, we accept it, but need to
2142          * dealing with reverse sec/ctx
2143          */
2144         if (likely(flavor_allowed(&exp->exp_flvr, req))) {
2145                 /*
2146                  * most cases should return here, we only interested in
2147                  * gss root ctx init
2148                  */
2149                 if (!req->rq_auth_gss || !req->rq_ctx_init ||
2150                     (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
2151                      !req->rq_auth_usr_ost)) {
2152                         spin_unlock(&exp->exp_lock);
2153                         return 0;
2154                 }
2155
2156                 /*
2157                  * if flavor just changed, we should not proceed, just leave
2158                  * it and current flavor will be discovered and replaced
2159                  * shortly, and let _this_ rpc pass through
2160                  */
2161                 if (exp->exp_flvr_changed) {
2162                         LASSERT(exp->exp_flvr_adapt);
2163                         spin_unlock(&exp->exp_lock);
2164                         return 0;
2165                 }
2166
2167                 if (exp->exp_flvr_adapt) {
2168                         exp->exp_flvr_adapt = 0;
2169                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
2170                                exp, exp->exp_flvr.sf_rpc,
2171                                exp->exp_flvr_old[0].sf_rpc,
2172                                exp->exp_flvr_old[1].sf_rpc);
2173                         flavor = exp->exp_flvr;
2174                         spin_unlock(&exp->exp_lock);
2175
2176                         return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
2177                                                         req->rq_svc_ctx,
2178                                                         &flavor);
2179                 } else {
2180                         CDEBUG(D_SEC,
2181                                "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n",
2182                                exp, exp->exp_flvr.sf_rpc,
2183                                exp->exp_flvr_old[0].sf_rpc,
2184                                exp->exp_flvr_old[1].sf_rpc);
2185                         spin_unlock(&exp->exp_lock);
2186
2187                         return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
2188                                                            req->rq_svc_ctx);
2189                 }
2190         }
2191
2192         if (exp->exp_flvr_expire[0]) {
2193                 if (exp->exp_flvr_expire[0] >= ktime_get_real_seconds()) {
2194                         if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
2195                                 CDEBUG(D_SEC,
2196                                        "exp %p (%x|%x|%x): match the middle one (%lld)\n",
2197                                        exp, exp->exp_flvr.sf_rpc,
2198                                        exp->exp_flvr_old[0].sf_rpc,
2199                                        exp->exp_flvr_old[1].sf_rpc,
2200                                        (s64)(exp->exp_flvr_expire[0] -
2201                                              ktime_get_real_seconds()));
2202                                 spin_unlock(&exp->exp_lock);
2203                                 return 0;
2204                         }
2205                 } else {
2206                         CDEBUG(D_SEC, "mark middle expired\n");
2207                         exp->exp_flvr_expire[0] = 0;
2208                 }
2209                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
2210                        exp->exp_flvr.sf_rpc,
2211                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
2212                        req->rq_flvr.sf_rpc);
2213         }
2214
2215         /*
2216          * now it doesn't match the current flavor, the only chance we can
2217          * accept it is match the old flavors which is not expired.
2218          */
2219         if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
2220                 if (exp->exp_flvr_expire[1] >= ktime_get_real_seconds()) {
2221                         if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
2222                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (%lld)\n",
2223                                        exp,
2224                                        exp->exp_flvr.sf_rpc,
2225                                        exp->exp_flvr_old[0].sf_rpc,
2226                                        exp->exp_flvr_old[1].sf_rpc,
2227                                        (s64)(exp->exp_flvr_expire[1] -
2228                                        ktime_get_real_seconds()));
2229                                 spin_unlock(&exp->exp_lock);
2230                                 return 0;
2231                         }
2232                 } else {
2233                         CDEBUG(D_SEC, "mark oldest expired\n");
2234                         exp->exp_flvr_expire[1] = 0;
2235                 }
2236                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
2237                        exp, exp->exp_flvr.sf_rpc,
2238                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
2239                        req->rq_flvr.sf_rpc);
2240         } else {
2241                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
2242                        exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
2243                        exp->exp_flvr_old[1].sf_rpc);
2244         }
2245
2246         spin_unlock(&exp->exp_lock);
2247
2248         CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+lld)|%x(%+lld)\n",
2249               exp, exp->exp_obd->obd_name,
2250               req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
2251               req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
2252               req->rq_flvr.sf_rpc,
2253               exp->exp_flvr.sf_rpc,
2254               exp->exp_flvr_old[0].sf_rpc,
2255               exp->exp_flvr_expire[0] ?
2256               (s64)(exp->exp_flvr_expire[0] - ktime_get_real_seconds()) : 0,
2257               exp->exp_flvr_old[1].sf_rpc,
2258               exp->exp_flvr_expire[1] ?
2259               (s64)(exp->exp_flvr_expire[1] - ktime_get_real_seconds()) : 0);
2260         return -EACCES;
2261 }
2262 EXPORT_SYMBOL(sptlrpc_target_export_check);
2263
2264 void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
2265                                       struct sptlrpc_rule_set *rset)
2266 {
2267         struct obd_export *exp;
2268         struct sptlrpc_flavor new_flvr;
2269
2270         LASSERT(obd);
2271
2272         spin_lock(&obd->obd_dev_lock);
2273
2274         list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
2275                 if (exp->exp_connection == NULL)
2276                         continue;
2277
2278                 /*
2279                  * note if this export had just been updated flavor
2280                  * (exp_flvr_changed == 1), this will override the
2281                  * previous one.
2282                  */
2283                 spin_lock(&exp->exp_lock);
2284                 sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
2285                                              &exp->exp_connection->c_peer.nid,
2286                                              &new_flvr);
2287                 if (exp->exp_flvr_changed ||
2288                     !flavor_equal(&new_flvr, &exp->exp_flvr)) {
2289                         exp->exp_flvr_old[1] = new_flvr;
2290                         exp->exp_flvr_expire[1] = 0;
2291                         exp->exp_flvr_changed = 1;
2292                         exp->exp_flvr_adapt = 1;
2293
2294                         CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
2295                                exp, sptlrpc_part2name(exp->exp_sp_peer),
2296                                exp->exp_flvr.sf_rpc,
2297                                exp->exp_flvr_old[1].sf_rpc);
2298                 }
2299                 spin_unlock(&exp->exp_lock);
2300         }
2301
2302         spin_unlock(&obd->obd_dev_lock);
2303 }
2304 EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
2305
2306 static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
2307 {
2308         /* peer's claim is unreliable unless gss is being used */
2309         if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
2310                 return svc_rc;
2311
2312         switch (req->rq_sp_from) {
2313         case LUSTRE_SP_CLI:
2314                 if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
2315                         /* The below message is checked in sanity-sec test_33 */
2316                         DEBUG_REQ(D_ERROR, req, "faked source CLI");
2317                         svc_rc = SECSVC_DROP;
2318                 }
2319                 break;
2320         case LUSTRE_SP_MDT:
2321                 if (!req->rq_auth_usr_mdt) {
2322                         /* The below message is checked in sanity-sec test_33 */
2323                         DEBUG_REQ(D_ERROR, req, "faked source MDT");
2324                         svc_rc = SECSVC_DROP;
2325                 }
2326                 break;
2327         case LUSTRE_SP_OST:
2328                 if (!req->rq_auth_usr_ost) {
2329                         /* The below message is checked in sanity-sec test_33 */
2330                         DEBUG_REQ(D_ERROR, req, "faked source OST");
2331                         svc_rc = SECSVC_DROP;
2332                 }
2333                 break;
2334         case LUSTRE_SP_MGS:
2335         case LUSTRE_SP_MGC:
2336                 if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
2337                     !req->rq_auth_usr_ost) {
2338                         /* The below message is checked in sanity-sec test_33 */
2339                         DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
2340                         svc_rc = SECSVC_DROP;
2341                 }
2342                 break;
2343         case LUSTRE_SP_ANY:
2344         default:
2345                 DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
2346                 svc_rc = SECSVC_DROP;
2347         }
2348
2349         return svc_rc;
2350 }
2351
2352 /**
2353  * Used by ptlrpc server, to perform transformation upon request message of
2354  * incoming \a req. This must be the first thing to do with an incoming
2355  * request in ptlrpc layer.
2356  *
2357  * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
2358  * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
2359  * \retval SECSVC_COMPLETE success, the request has been fully processed, and
2360  * reply message has been prepared.
2361  * \retval SECSVC_DROP failed, this request should be dropped.
2362  */
2363 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
2364 {
2365         struct ptlrpc_sec_policy *policy;
2366         struct lustre_msg *msg = req->rq_reqbuf;
2367         int rc;
2368
2369         ENTRY;
2370
2371         LASSERT(msg);
2372         LASSERT(req->rq_reqmsg == NULL);
2373         LASSERT(req->rq_repmsg == NULL);
2374         LASSERT(req->rq_svc_ctx == NULL);
2375
2376         req->rq_req_swab_mask = 0;
2377
2378         rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
2379         switch (rc) {
2380         case 1:
2381                 req_capsule_set_req_swabbed(&req->rq_pill,
2382                                             MSG_PTLRPC_HEADER_OFF);
2383         case 0:
2384                 break;
2385         default:
2386                 CERROR("error unpacking request from %s x%llu\n",
2387                        libcfs_idstr(&req->rq_peer), req->rq_xid);
2388                 RETURN(SECSVC_DROP);
2389         }
2390
2391         req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
2392         req->rq_sp_from = LUSTRE_SP_ANY;
2393         req->rq_auth_uid = -1; /* set to INVALID_UID */
2394         req->rq_auth_mapped_uid = -1;
2395
2396         policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
2397         if (!policy) {
2398                 CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
2399                 RETURN(SECSVC_DROP);
2400         }
2401
2402         LASSERT(policy->sp_sops->accept);
2403         rc = policy->sp_sops->accept(req);
2404         sptlrpc_policy_put(policy);
2405         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
2406         LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
2407
2408         /*
2409          * if it's not null flavor (which means embedded packing msg),
2410          * reset the swab mask for the comming inner msg unpacking.
2411          */
2412         if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
2413                 req->rq_req_swab_mask = 0;
2414
2415         /* sanity check for the request source */
2416         rc = sptlrpc_svc_check_from(req, rc);
2417         RETURN(rc);
2418 }
2419
2420 /**
2421  * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
2422  * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
2423  * a buffer of \a msglen size.
2424  */
2425 int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
2426 {
2427         struct ptlrpc_sec_policy *policy;
2428         struct ptlrpc_reply_state *rs;
2429         int rc;
2430
2431         ENTRY;
2432
2433         LASSERT(req->rq_svc_ctx);
2434         LASSERT(req->rq_svc_ctx->sc_policy);
2435
2436         policy = req->rq_svc_ctx->sc_policy;
2437         LASSERT(policy->sp_sops->alloc_rs);
2438
2439         rc = policy->sp_sops->alloc_rs(req, msglen);
2440         if (unlikely(rc == -ENOMEM)) {
2441                 struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
2442
2443                 if (svcpt->scp_service->srv_max_reply_size <
2444                    msglen + sizeof(struct ptlrpc_reply_state)) {
2445                         /* Just return failure if the size is too big */
2446                         CERROR("size of message is too big (%zd), %d allowed\n",
2447                                 msglen + sizeof(struct ptlrpc_reply_state),
2448                                 svcpt->scp_service->srv_max_reply_size);
2449                         RETURN(-ENOMEM);
2450                 }
2451
2452                 /* failed alloc, try emergency pool */
2453                 rs = lustre_get_emerg_rs(svcpt);
2454                 if (rs == NULL)
2455                         RETURN(-ENOMEM);
2456
2457                 req->rq_reply_state = rs;
2458                 rc = policy->sp_sops->alloc_rs(req, msglen);
2459                 if (rc) {
2460                         lustre_put_emerg_rs(rs);
2461                         req->rq_reply_state = NULL;
2462                 }
2463         }
2464
2465         LASSERT(rc != 0 ||
2466                 (req->rq_reply_state && req->rq_reply_state->rs_msg));
2467
2468         RETURN(rc);
2469 }
2470
2471 /**
2472  * Used by ptlrpc server, to perform transformation upon reply message.
2473  *
2474  * \post req->rq_reply_off is set to approriate server-controlled reply offset.
2475  * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
2476  */
2477 int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
2478 {
2479         struct ptlrpc_sec_policy *policy;
2480         int rc;
2481
2482         ENTRY;
2483
2484         LASSERT(req->rq_svc_ctx);
2485         LASSERT(req->rq_svc_ctx->sc_policy);
2486
2487         policy = req->rq_svc_ctx->sc_policy;
2488         LASSERT(policy->sp_sops->authorize);
2489
2490         rc = policy->sp_sops->authorize(req);
2491         LASSERT(rc || req->rq_reply_state->rs_repdata_len);
2492
2493         RETURN(rc);
2494 }
2495
2496 /**
2497  * Used by ptlrpc server, to free reply_state.
2498  */
2499 void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
2500 {
2501         struct ptlrpc_sec_policy *policy;
2502         unsigned int prealloc;
2503
2504         ENTRY;
2505
2506         LASSERT(rs->rs_svc_ctx);
2507         LASSERT(rs->rs_svc_ctx->sc_policy);
2508
2509         policy = rs->rs_svc_ctx->sc_policy;
2510         LASSERT(policy->sp_sops->free_rs);
2511
2512         prealloc = rs->rs_prealloc;
2513         policy->sp_sops->free_rs(rs);
2514
2515         if (prealloc)
2516                 lustre_put_emerg_rs(rs);
2517         EXIT;
2518 }
2519
2520 void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
2521 {
2522         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2523
2524         if (ctx != NULL)
2525                 atomic_inc(&ctx->sc_refcount);
2526 }
2527
2528 void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
2529 {
2530         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2531
2532         if (ctx == NULL)
2533                 return;
2534
2535         LASSERT(atomic_read(&(ctx)->sc_refcount) > 0);
2536         if (atomic_dec_and_test(&ctx->sc_refcount)) {
2537                 if (ctx->sc_policy->sp_sops->free_ctx)
2538                         ctx->sc_policy->sp_sops->free_ctx(ctx);
2539         }
2540         req->rq_svc_ctx = NULL;
2541 }
2542
2543 void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
2544 {
2545         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2546
2547         if (ctx == NULL)
2548                 return;
2549
2550         LASSERT(atomic_read(&(ctx)->sc_refcount) > 0);
2551         if (ctx->sc_policy->sp_sops->invalidate_ctx)
2552                 ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
2553 }
2554 EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
2555
2556 /*
2557  * bulk security
2558  */
2559
2560 /**
2561  * Perform transformation upon bulk data pointed by \a desc. This is called
2562  * before transforming the request message.
2563  */
2564 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
2565                           struct ptlrpc_bulk_desc *desc)
2566 {
2567         struct ptlrpc_cli_ctx *ctx;
2568
2569         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2570
2571         if (!req->rq_pack_bulk)
2572                 return 0;
2573
2574         ctx = req->rq_cli_ctx;
2575         if (ctx->cc_ops->wrap_bulk)
2576                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
2577         return 0;
2578 }
2579 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
2580
2581 /**
2582  * This is called after unwrap the reply message.
2583  * return nob of actual plain text size received, or error code.
2584  */
2585 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
2586                                  struct ptlrpc_bulk_desc *desc,
2587                                  int nob)
2588 {
2589         struct ptlrpc_cli_ctx *ctx;
2590         int rc;
2591
2592         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
2593
2594         if (!req->rq_pack_bulk)
2595                 return desc->bd_nob_transferred;
2596
2597         ctx = req->rq_cli_ctx;
2598         if (ctx->cc_ops->unwrap_bulk) {
2599                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2600                 if (rc < 0)
2601                         return rc;
2602         }
2603         return desc->bd_nob_transferred;
2604 }
2605 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
2606
2607 /**
2608  * This is called after unwrap the reply message.
2609  * return 0 for success or error code.
2610  */
2611 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
2612                                   struct ptlrpc_bulk_desc *desc)
2613 {
2614         struct ptlrpc_cli_ctx *ctx;
2615         int rc;
2616
2617         LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
2618
2619         if (!req->rq_pack_bulk)
2620                 return 0;
2621
2622         ctx = req->rq_cli_ctx;
2623         if (ctx->cc_ops->unwrap_bulk) {
2624                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2625                 if (rc < 0)
2626                         return rc;
2627         }
2628
2629         /*
2630          * if everything is going right, nob should equals to nob_transferred.
2631          * in case of privacy mode, nob_transferred needs to be adjusted.
2632          */
2633         if (desc->bd_nob != desc->bd_nob_transferred) {
2634                 CERROR("nob %d doesn't match transferred nob %d\n",
2635                        desc->bd_nob, desc->bd_nob_transferred);
2636                 return -EPROTO;
2637         }
2638
2639         return 0;
2640 }
2641 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
2642
2643 #ifdef HAVE_SERVER_SUPPORT
2644 /**
2645  * Performe transformation upon outgoing bulk read.
2646  */
2647 int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
2648                           struct ptlrpc_bulk_desc *desc)
2649 {
2650         struct ptlrpc_svc_ctx *ctx;
2651
2652         LASSERT(req->rq_bulk_read);
2653
2654         if (!req->rq_pack_bulk)
2655                 return 0;
2656
2657         ctx = req->rq_svc_ctx;
2658         if (ctx->sc_policy->sp_sops->wrap_bulk)
2659                 return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
2660
2661         return 0;
2662 }
2663 EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
2664
2665 /**
2666  * Performe transformation upon incoming bulk write.
2667  */
2668 int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
2669                             struct ptlrpc_bulk_desc *desc)
2670 {
2671         struct ptlrpc_svc_ctx *ctx;
2672         int rc;
2673
2674         LASSERT(req->rq_bulk_write);
2675
2676         /*
2677          * if it's in privacy mode, transferred should >= expected; otherwise
2678          * transferred should == expected.
2679          */
2680         if (desc->bd_nob_transferred < desc->bd_nob ||
2681             (desc->bd_nob_transferred > desc->bd_nob &&
2682              SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) !=
2683              SPTLRPC_BULK_SVC_PRIV)) {
2684                 DEBUG_REQ(D_ERROR, req, "truncated bulk GET %d(%d)",
2685                           desc->bd_nob_transferred, desc->bd_nob);
2686                 return -ETIMEDOUT;
2687         }
2688
2689         if (!req->rq_pack_bulk)
2690                 return 0;
2691
2692         ctx = req->rq_svc_ctx;
2693         if (ctx->sc_policy->sp_sops->unwrap_bulk) {
2694                 rc = ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
2695                 if (rc)
2696                         CERROR("error unwrap bulk: %d\n", rc);
2697         }
2698
2699         /* return 0 to allow reply be sent */
2700         return 0;
2701 }
2702 EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
2703
2704 /**
2705  * Prepare buffers for incoming bulk write.
2706  */
2707 int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req,
2708                           struct ptlrpc_bulk_desc *desc)
2709 {
2710         struct ptlrpc_svc_ctx *ctx;
2711
2712         LASSERT(req->rq_bulk_write);
2713
2714         if (!req->rq_pack_bulk)
2715                 return 0;
2716
2717         ctx = req->rq_svc_ctx;
2718         if (ctx->sc_policy->sp_sops->prep_bulk)
2719                 return ctx->sc_policy->sp_sops->prep_bulk(req, desc);
2720
2721         return 0;
2722 }
2723 EXPORT_SYMBOL(sptlrpc_svc_prep_bulk);
2724
2725 #endif /* HAVE_SERVER_SUPPORT */
2726
2727 /*
2728  * user descriptor helpers
2729  */
2730
2731 int sptlrpc_current_user_desc_size(void)
2732 {
2733         int ngroups;
2734
2735         ngroups = current_cred()->group_info->ngroups;
2736
2737         if (ngroups > LUSTRE_MAX_GROUPS)
2738                 ngroups = LUSTRE_MAX_GROUPS;
2739         return sptlrpc_user_desc_size(ngroups);
2740 }
2741 EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
2742
2743 int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
2744 {
2745         struct ptlrpc_user_desc *pud;
2746         int ngroups;
2747
2748         pud = lustre_msg_buf(msg, offset, 0);
2749
2750         pud->pud_uid = from_kuid(&init_user_ns, current_uid());
2751         pud->pud_gid = from_kgid(&init_user_ns, current_gid());
2752         pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
2753         pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid());
2754         pud->pud_cap = ll_capability_u32(current_cap());
2755         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
2756
2757         task_lock(current);
2758         ngroups = current_cred()->group_info->ngroups;
2759         if (pud->pud_ngroups > ngroups)
2760                 pud->pud_ngroups = ngroups;
2761 #ifdef HAVE_GROUP_INFO_GID
2762         memcpy(pud->pud_groups, current_cred()->group_info->gid,
2763                pud->pud_ngroups * sizeof(__u32));
2764 #else /* !HAVE_GROUP_INFO_GID */
2765         memcpy(pud->pud_groups, current_cred()->group_info->blocks[0],
2766                pud->pud_ngroups * sizeof(__u32));
2767 #endif /* HAVE_GROUP_INFO_GID */
2768         task_unlock(current);
2769
2770         return 0;
2771 }
2772 EXPORT_SYMBOL(sptlrpc_pack_user_desc);
2773
2774 int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
2775 {
2776         struct ptlrpc_user_desc *pud;
2777         int i;
2778
2779         pud = lustre_msg_buf(msg, offset, sizeof(*pud));
2780         if (!pud)
2781                 return -EINVAL;
2782
2783         if (swabbed) {
2784                 __swab32s(&pud->pud_uid);
2785                 __swab32s(&pud->pud_gid);
2786                 __swab32s(&pud->pud_fsuid);
2787                 __swab32s(&pud->pud_fsgid);
2788                 __swab32s(&pud->pud_cap);
2789                 __swab32s(&pud->pud_ngroups);
2790         }
2791
2792         if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
2793                 CERROR("%u groups is too large\n", pud->pud_ngroups);
2794                 return -EINVAL;
2795         }
2796
2797         if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
2798             msg->lm_buflens[offset]) {
2799                 CERROR("%u groups are claimed but bufsize only %u\n",
2800                        pud->pud_ngroups, msg->lm_buflens[offset]);
2801                 return -EINVAL;
2802         }
2803
2804         if (swabbed) {
2805                 for (i = 0; i < pud->pud_ngroups; i++)
2806                         __swab32s(&pud->pud_groups[i]);
2807         }
2808
2809         return 0;
2810 }
2811 EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
2812
2813 /*
2814  * misc helpers
2815  */
2816
2817 const char *sec2target_str(struct ptlrpc_sec *sec)
2818 {
2819         if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
2820                 return "*";
2821         if (sec_is_reverse(sec))
2822                 return "c";
2823         return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
2824 }
2825 EXPORT_SYMBOL(sec2target_str);
2826
2827 /*
2828  * return true if the bulk data is protected
2829  */
2830 int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
2831 {
2832         switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
2833         case SPTLRPC_BULK_SVC_INTG:
2834         case SPTLRPC_BULK_SVC_PRIV:
2835                 return 1;
2836         default:
2837                 return 0;
2838         }
2839 }
2840 EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
2841
2842
2843 static int cfs_hash_alg_id[] = {
2844         [BULK_HASH_ALG_NULL]    = CFS_HASH_ALG_NULL,
2845         [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32,
2846         [BULK_HASH_ALG_CRC32]   = CFS_HASH_ALG_CRC32,
2847         [BULK_HASH_ALG_MD5]     = CFS_HASH_ALG_MD5,
2848         [BULK_HASH_ALG_SHA1]    = CFS_HASH_ALG_SHA1,
2849         [BULK_HASH_ALG_SHA256]  = CFS_HASH_ALG_SHA256,
2850         [BULK_HASH_ALG_SHA384]  = CFS_HASH_ALG_SHA384,
2851         [BULK_HASH_ALG_SHA512]  = CFS_HASH_ALG_SHA512,
2852 };
2853 const char *sptlrpc_get_hash_name(__u8 hash_alg)
2854 {
2855         return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
2856 }
2857
2858 __u8 sptlrpc_get_hash_alg(const char *algname)
2859 {
2860         return cfs_crypto_hash_alg(algname);
2861 }
2862
2863 int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
2864 {
2865         struct ptlrpc_bulk_sec_desc *bsd;
2866         int size = msg->lm_buflens[offset];
2867
2868         bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
2869         if (bsd == NULL) {
2870                 CERROR("Invalid bulk sec desc: size %d\n", size);
2871                 return -EINVAL;
2872         }
2873
2874         if (swabbed)
2875                 __swab32s(&bsd->bsd_nob);
2876
2877         if (unlikely(bsd->bsd_version != 0)) {
2878                 CERROR("Unexpected version %u\n", bsd->bsd_version);
2879                 return -EPROTO;
2880         }
2881
2882         if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
2883                 CERROR("Invalid type %u\n", bsd->bsd_type);
2884                 return -EPROTO;
2885         }
2886
2887         /* FIXME more sanity check here */
2888
2889         if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
2890                      bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
2891                      bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
2892                 CERROR("Invalid svc %u\n", bsd->bsd_svc);
2893                 return -EPROTO;
2894         }
2895
2896         return 0;
2897 }
2898 EXPORT_SYMBOL(bulk_sec_desc_unpack);
2899
2900 /*
2901  * Compute the checksum of an RPC buffer payload.  If the return \a buflen
2902  * is not large enough, truncate the result to fit so that it is possible
2903  * to use a hash function with a large hash space, but only use a part of
2904  * the resulting hash.
2905  */
2906 int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
2907                               void *buf, int buflen)
2908 {
2909         struct ahash_request *req;
2910         int hashsize;
2911         unsigned int bufsize;
2912         int i, err;
2913
2914         LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
2915         LASSERT(buflen >= 4);
2916
2917         req = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0);
2918         if (IS_ERR(req)) {
2919                 CERROR("Unable to initialize checksum hash %s\n",
2920                        cfs_crypto_hash_name(cfs_hash_alg_id[alg]));
2921                 return PTR_ERR(req);
2922         }
2923
2924         hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
2925
2926         for (i = 0; i < desc->bd_iov_count; i++) {
2927                 cfs_crypto_hash_update_page(req,
2928                                   desc->bd_vec[i].bv_page,
2929                                   desc->bd_vec[i].bv_offset &
2930                                               ~PAGE_MASK,
2931                                   desc->bd_vec[i].bv_len);
2932         }
2933
2934         if (hashsize > buflen) {
2935                 unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX];
2936
2937                 bufsize = sizeof(hashbuf);
2938                 LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n",
2939                          bufsize, hashsize);
2940                 err = cfs_crypto_hash_final(req, hashbuf, &bufsize);
2941                 memcpy(buf, hashbuf, buflen);
2942         } else {
2943                 bufsize = buflen;
2944                 err = cfs_crypto_hash_final(req, buf, &bufsize);
2945         }
2946
2947         return err;
2948 }
2949
2950 /*
2951  * crypto API helper/alloc blkciper
2952  */
2953
2954 /*
2955  * initialize/finalize
2956  */
2957
2958 int sptlrpc_init(void)
2959 {
2960         int rc;
2961
2962         rwlock_init(&policy_lock);
2963
2964         rc = sptlrpc_gc_init();
2965         if (rc)
2966                 goto out;
2967
2968         rc = sptlrpc_conf_init();
2969         if (rc)
2970                 goto out_gc;
2971
2972         rc = sptlrpc_null_init();
2973         if (rc)
2974                 goto out_conf;
2975
2976         rc = sptlrpc_plain_init();
2977         if (rc)
2978                 goto out_null;
2979
2980         rc = sptlrpc_lproc_init();
2981         if (rc)
2982                 goto out_plain;
2983
2984         return 0;
2985
2986 out_plain:
2987         sptlrpc_plain_fini();
2988 out_null:
2989         sptlrpc_null_fini();
2990 out_conf:
2991         sptlrpc_conf_fini();
2992 out_gc:
2993         sptlrpc_gc_fini();
2994 out:
2995         return rc;
2996 }
2997
2998 void sptlrpc_fini(void)
2999 {
3000         sptlrpc_lproc_fini();
3001         sptlrpc_plain_fini();
3002         sptlrpc_null_fini();
3003         sptlrpc_conf_fini();
3004         sptlrpc_gc_fini();
3005 }