Whamcloud - gitweb
55bbdf255079df56b109a42d91888d4ebd0d6f91
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/sec.c
37  *
38  * Author: Eric Mei <ericm@clusterfs.com>
39  */
40
41 #ifndef EXPORT_SYMTAB
42 #define EXPORT_SYMTAB
43 #endif
44 #define DEBUG_SUBSYSTEM S_SEC
45
46 #include <libcfs/libcfs.h>
47 #ifndef __KERNEL__
48 #include <liblustre.h>
49 #include <libcfs/list.h>
50 #else
51 #include <linux/crypto.h>
52 #include <linux/key.h>
53 #endif
54
55 #include <obd.h>
56 #include <obd_class.h>
57 #include <obd_support.h>
58 #include <lustre_net.h>
59 #include <lustre_import.h>
60 #include <lustre_dlm.h>
61 #include <lustre_sec.h>
62
63 #include "ptlrpc_internal.h"
64
65 /***********************************************
66  * policy registers                            *
67  ***********************************************/
68
69 static rwlock_t policy_lock;
70 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
71         NULL,
72 };
73
74 int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
75 {
76         __u16 number = policy->sp_policy;
77
78         LASSERT(policy->sp_name);
79         LASSERT(policy->sp_cops);
80         LASSERT(policy->sp_sops);
81
82         if (number >= SPTLRPC_POLICY_MAX)
83                 return -EINVAL;
84
85         write_lock(&policy_lock);
86         if (unlikely(policies[number])) {
87                 write_unlock(&policy_lock);
88                 return -EALREADY;
89         }
90         policies[number] = policy;
91         write_unlock(&policy_lock);
92
93         CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
94         return 0;
95 }
96 EXPORT_SYMBOL(sptlrpc_register_policy);
97
98 int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
99 {
100         __u16 number = policy->sp_policy;
101
102         LASSERT(number < SPTLRPC_POLICY_MAX);
103
104         write_lock(&policy_lock);
105         if (unlikely(policies[number] == NULL)) {
106                 write_unlock(&policy_lock);
107                 CERROR("%s: already unregistered\n", policy->sp_name);
108                 return -EINVAL;
109         }
110
111         LASSERT(policies[number] == policy);
112         policies[number] = NULL;
113         write_unlock(&policy_lock);
114
115         CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
116         return 0;
117 }
118 EXPORT_SYMBOL(sptlrpc_unregister_policy);
119
120 static
121 struct ptlrpc_sec_policy * sptlrpc_wireflavor2policy(__u32 flavor)
122 {
123         static DECLARE_MUTEX(load_mutex);
124         static atomic_t           loaded = ATOMIC_INIT(0);
125         struct ptlrpc_sec_policy *policy;
126         __u16                     number = SPTLRPC_FLVR_POLICY(flavor);
127         __u16                     flag = 0;
128
129         if (number >= SPTLRPC_POLICY_MAX)
130                 return NULL;
131
132         while (1) {
133                 read_lock(&policy_lock);
134                 policy = policies[number];
135                 if (policy && !try_module_get(policy->sp_owner))
136                         policy = NULL;
137                 if (policy == NULL)
138                         flag = atomic_read(&loaded);
139                 read_unlock(&policy_lock);
140
141                 if (policy != NULL || flag != 0 ||
142                     number != SPTLRPC_POLICY_GSS)
143                         break;
144
145                 /* try to load gss module, once */
146                 mutex_down(&load_mutex);
147                 if (atomic_read(&loaded) == 0) {
148                         if (request_module("ptlrpc_gss") == 0)
149                                 CWARN("module ptlrpc_gss loaded on demand\n");
150                         else
151                                 CERROR("Unable to load module ptlrpc_gss\n");
152
153                         atomic_set(&loaded, 1);
154                 }
155                 mutex_up(&load_mutex);
156         }
157
158         return policy;
159 }
160
161 __u32 sptlrpc_name2flavor_base(const char *name)
162 {
163         if (!strcmp(name, "null"))
164                 return SPTLRPC_FLVR_NULL;
165         if (!strcmp(name, "plain"))
166                 return SPTLRPC_FLVR_PLAIN;
167         if (!strcmp(name, "krb5n"))
168                 return SPTLRPC_FLVR_KRB5N;
169         if (!strcmp(name, "krb5a"))
170                 return SPTLRPC_FLVR_KRB5A;
171         if (!strcmp(name, "krb5i"))
172                 return SPTLRPC_FLVR_KRB5I;
173         if (!strcmp(name, "krb5p"))
174                 return SPTLRPC_FLVR_KRB5P;
175
176         return SPTLRPC_FLVR_INVALID;
177 }
178 EXPORT_SYMBOL(sptlrpc_name2flavor_base);
179
180 const char *sptlrpc_flavor2name_base(__u32 flvr)
181 {
182         __u32   base = SPTLRPC_FLVR_BASE(flvr);
183
184         if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
185                 return "null";
186         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
187                 return "plain";
188         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
189                 return "krb5n";
190         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
191                 return "krb5a";
192         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
193                 return "krb5i";
194         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
195                 return "krb5p";
196
197         CERROR("invalid wire flavor 0x%x\n", flvr);
198         return "invalid";
199 }
200 EXPORT_SYMBOL(sptlrpc_flavor2name_base);
201
202 char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
203                                char *buf, int bufsize)
204 {
205         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
206                 snprintf(buf, bufsize, "hash:%s",
207                          sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
208         else
209                 snprintf(buf, bufsize, "%s",
210                          sptlrpc_flavor2name_base(sf->sf_rpc));
211
212         buf[bufsize - 1] = '\0';
213         return buf;
214 }
215 EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
216
217 char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
218 {
219         snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc));
220
221         /*
222          * currently we don't support customized bulk specification for
223          * flavors other than plain
224          */
225         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
226                 char bspec[16];
227
228                 bspec[0] = '-';
229                 sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
230                 strncat(buf, bspec, bufsize);
231         }
232
233         buf[bufsize - 1] = '\0';
234         return buf;
235 }
236 EXPORT_SYMBOL(sptlrpc_flavor2name);
237
238 char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
239 {
240         buf[0] = '\0';
241
242         if (flags & PTLRPC_SEC_FL_REVERSE)
243                 strncat(buf, "reverse,", bufsize);
244         if (flags & PTLRPC_SEC_FL_ROOTONLY)
245                 strncat(buf, "rootonly,", bufsize);
246         if (flags & PTLRPC_SEC_FL_UDESC)
247                 strncat(buf, "udesc,", bufsize);
248         if (flags & PTLRPC_SEC_FL_BULK)
249                 strncat(buf, "bulk,", bufsize);
250         if (buf[0] == '\0')
251                 strncat(buf, "-,", bufsize);
252
253         buf[bufsize - 1] = '\0';
254         return buf;
255 }
256 EXPORT_SYMBOL(sptlrpc_secflags2str);
257
258 /**************************************************
259  * client context APIs                            *
260  **************************************************/
261
262 static
263 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
264 {
265         struct vfs_cred vcred;
266         int create = 1, remove_dead = 1;
267
268         LASSERT(sec);
269         LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
270
271         if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
272                                      PTLRPC_SEC_FL_ROOTONLY)) {
273                 vcred.vc_uid = 0;
274                 vcred.vc_gid = 0;
275                 if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
276                         create = 0;
277                         remove_dead = 0;
278                 }
279         } else {
280                 vcred.vc_uid = cfs_current()->uid;
281                 vcred.vc_gid = cfs_current()->gid;
282         }
283
284         return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
285                                                    create, remove_dead);
286 }
287
288 struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
289 {
290         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
291         atomic_inc(&ctx->cc_refcount);
292         return ctx;
293 }
294 EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
295
296 void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
297 {
298         struct ptlrpc_sec *sec = ctx->cc_sec;
299
300         LASSERT(sec);
301         LASSERT(atomic_read(&ctx->cc_refcount));
302
303         if (!atomic_dec_and_test(&ctx->cc_refcount))
304                 return;
305
306         sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
307 }
308 EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
309
310 /*
311  * expire the context immediately.
312  * the caller must hold at least 1 ref on the ctx.
313  */
314 void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
315 {
316         LASSERT(ctx->cc_ops->die);
317         ctx->cc_ops->die(ctx, 0);
318 }
319 EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
320
321 void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
322 {
323         struct ptlrpc_request *req, *next;
324
325         spin_lock(&ctx->cc_lock);
326         list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
327                 list_del_init(&req->rq_ctx_chain);
328                 ptlrpc_client_wake_req(req);
329         }
330         spin_unlock(&ctx->cc_lock);
331 }
332 EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
333
334 int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
335 {
336         LASSERT(ctx->cc_ops);
337
338         if (ctx->cc_ops->display == NULL)
339                 return 0;
340
341         return ctx->cc_ops->display(ctx, buf, bufsize);
342 }
343
344 static int import_sec_check_expire(struct obd_import *imp)
345 {
346         int     adapt = 0;
347
348         spin_lock(&imp->imp_lock);
349         if (imp->imp_sec_expire &&
350             imp->imp_sec_expire < cfs_time_current_sec()) {
351                 adapt = 1;
352                 imp->imp_sec_expire = 0;
353         }
354         spin_unlock(&imp->imp_lock);
355
356         if (!adapt)
357                 return 0;
358
359         CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
360         return sptlrpc_import_sec_adapt(imp, NULL, 0);
361 }
362
363 static int import_sec_validate_get(struct obd_import *imp,
364                                    struct ptlrpc_sec **sec)
365 {
366         int     rc;
367
368         if (unlikely(imp->imp_sec_expire)) {
369                 rc = import_sec_check_expire(imp);
370                 if (rc)
371                         return rc;
372         }
373
374         *sec = sptlrpc_import_sec_ref(imp);
375         if (*sec == NULL) {
376                 CERROR("import %p (%s) with no sec\n",
377                        imp, ptlrpc_import_state_name(imp->imp_state));
378                 return -EACCES;
379         }
380
381         if (unlikely((*sec)->ps_dying)) {
382                 CERROR("attempt to use dying sec %p\n", sec);
383                 sptlrpc_sec_put(*sec);
384                 return -EACCES;
385         }
386
387         return 0;
388 }
389
390 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
391 {
392         struct obd_import *imp = req->rq_import;
393         struct ptlrpc_sec *sec;
394         int                rc;
395         ENTRY;
396
397         LASSERT(!req->rq_cli_ctx);
398         LASSERT(imp);
399
400         rc = import_sec_validate_get(imp, &sec);
401         if (rc)
402                 RETURN(rc);
403
404         req->rq_cli_ctx = get_my_ctx(sec);
405
406         sptlrpc_sec_put(sec);
407
408         if (!req->rq_cli_ctx) {
409                 CERROR("req %p: fail to get context\n", req);
410                 RETURN(-ENOMEM);
411         }
412
413         RETURN(0);
414 }
415
416 /*
417  * if @sync == 0, this function should return quickly without sleep;
418  * otherwise might trigger ctx destroying rpc to server.
419  */
420 void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
421 {
422         ENTRY;
423
424         LASSERT(req);
425         LASSERT(req->rq_cli_ctx);
426
427         /* request might be asked to release earlier while still
428          * in the context waiting list.
429          */
430         if (!list_empty(&req->rq_ctx_chain)) {
431                 spin_lock(&req->rq_cli_ctx->cc_lock);
432                 list_del_init(&req->rq_ctx_chain);
433                 spin_unlock(&req->rq_cli_ctx->cc_lock);
434         }
435
436         sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
437         req->rq_cli_ctx = NULL;
438         EXIT;
439 }
440
441 static
442 int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
443                            struct ptlrpc_cli_ctx *oldctx,
444                            struct ptlrpc_cli_ctx *newctx)
445 {
446         struct sptlrpc_flavor   old_flvr;
447         char                   *reqmsg;
448         int                     reqmsg_size;
449         int                     rc;
450
451         if (likely(oldctx->cc_sec == newctx->cc_sec))
452                 return 0;
453
454         LASSERT(req->rq_reqmsg);
455         LASSERT(req->rq_reqlen);
456         LASSERT(req->rq_replen);
457
458         CWARN("req %p: switch ctx %p -> %p, switch sec %p(%s) -> %p(%s)\n",
459               req, oldctx, newctx,
460               oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
461               newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
462
463         /* save flavor */
464         old_flvr = req->rq_flvr;
465
466         /* save request message */
467         reqmsg_size = req->rq_reqlen;
468         OBD_ALLOC(reqmsg, reqmsg_size);
469         if (reqmsg == NULL)
470                 return -ENOMEM;
471         memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
472
473         /* release old req/rep buf */
474         req->rq_cli_ctx = oldctx;
475         sptlrpc_cli_free_reqbuf(req);
476         sptlrpc_cli_free_repbuf(req);
477         req->rq_cli_ctx = newctx;
478
479         /* recalculate the flavor */
480         sptlrpc_req_set_flavor(req, 0);
481
482         /* alloc new request buffer
483          * we don't need to alloc reply buffer here, leave it to the
484          * rest procedure of ptlrpc
485          */
486         rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
487         if (!rc) {
488                 LASSERT(req->rq_reqmsg);
489                 memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
490         } else {
491                 CWARN("failed to alloc reqbuf: %d\n", rc);
492                 req->rq_flvr = old_flvr;
493         }
494
495         OBD_FREE(reqmsg, reqmsg_size);
496         return rc;
497 }
498
499 /**
500  * if current context has died, or if we resend after flavor switched,
501  * call this func to switch context. if no switch is needed, request
502  * will end up with the same context.
503  *
504  * request must have a context. in any case of failure, restore the
505  * restore the old one - a request must have a context.
506  */
507 int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
508 {
509         struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
510         struct ptlrpc_cli_ctx *newctx;
511         int                    rc;
512         ENTRY;
513
514         LASSERT(oldctx);
515
516         sptlrpc_cli_ctx_get(oldctx);
517         sptlrpc_req_put_ctx(req, 0);
518
519         rc = sptlrpc_req_get_ctx(req);
520         if (unlikely(rc)) {
521                 LASSERT(!req->rq_cli_ctx);
522
523                 /* restore old ctx */
524                 req->rq_cli_ctx = oldctx;
525                 RETURN(rc);
526         }
527
528         newctx = req->rq_cli_ctx;
529         LASSERT(newctx);
530
531         if (unlikely(newctx == oldctx)) {
532                 if (test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags)) {
533                         /*
534                          * still get the old ctx, usually means system busy
535                          */
536                         CWARN("ctx (%p, fl %lx) doesn't switch, "
537                               "relax a little bit\n",
538                               newctx, newctx->cc_flags);
539
540                         cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ);
541                 }
542         } else {
543                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
544                 if (rc) {
545                         /* restore old ctx */
546                         sptlrpc_req_put_ctx(req, 0);
547                         req->rq_cli_ctx = oldctx;
548                         RETURN(rc);
549                 }
550
551                 LASSERT(req->rq_cli_ctx == newctx);
552         }
553
554         sptlrpc_cli_ctx_put(oldctx, 1);
555         RETURN(0);
556 }
557 EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
558
559 static
560 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
561 {
562         if (cli_ctx_is_refreshed(ctx))
563                 return 1;
564         return 0;
565 }
566
567 static
568 int ctx_refresh_timeout(void *data)
569 {
570         struct ptlrpc_request *req = data;
571         int rc;
572
573         /* conn_cnt is needed in expire_one_request */
574         lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
575
576         rc = ptlrpc_expire_one_request(req, 1);
577         /* if we started recovery, we should mark this ctx dead; otherwise
578          * in case of lgssd died nobody would retire this ctx, following
579          * connecting will still find the same ctx thus cause deadlock.
580          * there's an assumption that expire time of the request should be
581          * later than the context refresh expire time.
582          */
583         if (rc == 0)
584                 req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0);
585         return rc;
586 }
587
588 static
589 void ctx_refresh_interrupt(void *data)
590 {
591         struct ptlrpc_request *req = data;
592
593         spin_lock(&req->rq_lock);
594         req->rq_intr = 1;
595         spin_unlock(&req->rq_lock);
596 }
597
598 static
599 void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
600 {
601         spin_lock(&ctx->cc_lock);
602         if (!list_empty(&req->rq_ctx_chain))
603                 list_del_init(&req->rq_ctx_chain);
604         spin_unlock(&ctx->cc_lock);
605 }
606
607 /*
608  * the status of context could be subject to be changed by other threads at any
609  * time. we allow this race. but once we return with 0, the caller will
610  * suppose it's uptodated and keep using it until the owning rpc is done.
611  *
612  * @timeout:
613  *    < 0  - don't wait
614  *    = 0  - wait until success or fatal error occur
615  *    > 0  - timeout value
616  *
617  * return 0 only if the context is uptodated.
618  */
619 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
620 {
621         struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
622         struct ptlrpc_sec      *sec;
623         struct l_wait_info      lwi;
624         int                     rc;
625         ENTRY;
626
627         LASSERT(ctx);
628
629         if (req->rq_ctx_init || req->rq_ctx_fini)
630                 RETURN(0);
631
632         /*
633          * during the process a request's context might change type even
634          * (e.g. from gss ctx to plain ctx), so each loop we need to re-check
635          * everything
636          */
637 again:
638         rc = import_sec_validate_get(req->rq_import, &sec);
639         if (rc)
640                 RETURN(rc);
641
642         if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc)
643                 sptlrpc_req_replace_dead_ctx(req);
644
645         sptlrpc_sec_put(sec);
646
647         if (cli_ctx_is_eternal(ctx))
648                 RETURN(0);
649
650         if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
651                 LASSERT(ctx->cc_ops->refresh);
652                 ctx->cc_ops->refresh(ctx);
653         }
654         LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
655
656         LASSERT(ctx->cc_ops->validate);
657         if (ctx->cc_ops->validate(ctx) == 0) {
658                 req_off_ctx_list(req, ctx);
659                 RETURN(0);
660         }
661
662         if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
663                 req->rq_err = 1;
664                 req_off_ctx_list(req, ctx);
665                 RETURN(-EPERM);
666         }
667
668         /* This is subtle. For resent message we have to keep original
669          * context to survive following situation:
670          *  1. the request sent to server
671          *  2. recovery was kick start
672          *  3. recovery finished, the request marked as resent
673          *  4. resend the request
674          *  5. old reply from server received (because xid is the same)
675          *  6. verify reply (has to be success)
676          *  7. new reply from server received, lnet drop it
677          *
678          * Note we can't simply change xid for resent request because
679          * server reply on it for reply reconstruction.
680          *
681          * Commonly the original context should be uptodate because we
682          * have a expiry nice time; And server will keep their half part
683          * context because we at least hold a ref of old context which
684          * prevent the context detroy RPC be sent. So server still can
685          * accept the request and finish RPC. Two cases:
686          *  1. If server side context has been trimed, a NO_CONTEXT will
687          *     be returned, gss_cli_ctx_verify/unseal will switch to new
688          *     context by force.
689          *  2. Current context never be refreshed, then we are fine: we
690          *     never really send request with old context before.
691          */
692         if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
693             unlikely(req->rq_reqmsg) &&
694             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
695                 req_off_ctx_list(req, ctx);
696                 RETURN(0);
697         }
698
699         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
700                 /*
701                  * don't switch ctx if import was deactivated
702                  */
703                 if (req->rq_import->imp_deactive) {
704                         req_off_ctx_list(req, ctx);
705                         req->rq_err = 1;
706                         RETURN(-EINTR);
707                 }
708
709                 rc = sptlrpc_req_replace_dead_ctx(req);
710                 if (rc) {
711                         LASSERT(ctx == req->rq_cli_ctx);
712                         CERROR("req %p: failed to replace dead ctx %p: %d\n",
713                                 req, ctx, rc);
714                         req->rq_err = 1;
715                         LASSERT(list_empty(&req->rq_ctx_chain));
716                         RETURN(rc);
717                 }
718
719                 CWARN("req %p: replace dead ctx %p => ctx %p (%u->%s)\n",
720                       req, ctx, req->rq_cli_ctx,
721                       req->rq_cli_ctx->cc_vcred.vc_uid,
722                       sec2target_str(req->rq_cli_ctx->cc_sec));
723
724                 ctx = req->rq_cli_ctx;
725                 LASSERT(list_empty(&req->rq_ctx_chain));
726
727                 goto again;
728         }
729
730         /* Now we're sure this context is during upcall, add myself into
731          * waiting list
732          */
733         spin_lock(&ctx->cc_lock);
734         if (list_empty(&req->rq_ctx_chain))
735                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
736         spin_unlock(&ctx->cc_lock);
737
738         if (timeout < 0)
739                 RETURN(-EWOULDBLOCK);
740
741         /* Clear any flags that may be present from previous sends */
742         LASSERT(req->rq_receiving_reply == 0);
743         spin_lock(&req->rq_lock);
744         req->rq_err = 0;
745         req->rq_timedout = 0;
746         req->rq_resend = 0;
747         req->rq_restart = 0;
748         spin_unlock(&req->rq_lock);
749
750         lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
751                                ctx_refresh_interrupt, req);
752         rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
753
754         /* following cases we could be here:
755          * - successfully refreshed;
756          * - interruptted;
757          * - timedout, and we don't want recover from the failure;
758          * - timedout, and waked up upon recovery finished;
759          * - someone else mark this ctx dead by force;
760          * - someone invalidate the req and call ptlrpc_client_wake_req(),
761          *   e.g. ptlrpc_abort_inflight();
762          */
763         if (!cli_ctx_is_refreshed(ctx)) {
764                 /* timed out or interruptted */
765                 req_off_ctx_list(req, ctx);
766
767                 LASSERT(rc != 0);
768                 RETURN(rc);
769         }
770
771         goto again;
772 }
773
774 /*
775  * Note this could be called in two situations:
776  * - new request from ptlrpc_pre_req(), with proper @opcode
777  * - old request which changed ctx in the middle, with @opcode == 0
778  */
779 void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
780 {
781         struct ptlrpc_sec *sec;
782
783         LASSERT(req->rq_import);
784         LASSERT(req->rq_cli_ctx);
785         LASSERT(req->rq_cli_ctx->cc_sec);
786         LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
787
788         /* special security flags accoding to opcode */
789         switch (opcode) {
790         case OST_READ:
791         case MDS_READPAGE:
792                 req->rq_bulk_read = 1;
793                 break;
794         case OST_WRITE:
795         case MDS_WRITEPAGE:
796                 req->rq_bulk_write = 1;
797                 break;
798         case SEC_CTX_INIT:
799                 req->rq_ctx_init = 1;
800                 break;
801         case SEC_CTX_FINI:
802                 req->rq_ctx_fini = 1;
803                 break;
804         case 0:
805                 /* init/fini rpc won't be resend, so can't be here */
806                 LASSERT(req->rq_ctx_init == 0);
807                 LASSERT(req->rq_ctx_fini == 0);
808
809                 /* cleanup flags, which should be recalculated */
810                 req->rq_pack_udesc = 0;
811                 req->rq_pack_bulk = 0;
812                 break;
813         }
814
815         sec = req->rq_cli_ctx->cc_sec;
816
817         spin_lock(&sec->ps_lock);
818         req->rq_flvr = sec->ps_flvr;
819         spin_unlock(&sec->ps_lock);
820
821         /* force SVC_NULL for context initiation rpc, SVC_INTG for context
822          * destruction rpc */
823         if (unlikely(req->rq_ctx_init))
824                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
825         else if (unlikely(req->rq_ctx_fini))
826                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
827
828         /* user descriptor flag, null security can't do it anyway */
829         if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
830             (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
831                 req->rq_pack_udesc = 1;
832
833         /* bulk security flag */
834         if ((req->rq_bulk_read || req->rq_bulk_write) &&
835             sptlrpc_flavor_has_bulk(&req->rq_flvr))
836                 req->rq_pack_bulk = 1;
837 }
838
839 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
840 {
841         if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
842                 return;
843
844         LASSERT(req->rq_clrbuf);
845         if (req->rq_pool || !req->rq_reqbuf)
846                 return;
847
848         OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
849         req->rq_reqbuf = NULL;
850         req->rq_reqbuf_len = 0;
851 }
852
853 /*
854  * check whether current user have valid context for an import or not.
855  * might repeatedly try in case of non-fatal errors.
856  * return 0 on success, < 0 on failure
857  */
858 int sptlrpc_import_check_ctx(struct obd_import *imp)
859 {
860         struct ptlrpc_sec     *sec;
861         struct ptlrpc_cli_ctx *ctx;
862         struct ptlrpc_request *req = NULL;
863         int rc;
864         ENTRY;
865
866         might_sleep();
867
868         sec = sptlrpc_import_sec_ref(imp);
869         ctx = get_my_ctx(sec);
870         sptlrpc_sec_put(sec);
871
872         if (!ctx)
873                 RETURN(-ENOMEM);
874
875         if (cli_ctx_is_eternal(ctx) ||
876             ctx->cc_ops->validate(ctx) == 0) {
877                 sptlrpc_cli_ctx_put(ctx, 1);
878                 RETURN(0);
879         }
880
881         if (cli_ctx_is_error(ctx)) {
882                 sptlrpc_cli_ctx_put(ctx, 1);
883                 RETURN(-EACCES);
884         }
885
886         OBD_ALLOC_PTR(req);
887         if (!req)
888                 RETURN(-ENOMEM);
889
890         spin_lock_init(&req->rq_lock);
891         atomic_set(&req->rq_refcount, 10000);
892         CFS_INIT_LIST_HEAD(&req->rq_ctx_chain);
893         cfs_waitq_init(&req->rq_reply_waitq);
894         req->rq_import = imp;
895         req->rq_flvr = sec->ps_flvr;
896         req->rq_cli_ctx = ctx;
897
898         rc = sptlrpc_req_refresh_ctx(req, 0);
899         LASSERT(list_empty(&req->rq_ctx_chain));
900         sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
901         OBD_FREE_PTR(req);
902
903         RETURN(rc);
904 }
905
906 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
907 {
908         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
909         int rc = 0;
910         ENTRY;
911
912         LASSERT(ctx);
913         LASSERT(ctx->cc_sec);
914         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
915
916         /* we wrap bulk request here because now we can be sure
917          * the context is uptodate.
918          */
919         if (req->rq_bulk) {
920                 rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
921                 if (rc)
922                         RETURN(rc);
923         }
924
925         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
926         case SPTLRPC_SVC_NULL:
927         case SPTLRPC_SVC_AUTH:
928         case SPTLRPC_SVC_INTG:
929                 LASSERT(ctx->cc_ops->sign);
930                 rc = ctx->cc_ops->sign(ctx, req);
931                 break;
932         case SPTLRPC_SVC_PRIV:
933                 LASSERT(ctx->cc_ops->seal);
934                 rc = ctx->cc_ops->seal(ctx, req);
935                 break;
936         default:
937                 LBUG();
938         }
939
940         if (rc == 0) {
941                 LASSERT(req->rq_reqdata_len);
942                 LASSERT(req->rq_reqdata_len % 8 == 0);
943                 LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
944         }
945
946         RETURN(rc);
947 }
948
949 static int do_cli_unwrap_reply(struct ptlrpc_request *req)
950 {
951         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
952         int                    rc;
953         __u32                  flvr;
954         ENTRY;
955
956         LASSERT(ctx);
957         LASSERT(ctx->cc_sec);
958         LASSERT(req->rq_repbuf);
959         LASSERT(req->rq_repdata);
960         LASSERT(req->rq_repmsg == NULL);
961
962         if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
963                 CERROR("replied data length %d too small\n",
964                        req->rq_repdata_len);
965                 RETURN(-EPROTO);
966         }
967
968         /* v2 message, check request/reply policy match */
969         flvr = WIRE_FLVR(req->rq_repdata->lm_secflvr);
970
971         if (req->rq_repdata->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
972                 __swab32s(&flvr);
973
974         if (SPTLRPC_FLVR_POLICY(flvr) !=
975             SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
976                 CERROR("request policy was %u while reply with %u\n",
977                        SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
978                        SPTLRPC_FLVR_POLICY(flvr));
979                 RETURN(-EPROTO);
980         }
981
982         /* do nothing if it's null policy; otherwise unpack the
983          * wrapper message */
984         if (SPTLRPC_FLVR_POLICY(flvr) != SPTLRPC_POLICY_NULL &&
985             lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len))
986                 RETURN(-EPROTO);
987
988         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
989         case SPTLRPC_SVC_NULL:
990         case SPTLRPC_SVC_AUTH:
991         case SPTLRPC_SVC_INTG:
992                 LASSERT(ctx->cc_ops->verify);
993                 rc = ctx->cc_ops->verify(ctx, req);
994                 break;
995         case SPTLRPC_SVC_PRIV:
996                 LASSERT(ctx->cc_ops->unseal);
997                 rc = ctx->cc_ops->unseal(ctx, req);
998                 break;
999         default:
1000                 LBUG();
1001         }
1002
1003         LASSERT(rc || req->rq_repmsg || req->rq_resend);
1004         RETURN(rc);
1005 }
1006
1007 /*
1008  * upon this be called, the reply buffer should have been un-posted,
1009  * so nothing is going to change.
1010  */
1011 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
1012 {
1013         LASSERT(req->rq_repbuf);
1014         LASSERT(req->rq_repdata == NULL);
1015         LASSERT(req->rq_repmsg == NULL);
1016         LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
1017
1018         if (req->rq_reply_off == 0 &&
1019             (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
1020                 CERROR("real reply with offset 0\n");
1021                 return -EPROTO;
1022         }
1023
1024         if (req->rq_reply_off % 8 != 0) {
1025                 CERROR("reply at odd offset %u\n", req->rq_reply_off);
1026                 return -EPROTO;
1027         }
1028
1029         req->rq_repdata = (struct lustre_msg *)
1030                                 (req->rq_repbuf + req->rq_reply_off);
1031         req->rq_repdata_len = req->rq_nob_received;
1032
1033         return do_cli_unwrap_reply(req);
1034 }
1035
1036 /**
1037  * Upon called, the receive buffer might be still posted, so the reply data
1038  * might be changed at any time, no matter we're holding rq_lock or not. we
1039  * expect the rq_reply_off be 0, rq_nob_received is the early reply size.
1040  *
1041  * we allocate separate ptlrpc_request and reply buffer for early reply
1042  * processing, return 0 and @req_ret is a duplicated ptlrpc_request. caller
1043  * must call sptlrpc_cli_finish_early_reply() on the returned request to
1044  * release it. if anything goes wrong @req_ret will not be set.
1045  */
1046 int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
1047                                    struct ptlrpc_request **req_ret)
1048 {
1049         struct ptlrpc_request  *early_req;
1050         char                   *early_buf;
1051         int                     early_bufsz, early_size;
1052         int                     rc;
1053         ENTRY;
1054
1055         OBD_ALLOC_PTR(early_req);
1056         if (early_req == NULL)
1057                 RETURN(-ENOMEM);
1058
1059         early_size = req->rq_nob_received;
1060         early_bufsz = size_roundup_power2(early_size);
1061         OBD_ALLOC(early_buf, early_bufsz);
1062         if (early_buf == NULL)
1063                 GOTO(err_req, rc = -ENOMEM);
1064
1065         /* sanity checkings and copy data out, do it inside spinlock */
1066         spin_lock(&req->rq_lock);
1067
1068         if (req->rq_replied) {
1069                 spin_unlock(&req->rq_lock);
1070                 GOTO(err_buf, rc = -EALREADY);
1071         }
1072
1073         LASSERT(req->rq_repbuf);
1074         LASSERT(req->rq_repdata == NULL);
1075         LASSERT(req->rq_repmsg == NULL);
1076
1077         if (req->rq_reply_off != 0) {
1078                 CERROR("early reply with offset %u\n", req->rq_reply_off);
1079                 spin_unlock(&req->rq_lock);
1080                 GOTO(err_buf, rc = -EPROTO);
1081         }
1082
1083         if (req->rq_nob_received != early_size) {
1084                 /* even another early arrived the size should be the same */
1085                 CERROR("data size has changed from %u to %u\n",
1086                        early_size, req->rq_nob_received);
1087                 spin_unlock(&req->rq_lock);
1088                 GOTO(err_buf, rc = -EINVAL);
1089         }
1090
1091         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
1092                 CERROR("early reply length %d too small\n",
1093                        req->rq_nob_received);
1094                 spin_unlock(&req->rq_lock);
1095                 GOTO(err_buf, rc = -EALREADY);
1096         }
1097
1098         memcpy(early_buf, req->rq_repbuf, early_size);
1099         spin_unlock(&req->rq_lock);
1100
1101         early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
1102         early_req->rq_flvr = req->rq_flvr;
1103         early_req->rq_repbuf = early_buf;
1104         early_req->rq_repbuf_len = early_bufsz;
1105         early_req->rq_repdata = (struct lustre_msg *) early_buf;
1106         early_req->rq_repdata_len = early_size;
1107         early_req->rq_early = 1;
1108
1109         rc = do_cli_unwrap_reply(early_req);
1110         if (rc) {
1111                 DEBUG_REQ(D_ADAPTTO, early_req,
1112                           "error %d unwrap early reply", rc);
1113                 GOTO(err_ctx, rc);
1114         }
1115
1116         LASSERT(early_req->rq_repmsg);
1117         *req_ret = early_req;
1118         RETURN(0);
1119
1120 err_ctx:
1121         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1122 err_buf:
1123         OBD_FREE(early_buf, early_bufsz);
1124 err_req:
1125         OBD_FREE_PTR(early_req);
1126         RETURN(rc);
1127 }
1128
1129 void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
1130 {
1131         LASSERT(early_req->rq_repbuf);
1132         LASSERT(early_req->rq_repdata);
1133         LASSERT(early_req->rq_repmsg);
1134
1135         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1136         OBD_FREE(early_req->rq_repbuf, early_req->rq_repbuf_len);
1137         OBD_FREE_PTR(early_req);
1138 }
1139
1140 /**************************************************
1141  * sec ID                                         *
1142  **************************************************/
1143
1144 /*
1145  * "fixed" sec (e.g. null) use sec_id < 0
1146  */
1147 static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
1148
1149 int sptlrpc_get_next_secid(void)
1150 {
1151         return atomic_inc_return(&sptlrpc_sec_id);
1152 }
1153 EXPORT_SYMBOL(sptlrpc_get_next_secid);
1154
1155 /**************************************************
1156  * client side high-level security APIs           *
1157  **************************************************/
1158
1159 static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
1160                                    int grace, int force)
1161 {
1162         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1163
1164         LASSERT(policy->sp_cops);
1165         LASSERT(policy->sp_cops->flush_ctx_cache);
1166
1167         return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
1168 }
1169
1170 static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
1171 {
1172         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1173
1174         LASSERT(atomic_read(&sec->ps_refcount) == 0);
1175         LASSERT(atomic_read(&sec->ps_nctx) == 0);
1176         LASSERT(policy->sp_cops->destroy_sec);
1177
1178         CDEBUG(D_SEC, "%s@%p: being destroied\n", sec->ps_policy->sp_name, sec);
1179
1180         policy->sp_cops->destroy_sec(sec);
1181         sptlrpc_policy_put(policy);
1182 }
1183
1184 void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
1185 {
1186         sec_cop_destroy_sec(sec);
1187 }
1188 EXPORT_SYMBOL(sptlrpc_sec_destroy);
1189
1190 static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
1191 {
1192         LASSERT(atomic_read(&sec->ps_refcount) > 0);
1193
1194         if (sec->ps_policy->sp_cops->kill_sec) {
1195                 sec->ps_policy->sp_cops->kill_sec(sec);
1196
1197                 sec_cop_flush_ctx_cache(sec, -1, 1, 1);
1198         }
1199 }
1200
1201 struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
1202 {
1203         if (sec) {
1204                 LASSERT(atomic_read(&sec->ps_refcount) > 0);
1205                 atomic_inc(&sec->ps_refcount);
1206         }
1207
1208         return sec;
1209 }
1210 EXPORT_SYMBOL(sptlrpc_sec_get);
1211
1212 void sptlrpc_sec_put(struct ptlrpc_sec *sec)
1213 {
1214         if (sec) {
1215                 LASSERT(atomic_read(&sec->ps_refcount) > 0);
1216
1217                 if (atomic_dec_and_test(&sec->ps_refcount)) {
1218                         LASSERT(atomic_read(&sec->ps_nctx) == 0);
1219
1220                         sptlrpc_gc_del_sec(sec);
1221                         sec_cop_destroy_sec(sec);
1222                 }
1223         }
1224 }
1225 EXPORT_SYMBOL(sptlrpc_sec_put);
1226
1227 /*
1228  * policy module is responsible for taking refrence of import
1229  */
1230 static
1231 struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
1232                                        struct ptlrpc_svc_ctx *svc_ctx,
1233                                        struct sptlrpc_flavor *sf,
1234                                        enum lustre_sec_part sp)
1235 {
1236         struct ptlrpc_sec_policy *policy;
1237         struct ptlrpc_sec        *sec;
1238         char                      str[32];
1239         ENTRY;
1240
1241         if (svc_ctx) {
1242                 LASSERT(imp->imp_dlm_fake == 1);
1243
1244                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
1245                        imp->imp_obd->obd_type->typ_name,
1246                        imp->imp_obd->obd_name,
1247                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1248
1249                 policy = sptlrpc_policy_get(svc_ctx->sc_policy);
1250                 sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1251         } else {
1252                 LASSERT(imp->imp_dlm_fake == 0);
1253
1254                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
1255                        imp->imp_obd->obd_type->typ_name,
1256                        imp->imp_obd->obd_name,
1257                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1258
1259                 policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
1260                 if (!policy) {
1261                         CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
1262                         RETURN(NULL);
1263                 }
1264         }
1265
1266         sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
1267         if (sec) {
1268                 atomic_inc(&sec->ps_refcount);
1269
1270                 sec->ps_part = sp;
1271
1272                 if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
1273                         sptlrpc_gc_add_sec(sec);
1274         } else {
1275                 sptlrpc_policy_put(policy);
1276         }
1277
1278         RETURN(sec);
1279 }
1280
1281 struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
1282 {
1283         struct ptlrpc_sec *sec;
1284
1285         spin_lock(&imp->imp_lock);
1286         sec = sptlrpc_sec_get(imp->imp_sec);
1287         spin_unlock(&imp->imp_lock);
1288
1289         return sec;
1290 }
1291 EXPORT_SYMBOL(sptlrpc_import_sec_ref);
1292
1293 static void sptlrpc_import_sec_install(struct obd_import *imp,
1294                                        struct ptlrpc_sec *sec)
1295 {
1296         struct ptlrpc_sec *old_sec;
1297
1298         LASSERT(atomic_read(&sec->ps_refcount) > 0);
1299
1300         spin_lock(&imp->imp_lock);
1301         old_sec = imp->imp_sec;
1302         imp->imp_sec = sec;
1303         spin_unlock(&imp->imp_lock);
1304
1305         if (old_sec) {
1306                 sptlrpc_sec_kill(old_sec);
1307
1308                 /* balance the ref taken by this import */
1309                 sptlrpc_sec_put(old_sec);
1310         }
1311 }
1312
1313 static inline
1314 int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
1315 {
1316         return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
1317 }
1318
1319 static inline
1320 void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
1321 {
1322         *dst = *src;
1323 }
1324
1325 static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
1326                                              struct ptlrpc_sec *sec,
1327                                              struct sptlrpc_flavor *sf)
1328 {
1329         char    str1[32], str2[32];
1330
1331         if (sec->ps_flvr.sf_flags != sf->sf_flags)
1332                 CWARN("changing sec flags: %s -> %s\n",
1333                       sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
1334                                            str1, sizeof(str1)),
1335                       sptlrpc_secflags2str(sf->sf_flags,
1336                                            str2, sizeof(str2)));
1337
1338         spin_lock(&sec->ps_lock);
1339         flavor_copy(&sec->ps_flvr, sf);
1340         spin_unlock(&sec->ps_lock);
1341 }
1342
1343 /*
1344  * for normal import, @svc_ctx should be NULL and @flvr is ignored;
1345  * for reverse import, @svc_ctx and @flvr is from incoming request.
1346  */
1347 int sptlrpc_import_sec_adapt(struct obd_import *imp,
1348                              struct ptlrpc_svc_ctx *svc_ctx,
1349                              struct sptlrpc_flavor *flvr)
1350 {
1351         struct ptlrpc_connection   *conn;
1352         struct sptlrpc_flavor       sf;
1353         struct ptlrpc_sec          *sec, *newsec;
1354         enum lustre_sec_part        sp;
1355         char                        str[24];
1356         int                         rc = 0;
1357         ENTRY;
1358
1359         might_sleep();
1360
1361         if (imp == NULL)
1362                 RETURN(0);
1363
1364         conn = imp->imp_connection;
1365
1366         if (svc_ctx == NULL) {
1367                 struct client_obd *cliobd = &imp->imp_obd->u.cli;
1368                 /*
1369                  * normal import, determine flavor from rule set, except
1370                  * for mgc the flavor is predetermined.
1371                  */
1372                 if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
1373                         sf = cliobd->cl_flvr_mgc;
1374                 else 
1375                         sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
1376                                                    cliobd->cl_sp_to,
1377                                                    &cliobd->cl_target_uuid,
1378                                                    conn->c_self, &sf);
1379
1380                 sp = imp->imp_obd->u.cli.cl_sp_me;
1381         } else {
1382                 /* reverse import, determine flavor from incoming reqeust */
1383                 sf = *flvr;
1384
1385                 if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
1386                         sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
1387                                       PTLRPC_SEC_FL_ROOTONLY;
1388
1389                 sp = sptlrpc_target_sec_part(imp->imp_obd);
1390         }
1391
1392         sec = sptlrpc_import_sec_ref(imp);
1393         if (sec) {
1394                 char    str2[24];
1395
1396                 if (flavor_equal(&sf, &sec->ps_flvr))
1397                         GOTO(out, rc);
1398
1399                 CWARN("import %s->%s: changing flavor %s -> %s\n",
1400                       imp->imp_obd->obd_name,
1401                       obd_uuid2str(&conn->c_remote_uuid),
1402                       sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
1403                       sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
1404
1405                 if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
1406                     SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
1407                     SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
1408                     SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
1409                         sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
1410                         GOTO(out, rc);
1411                 }
1412         } else {
1413                 CWARN("import %s->%s netid %x: select flavor %s\n",
1414                       imp->imp_obd->obd_name,
1415                       obd_uuid2str(&conn->c_remote_uuid),
1416                       LNET_NIDNET(conn->c_self),
1417                       sptlrpc_flavor2name(&sf, str, sizeof(str)));
1418         }
1419
1420         mutex_down(&imp->imp_sec_mutex);
1421
1422         newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
1423         if (newsec) {
1424                 sptlrpc_import_sec_install(imp, newsec);
1425         } else {
1426                 CERROR("import %s->%s: failed to create new sec\n",
1427                        imp->imp_obd->obd_name,
1428                        obd_uuid2str(&conn->c_remote_uuid));
1429                 rc = -EPERM;
1430         }
1431
1432         mutex_up(&imp->imp_sec_mutex);
1433 out:
1434         sptlrpc_sec_put(sec);
1435         RETURN(rc);
1436 }
1437
1438 void sptlrpc_import_sec_put(struct obd_import *imp)
1439 {
1440         if (imp->imp_sec) {
1441                 sptlrpc_sec_kill(imp->imp_sec);
1442
1443                 sptlrpc_sec_put(imp->imp_sec);
1444                 imp->imp_sec = NULL;
1445         }
1446 }
1447
1448 static void import_flush_ctx_common(struct obd_import *imp,
1449                                     uid_t uid, int grace, int force)
1450 {
1451         struct ptlrpc_sec *sec;
1452
1453         if (imp == NULL)
1454                 return;
1455
1456         sec = sptlrpc_import_sec_ref(imp);
1457         if (sec == NULL)
1458                 return;
1459
1460         sec_cop_flush_ctx_cache(sec, uid, grace, force);
1461         sptlrpc_sec_put(sec);
1462 }
1463
1464 void sptlrpc_import_inval_all_ctx(struct obd_import *imp)
1465 {
1466         /* use grace == 0 */
1467         import_flush_ctx_common(imp, -1, 0, 1);
1468 }
1469
1470 void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
1471 {
1472         /* it's important to use grace mode, see explain in
1473          * sptlrpc_req_refresh_ctx() */
1474         import_flush_ctx_common(imp, 0, 1, 1);
1475 }
1476
1477 void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
1478 {
1479         import_flush_ctx_common(imp, cfs_current()->uid, 1, 1);
1480 }
1481 EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
1482
1483 void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
1484 {
1485         import_flush_ctx_common(imp, -1, 1, 1);
1486 }
1487 EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
1488
1489 /*
1490  * when complete successfully, req->rq_reqmsg should point to the
1491  * right place.
1492  */
1493 int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
1494 {
1495         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1496         struct ptlrpc_sec_policy *policy;
1497         int rc;
1498
1499         LASSERT(ctx);
1500         LASSERT(atomic_read(&ctx->cc_refcount));
1501         LASSERT(ctx->cc_sec);
1502         LASSERT(ctx->cc_sec->ps_policy);
1503         LASSERT(req->rq_reqmsg == NULL);
1504
1505         policy = ctx->cc_sec->ps_policy;
1506         rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
1507         if (!rc) {
1508                 LASSERT(req->rq_reqmsg);
1509                 LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1510
1511                 /* zeroing preallocated buffer */
1512                 if (req->rq_pool)
1513                         memset(req->rq_reqmsg, 0, msgsize);
1514         }
1515
1516         return rc;
1517 }
1518
1519 void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
1520 {
1521         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1522         struct ptlrpc_sec_policy *policy;
1523
1524         LASSERT(ctx);
1525         LASSERT(atomic_read(&ctx->cc_refcount));
1526         LASSERT(ctx->cc_sec);
1527         LASSERT(ctx->cc_sec->ps_policy);
1528
1529         if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
1530                 return;
1531
1532         policy = ctx->cc_sec->ps_policy;
1533         policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
1534 }
1535
1536 /*
1537  * NOTE caller must guarantee the buffer size is enough for the enlargement
1538  */
1539 void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
1540                                   int segment, int newsize)
1541 {
1542         void   *src, *dst;
1543         int     oldsize, oldmsg_size, movesize;
1544
1545         LASSERT(segment < msg->lm_bufcount);
1546         LASSERT(msg->lm_buflens[segment] <= newsize);
1547
1548         if (msg->lm_buflens[segment] == newsize)
1549                 return;
1550
1551         /* nothing to do if we are enlarging the last segment */
1552         if (segment == msg->lm_bufcount - 1) {
1553                 msg->lm_buflens[segment] = newsize;
1554                 return;
1555         }
1556
1557         oldsize = msg->lm_buflens[segment];
1558
1559         src = lustre_msg_buf(msg, segment + 1, 0);
1560         msg->lm_buflens[segment] = newsize;
1561         dst = lustre_msg_buf(msg, segment + 1, 0);
1562         msg->lm_buflens[segment] = oldsize;
1563
1564         /* move from segment + 1 to end segment */
1565         LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
1566         oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
1567         movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
1568         LASSERT(movesize >= 0);
1569
1570         if (movesize)
1571                 memmove(dst, src, movesize);
1572
1573         /* note we don't clear the ares where old data live, not secret */
1574
1575         /* finally set new segment size */
1576         msg->lm_buflens[segment] = newsize;
1577 }
1578 EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
1579
1580 /*
1581  * enlarge @segment of upper message req->rq_reqmsg to @newsize, all data
1582  * will be preserved after enlargement. this must be called after rq_reqmsg has
1583  * been intialized at least.
1584  *
1585  * caller's attention: upon return, rq_reqmsg and rq_reqlen might have
1586  * been changed.
1587  */
1588 int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
1589                                int segment, int newsize)
1590 {
1591         struct ptlrpc_cli_ctx    *ctx = req->rq_cli_ctx;
1592         struct ptlrpc_sec_cops   *cops;
1593         struct lustre_msg        *msg = req->rq_reqmsg;
1594
1595         LASSERT(ctx);
1596         LASSERT(msg);
1597         LASSERT(msg->lm_bufcount > segment);
1598         LASSERT(msg->lm_buflens[segment] <= newsize);
1599
1600         if (msg->lm_buflens[segment] == newsize)
1601                 return 0;
1602
1603         cops = ctx->cc_sec->ps_policy->sp_cops;
1604         LASSERT(cops->enlarge_reqbuf);
1605         return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
1606 }
1607 EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
1608
1609 int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
1610 {
1611         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1612         struct ptlrpc_sec_policy *policy;
1613         ENTRY;
1614
1615         LASSERT(ctx);
1616         LASSERT(atomic_read(&ctx->cc_refcount));
1617         LASSERT(ctx->cc_sec);
1618         LASSERT(ctx->cc_sec->ps_policy);
1619
1620         if (req->rq_repbuf)
1621                 RETURN(0);
1622
1623         policy = ctx->cc_sec->ps_policy;
1624         RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
1625 }
1626
1627 void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
1628 {
1629         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1630         struct ptlrpc_sec_policy *policy;
1631         ENTRY;
1632
1633         LASSERT(ctx);
1634         LASSERT(atomic_read(&ctx->cc_refcount));
1635         LASSERT(ctx->cc_sec);
1636         LASSERT(ctx->cc_sec->ps_policy);
1637
1638         if (req->rq_repbuf == NULL)
1639                 return;
1640         LASSERT(req->rq_repbuf_len);
1641
1642         policy = ctx->cc_sec->ps_policy;
1643         policy->sp_cops->free_repbuf(ctx->cc_sec, req);
1644         EXIT;
1645 }
1646
1647 int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
1648                                 struct ptlrpc_cli_ctx *ctx)
1649 {
1650         struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
1651
1652         if (!policy->sp_cops->install_rctx)
1653                 return 0;
1654         return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
1655 }
1656
1657 int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
1658                                 struct ptlrpc_svc_ctx *ctx)
1659 {
1660         struct ptlrpc_sec_policy *policy = ctx->sc_policy;
1661
1662         if (!policy->sp_sops->install_rctx)
1663                 return 0;
1664         return policy->sp_sops->install_rctx(imp, ctx);
1665 }
1666
1667 /****************************************
1668  * server side security                 *
1669  ****************************************/
1670
1671 static int flavor_allowed(struct sptlrpc_flavor *exp,
1672                           struct ptlrpc_request *req)
1673 {
1674         struct sptlrpc_flavor *flvr = &req->rq_flvr;
1675
1676         if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
1677                 return 1;
1678
1679         if ((req->rq_ctx_init || req->rq_ctx_fini) &&
1680             SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
1681             SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
1682             SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
1683                 return 1;
1684
1685         return 0;
1686 }
1687
1688 #define EXP_FLVR_UPDATE_EXPIRE      (OBD_TIMEOUT_DEFAULT + 10)
1689
1690 int sptlrpc_target_export_check(struct obd_export *exp,
1691                                 struct ptlrpc_request *req)
1692 {
1693         struct sptlrpc_flavor   flavor;
1694
1695         if (exp == NULL)
1696                 return 0;
1697
1698         /* client side export has no imp_reverse, skip
1699          * FIXME maybe we should check flavor this as well??? */
1700         if (exp->exp_imp_reverse == NULL)
1701                 return 0;
1702
1703         /* don't care about ctx fini rpc */
1704         if (req->rq_ctx_fini)
1705                 return 0;
1706
1707         spin_lock(&exp->exp_lock);
1708
1709         /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
1710          * the first req with the new flavor, then treat it as current flavor,
1711          * adapt reverse sec according to it.
1712          * note the first rpc with new flavor might not be with root ctx, in
1713          * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */
1714         if (unlikely(exp->exp_flvr_changed) &&
1715             flavor_allowed(&exp->exp_flvr_old[1], req)) {
1716                 /* make the new flavor as "current", and old ones as
1717                  * about-to-expire */
1718                 CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
1719                        exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
1720                 flavor = exp->exp_flvr_old[1];
1721                 exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
1722                 exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
1723                 exp->exp_flvr_old[0] = exp->exp_flvr;
1724                 exp->exp_flvr_expire[0] = cfs_time_current_sec() +
1725                                           EXP_FLVR_UPDATE_EXPIRE;
1726                 exp->exp_flvr = flavor;
1727
1728                 /* flavor change finished */
1729                 exp->exp_flvr_changed = 0;
1730                 LASSERT(exp->exp_flvr_adapt == 1);
1731
1732                 /* if it's gss, we only interested in root ctx init */
1733                 if (req->rq_auth_gss &&
1734                     !(req->rq_ctx_init && (req->rq_auth_usr_root ||
1735                                            req->rq_auth_usr_mdt))) {
1736                         spin_unlock(&exp->exp_lock);
1737                         CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d)\n",
1738                                req->rq_auth_gss, req->rq_ctx_init,
1739                                req->rq_auth_usr_root, req->rq_auth_usr_mdt);
1740                         return 0;
1741                 }
1742
1743                 exp->exp_flvr_adapt = 0;
1744                 spin_unlock(&exp->exp_lock);
1745
1746                 return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1747                                                 req->rq_svc_ctx, &flavor);
1748         }
1749
1750         /* if it equals to the current flavor, we accept it, but need to
1751          * dealing with reverse sec/ctx */
1752         if (likely(flavor_allowed(&exp->exp_flvr, req))) {
1753                 /* most cases should return here, we only interested in
1754                  * gss root ctx init */
1755                 if (!req->rq_auth_gss || !req->rq_ctx_init ||
1756                     (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt)) {
1757                         spin_unlock(&exp->exp_lock);
1758                         return 0;
1759                 }
1760
1761                 /* if flavor just changed, we should not proceed, just leave
1762                  * it and current flavor will be discovered and replaced
1763                  * shortly, and let _this_ rpc pass through */
1764                 if (exp->exp_flvr_changed) {
1765                         LASSERT(exp->exp_flvr_adapt);
1766                         spin_unlock(&exp->exp_lock);
1767                         return 0;
1768                 }
1769
1770                 if (exp->exp_flvr_adapt) {
1771                         exp->exp_flvr_adapt = 0;
1772                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
1773                                exp, exp->exp_flvr.sf_rpc,
1774                                exp->exp_flvr_old[0].sf_rpc,
1775                                exp->exp_flvr_old[1].sf_rpc);
1776                         flavor = exp->exp_flvr;
1777                         spin_unlock(&exp->exp_lock);
1778
1779                         return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1780                                                         req->rq_svc_ctx,
1781                                                         &flavor);
1782                 } else {
1783                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, "
1784                                "install rvs ctx\n", exp, exp->exp_flvr.sf_rpc,
1785                                exp->exp_flvr_old[0].sf_rpc,
1786                                exp->exp_flvr_old[1].sf_rpc);
1787                         spin_unlock(&exp->exp_lock);
1788
1789                         return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
1790                                                            req->rq_svc_ctx);
1791                 }
1792         }
1793
1794         if (exp->exp_flvr_expire[0]) {
1795                 if (exp->exp_flvr_expire[0] >= cfs_time_current_sec()) {
1796                         if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
1797                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
1798                                        "middle one ("CFS_DURATION_T")\n", exp,
1799                                        exp->exp_flvr.sf_rpc,
1800                                        exp->exp_flvr_old[0].sf_rpc,
1801                                        exp->exp_flvr_old[1].sf_rpc,
1802                                        exp->exp_flvr_expire[0] -
1803                                                 cfs_time_current_sec());
1804                                 spin_unlock(&exp->exp_lock);
1805                                 return 0;
1806                         }
1807                 } else {
1808                         CDEBUG(D_SEC, "mark middle expired\n");
1809                         exp->exp_flvr_expire[0] = 0;
1810                 }
1811                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
1812                        exp->exp_flvr.sf_rpc,
1813                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1814                        req->rq_flvr.sf_rpc);
1815         }
1816
1817         /* now it doesn't match the current flavor, the only chance we can
1818          * accept it is match the old flavors which is not expired. */
1819         if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
1820                 if (exp->exp_flvr_expire[1] >= cfs_time_current_sec()) {
1821                         if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
1822                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
1823                                        "oldest one ("CFS_DURATION_T")\n", exp,
1824                                        exp->exp_flvr.sf_rpc,
1825                                        exp->exp_flvr_old[0].sf_rpc,
1826                                        exp->exp_flvr_old[1].sf_rpc,
1827                                        exp->exp_flvr_expire[1] -
1828                                                 cfs_time_current_sec());
1829                                 spin_unlock(&exp->exp_lock);
1830                                 return 0;
1831                         }
1832                 } else {
1833                         CDEBUG(D_SEC, "mark oldest expired\n");
1834                         exp->exp_flvr_expire[1] = 0;
1835                 }
1836                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
1837                        exp, exp->exp_flvr.sf_rpc,
1838                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1839                        req->rq_flvr.sf_rpc);
1840         } else {
1841                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
1842                        exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
1843                        exp->exp_flvr_old[1].sf_rpc);
1844         }
1845
1846         spin_unlock(&exp->exp_lock);
1847
1848         CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u) with "
1849               "unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
1850               exp, exp->exp_obd->obd_name,
1851               req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
1852               req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_flvr.sf_rpc,
1853               exp->exp_flvr.sf_rpc,
1854               exp->exp_flvr_old[0].sf_rpc,
1855               exp->exp_flvr_expire[0] ?
1856               (unsigned long) (exp->exp_flvr_expire[0] -
1857                                cfs_time_current_sec()) : 0,
1858               exp->exp_flvr_old[1].sf_rpc,
1859               exp->exp_flvr_expire[1] ?
1860               (unsigned long) (exp->exp_flvr_expire[1] -
1861                                cfs_time_current_sec()) : 0);
1862         return -EACCES;
1863 }
1864 EXPORT_SYMBOL(sptlrpc_target_export_check);
1865
1866 void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
1867                                       struct sptlrpc_rule_set *rset)
1868 {
1869         struct obd_export       *exp;
1870         struct sptlrpc_flavor    new_flvr;
1871
1872         LASSERT(obd);
1873
1874         spin_lock(&obd->obd_dev_lock);
1875
1876         list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
1877                 if (exp->exp_connection == NULL)
1878                         continue;
1879
1880                 /* note if this export had just been updated flavor
1881                  * (exp_flvr_changed == 1), this will override the
1882                  * previous one. */
1883                 spin_lock(&exp->exp_lock);
1884                 sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
1885                                              exp->exp_connection->c_peer.nid,
1886                                              &new_flvr);
1887                 if (exp->exp_flvr_changed ||
1888                     !flavor_equal(&new_flvr, &exp->exp_flvr)) {
1889                         exp->exp_flvr_old[1] = new_flvr;
1890                         exp->exp_flvr_expire[1] = 0;
1891                         exp->exp_flvr_changed = 1;
1892                         exp->exp_flvr_adapt = 1;
1893
1894                         CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
1895                                exp, sptlrpc_part2name(exp->exp_sp_peer),
1896                                exp->exp_flvr.sf_rpc,
1897                                exp->exp_flvr_old[1].sf_rpc);
1898                 }
1899                 spin_unlock(&exp->exp_lock);
1900         }
1901
1902         spin_unlock(&obd->obd_dev_lock);
1903 }
1904 EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
1905
1906 static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
1907 {
1908         if (svc_rc == SECSVC_DROP)
1909                 return SECSVC_DROP;
1910
1911         switch (req->rq_sp_from) {
1912         case LUSTRE_SP_CLI:
1913         case LUSTRE_SP_MDT:
1914         case LUSTRE_SP_OST:
1915         case LUSTRE_SP_MGC:
1916         case LUSTRE_SP_MGS:
1917         case LUSTRE_SP_ANY:
1918                 break;
1919         default:
1920                 DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
1921                 return SECSVC_DROP;
1922         }
1923
1924         if (!req->rq_auth_gss)
1925                 return svc_rc;
1926
1927         if (unlikely(req->rq_sp_from == LUSTRE_SP_ANY)) {
1928                 CERROR("not specific part\n");
1929                 return SECSVC_DROP;
1930         }
1931
1932         /* from MDT, must be authenticated as MDT */
1933         if (unlikely(req->rq_sp_from == LUSTRE_SP_MDT &&
1934                      !req->rq_auth_usr_mdt)) {
1935                 DEBUG_REQ(D_ERROR, req, "fake source MDT");
1936                 return SECSVC_DROP;
1937         }
1938
1939         /* from OST, must be callback to MDT and CLI, the reverse sec
1940          * was from mdt/root keytab, so it should be MDT or root FIXME */
1941         if (unlikely(req->rq_sp_from == LUSTRE_SP_OST &&
1942                      !req->rq_auth_usr_mdt && !req->rq_auth_usr_root)) {
1943                 DEBUG_REQ(D_ERROR, req, "fake source OST");
1944                 return SECSVC_DROP;
1945         }
1946
1947         return svc_rc;
1948 }
1949
1950 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
1951 {
1952         struct ptlrpc_sec_policy *policy;
1953         struct lustre_msg        *msg = req->rq_reqbuf;
1954         int                       rc;
1955         ENTRY;
1956
1957         LASSERT(msg);
1958         LASSERT(req->rq_reqmsg == NULL);
1959         LASSERT(req->rq_repmsg == NULL);
1960         LASSERT(req->rq_svc_ctx == NULL);
1961
1962         req->rq_sp_from = LUSTRE_SP_ANY;
1963         req->rq_auth_uid = INVALID_UID;
1964         req->rq_auth_mapped_uid = INVALID_UID;
1965
1966         if (req->rq_reqdata_len < sizeof(struct lustre_msg)) {
1967                 CERROR("request size %d too small\n", req->rq_reqdata_len);
1968                 RETURN(SECSVC_DROP);
1969         }
1970
1971         /*
1972          * only expect v2 message.
1973          */
1974         switch (msg->lm_magic) {
1975         case LUSTRE_MSG_MAGIC_V2:
1976                 req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
1977                 break;
1978         case LUSTRE_MSG_MAGIC_V2_SWABBED:
1979                 req->rq_flvr.sf_rpc = WIRE_FLVR(__swab32(msg->lm_secflvr));
1980                 break;
1981         default:
1982                 CERROR("invalid magic %x\n", msg->lm_magic);
1983                 RETURN(SECSVC_DROP);
1984         }
1985
1986         /* unpack the wrapper message if the policy is not null */
1987         if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
1988             lustre_unpack_msg(msg, req->rq_reqdata_len)) {
1989                 CERROR("invalid wrapper msg format\n");
1990                 RETURN(SECSVC_DROP);
1991         }
1992
1993         policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
1994         if (!policy) {
1995                 CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
1996                 RETURN(SECSVC_DROP);
1997         }
1998
1999         LASSERT(policy->sp_sops->accept);
2000         rc = policy->sp_sops->accept(req);
2001
2002         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
2003         LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
2004         sptlrpc_policy_put(policy);
2005
2006         /* sanity check for the request source */
2007         rc = sptlrpc_svc_check_from(req, rc);
2008         RETURN(rc);
2009 }
2010
2011 int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req,
2012                          int msglen)
2013 {
2014         struct ptlrpc_sec_policy *policy;
2015         struct ptlrpc_reply_state *rs;
2016         int rc;
2017         ENTRY;
2018
2019         LASSERT(req->rq_svc_ctx);
2020         LASSERT(req->rq_svc_ctx->sc_policy);
2021
2022         policy = req->rq_svc_ctx->sc_policy;
2023         LASSERT(policy->sp_sops->alloc_rs);
2024
2025         rc = policy->sp_sops->alloc_rs(req, msglen);
2026         if (unlikely(rc == -ENOMEM)) {
2027                 /* failed alloc, try emergency pool */
2028                 rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
2029                 if (rs == NULL)
2030                         RETURN(-ENOMEM);
2031
2032                 req->rq_reply_state = rs;
2033                 rc = policy->sp_sops->alloc_rs(req, msglen);
2034                 if (rc) {
2035                         lustre_put_emerg_rs(rs);
2036                         req->rq_reply_state = NULL;
2037                 }
2038         }
2039
2040         LASSERT(rc != 0 ||
2041                 (req->rq_reply_state && req->rq_reply_state->rs_msg));
2042
2043         RETURN(rc);
2044 }
2045
2046 int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
2047 {
2048         struct ptlrpc_sec_policy *policy;
2049         int rc;
2050         ENTRY;
2051
2052         LASSERT(req->rq_svc_ctx);
2053         LASSERT(req->rq_svc_ctx->sc_policy);
2054
2055         policy = req->rq_svc_ctx->sc_policy;
2056         LASSERT(policy->sp_sops->authorize);
2057
2058         rc = policy->sp_sops->authorize(req);
2059         LASSERT(rc || req->rq_reply_state->rs_repdata_len);
2060
2061         RETURN(rc);
2062 }
2063
2064 void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
2065 {
2066         struct ptlrpc_sec_policy *policy;
2067         unsigned int prealloc;
2068         ENTRY;
2069
2070         LASSERT(rs->rs_svc_ctx);
2071         LASSERT(rs->rs_svc_ctx->sc_policy);
2072
2073         policy = rs->rs_svc_ctx->sc_policy;
2074         LASSERT(policy->sp_sops->free_rs);
2075
2076         prealloc = rs->rs_prealloc;
2077         policy->sp_sops->free_rs(rs);
2078
2079         if (prealloc)
2080                 lustre_put_emerg_rs(rs);
2081         EXIT;
2082 }
2083
2084 void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
2085 {
2086         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2087
2088         if (ctx == NULL)
2089                 return;
2090
2091         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2092         atomic_inc(&ctx->sc_refcount);
2093 }
2094
2095 void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
2096 {
2097         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2098
2099         if (ctx == NULL)
2100                 return;
2101
2102         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2103         if (atomic_dec_and_test(&ctx->sc_refcount)) {
2104                 if (ctx->sc_policy->sp_sops->free_ctx)
2105                         ctx->sc_policy->sp_sops->free_ctx(ctx);
2106         }
2107         req->rq_svc_ctx = NULL;
2108 }
2109
2110 void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
2111 {
2112         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2113
2114         if (ctx == NULL)
2115                 return;
2116
2117         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2118         if (ctx->sc_policy->sp_sops->invalidate_ctx)
2119                 ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
2120 }
2121 EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
2122
2123 /****************************************
2124  * bulk security                        *
2125  ****************************************/
2126
2127 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
2128                           struct ptlrpc_bulk_desc *desc)
2129 {
2130         struct ptlrpc_cli_ctx *ctx;
2131
2132         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2133
2134         if (!req->rq_pack_bulk)
2135                 return 0;
2136
2137         ctx = req->rq_cli_ctx;
2138         if (ctx->cc_ops->wrap_bulk)
2139                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
2140         return 0;
2141 }
2142 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
2143
2144 /*
2145  * return nob of actual plain text size received, or error code.
2146  */
2147 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
2148                                  struct ptlrpc_bulk_desc *desc,
2149                                  int nob)
2150 {
2151         struct ptlrpc_cli_ctx  *ctx;
2152         int                     rc;
2153
2154         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
2155
2156         if (!req->rq_pack_bulk)
2157                 return desc->bd_nob_transferred;
2158
2159         ctx = req->rq_cli_ctx;
2160         if (ctx->cc_ops->unwrap_bulk) {
2161                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2162                 if (rc < 0)
2163                         return rc;
2164         }
2165         return desc->bd_nob_transferred;
2166 }
2167 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
2168
2169 /*
2170  * return 0 for success or error code.
2171  */
2172 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
2173                                   struct ptlrpc_bulk_desc *desc)
2174 {
2175         struct ptlrpc_cli_ctx  *ctx;
2176         int                     rc;
2177
2178         LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
2179
2180         if (!req->rq_pack_bulk)
2181                 return 0;
2182
2183         ctx = req->rq_cli_ctx;
2184         if (ctx->cc_ops->unwrap_bulk) {
2185                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2186                 if (rc < 0)
2187                         return rc;
2188         }
2189
2190         /*
2191          * if everything is going right, nob should equals to nob_transferred.
2192          * in case of privacy mode, nob_transferred needs to be adjusted.
2193          */
2194         if (desc->bd_nob != desc->bd_nob_transferred) {
2195                 CERROR("nob %d doesn't match transferred nob %d",
2196                        desc->bd_nob, desc->bd_nob_transferred);
2197                 return -EPROTO;
2198         }
2199
2200         return 0;
2201 }
2202 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
2203
2204 int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
2205                           struct ptlrpc_bulk_desc *desc)
2206 {
2207         struct ptlrpc_svc_ctx *ctx;
2208
2209         LASSERT(req->rq_bulk_read);
2210
2211         if (!req->rq_pack_bulk)
2212                 return 0;
2213
2214         ctx = req->rq_svc_ctx;
2215         if (ctx->sc_policy->sp_sops->wrap_bulk)
2216                 return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
2217
2218         return 0;
2219 }
2220 EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
2221
2222 int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
2223                             struct ptlrpc_bulk_desc *desc)
2224 {
2225         struct ptlrpc_svc_ctx *ctx;
2226         int                    rc;
2227
2228         LASSERT(req->rq_bulk_write);
2229
2230         if (desc->bd_nob_transferred != desc->bd_nob &&
2231             SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) !=
2232             SPTLRPC_BULK_SVC_PRIV) {
2233                 DEBUG_REQ(D_ERROR, req, "truncated bulk GET %d(%d)",
2234                           desc->bd_nob_transferred, desc->bd_nob);
2235                 return -ETIMEDOUT;
2236         }
2237
2238         if (!req->rq_pack_bulk)
2239                 return 0;
2240
2241         ctx = req->rq_svc_ctx;
2242         if (ctx->sc_policy->sp_sops->unwrap_bulk) {
2243                 rc = ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
2244                 if (rc)
2245                         CERROR("error unwrap bulk: %d\n", rc);
2246         }
2247
2248         /* return 0 to allow reply be sent */
2249         return 0;
2250 }
2251 EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
2252
2253 int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req,
2254                           struct ptlrpc_bulk_desc *desc)
2255 {
2256         struct ptlrpc_svc_ctx *ctx;
2257
2258         LASSERT(req->rq_bulk_write);
2259
2260         if (!req->rq_pack_bulk)
2261                 return 0;
2262
2263         ctx = req->rq_svc_ctx;
2264         if (ctx->sc_policy->sp_sops->prep_bulk)
2265                 return ctx->sc_policy->sp_sops->prep_bulk(req, desc);
2266
2267         return 0;
2268 }
2269 EXPORT_SYMBOL(sptlrpc_svc_prep_bulk);
2270
2271 /****************************************
2272  * user descriptor helpers              *
2273  ****************************************/
2274
2275 int sptlrpc_current_user_desc_size(void)
2276 {
2277         int ngroups;
2278
2279 #ifdef __KERNEL__
2280         ngroups = current_ngroups;
2281
2282         if (ngroups > LUSTRE_MAX_GROUPS)
2283                 ngroups = LUSTRE_MAX_GROUPS;
2284 #else
2285         ngroups = 0;
2286 #endif
2287         return sptlrpc_user_desc_size(ngroups);
2288 }
2289 EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
2290
2291 int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
2292 {
2293         struct ptlrpc_user_desc *pud;
2294
2295         pud = lustre_msg_buf(msg, offset, 0);
2296
2297         pud->pud_uid = cfs_current()->uid;
2298         pud->pud_gid = cfs_current()->gid;
2299         pud->pud_fsuid = cfs_current()->fsuid;
2300         pud->pud_fsgid = cfs_current()->fsgid;
2301         pud->pud_cap = cfs_curproc_cap_pack();
2302         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
2303
2304 #ifdef __KERNEL__
2305         task_lock(current);
2306         if (pud->pud_ngroups > current_ngroups)
2307                 pud->pud_ngroups = current_ngroups;
2308         memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0],
2309                pud->pud_ngroups * sizeof(__u32));
2310         task_unlock(current);
2311 #endif
2312
2313         return 0;
2314 }
2315 EXPORT_SYMBOL(sptlrpc_pack_user_desc);
2316
2317 int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset)
2318 {
2319         struct ptlrpc_user_desc *pud;
2320         int                      i;
2321
2322         pud = lustre_msg_buf(msg, offset, sizeof(*pud));
2323         if (!pud)
2324                 return -EINVAL;
2325
2326         if (lustre_msg_swabbed(msg)) {
2327                 __swab32s(&pud->pud_uid);
2328                 __swab32s(&pud->pud_gid);
2329                 __swab32s(&pud->pud_fsuid);
2330                 __swab32s(&pud->pud_fsgid);
2331                 __swab32s(&pud->pud_cap);
2332                 __swab32s(&pud->pud_ngroups);
2333         }
2334
2335         if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
2336                 CERROR("%u groups is too large\n", pud->pud_ngroups);
2337                 return -EINVAL;
2338         }
2339
2340         if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
2341             msg->lm_buflens[offset]) {
2342                 CERROR("%u groups are claimed but bufsize only %u\n",
2343                        pud->pud_ngroups, msg->lm_buflens[offset]);
2344                 return -EINVAL;
2345         }
2346
2347         if (lustre_msg_swabbed(msg)) {
2348                 for (i = 0; i < pud->pud_ngroups; i++)
2349                         __swab32s(&pud->pud_groups[i]);
2350         }
2351
2352         return 0;
2353 }
2354 EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
2355
2356 /****************************************
2357  * misc helpers                         *
2358  ****************************************/
2359
2360 const char * sec2target_str(struct ptlrpc_sec *sec)
2361 {
2362         if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
2363                 return "*";
2364         if (sec_is_reverse(sec))
2365                 return "c";
2366         return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
2367 }
2368 EXPORT_SYMBOL(sec2target_str);
2369
2370 /*
2371  * return true if the bulk data is protected
2372  */
2373 int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
2374 {
2375         switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
2376         case SPTLRPC_BULK_SVC_INTG:
2377         case SPTLRPC_BULK_SVC_PRIV:
2378                 return 1;
2379         default:
2380                 return 0;
2381         }
2382 }
2383 EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
2384
2385 /****************************************
2386  * crypto API helper/alloc blkciper     *
2387  ****************************************/
2388
2389 /****************************************
2390  * initialize/finalize                  *
2391  ****************************************/
2392
2393 int __init sptlrpc_init(void)
2394 {
2395         int rc;
2396
2397         rwlock_init(&policy_lock);
2398
2399         rc = sptlrpc_gc_init();
2400         if (rc)
2401                 goto out;
2402
2403         rc = sptlrpc_conf_init();
2404         if (rc)
2405                 goto out_gc;
2406
2407         rc = sptlrpc_enc_pool_init();
2408         if (rc)
2409                 goto out_conf;
2410
2411         rc = sptlrpc_null_init();
2412         if (rc)
2413                 goto out_pool;
2414
2415         rc = sptlrpc_plain_init();
2416         if (rc)
2417                 goto out_null;
2418
2419         rc = sptlrpc_lproc_init();
2420         if (rc)
2421                 goto out_plain;
2422
2423         return 0;
2424
2425 out_plain:
2426         sptlrpc_plain_fini();
2427 out_null:
2428         sptlrpc_null_fini();
2429 out_pool:
2430         sptlrpc_enc_pool_fini();
2431 out_conf:
2432         sptlrpc_conf_fini();
2433 out_gc:
2434         sptlrpc_gc_fini();
2435 out:
2436         return rc;
2437 }
2438
2439 void __exit sptlrpc_fini(void)
2440 {
2441         sptlrpc_lproc_fini();
2442         sptlrpc_plain_fini();
2443         sptlrpc_null_fini();
2444         sptlrpc_enc_pool_fini();
2445         sptlrpc_conf_fini();
2446         sptlrpc_gc_fini();
2447 }