Whamcloud - gitweb
branch: HEAD
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ptlrpc/sec.c
37  *
38  * Author: Eric Mei <ericm@clusterfs.com>
39  */
40
41 #ifndef EXPORT_SYMTAB
42 #define EXPORT_SYMTAB
43 #endif
44 #define DEBUG_SUBSYSTEM S_SEC
45
46 #include <libcfs/libcfs.h>
47 #ifndef __KERNEL__
48 #include <liblustre.h>
49 #include <libcfs/list.h>
50 #else
51 #include <linux/crypto.h>
52 #include <linux/key.h>
53 #endif
54
55 #include <obd.h>
56 #include <obd_class.h>
57 #include <obd_support.h>
58 #include <lustre_net.h>
59 #include <lustre_import.h>
60 #include <lustre_dlm.h>
61 #include <lustre_sec.h>
62
63 #include "ptlrpc_internal.h"
64
65 /***********************************************
66  * policy registers                            *
67  ***********************************************/
68
69 static rwlock_t policy_lock;
70 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
71         NULL,
72 };
73
74 int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
75 {
76         __u16 number = policy->sp_policy;
77
78         LASSERT(policy->sp_name);
79         LASSERT(policy->sp_cops);
80         LASSERT(policy->sp_sops);
81
82         if (number >= SPTLRPC_POLICY_MAX)
83                 return -EINVAL;
84
85         write_lock(&policy_lock);
86         if (unlikely(policies[number])) {
87                 write_unlock(&policy_lock);
88                 return -EALREADY;
89         }
90         policies[number] = policy;
91         write_unlock(&policy_lock);
92
93         CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
94         return 0;
95 }
96 EXPORT_SYMBOL(sptlrpc_register_policy);
97
98 int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
99 {
100         __u16 number = policy->sp_policy;
101
102         LASSERT(number < SPTLRPC_POLICY_MAX);
103
104         write_lock(&policy_lock);
105         if (unlikely(policies[number] == NULL)) {
106                 write_unlock(&policy_lock);
107                 CERROR("%s: already unregistered\n", policy->sp_name);
108                 return -EINVAL;
109         }
110
111         LASSERT(policies[number] == policy);
112         policies[number] = NULL;
113         write_unlock(&policy_lock);
114
115         CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
116         return 0;
117 }
118 EXPORT_SYMBOL(sptlrpc_unregister_policy);
119
120 static
121 struct ptlrpc_sec_policy * sptlrpc_rpcflavor2policy(__u16 flavor)
122 {
123         static DECLARE_MUTEX(load_mutex);
124         static atomic_t           loaded = ATOMIC_INIT(0);
125         struct ptlrpc_sec_policy *policy;
126         __u16                     number = RPC_FLVR_POLICY(flavor), flag = 0;
127
128         if (number >= SPTLRPC_POLICY_MAX)
129                 return NULL;
130
131         while (1) {
132                 read_lock(&policy_lock);
133                 policy = policies[number];
134                 if (policy && !try_module_get(policy->sp_owner))
135                         policy = NULL;
136                 if (policy == NULL)
137                         flag = atomic_read(&loaded);
138                 read_unlock(&policy_lock);
139
140                 if (policy != NULL || flag != 0 ||
141                     number != SPTLRPC_POLICY_GSS)
142                         break;
143
144                 /* try to load gss module, once */
145                 mutex_down(&load_mutex);
146                 if (atomic_read(&loaded) == 0) {
147                         if (request_module("ptlrpc_gss") == 0)
148                                 CWARN("module ptlrpc_gss loaded on demand\n");
149                         else
150                                 CERROR("Unable to load module ptlrpc_gss\n");
151
152                         atomic_set(&loaded, 1);
153                 }
154                 mutex_up(&load_mutex);
155         }
156
157         return policy;
158 }
159
160 __u16 sptlrpc_name2rpcflavor(const char *name)
161 {
162         if (!strcmp(name, "null"))
163                 return SPTLRPC_FLVR_NULL;
164         if (!strcmp(name, "plain"))
165                 return SPTLRPC_FLVR_PLAIN;
166         if (!strcmp(name, "krb5n"))
167                 return SPTLRPC_FLVR_KRB5N;
168         if (!strcmp(name, "krb5a"))
169                 return SPTLRPC_FLVR_KRB5A;
170         if (!strcmp(name, "krb5i"))
171                 return SPTLRPC_FLVR_KRB5I;
172         if (!strcmp(name, "krb5p"))
173                 return SPTLRPC_FLVR_KRB5P;
174
175         return SPTLRPC_FLVR_INVALID;
176 }
177 EXPORT_SYMBOL(sptlrpc_name2rpcflavor);
178
179 const char *sptlrpc_rpcflavor2name(__u16 flavor)
180 {
181         switch (flavor) {
182         case SPTLRPC_FLVR_NULL:
183                 return "null";
184         case SPTLRPC_FLVR_PLAIN:
185                 return "plain";
186         case SPTLRPC_FLVR_KRB5N:
187                 return "krb5n";
188         case SPTLRPC_FLVR_KRB5A:
189                 return "krb5a";
190         case SPTLRPC_FLVR_KRB5I:
191                 return "krb5i";
192         case SPTLRPC_FLVR_KRB5P:
193                 return "krb5p";
194         default:
195                 CERROR("invalid rpc flavor 0x%x(p%u,s%u,v%u)\n", flavor,
196                        RPC_FLVR_POLICY(flavor), RPC_FLVR_MECH(flavor),
197                        RPC_FLVR_SVC(flavor));
198         }
199         return "unknown";
200 }
201 EXPORT_SYMBOL(sptlrpc_rpcflavor2name);
202
203 int sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
204 {
205         char           *bulk;
206
207         if (sf->sf_bulk_ciph != BULK_CIPH_ALG_NULL)
208                 bulk = "bulkp";
209         else if (sf->sf_bulk_hash != BULK_HASH_ALG_NULL)
210                 bulk = "bulki";
211         else
212                 bulk = "bulkn";
213
214         snprintf(buf, bufsize, "%s-%s:%s/%s",
215                  sptlrpc_rpcflavor2name(sf->sf_rpc), bulk,
216                  sptlrpc_get_hash_name(sf->sf_bulk_hash),
217                  sptlrpc_get_ciph_name(sf->sf_bulk_ciph));
218         return 0;
219 }
220 EXPORT_SYMBOL(sptlrpc_flavor2name);
221
222 /**************************************************
223  * client context APIs                            *
224  **************************************************/
225
226 static
227 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
228 {
229         struct vfs_cred vcred;
230         int create = 1, remove_dead = 1;
231
232         LASSERT(sec);
233         LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
234
235         if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
236                                      PTLRPC_SEC_FL_ROOTONLY)) {
237                 vcred.vc_uid = 0;
238                 vcred.vc_gid = 0;
239                 if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
240                         create = 0;
241                         remove_dead = 0;
242                 }
243         } else {
244                 vcred.vc_uid = cfs_current()->uid;
245                 vcred.vc_gid = cfs_current()->gid;
246         }
247
248         return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
249                                                    create, remove_dead);
250 }
251
252 struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
253 {
254         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
255         atomic_inc(&ctx->cc_refcount);
256         return ctx;
257 }
258 EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
259
260 void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
261 {
262         struct ptlrpc_sec *sec = ctx->cc_sec;
263
264         LASSERT(sec);
265         LASSERT(atomic_read(&ctx->cc_refcount));
266
267         if (!atomic_dec_and_test(&ctx->cc_refcount))
268                 return;
269
270         sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
271 }
272 EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
273
274 /*
275  * expire the context immediately.
276  * the caller must hold at least 1 ref on the ctx.
277  */
278 void sptlrpc_cli_ctx_expire(struct ptlrpc_cli_ctx *ctx)
279 {
280         LASSERT(ctx->cc_ops->die);
281         ctx->cc_ops->die(ctx, 0);
282 }
283 EXPORT_SYMBOL(sptlrpc_cli_ctx_expire);
284
285 void sptlrpc_cli_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
286 {
287         struct ptlrpc_request *req, *next;
288
289         spin_lock(&ctx->cc_lock);
290         list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
291                 list_del_init(&req->rq_ctx_chain);
292                 ptlrpc_client_wake_req(req);
293         }
294         spin_unlock(&ctx->cc_lock);
295 }
296 EXPORT_SYMBOL(sptlrpc_cli_ctx_wakeup);
297
298 int sptlrpc_cli_ctx_display(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize)
299 {
300         LASSERT(ctx->cc_ops);
301
302         if (ctx->cc_ops->display == NULL)
303                 return 0;
304
305         return ctx->cc_ops->display(ctx, buf, bufsize);
306 }
307
308 static int import_sec_check_expire(struct obd_import *imp)
309 {
310         int     adapt = 0;
311
312         spin_lock(&imp->imp_lock);
313         if (imp->imp_sec_expire &&
314             imp->imp_sec_expire < cfs_time_current_sec()) {
315                 adapt = 1;
316                 imp->imp_sec_expire = 0;
317         }
318         spin_unlock(&imp->imp_lock);
319
320         if (!adapt)
321                 return 0;
322
323         CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
324         return sptlrpc_import_sec_adapt(imp, NULL, 0);
325 }
326
327 static int import_sec_validate_get(struct obd_import *imp,
328                                    struct ptlrpc_sec **sec)
329 {
330         int     rc;
331
332         if (unlikely(imp->imp_sec_expire)) {
333                 rc = import_sec_check_expire(imp);
334                 if (rc)
335                         return rc;
336         }
337
338         *sec = sptlrpc_import_sec_ref(imp);
339         if (*sec == NULL) {
340                 CERROR("import %p (%s) with no sec\n",
341                        imp, ptlrpc_import_state_name(imp->imp_state));
342                 return -EACCES;
343         }
344
345         if (unlikely((*sec)->ps_dying)) {
346                 CERROR("attempt to use dying sec %p\n", sec);
347                 sptlrpc_sec_put(*sec);
348                 return -EACCES;
349         }
350
351         return 0;
352 }
353
354 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
355 {
356         struct obd_import *imp = req->rq_import;
357         struct ptlrpc_sec *sec;
358         int                rc;
359         ENTRY;
360
361         LASSERT(!req->rq_cli_ctx);
362         LASSERT(imp);
363
364         rc = import_sec_validate_get(imp, &sec);
365         if (rc)
366                 RETURN(rc);
367
368         req->rq_cli_ctx = get_my_ctx(sec);
369
370         sptlrpc_sec_put(sec);
371
372         if (!req->rq_cli_ctx) {
373                 CERROR("req %p: fail to get context\n", req);
374                 RETURN(-ENOMEM);
375         }
376
377         RETURN(0);
378 }
379
380 /*
381  * if @sync == 0, this function should return quickly without sleep;
382  * otherwise might trigger ctx destroying rpc to server.
383  */
384 void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
385 {
386         ENTRY;
387
388         LASSERT(req);
389         LASSERT(req->rq_cli_ctx);
390
391         /* request might be asked to release earlier while still
392          * in the context waiting list.
393          */
394         if (!list_empty(&req->rq_ctx_chain)) {
395                 spin_lock(&req->rq_cli_ctx->cc_lock);
396                 list_del_init(&req->rq_ctx_chain);
397                 spin_unlock(&req->rq_cli_ctx->cc_lock);
398         }
399
400         sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
401         req->rq_cli_ctx = NULL;
402         EXIT;
403 }
404
405 static
406 int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
407                            struct ptlrpc_cli_ctx *oldctx,
408                            struct ptlrpc_cli_ctx *newctx)
409 {
410         struct sptlrpc_flavor   old_flvr;
411         char                   *reqmsg;
412         int                     reqmsg_size;
413         int                     rc;
414
415         if (likely(oldctx->cc_sec == newctx->cc_sec))
416                 return 0;
417
418         LASSERT(req->rq_reqmsg);
419         LASSERT(req->rq_reqlen);
420         LASSERT(req->rq_replen);
421
422         CWARN("req %p: switch ctx %p -> %p, switch sec %p(%s) -> %p(%s)\n",
423               req, oldctx, newctx,
424               oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
425               newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
426
427         /* save flavor */
428         old_flvr = req->rq_flvr;
429
430         /* save request message */
431         reqmsg_size = req->rq_reqlen;
432         OBD_ALLOC(reqmsg, reqmsg_size);
433         if (reqmsg == NULL)
434                 return -ENOMEM;
435         memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
436
437         /* release old req/rep buf */
438         req->rq_cli_ctx = oldctx;
439         sptlrpc_cli_free_reqbuf(req);
440         sptlrpc_cli_free_repbuf(req);
441         req->rq_cli_ctx = newctx;
442
443         /* recalculate the flavor */
444         sptlrpc_req_set_flavor(req, 0);
445
446         /* alloc new request buffer
447          * we don't need to alloc reply buffer here, leave it to the
448          * rest procedure of ptlrpc
449          */
450         rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
451         if (!rc) {
452                 LASSERT(req->rq_reqmsg);
453                 memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
454         } else {
455                 CWARN("failed to alloc reqbuf: %d\n", rc);
456                 req->rq_flvr = old_flvr;
457         }
458
459         OBD_FREE(reqmsg, reqmsg_size);
460         return rc;
461 }
462
463 /**
464  * if current context has died, or if we resend after flavor switched,
465  * call this func to switch context. if no switch is needed, request
466  * will end up with the same context.
467  *
468  * request must have a context. in any case of failure, restore the
469  * restore the old one - a request must have a context.
470  */
471 int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
472 {
473         struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
474         struct ptlrpc_cli_ctx *newctx;
475         int                    rc;
476         ENTRY;
477
478         LASSERT(oldctx);
479
480         sptlrpc_cli_ctx_get(oldctx);
481         sptlrpc_req_put_ctx(req, 0);
482
483         rc = sptlrpc_req_get_ctx(req);
484         if (unlikely(rc)) {
485                 LASSERT(!req->rq_cli_ctx);
486
487                 /* restore old ctx */
488                 req->rq_cli_ctx = oldctx;
489                 RETURN(rc);
490         }
491
492         newctx = req->rq_cli_ctx;
493         LASSERT(newctx);
494
495         if (unlikely(newctx == oldctx)) {
496                 if (test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags)) {
497                         /*
498                          * still get the old ctx, usually means system busy
499                          */
500                         CWARN("ctx (%p, fl %lx) doesn't switch, "
501                               "relax a little bit\n",
502                               newctx, newctx->cc_flags);
503
504                         cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ);
505                 }
506         } else {
507                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
508                 if (rc) {
509                         /* restore old ctx */
510                         sptlrpc_req_put_ctx(req, 0);
511                         req->rq_cli_ctx = oldctx;
512                         RETURN(rc);
513                 }
514
515                 LASSERT(req->rq_cli_ctx == newctx);
516         }
517
518         sptlrpc_cli_ctx_put(oldctx, 1);
519         RETURN(0);
520 }
521 EXPORT_SYMBOL(sptlrpc_req_replace_dead_ctx);
522
523 static
524 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
525 {
526         if (cli_ctx_is_refreshed(ctx))
527                 return 1;
528         return 0;
529 }
530
531 static
532 int ctx_refresh_timeout(void *data)
533 {
534         struct ptlrpc_request *req = data;
535         int rc;
536
537         /* conn_cnt is needed in expire_one_request */
538         lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
539
540         rc = ptlrpc_expire_one_request(req, 1);
541         /* if we started recovery, we should mark this ctx dead; otherwise
542          * in case of lgssd died nobody would retire this ctx, following
543          * connecting will still find the same ctx thus cause deadlock.
544          * there's an assumption that expire time of the request should be
545          * later than the context refresh expire time.
546          */
547         if (rc == 0)
548                 req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0);
549         return rc;
550 }
551
552 static
553 void ctx_refresh_interrupt(void *data)
554 {
555         struct ptlrpc_request *req = data;
556
557         spin_lock(&req->rq_lock);
558         req->rq_intr = 1;
559         spin_unlock(&req->rq_lock);
560 }
561
562 static
563 void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
564 {
565         spin_lock(&ctx->cc_lock);
566         if (!list_empty(&req->rq_ctx_chain))
567                 list_del_init(&req->rq_ctx_chain);
568         spin_unlock(&ctx->cc_lock);
569 }
570
571 /*
572  * the status of context could be subject to be changed by other threads at any
573  * time. we allow this race. but once we return with 0, the caller will
574  * suppose it's uptodated and keep using it until the owning rpc is done.
575  *
576  * @timeout:
577  *    < 0  - don't wait
578  *    = 0  - wait until success or fatal error occur
579  *    > 0  - timeout value
580  *
581  * return 0 only if the context is uptodated.
582  */
583 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
584 {
585         struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
586         struct ptlrpc_sec      *sec;
587         struct l_wait_info      lwi;
588         int                     rc;
589         ENTRY;
590
591         LASSERT(ctx);
592
593         if (req->rq_ctx_init || req->rq_ctx_fini)
594                 RETURN(0);
595
596         /*
597          * during the process a request's context might change type even
598          * (e.g. from gss ctx to plain ctx), so each loop we need to re-check
599          * everything
600          */
601 again:
602         rc = import_sec_validate_get(req->rq_import, &sec);
603         if (rc)
604                 RETURN(rc);
605
606         if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc)
607                 sptlrpc_req_replace_dead_ctx(req);
608
609         sptlrpc_sec_put(sec);
610
611         if (cli_ctx_is_eternal(ctx))
612                 RETURN(0);
613
614         if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
615                 LASSERT(ctx->cc_ops->refresh);
616                 ctx->cc_ops->refresh(ctx);
617         }
618         LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
619
620         LASSERT(ctx->cc_ops->validate);
621         if (ctx->cc_ops->validate(ctx) == 0) {
622                 req_off_ctx_list(req, ctx);
623                 RETURN(0);
624         }
625
626         if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
627                 req->rq_err = 1;
628                 req_off_ctx_list(req, ctx);
629                 RETURN(-EPERM);
630         }
631
632         /* This is subtle. For resent message we have to keep original
633          * context to survive following situation:
634          *  1. the request sent to server
635          *  2. recovery was kick start
636          *  3. recovery finished, the request marked as resent
637          *  4. resend the request
638          *  5. old reply from server received (because xid is the same)
639          *  6. verify reply (has to be success)
640          *  7. new reply from server received, lnet drop it
641          *
642          * Note we can't simply change xid for resent request because
643          * server reply on it for reply reconstruction.
644          *
645          * Commonly the original context should be uptodate because we
646          * have a expiry nice time; And server will keep their half part
647          * context because we at least hold a ref of old context which
648          * prevent the context detroy RPC be sent. So server still can
649          * accept the request and finish RPC. Two cases:
650          *  1. If server side context has been trimed, a NO_CONTEXT will
651          *     be returned, gss_cli_ctx_verify/unseal will switch to new
652          *     context by force.
653          *  2. Current context never be refreshed, then we are fine: we
654          *     never really send request with old context before.
655          */
656         if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
657             unlikely(req->rq_reqmsg) &&
658             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
659                 req_off_ctx_list(req, ctx);
660                 RETURN(0);
661         }
662
663         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
664                 /*
665                  * don't switch ctx if import was deactivated
666                  */
667                 if (req->rq_import->imp_deactive) {
668                         req_off_ctx_list(req, ctx);
669                         req->rq_err = 1;
670                         RETURN(-EINTR);
671                 }
672
673                 rc = sptlrpc_req_replace_dead_ctx(req);
674                 if (rc) {
675                         LASSERT(ctx == req->rq_cli_ctx);
676                         CERROR("req %p: failed to replace dead ctx %p: %d\n",
677                                 req, ctx, rc);
678                         req->rq_err = 1;
679                         LASSERT(list_empty(&req->rq_ctx_chain));
680                         RETURN(rc);
681                 }
682
683                 CWARN("req %p: replace dead ctx %p => ctx %p (%u->%s)\n",
684                       req, ctx, req->rq_cli_ctx,
685                       req->rq_cli_ctx->cc_vcred.vc_uid,
686                       sec2target_str(req->rq_cli_ctx->cc_sec));
687
688                 ctx = req->rq_cli_ctx;
689                 LASSERT(list_empty(&req->rq_ctx_chain));
690
691                 goto again;
692         }
693
694         /* Now we're sure this context is during upcall, add myself into
695          * waiting list
696          */
697         spin_lock(&ctx->cc_lock);
698         if (list_empty(&req->rq_ctx_chain))
699                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
700         spin_unlock(&ctx->cc_lock);
701
702         if (timeout < 0)
703                 RETURN(-EWOULDBLOCK);
704
705         /* Clear any flags that may be present from previous sends */
706         LASSERT(req->rq_receiving_reply == 0);
707         spin_lock(&req->rq_lock);
708         req->rq_err = 0;
709         req->rq_timedout = 0;
710         req->rq_resend = 0;
711         req->rq_restart = 0;
712         spin_unlock(&req->rq_lock);
713
714         lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
715                                ctx_refresh_interrupt, req);
716         rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
717
718         /* following cases we could be here:
719          * - successfully refreshed;
720          * - interruptted;
721          * - timedout, and we don't want recover from the failure;
722          * - timedout, and waked up upon recovery finished;
723          * - someone else mark this ctx dead by force;
724          * - someone invalidate the req and call ptlrpc_client_wake_req(),
725          *   e.g. ptlrpc_abort_inflight();
726          */
727         if (!cli_ctx_is_refreshed(ctx)) {
728                 /* timed out or interruptted */
729                 req_off_ctx_list(req, ctx);
730
731                 LASSERT(rc != 0);
732                 RETURN(rc);
733         }
734
735         goto again;
736 }
737
738 /*
739  * Note this could be called in two situations:
740  * - new request from ptlrpc_pre_req(), with proper @opcode
741  * - old request which changed ctx in the middle, with @opcode == 0
742  */
743 void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
744 {
745         struct ptlrpc_sec *sec;
746
747         LASSERT(req->rq_import);
748         LASSERT(req->rq_cli_ctx);
749         LASSERT(req->rq_cli_ctx->cc_sec);
750         LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
751
752         /* special security flags accoding to opcode */
753         switch (opcode) {
754         case OST_READ:
755                 req->rq_bulk_read = 1;
756                 break;
757         case OST_WRITE:
758                 req->rq_bulk_write = 1;
759                 break;
760         case SEC_CTX_INIT:
761                 req->rq_ctx_init = 1;
762                 break;
763         case SEC_CTX_FINI:
764                 req->rq_ctx_fini = 1;
765                 break;
766         case 0:
767                 /* init/fini rpc won't be resend, so can't be here */
768                 LASSERT(req->rq_ctx_init == 0);
769                 LASSERT(req->rq_ctx_fini == 0);
770
771                 /* cleanup flags, which should be recalculated */
772                 req->rq_pack_udesc = 0;
773                 req->rq_pack_bulk = 0;
774                 break;
775         }
776
777         sec = req->rq_cli_ctx->cc_sec;
778
779         spin_lock(&sec->ps_lock);
780         req->rq_flvr = sec->ps_flvr;
781         spin_unlock(&sec->ps_lock);
782
783         /* force SVC_NULL for context initiation rpc, SVC_INTG for context
784          * destruction rpc */
785         if (unlikely(req->rq_ctx_init))
786                 rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
787         else if (unlikely(req->rq_ctx_fini))
788                 rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
789
790         /* user descriptor flag, null security can't do it anyway */
791         if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
792             (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
793                 req->rq_pack_udesc = 1;
794
795         /* bulk security flag */
796         if ((req->rq_bulk_read || req->rq_bulk_write) &&
797             (req->rq_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL ||
798              req->rq_flvr.sf_bulk_hash != BULK_HASH_ALG_NULL))
799                 req->rq_pack_bulk = 1;
800 }
801
802 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
803 {
804         if (RPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
805                 return;
806
807         LASSERT(req->rq_clrbuf);
808         if (req->rq_pool || !req->rq_reqbuf)
809                 return;
810
811         OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
812         req->rq_reqbuf = NULL;
813         req->rq_reqbuf_len = 0;
814 }
815
816 /*
817  * check whether current user have valid context for an import or not.
818  * might repeatedly try in case of non-fatal errors.
819  * return 0 on success, < 0 on failure
820  */
821 int sptlrpc_import_check_ctx(struct obd_import *imp)
822 {
823         struct ptlrpc_sec     *sec;
824         struct ptlrpc_cli_ctx *ctx;
825         struct ptlrpc_request *req = NULL;
826         int rc;
827         ENTRY;
828
829         might_sleep();
830
831         sec = sptlrpc_import_sec_ref(imp);
832         ctx = get_my_ctx(sec);
833         sptlrpc_sec_put(sec);
834
835         if (!ctx)
836                 RETURN(-ENOMEM);
837
838         if (cli_ctx_is_eternal(ctx) ||
839             ctx->cc_ops->validate(ctx) == 0) {
840                 sptlrpc_cli_ctx_put(ctx, 1);
841                 RETURN(0);
842         }
843
844         if (cli_ctx_is_error(ctx)) {
845                 sptlrpc_cli_ctx_put(ctx, 1);
846                 RETURN(-EACCES);
847         }
848
849         OBD_ALLOC_PTR(req);
850         if (!req)
851                 RETURN(-ENOMEM);
852
853         spin_lock_init(&req->rq_lock);
854         atomic_set(&req->rq_refcount, 10000);
855         CFS_INIT_LIST_HEAD(&req->rq_ctx_chain);
856         cfs_waitq_init(&req->rq_reply_waitq);
857         req->rq_import = imp;
858         req->rq_flvr = sec->ps_flvr;
859         req->rq_cli_ctx = ctx;
860
861         rc = sptlrpc_req_refresh_ctx(req, 0);
862         LASSERT(list_empty(&req->rq_ctx_chain));
863         sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
864         OBD_FREE_PTR(req);
865
866         RETURN(rc);
867 }
868
869 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
870 {
871         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
872         int rc = 0;
873         ENTRY;
874
875         LASSERT(ctx);
876         LASSERT(ctx->cc_sec);
877         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
878
879         /* we wrap bulk request here because now we can be sure
880          * the context is uptodate.
881          */
882         if (req->rq_bulk) {
883                 rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
884                 if (rc)
885                         RETURN(rc);
886         }
887
888         switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
889         case SPTLRPC_SVC_NULL:
890         case SPTLRPC_SVC_AUTH:
891         case SPTLRPC_SVC_INTG:
892                 LASSERT(ctx->cc_ops->sign);
893                 rc = ctx->cc_ops->sign(ctx, req);
894                 break;
895         case SPTLRPC_SVC_PRIV:
896                 LASSERT(ctx->cc_ops->seal);
897                 rc = ctx->cc_ops->seal(ctx, req);
898                 break;
899         default:
900                 LBUG();
901         }
902
903         if (rc == 0) {
904                 LASSERT(req->rq_reqdata_len);
905                 LASSERT(req->rq_reqdata_len % 8 == 0);
906                 LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
907         }
908
909         RETURN(rc);
910 }
911
912 static int do_cli_unwrap_reply(struct ptlrpc_request *req)
913 {
914         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
915         int                    rc;
916         __u16                  rpc_flvr;
917         ENTRY;
918
919         LASSERT(ctx);
920         LASSERT(ctx->cc_sec);
921         LASSERT(req->rq_repbuf);
922         LASSERT(req->rq_repdata);
923         LASSERT(req->rq_repmsg == NULL);
924
925         if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
926                 CERROR("replied data length %d too small\n",
927                        req->rq_repdata_len);
928                 RETURN(-EPROTO);
929         }
930
931         /* v2 message, check request/reply policy match */
932         rpc_flvr = WIRE_FLVR_RPC(req->rq_repdata->lm_secflvr);
933
934         if (req->rq_repdata->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
935                 __swab16s(&rpc_flvr);
936
937         if (RPC_FLVR_POLICY(rpc_flvr) !=
938             RPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
939                 CERROR("request policy was %u while reply with %u\n",
940                        RPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
941                        RPC_FLVR_POLICY(rpc_flvr));
942                 RETURN(-EPROTO);
943         }
944
945         /* do nothing if it's null policy; otherwise unpack the
946          * wrapper message */
947         if (RPC_FLVR_POLICY(rpc_flvr) != SPTLRPC_POLICY_NULL &&
948             lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len))
949                 RETURN(-EPROTO);
950
951         switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
952         case SPTLRPC_SVC_NULL:
953         case SPTLRPC_SVC_AUTH:
954         case SPTLRPC_SVC_INTG:
955                 LASSERT(ctx->cc_ops->verify);
956                 rc = ctx->cc_ops->verify(ctx, req);
957                 break;
958         case SPTLRPC_SVC_PRIV:
959                 LASSERT(ctx->cc_ops->unseal);
960                 rc = ctx->cc_ops->unseal(ctx, req);
961                 break;
962         default:
963                 LBUG();
964         }
965
966         LASSERT(rc || req->rq_repmsg || req->rq_resend);
967         RETURN(rc);
968 }
969
970 /*
971  * upon this be called, the reply buffer should have been un-posted,
972  * so nothing is going to change.
973  */
974 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
975 {
976         LASSERT(req->rq_repbuf);
977         LASSERT(req->rq_repdata == NULL);
978         LASSERT(req->rq_repmsg == NULL);
979         LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
980
981         if (req->rq_reply_off == 0 &&
982             (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
983                 CERROR("real reply with offset 0\n");
984                 return -EPROTO;
985         }
986
987         if (req->rq_reply_off % 8 != 0) {
988                 CERROR("reply at odd offset %u\n", req->rq_reply_off);
989                 return -EPROTO;
990         }
991
992         req->rq_repdata = (struct lustre_msg *)
993                                 (req->rq_repbuf + req->rq_reply_off);
994         req->rq_repdata_len = req->rq_nob_received;
995
996         return do_cli_unwrap_reply(req);
997 }
998
999 /**
1000  * Upon called, the receive buffer might be still posted, so the reply data
1001  * might be changed at any time, no matter we're holding rq_lock or not. we
1002  * expect the rq_reply_off be 0, rq_nob_received is the early reply size.
1003  *
1004  * we allocate separate ptlrpc_request and reply buffer for early reply
1005  * processing, return 0 and @req_ret is a duplicated ptlrpc_request. caller
1006  * must call sptlrpc_cli_finish_early_reply() on the returned request to
1007  * release it. if anything goes wrong @req_ret will not be set.
1008  */
1009 int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
1010                                    struct ptlrpc_request **req_ret)
1011 {
1012         struct ptlrpc_request  *early_req;
1013         char                   *early_buf;
1014         int                     early_bufsz, early_size;
1015         int                     rc;
1016         ENTRY;
1017
1018         OBD_ALLOC_PTR(early_req);
1019         if (early_req == NULL)
1020                 RETURN(-ENOMEM);
1021
1022         early_size = req->rq_nob_received;
1023         early_bufsz = size_roundup_power2(early_size);
1024         OBD_ALLOC(early_buf, early_bufsz);
1025         if (early_buf == NULL)
1026                 GOTO(err_req, rc = -ENOMEM);
1027
1028         /* sanity checkings and copy data out, do it inside spinlock */
1029         spin_lock(&req->rq_lock);
1030
1031         if (req->rq_replied) {
1032                 spin_unlock(&req->rq_lock);
1033                 GOTO(err_buf, rc = -EALREADY);
1034         }
1035
1036         LASSERT(req->rq_repbuf);
1037         LASSERT(req->rq_repdata == NULL);
1038         LASSERT(req->rq_repmsg == NULL);
1039
1040         if (req->rq_reply_off != 0) {
1041                 CERROR("early reply with offset %u\n", req->rq_reply_off);
1042                 spin_unlock(&req->rq_lock);
1043                 GOTO(err_buf, rc = -EPROTO);
1044         }
1045
1046         if (req->rq_nob_received != early_size) {
1047                 /* even another early arrived the size should be the same */
1048                 CERROR("data size has changed from %u to %u\n",
1049                        early_size, req->rq_nob_received);
1050                 spin_unlock(&req->rq_lock);
1051                 GOTO(err_buf, rc = -EINVAL);
1052         }
1053
1054         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
1055                 CERROR("early reply length %d too small\n",
1056                        req->rq_nob_received);
1057                 spin_unlock(&req->rq_lock);
1058                 GOTO(err_buf, rc = -EALREADY);
1059         }
1060
1061         memcpy(early_buf, req->rq_repbuf, early_size);
1062         spin_unlock(&req->rq_lock);
1063
1064         early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
1065         early_req->rq_flvr = req->rq_flvr;
1066         early_req->rq_repbuf = early_buf;
1067         early_req->rq_repbuf_len = early_bufsz;
1068         early_req->rq_repdata = (struct lustre_msg *) early_buf;
1069         early_req->rq_repdata_len = early_size;
1070         early_req->rq_early = 1;
1071
1072         rc = do_cli_unwrap_reply(early_req);
1073         if (rc) {
1074                 DEBUG_REQ(D_ADAPTTO, early_req,
1075                           "error %d unwrap early reply", rc);
1076                 GOTO(err_ctx, rc);
1077         }
1078
1079         LASSERT(early_req->rq_repmsg);
1080         *req_ret = early_req;
1081         RETURN(0);
1082
1083 err_ctx:
1084         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1085 err_buf:
1086         OBD_FREE(early_buf, early_bufsz);
1087 err_req:
1088         OBD_FREE_PTR(early_req);
1089         RETURN(rc);
1090 }
1091
1092 void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
1093 {
1094         LASSERT(early_req->rq_repbuf);
1095         LASSERT(early_req->rq_repdata);
1096         LASSERT(early_req->rq_repmsg);
1097
1098         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1099         OBD_FREE(early_req->rq_repbuf, early_req->rq_repbuf_len);
1100         OBD_FREE_PTR(early_req);
1101 }
1102
1103 /**************************************************
1104  * sec ID                                         *
1105  **************************************************/
1106
1107 /*
1108  * "fixed" sec (e.g. null) use sec_id < 0
1109  */
1110 static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
1111
1112 int sptlrpc_get_next_secid(void)
1113 {
1114         return atomic_inc_return(&sptlrpc_sec_id);
1115 }
1116 EXPORT_SYMBOL(sptlrpc_get_next_secid);
1117
1118 /**************************************************
1119  * client side high-level security APIs           *
1120  **************************************************/
1121
1122 static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
1123                                    int grace, int force)
1124 {
1125         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1126
1127         LASSERT(policy->sp_cops);
1128         LASSERT(policy->sp_cops->flush_ctx_cache);
1129
1130         return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
1131 }
1132
1133 static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
1134 {
1135         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1136
1137         LASSERT(atomic_read(&sec->ps_refcount) == 0);
1138         LASSERT(atomic_read(&sec->ps_nctx) == 0);
1139         LASSERT(policy->sp_cops->destroy_sec);
1140
1141         CDEBUG(D_SEC, "%s@%p: being destroied\n", sec->ps_policy->sp_name, sec);
1142
1143         policy->sp_cops->destroy_sec(sec);
1144         sptlrpc_policy_put(policy);
1145 }
1146
1147 void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
1148 {
1149         sec_cop_destroy_sec(sec);
1150 }
1151 EXPORT_SYMBOL(sptlrpc_sec_destroy);
1152
1153 static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
1154 {
1155         LASSERT(atomic_read(&sec->ps_refcount) > 0);
1156
1157         if (sec->ps_policy->sp_cops->kill_sec) {
1158                 sec->ps_policy->sp_cops->kill_sec(sec);
1159
1160                 sec_cop_flush_ctx_cache(sec, -1, 1, 1);
1161         }
1162 }
1163
1164 struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
1165 {
1166         if (sec) {
1167                 LASSERT(atomic_read(&sec->ps_refcount) > 0);
1168                 atomic_inc(&sec->ps_refcount);
1169         }
1170
1171         return sec;
1172 }
1173 EXPORT_SYMBOL(sptlrpc_sec_get);
1174
1175 void sptlrpc_sec_put(struct ptlrpc_sec *sec)
1176 {
1177         if (sec) {
1178                 LASSERT(atomic_read(&sec->ps_refcount) > 0);
1179
1180                 if (atomic_dec_and_test(&sec->ps_refcount)) {
1181                         LASSERT(atomic_read(&sec->ps_nctx) == 0);
1182
1183                         sptlrpc_gc_del_sec(sec);
1184                         sec_cop_destroy_sec(sec);
1185                 }
1186         }
1187 }
1188 EXPORT_SYMBOL(sptlrpc_sec_put);
1189
1190 /*
1191  * it's policy module responsible for taking refrence of import
1192  */
1193 static
1194 struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
1195                                        struct ptlrpc_svc_ctx *svc_ctx,
1196                                        struct sptlrpc_flavor *sf,
1197                                        enum lustre_sec_part sp)
1198 {
1199         struct ptlrpc_sec_policy *policy;
1200         struct ptlrpc_sec        *sec;
1201         ENTRY;
1202
1203         if (svc_ctx) {
1204                 LASSERT(imp->imp_dlm_fake == 1);
1205
1206                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
1207                        imp->imp_obd->obd_type->typ_name,
1208                        imp->imp_obd->obd_name,
1209                        sptlrpc_rpcflavor2name(sf->sf_rpc));
1210
1211                 policy = sptlrpc_policy_get(svc_ctx->sc_policy);
1212                 sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1213         } else {
1214                 LASSERT(imp->imp_dlm_fake == 0);
1215
1216                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
1217                        imp->imp_obd->obd_type->typ_name,
1218                        imp->imp_obd->obd_name,
1219                        sptlrpc_rpcflavor2name(sf->sf_rpc));
1220
1221                 policy = sptlrpc_rpcflavor2policy(sf->sf_rpc);
1222                 if (!policy) {
1223                         CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
1224                         RETURN(NULL);
1225                 }
1226         }
1227
1228         sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
1229         if (sec) {
1230                 atomic_inc(&sec->ps_refcount);
1231
1232                 sec->ps_part = sp;
1233
1234                 if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
1235                         sptlrpc_gc_add_sec(sec);
1236         } else {
1237                 sptlrpc_policy_put(policy);
1238         }
1239
1240         RETURN(sec);
1241 }
1242
1243 struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
1244 {
1245         struct ptlrpc_sec *sec;
1246
1247         spin_lock(&imp->imp_lock);
1248         sec = sptlrpc_sec_get(imp->imp_sec);
1249         spin_unlock(&imp->imp_lock);
1250
1251         return sec;
1252 }
1253 EXPORT_SYMBOL(sptlrpc_import_sec_ref);
1254
1255 static void sptlrpc_import_sec_install(struct obd_import *imp,
1256                                        struct ptlrpc_sec *sec)
1257 {
1258         struct ptlrpc_sec *old_sec;
1259
1260         LASSERT(atomic_read(&sec->ps_refcount) > 0);
1261
1262         spin_lock(&imp->imp_lock);
1263         old_sec = imp->imp_sec;
1264         imp->imp_sec = sec;
1265         spin_unlock(&imp->imp_lock);
1266
1267         if (old_sec) {
1268                 sptlrpc_sec_kill(old_sec);
1269
1270                 /* balance the ref taken by this import */
1271                 sptlrpc_sec_put(old_sec);
1272         }
1273 }
1274
1275 static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
1276                                              struct ptlrpc_sec *sec,
1277                                              struct sptlrpc_flavor *sf)
1278 {
1279         if (sf->sf_bulk_ciph != sec->ps_flvr.sf_bulk_ciph ||
1280             sf->sf_bulk_hash != sec->ps_flvr.sf_bulk_hash) {
1281                 CWARN("imp %p (%s->%s): changing bulk flavor %s/%s -> %s/%s\n",
1282                       imp, imp->imp_obd->obd_name,
1283                       obd_uuid2str(&imp->imp_connection->c_remote_uuid),
1284                       sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
1285                       sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
1286                       sptlrpc_get_ciph_name(sf->sf_bulk_ciph),
1287                       sptlrpc_get_hash_name(sf->sf_bulk_hash));
1288
1289                 spin_lock(&sec->ps_lock);
1290                 sec->ps_flvr.sf_bulk_ciph = sf->sf_bulk_ciph;
1291                 sec->ps_flvr.sf_bulk_hash = sf->sf_bulk_hash;
1292                 spin_unlock(&sec->ps_lock);
1293         }
1294
1295         if (!equi(sf->sf_flags & PTLRPC_SEC_FL_UDESC,
1296                   sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC)) {
1297                 CWARN("imp %p (%s->%s): %s shipping user descriptor\n",
1298                       imp, imp->imp_obd->obd_name,
1299                       obd_uuid2str(&imp->imp_connection->c_remote_uuid),
1300                       (sf->sf_flags & PTLRPC_SEC_FL_UDESC) ? "start" : "stop");
1301
1302                 spin_lock(&sec->ps_lock);
1303                 sec->ps_flvr.sf_flags &= ~PTLRPC_SEC_FL_UDESC;
1304                 sec->ps_flvr.sf_flags |= sf->sf_flags & PTLRPC_SEC_FL_UDESC;
1305                 spin_unlock(&sec->ps_lock);
1306         }
1307 }
1308
1309 /*
1310  * for normal import, @svc_ctx should be NULL and @rpc_flavor is ignored;
1311  * for reverse import, @svc_ctx and @rpc_flavor is from incoming request.
1312  */
1313 int sptlrpc_import_sec_adapt(struct obd_import *imp,
1314                              struct ptlrpc_svc_ctx *svc_ctx,
1315                              __u16 rpc_flavor)
1316 {
1317         struct ptlrpc_connection   *conn;
1318         struct sptlrpc_flavor       sf;
1319         struct ptlrpc_sec          *sec, *newsec;
1320         enum lustre_sec_part        sp;
1321         int                         rc;
1322
1323         might_sleep();
1324
1325         if (imp == NULL)
1326                 return 0;
1327
1328         conn = imp->imp_connection;
1329
1330         if (svc_ctx == NULL) {
1331                 struct client_obd *cliobd = &imp->imp_obd->u.cli;
1332                 /*
1333                  * normal import, determine flavor from rule set, except
1334                  * for mgc the flavor is predetermined.
1335                  */
1336                 if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
1337                         sf = cliobd->cl_flvr_mgc;
1338                 else 
1339                         sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
1340                                                    cliobd->cl_sp_to,
1341                                                    &cliobd->cl_target_uuid,
1342                                                    conn->c_self, &sf);
1343
1344                 sp = imp->imp_obd->u.cli.cl_sp_me;
1345         } else {
1346                 /* reverse import, determine flavor from incoming reqeust */
1347                 sf.sf_rpc = rpc_flavor;
1348                 sf.sf_bulk_ciph = BULK_CIPH_ALG_NULL;
1349                 sf.sf_bulk_hash = BULK_HASH_ALG_NULL;
1350                 sf.sf_flags = PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1351
1352                 sp = sptlrpc_target_sec_part(imp->imp_obd);
1353         }
1354
1355         sec = sptlrpc_import_sec_ref(imp);
1356         if (sec) {
1357                 if (svc_ctx == NULL) {
1358                         /* normal import, only check rpc flavor, if just bulk
1359                          * flavor or flags changed, we can handle it on the fly
1360                          * without switching sec. */
1361                         if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
1362                                 sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
1363
1364                                 rc = 0;
1365                                 goto out;
1366                         }
1367                 } else {
1368                         /* reverse import, do not compare bulk flavor */
1369                         if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
1370                                 rc = 0;
1371                                 goto out;
1372                         }
1373                 }
1374
1375                 CWARN("%simport %p (%s%s%s): changing flavor "
1376                       "(%s, %s/%s) -> (%s, %s/%s)\n",
1377                       svc_ctx ? "reverse " : "",
1378                       imp, imp->imp_obd->obd_name,
1379                       svc_ctx == NULL ? "->" : "<-",
1380                       obd_uuid2str(&conn->c_remote_uuid),
1381                       sptlrpc_rpcflavor2name(sec->ps_flvr.sf_rpc),
1382                       sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
1383                       sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
1384                       sptlrpc_rpcflavor2name(sf.sf_rpc),
1385                       sptlrpc_get_hash_name(sf.sf_bulk_hash),
1386                       sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
1387         } else {
1388                 CWARN("%simport %p (%s%s%s) netid %x: "
1389                       "select initial flavor (%s, %s/%s)\n",
1390                       svc_ctx == NULL ? "" : "reverse ",
1391                       imp, imp->imp_obd->obd_name,
1392                       svc_ctx == NULL ? "->" : "<-",
1393                       obd_uuid2str(&conn->c_remote_uuid),
1394                       LNET_NIDNET(conn->c_self),
1395                       sptlrpc_rpcflavor2name(sf.sf_rpc),
1396                       sptlrpc_get_hash_name(sf.sf_bulk_hash),
1397                       sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
1398         }
1399
1400         mutex_down(&imp->imp_sec_mutex);
1401
1402         newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
1403         if (newsec) {
1404                 sptlrpc_import_sec_install(imp, newsec);
1405                 rc = 0;
1406         } else {
1407                 CERROR("%simport %p (%s): failed to create new sec\n",
1408                        svc_ctx == NULL ? "" : "reverse ",
1409                        imp, obd_uuid2str(&conn->c_remote_uuid));
1410                 rc = -EPERM;
1411         }
1412
1413         mutex_up(&imp->imp_sec_mutex);
1414
1415 out:
1416         sptlrpc_sec_put(sec);
1417         return 0;
1418 }
1419
1420 void sptlrpc_import_sec_put(struct obd_import *imp)
1421 {
1422         if (imp->imp_sec) {
1423                 sptlrpc_sec_kill(imp->imp_sec);
1424
1425                 sptlrpc_sec_put(imp->imp_sec);
1426                 imp->imp_sec = NULL;
1427         }
1428 }
1429
1430 static void import_flush_ctx_common(struct obd_import *imp,
1431                                     uid_t uid, int grace, int force)
1432 {
1433         struct ptlrpc_sec *sec;
1434
1435         if (imp == NULL)
1436                 return;
1437
1438         sec = sptlrpc_import_sec_ref(imp);
1439         if (sec == NULL)
1440                 return;
1441
1442         sec_cop_flush_ctx_cache(sec, uid, grace, force);
1443         sptlrpc_sec_put(sec);
1444 }
1445
1446 void sptlrpc_import_inval_all_ctx(struct obd_import *imp)
1447 {
1448         /* use grace == 0 */
1449         import_flush_ctx_common(imp, -1, 0, 1);
1450 }
1451
1452 void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
1453 {
1454         /* it's important to use grace mode, see explain in
1455          * sptlrpc_req_refresh_ctx() */
1456         import_flush_ctx_common(imp, 0, 1, 1);
1457 }
1458
1459 void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
1460 {
1461         import_flush_ctx_common(imp, cfs_current()->uid, 1, 1);
1462 }
1463 EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
1464
1465 void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
1466 {
1467         import_flush_ctx_common(imp, -1, 1, 1);
1468 }
1469 EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
1470
1471 /*
1472  * when complete successfully, req->rq_reqmsg should point to the
1473  * right place.
1474  */
1475 int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
1476 {
1477         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1478         struct ptlrpc_sec_policy *policy;
1479         int rc;
1480
1481         LASSERT(ctx);
1482         LASSERT(atomic_read(&ctx->cc_refcount));
1483         LASSERT(ctx->cc_sec);
1484         LASSERT(ctx->cc_sec->ps_policy);
1485         LASSERT(req->rq_reqmsg == NULL);
1486
1487         policy = ctx->cc_sec->ps_policy;
1488         rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
1489         if (!rc) {
1490                 LASSERT(req->rq_reqmsg);
1491                 LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1492
1493                 /* zeroing preallocated buffer */
1494                 if (req->rq_pool)
1495                         memset(req->rq_reqmsg, 0, msgsize);
1496         }
1497
1498         return rc;
1499 }
1500
1501 void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
1502 {
1503         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1504         struct ptlrpc_sec_policy *policy;
1505
1506         LASSERT(ctx);
1507         LASSERT(atomic_read(&ctx->cc_refcount));
1508         LASSERT(ctx->cc_sec);
1509         LASSERT(ctx->cc_sec->ps_policy);
1510
1511         if (req->rq_reqbuf == NULL && req->rq_clrbuf == NULL)
1512                 return;
1513
1514         policy = ctx->cc_sec->ps_policy;
1515         policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
1516 }
1517
1518 /*
1519  * NOTE caller must guarantee the buffer size is enough for the enlargement
1520  */
1521 void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
1522                                   int segment, int newsize)
1523 {
1524         void   *src, *dst;
1525         int     oldsize, oldmsg_size, movesize;
1526
1527         LASSERT(segment < msg->lm_bufcount);
1528         LASSERT(msg->lm_buflens[segment] <= newsize);
1529
1530         if (msg->lm_buflens[segment] == newsize)
1531                 return;
1532
1533         /* nothing to do if we are enlarging the last segment */
1534         if (segment == msg->lm_bufcount - 1) {
1535                 msg->lm_buflens[segment] = newsize;
1536                 return;
1537         }
1538
1539         oldsize = msg->lm_buflens[segment];
1540
1541         src = lustre_msg_buf(msg, segment + 1, 0);
1542         msg->lm_buflens[segment] = newsize;
1543         dst = lustre_msg_buf(msg, segment + 1, 0);
1544         msg->lm_buflens[segment] = oldsize;
1545
1546         /* move from segment + 1 to end segment */
1547         LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
1548         oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
1549         movesize = oldmsg_size - ((unsigned long) src - (unsigned long) msg);
1550         LASSERT(movesize >= 0);
1551
1552         if (movesize)
1553                 memmove(dst, src, movesize);
1554
1555         /* note we don't clear the ares where old data live, not secret */
1556
1557         /* finally set new segment size */
1558         msg->lm_buflens[segment] = newsize;
1559 }
1560 EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
1561
1562 /*
1563  * enlarge @segment of upper message req->rq_reqmsg to @newsize, all data
1564  * will be preserved after enlargement. this must be called after rq_reqmsg has
1565  * been intialized at least.
1566  *
1567  * caller's attention: upon return, rq_reqmsg and rq_reqlen might have
1568  * been changed.
1569  */
1570 int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
1571                                int segment, int newsize)
1572 {
1573         struct ptlrpc_cli_ctx    *ctx = req->rq_cli_ctx;
1574         struct ptlrpc_sec_cops   *cops;
1575         struct lustre_msg        *msg = req->rq_reqmsg;
1576
1577         LASSERT(ctx);
1578         LASSERT(msg);
1579         LASSERT(msg->lm_bufcount > segment);
1580         LASSERT(msg->lm_buflens[segment] <= newsize);
1581
1582         if (msg->lm_buflens[segment] == newsize)
1583                 return 0;
1584
1585         cops = ctx->cc_sec->ps_policy->sp_cops;
1586         LASSERT(cops->enlarge_reqbuf);
1587         return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
1588 }
1589 EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
1590
1591 int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
1592 {
1593         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1594         struct ptlrpc_sec_policy *policy;
1595         ENTRY;
1596
1597         LASSERT(ctx);
1598         LASSERT(atomic_read(&ctx->cc_refcount));
1599         LASSERT(ctx->cc_sec);
1600         LASSERT(ctx->cc_sec->ps_policy);
1601
1602         if (req->rq_repbuf)
1603                 RETURN(0);
1604
1605         policy = ctx->cc_sec->ps_policy;
1606         RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
1607 }
1608
1609 void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
1610 {
1611         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1612         struct ptlrpc_sec_policy *policy;
1613         ENTRY;
1614
1615         LASSERT(ctx);
1616         LASSERT(atomic_read(&ctx->cc_refcount));
1617         LASSERT(ctx->cc_sec);
1618         LASSERT(ctx->cc_sec->ps_policy);
1619
1620         if (req->rq_repbuf == NULL)
1621                 return;
1622         LASSERT(req->rq_repbuf_len);
1623
1624         policy = ctx->cc_sec->ps_policy;
1625         policy->sp_cops->free_repbuf(ctx->cc_sec, req);
1626         EXIT;
1627 }
1628
1629 int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
1630                                 struct ptlrpc_cli_ctx *ctx)
1631 {
1632         struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
1633
1634         if (!policy->sp_cops->install_rctx)
1635                 return 0;
1636         return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
1637 }
1638
1639 int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
1640                                 struct ptlrpc_svc_ctx *ctx)
1641 {
1642         struct ptlrpc_sec_policy *policy = ctx->sc_policy;
1643
1644         if (!policy->sp_sops->install_rctx)
1645                 return 0;
1646         return policy->sp_sops->install_rctx(imp, ctx);
1647 }
1648
1649 /****************************************
1650  * server side security                 *
1651  ****************************************/
1652
1653 static int flavor_allowed(struct sptlrpc_flavor *exp,
1654                           struct ptlrpc_request *req)
1655 {
1656         struct sptlrpc_flavor *flvr = &req->rq_flvr;
1657
1658         if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
1659                 return 1;
1660
1661         if ((req->rq_ctx_init || req->rq_ctx_fini) &&
1662             RPC_FLVR_POLICY(exp->sf_rpc) == RPC_FLVR_POLICY(flvr->sf_rpc) &&
1663             RPC_FLVR_MECH(exp->sf_rpc) == RPC_FLVR_MECH(flvr->sf_rpc))
1664                 return 1;
1665
1666         return 0;
1667 }
1668
1669 #define EXP_FLVR_UPDATE_EXPIRE      (OBD_TIMEOUT_DEFAULT + 10)
1670
1671 int sptlrpc_target_export_check(struct obd_export *exp,
1672                                 struct ptlrpc_request *req)
1673 {
1674         struct sptlrpc_flavor   flavor;
1675
1676         if (exp == NULL)
1677                 return 0;
1678
1679         /* client side export has no imp_reverse, skip
1680          * FIXME maybe we should check flavor this as well??? */
1681         if (exp->exp_imp_reverse == NULL)
1682                 return 0;
1683
1684         /* don't care about ctx fini rpc */
1685         if (req->rq_ctx_fini)
1686                 return 0;
1687
1688         spin_lock(&exp->exp_lock);
1689
1690         /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
1691          * the first req with the new flavor, then treat it as current flavor,
1692          * adapt reverse sec according to it.
1693          * note the first rpc with new flavor might not be with root ctx, in
1694          * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. */
1695         if (unlikely(exp->exp_flvr_changed) &&
1696             flavor_allowed(&exp->exp_flvr_old[1], req)) {
1697                 /* make the new flavor as "current", and old ones as
1698                  * about-to-expire */
1699                 CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
1700                        exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
1701                 flavor = exp->exp_flvr_old[1];
1702                 exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
1703                 exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
1704                 exp->exp_flvr_old[0] = exp->exp_flvr;
1705                 exp->exp_flvr_expire[0] = cfs_time_current_sec() +
1706                                           EXP_FLVR_UPDATE_EXPIRE;
1707                 exp->exp_flvr = flavor;
1708
1709                 /* flavor change finished */
1710                 exp->exp_flvr_changed = 0;
1711                 LASSERT(exp->exp_flvr_adapt == 1);
1712
1713                 /* if it's gss, we only interested in root ctx init */
1714                 if (req->rq_auth_gss &&
1715                     !(req->rq_ctx_init && (req->rq_auth_usr_root ||
1716                                            req->rq_auth_usr_mdt))) {
1717                         spin_unlock(&exp->exp_lock);
1718                         CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d)\n",
1719                                req->rq_auth_gss, req->rq_ctx_init,
1720                                req->rq_auth_usr_root, req->rq_auth_usr_mdt);
1721                         return 0;
1722                 }
1723
1724                 exp->exp_flvr_adapt = 0;
1725                 spin_unlock(&exp->exp_lock);
1726
1727                 return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1728                                                 req->rq_svc_ctx, flavor.sf_rpc);
1729         }
1730
1731         /* if it equals to the current flavor, we accept it, but need to
1732          * dealing with reverse sec/ctx */
1733         if (likely(flavor_allowed(&exp->exp_flvr, req))) {
1734                 /* most cases should return here, we only interested in
1735                  * gss root ctx init */
1736                 if (!req->rq_auth_gss || !req->rq_ctx_init ||
1737                     (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt)) {
1738                         spin_unlock(&exp->exp_lock);
1739                         return 0;
1740                 }
1741
1742                 /* if flavor just changed, we should not proceed, just leave
1743                  * it and current flavor will be discovered and replaced
1744                  * shortly, and let _this_ rpc pass through */
1745                 if (exp->exp_flvr_changed) {
1746                         LASSERT(exp->exp_flvr_adapt);
1747                         spin_unlock(&exp->exp_lock);
1748                         return 0;
1749                 }
1750
1751                 if (exp->exp_flvr_adapt) {
1752                         exp->exp_flvr_adapt = 0;
1753                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
1754                                exp, exp->exp_flvr.sf_rpc,
1755                                exp->exp_flvr_old[0].sf_rpc,
1756                                exp->exp_flvr_old[1].sf_rpc);
1757                         flavor = exp->exp_flvr;
1758                         spin_unlock(&exp->exp_lock);
1759
1760                         return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1761                                                         req->rq_svc_ctx,
1762                                                         flavor.sf_rpc);
1763                 } else {
1764                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, "
1765                                "install rvs ctx\n", exp, exp->exp_flvr.sf_rpc,
1766                                exp->exp_flvr_old[0].sf_rpc,
1767                                exp->exp_flvr_old[1].sf_rpc);
1768                         spin_unlock(&exp->exp_lock);
1769
1770                         return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
1771                                                            req->rq_svc_ctx);
1772                 }
1773         }
1774
1775         if (exp->exp_flvr_expire[0]) {
1776                 if (exp->exp_flvr_expire[0] >= cfs_time_current_sec()) {
1777                         if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
1778                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
1779                                        "middle one ("CFS_DURATION_T")\n", exp,
1780                                        exp->exp_flvr.sf_rpc,
1781                                        exp->exp_flvr_old[0].sf_rpc,
1782                                        exp->exp_flvr_old[1].sf_rpc,
1783                                        exp->exp_flvr_expire[0] -
1784                                                 cfs_time_current_sec());
1785                                 spin_unlock(&exp->exp_lock);
1786                                 return 0;
1787                         }
1788                 } else {
1789                         CDEBUG(D_SEC, "mark middle expired\n");
1790                         exp->exp_flvr_expire[0] = 0;
1791                 }
1792                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
1793                        exp->exp_flvr.sf_rpc,
1794                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1795                        req->rq_flvr.sf_rpc);
1796         }
1797
1798         /* now it doesn't match the current flavor, the only chance we can
1799          * accept it is match the old flavors which is not expired. */
1800         if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
1801                 if (exp->exp_flvr_expire[1] >= cfs_time_current_sec()) {
1802                         if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
1803                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the "
1804                                        "oldest one ("CFS_DURATION_T")\n", exp,
1805                                        exp->exp_flvr.sf_rpc,
1806                                        exp->exp_flvr_old[0].sf_rpc,
1807                                        exp->exp_flvr_old[1].sf_rpc,
1808                                        exp->exp_flvr_expire[1] -
1809                                                 cfs_time_current_sec());
1810                                 spin_unlock(&exp->exp_lock);
1811                                 return 0;
1812                         }
1813                 } else {
1814                         CDEBUG(D_SEC, "mark oldest expired\n");
1815                         exp->exp_flvr_expire[1] = 0;
1816                 }
1817                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
1818                        exp, exp->exp_flvr.sf_rpc,
1819                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1820                        req->rq_flvr.sf_rpc);
1821         } else {
1822                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
1823                        exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
1824                        exp->exp_flvr_old[1].sf_rpc);
1825         }
1826
1827         spin_unlock(&exp->exp_lock);
1828
1829         CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u) with "
1830               "unauthorized flavor %x, expect %x|%x(%+ld)|%x(%+ld)\n",
1831               exp, exp->exp_obd->obd_name,
1832               req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
1833               req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_flvr.sf_rpc,
1834               exp->exp_flvr.sf_rpc,
1835               exp->exp_flvr_old[0].sf_rpc,
1836               exp->exp_flvr_expire[0] ?
1837               (unsigned long) (exp->exp_flvr_expire[0] -
1838                                cfs_time_current_sec()) : 0,
1839               exp->exp_flvr_old[1].sf_rpc,
1840               exp->exp_flvr_expire[1] ?
1841               (unsigned long) (exp->exp_flvr_expire[1] -
1842                                cfs_time_current_sec()) : 0);
1843         return -EACCES;
1844 }
1845 EXPORT_SYMBOL(sptlrpc_target_export_check);
1846
1847 void sptlrpc_target_update_exp_flavor(struct obd_device *obd,
1848                                       struct sptlrpc_rule_set *rset)
1849 {
1850         struct obd_export       *exp;
1851         struct sptlrpc_flavor    new_flvr;
1852
1853         LASSERT(obd);
1854
1855         spin_lock(&obd->obd_dev_lock);
1856
1857         list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
1858                 if (exp->exp_connection == NULL)
1859                         continue;
1860
1861                 /* note if this export had just been updated flavor
1862                  * (exp_flvr_changed == 1), this will override the
1863                  * previous one. */
1864                 spin_lock(&exp->exp_lock);
1865                 sptlrpc_target_choose_flavor(rset, exp->exp_sp_peer,
1866                                              exp->exp_connection->c_peer.nid,
1867                                              &new_flvr);
1868                 if (exp->exp_flvr_changed ||
1869                     memcmp(&new_flvr, &exp->exp_flvr, sizeof(new_flvr))) {
1870                         exp->exp_flvr_old[1] = new_flvr;
1871                         exp->exp_flvr_expire[1] = 0;
1872                         exp->exp_flvr_changed = 1;
1873                         exp->exp_flvr_adapt = 1;
1874
1875                         CDEBUG(D_SEC, "exp %p (%s): updated flavor %x->%x\n",
1876                                exp, sptlrpc_part2name(exp->exp_sp_peer),
1877                                exp->exp_flvr.sf_rpc,
1878                                exp->exp_flvr_old[1].sf_rpc);
1879                 }
1880                 spin_unlock(&exp->exp_lock);
1881         }
1882
1883         spin_unlock(&obd->obd_dev_lock);
1884 }
1885 EXPORT_SYMBOL(sptlrpc_target_update_exp_flavor);
1886
1887 static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
1888 {
1889         if (svc_rc == SECSVC_DROP)
1890                 return SECSVC_DROP;
1891
1892         switch (req->rq_sp_from) {
1893         case LUSTRE_SP_CLI:
1894         case LUSTRE_SP_MDT:
1895         case LUSTRE_SP_OST:
1896         case LUSTRE_SP_MGC:
1897         case LUSTRE_SP_MGS:
1898         case LUSTRE_SP_ANY:
1899                 break;
1900         default:
1901                 DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
1902                 return SECSVC_DROP;
1903         }
1904
1905         if (!req->rq_auth_gss)
1906                 return svc_rc;
1907
1908         if (unlikely(req->rq_sp_from == LUSTRE_SP_ANY)) {
1909                 CERROR("not specific part\n");
1910                 return SECSVC_DROP;
1911         }
1912
1913         /* from MDT, must be authenticated as MDT */
1914         if (unlikely(req->rq_sp_from == LUSTRE_SP_MDT &&
1915                      !req->rq_auth_usr_mdt)) {
1916                 DEBUG_REQ(D_ERROR, req, "fake source MDT");
1917                 return SECSVC_DROP;
1918         }
1919
1920         /* from OST, must be callback to MDT and CLI, the reverse sec
1921          * was from mdt/root keytab, so it should be MDT or root FIXME */
1922         if (unlikely(req->rq_sp_from == LUSTRE_SP_OST &&
1923                      !req->rq_auth_usr_mdt && !req->rq_auth_usr_root)) {
1924                 DEBUG_REQ(D_ERROR, req, "fake source OST");
1925                 return SECSVC_DROP;
1926         }
1927
1928         return svc_rc;
1929 }
1930
1931 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
1932 {
1933         struct ptlrpc_sec_policy *policy;
1934         struct lustre_msg *msg = req->rq_reqbuf;
1935         int rc;
1936         ENTRY;
1937
1938         LASSERT(msg);
1939         LASSERT(req->rq_reqmsg == NULL);
1940         LASSERT(req->rq_repmsg == NULL);
1941
1942         req->rq_sp_from = LUSTRE_SP_ANY;
1943         req->rq_auth_uid = INVALID_UID;
1944         req->rq_auth_mapped_uid = INVALID_UID;
1945
1946         if (req->rq_reqdata_len < sizeof(struct lustre_msg)) {
1947                 CERROR("request size %d too small\n", req->rq_reqdata_len);
1948                 RETURN(SECSVC_DROP);
1949         }
1950
1951         /*
1952          * v2 message.
1953          */
1954         if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2)
1955                 req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(msg->lm_secflvr);
1956         else
1957                 req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(__swab32(msg->lm_secflvr));
1958
1959         /* unpack the wrapper message if the policy is not null */
1960         if ((RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) &&
1961              lustre_unpack_msg(msg, req->rq_reqdata_len))
1962                 RETURN(SECSVC_DROP);
1963
1964         policy = sptlrpc_rpcflavor2policy(req->rq_flvr.sf_rpc);
1965         if (!policy) {
1966                 CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
1967                 RETURN(SECSVC_DROP);
1968         }
1969
1970         LASSERT(policy->sp_sops->accept);
1971         rc = policy->sp_sops->accept(req);
1972
1973         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
1974         sptlrpc_policy_put(policy);
1975
1976         /* sanity check for the request source */
1977         rc = sptlrpc_svc_check_from(req, rc);
1978
1979         /* FIXME move to proper place */
1980         if (rc == SECSVC_OK) {
1981                 __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
1982
1983                 if (opc == OST_WRITE)
1984                         req->rq_bulk_write = 1;
1985                 else if (opc == OST_READ)
1986                         req->rq_bulk_read = 1;
1987         }
1988
1989         LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
1990         RETURN(rc);
1991 }
1992
1993 int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req,
1994                          int msglen)
1995 {
1996         struct ptlrpc_sec_policy *policy;
1997         struct ptlrpc_reply_state *rs;
1998         int rc;
1999         ENTRY;
2000
2001         LASSERT(req->rq_svc_ctx);
2002         LASSERT(req->rq_svc_ctx->sc_policy);
2003
2004         policy = req->rq_svc_ctx->sc_policy;
2005         LASSERT(policy->sp_sops->alloc_rs);
2006
2007         rc = policy->sp_sops->alloc_rs(req, msglen);
2008         if (unlikely(rc == -ENOMEM)) {
2009                 /* failed alloc, try emergency pool */
2010                 rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
2011                 if (rs == NULL)
2012                         RETURN(-ENOMEM);
2013
2014                 req->rq_reply_state = rs;
2015                 rc = policy->sp_sops->alloc_rs(req, msglen);
2016                 if (rc) {
2017                         lustre_put_emerg_rs(rs);
2018                         req->rq_reply_state = NULL;
2019                 }
2020         }
2021
2022         LASSERT(rc != 0 ||
2023                 (req->rq_reply_state && req->rq_reply_state->rs_msg));
2024
2025         RETURN(rc);
2026 }
2027
2028 int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
2029 {
2030         struct ptlrpc_sec_policy *policy;
2031         int rc;
2032         ENTRY;
2033
2034         LASSERT(req->rq_svc_ctx);
2035         LASSERT(req->rq_svc_ctx->sc_policy);
2036
2037         policy = req->rq_svc_ctx->sc_policy;
2038         LASSERT(policy->sp_sops->authorize);
2039
2040         rc = policy->sp_sops->authorize(req);
2041         LASSERT(rc || req->rq_reply_state->rs_repdata_len);
2042
2043         RETURN(rc);
2044 }
2045
2046 void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
2047 {
2048         struct ptlrpc_sec_policy *policy;
2049         unsigned int prealloc;
2050         ENTRY;
2051
2052         LASSERT(rs->rs_svc_ctx);
2053         LASSERT(rs->rs_svc_ctx->sc_policy);
2054
2055         policy = rs->rs_svc_ctx->sc_policy;
2056         LASSERT(policy->sp_sops->free_rs);
2057
2058         prealloc = rs->rs_prealloc;
2059         policy->sp_sops->free_rs(rs);
2060
2061         if (prealloc)
2062                 lustre_put_emerg_rs(rs);
2063         EXIT;
2064 }
2065
2066 void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
2067 {
2068         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2069
2070         if (ctx == NULL)
2071                 return;
2072
2073         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2074         atomic_inc(&ctx->sc_refcount);
2075 }
2076
2077 void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
2078 {
2079         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2080
2081         if (ctx == NULL)
2082                 return;
2083
2084         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2085         if (atomic_dec_and_test(&ctx->sc_refcount)) {
2086                 if (ctx->sc_policy->sp_sops->free_ctx)
2087                         ctx->sc_policy->sp_sops->free_ctx(ctx);
2088         }
2089         req->rq_svc_ctx = NULL;
2090 }
2091
2092 void sptlrpc_svc_ctx_invalidate(struct ptlrpc_request *req)
2093 {
2094         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2095
2096         if (ctx == NULL)
2097                 return;
2098
2099         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
2100         if (ctx->sc_policy->sp_sops->invalidate_ctx)
2101                 ctx->sc_policy->sp_sops->invalidate_ctx(ctx);
2102 }
2103 EXPORT_SYMBOL(sptlrpc_svc_ctx_invalidate);
2104
2105 /****************************************
2106  * bulk security                        *
2107  ****************************************/
2108
2109 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
2110                           struct ptlrpc_bulk_desc *desc)
2111 {
2112         struct ptlrpc_cli_ctx *ctx;
2113
2114         if (!req->rq_pack_bulk)
2115                 return 0;
2116
2117         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2118
2119         ctx = req->rq_cli_ctx;
2120         if (ctx->cc_ops->wrap_bulk)
2121                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
2122         return 0;
2123 }
2124 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
2125
2126 static
2127 void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
2128                       struct ptlrpc_bulk_desc *desc)
2129 {
2130         int i;
2131
2132         LASSERT(pga);
2133         LASSERT(*pga);
2134
2135         for (i = 0; i < pg_count && nob > 0; i++) {
2136 #ifdef __KERNEL__
2137                 desc->bd_iov[i].kiov_page = pga[i]->pg;
2138                 desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
2139                                            nob : pga[i]->count;
2140                 desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
2141 #else
2142                 /* FIXME currently liblustre doesn't support bulk encryption.
2143                  * if we do, check again following may not be right. */
2144                 LASSERTF(0, "Bulk encryption not implemented for liblustre\n");
2145                 desc->bd_iov[i].iov_base = pga[i]->pg->addr;
2146                 desc->bd_iov[i].iov_len = pga[i]->count > nob ?
2147                                            nob : pga[i]->count;
2148 #endif
2149
2150                 desc->bd_iov_count++;
2151                 nob -= pga[i]->count;
2152         }
2153 }
2154
2155 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
2156                                  int nob, obd_count pg_count,
2157                                  struct brw_page **pga)
2158 {
2159         struct ptlrpc_bulk_desc *desc;
2160         struct ptlrpc_cli_ctx *ctx;
2161         int rc = 0;
2162
2163         if (!req->rq_pack_bulk)
2164                 return 0;
2165
2166         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
2167
2168         OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
2169         if (desc == NULL) {
2170                 CERROR("out of memory, can't verify bulk read data\n");
2171                 return -ENOMEM;
2172         }
2173
2174         pga_to_bulk_desc(nob, pg_count, pga, desc);
2175
2176         ctx = req->rq_cli_ctx;
2177         if (ctx->cc_ops->unwrap_bulk)
2178                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2179
2180         OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
2181
2182         return rc;
2183 }
2184 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
2185
2186 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
2187                                   struct ptlrpc_bulk_desc *desc)
2188 {
2189         struct ptlrpc_cli_ctx *ctx;
2190
2191         if (!req->rq_pack_bulk)
2192                 return 0;
2193
2194         LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
2195
2196         ctx = req->rq_cli_ctx;
2197         if (ctx->cc_ops->unwrap_bulk)
2198                 return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2199
2200         return 0;
2201 }
2202 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
2203
2204 int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
2205                           struct ptlrpc_bulk_desc *desc)
2206 {
2207         struct ptlrpc_svc_ctx *ctx;
2208
2209         if (!req->rq_pack_bulk)
2210                 return 0;
2211
2212         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2213
2214         ctx = req->rq_svc_ctx;
2215         if (ctx->sc_policy->sp_sops->wrap_bulk)
2216                 return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
2217
2218         return 0;
2219 }
2220 EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
2221
2222 int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
2223                             struct ptlrpc_bulk_desc *desc)
2224 {
2225         struct ptlrpc_svc_ctx *ctx;
2226
2227         if (!req->rq_pack_bulk)
2228                 return 0;
2229
2230         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2231
2232         ctx = req->rq_svc_ctx;
2233         if (ctx->sc_policy->sp_sops->unwrap_bulk);
2234                 return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
2235
2236         return 0;
2237 }
2238 EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
2239
2240
2241 /****************************************
2242  * user descriptor helpers              *
2243  ****************************************/
2244
2245 int sptlrpc_current_user_desc_size(void)
2246 {
2247         int ngroups;
2248
2249 #ifdef __KERNEL__
2250         ngroups = current_ngroups;
2251
2252         if (ngroups > LUSTRE_MAX_GROUPS)
2253                 ngroups = LUSTRE_MAX_GROUPS;
2254 #else
2255         ngroups = 0;
2256 #endif
2257         return sptlrpc_user_desc_size(ngroups);
2258 }
2259 EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
2260
2261 int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
2262 {
2263         struct ptlrpc_user_desc *pud;
2264
2265         pud = lustre_msg_buf(msg, offset, 0);
2266
2267         pud->pud_uid = cfs_current()->uid;
2268         pud->pud_gid = cfs_current()->gid;
2269         pud->pud_fsuid = cfs_current()->fsuid;
2270         pud->pud_fsgid = cfs_current()->fsgid;
2271         pud->pud_cap = cfs_curproc_cap_pack();
2272         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
2273
2274 #ifdef __KERNEL__
2275         task_lock(current);
2276         if (pud->pud_ngroups > current_ngroups)
2277                 pud->pud_ngroups = current_ngroups;
2278         memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0],
2279                pud->pud_ngroups * sizeof(__u32));
2280         task_unlock(current);
2281 #endif
2282
2283         return 0;
2284 }
2285 EXPORT_SYMBOL(sptlrpc_pack_user_desc);
2286
2287 int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset)
2288 {
2289         struct ptlrpc_user_desc *pud;
2290         int                      i;
2291
2292         pud = lustre_msg_buf(msg, offset, sizeof(*pud));
2293         if (!pud)
2294                 return -EINVAL;
2295
2296         if (lustre_msg_swabbed(msg)) {
2297                 __swab32s(&pud->pud_uid);
2298                 __swab32s(&pud->pud_gid);
2299                 __swab32s(&pud->pud_fsuid);
2300                 __swab32s(&pud->pud_fsgid);
2301                 __swab32s(&pud->pud_cap);
2302                 __swab32s(&pud->pud_ngroups);
2303         }
2304
2305         if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
2306                 CERROR("%u groups is too large\n", pud->pud_ngroups);
2307                 return -EINVAL;
2308         }
2309
2310         if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
2311             msg->lm_buflens[offset]) {
2312                 CERROR("%u groups are claimed but bufsize only %u\n",
2313                        pud->pud_ngroups, msg->lm_buflens[offset]);
2314                 return -EINVAL;
2315         }
2316
2317         if (lustre_msg_swabbed(msg)) {
2318                 for (i = 0; i < pud->pud_ngroups; i++)
2319                         __swab32s(&pud->pud_groups[i]);
2320         }
2321
2322         return 0;
2323 }
2324 EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
2325
2326 /****************************************
2327  * misc helpers                         *
2328  ****************************************/
2329
2330 const char * sec2target_str(struct ptlrpc_sec *sec)
2331 {
2332         if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
2333                 return "*";
2334         if (sec_is_reverse(sec))
2335                 return "c";
2336         return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
2337 }
2338 EXPORT_SYMBOL(sec2target_str);
2339
2340 /****************************************
2341  * crypto API helper/alloc blkciper     *
2342  ****************************************/
2343
2344 /****************************************
2345  * initialize/finalize                  *
2346  ****************************************/
2347
2348 int __init sptlrpc_init(void)
2349 {
2350         int rc;
2351
2352         rwlock_init(&policy_lock);
2353
2354         rc = sptlrpc_gc_init();
2355         if (rc)
2356                 goto out;
2357
2358         rc = sptlrpc_conf_init();
2359         if (rc)
2360                 goto out_gc;
2361
2362         rc = sptlrpc_enc_pool_init();
2363         if (rc)
2364                 goto out_conf;
2365
2366         rc = sptlrpc_null_init();
2367         if (rc)
2368                 goto out_pool;
2369
2370         rc = sptlrpc_plain_init();
2371         if (rc)
2372                 goto out_null;
2373
2374         rc = sptlrpc_lproc_init();
2375         if (rc)
2376                 goto out_plain;
2377
2378         return 0;
2379
2380 out_plain:
2381         sptlrpc_plain_fini();
2382 out_null:
2383         sptlrpc_null_fini();
2384 out_pool:
2385         sptlrpc_enc_pool_fini();
2386 out_conf:
2387         sptlrpc_conf_fini();
2388 out_gc:
2389         sptlrpc_gc_fini();
2390 out:
2391         return rc;
2392 }
2393
2394 void __exit sptlrpc_fini(void)
2395 {
2396         sptlrpc_lproc_fini();
2397         sptlrpc_plain_fini();
2398         sptlrpc_null_fini();
2399         sptlrpc_enc_pool_fini();
2400         sptlrpc_conf_fini();
2401         sptlrpc_gc_fini();
2402 }