Whamcloud - gitweb
branch: b_new_cmd
[fs/lustre-release.git] / lustre / ptlrpc / sec.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004-2006 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #ifndef EXPORT_SYMTAB
23 #define EXPORT_SYMTAB
24 #endif
25 #define DEBUG_SUBSYSTEM S_SEC
26
27 #include <libcfs/libcfs.h>
28 #ifndef __KERNEL__
29 #include <liblustre.h>
30 #include <libcfs/list.h>
31 #else
32 #include <linux/crypto.h>
33 #endif
34
35 #include <obd.h>
36 #include <obd_class.h>
37 #include <obd_support.h>
38 #include <lustre_net.h>
39 #include <lustre_import.h>
40 #include <lustre_dlm.h>
41 #include <lustre_sec.h>
42
43 #include "ptlrpc_internal.h"
44
45 static void sptlrpc_sec_destroy(struct ptlrpc_sec *sec);
46 static int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
47                                    struct ptlrpc_cli_ctx *ctx);
48 static void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx);
49
50 /***********************************************
51  * policy registers                            *
52  ***********************************************/
53
54 static spinlock_t policy_lock = SPIN_LOCK_UNLOCKED;
55 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
56         NULL,
57 };
58
59 int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
60 {
61         __u32 number = policy->sp_policy;
62
63         LASSERT(policy->sp_name);
64         LASSERT(policy->sp_cops);
65         LASSERT(policy->sp_sops);
66
67         if (number >= SPTLRPC_POLICY_MAX)
68                 return -EINVAL;
69
70         spin_lock(&policy_lock);
71         if (policies[number]) {
72                 spin_unlock(&policy_lock);
73                 return -EALREADY;
74         }
75         policies[number] = policy;
76         spin_unlock(&policy_lock);
77
78         CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
79         return 0;
80 }
81 EXPORT_SYMBOL(sptlrpc_register_policy);
82
83 int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
84 {
85         __u32 number = policy->sp_policy;
86
87         LASSERT(number < SPTLRPC_POLICY_MAX);
88
89         spin_lock(&policy_lock);
90         if (!policies[number]) {
91                 spin_unlock(&policy_lock);
92                 CERROR("%s: already unregistered\n", policy->sp_name);
93                 return -EINVAL;
94         }
95
96         LASSERT(policies[number] == policy);
97         policies[number] = NULL;
98         spin_unlock(&policy_lock);
99
100         CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
101         return 0;
102 }
103 EXPORT_SYMBOL(sptlrpc_unregister_policy);
104
105 static
106 struct ptlrpc_sec_policy * sptlrpc_flavor2policy(ptlrpc_flavor_t flavor)
107 {
108         static int load_module = 0;
109         struct ptlrpc_sec_policy *policy;
110         __u32 number = SEC_FLAVOR_POLICY(flavor);
111
112         if (number >= SPTLRPC_POLICY_MAX)
113                 return NULL;
114
115 again:
116         spin_lock(&policy_lock);
117         policy = policies[number];
118         if (policy && !try_module_get(policy->sp_owner))
119                 policy = NULL;
120         spin_unlock(&policy_lock);
121
122         /* if failure, try to load gss module, once */
123         if (policy == NULL && load_module == 0 &&
124             number == SPTLRPC_POLICY_GSS) {
125                 load_module = 1;
126                 if (request_module("ptlrpc_gss") == 0)
127                         goto again;
128         }
129
130         return policy;
131 }
132
133 ptlrpc_flavor_t sptlrpc_name2flavor(const char *name)
134 {
135         if (!strcmp(name, "null"))
136                 return SPTLRPC_FLVR_NULL;
137         if (!strcmp(name, "plain"))
138                 return SPTLRPC_FLVR_PLAIN;
139         if (!strcmp(name, "krb5"))
140                 return SPTLRPC_FLVR_KRB5;
141         if (!strcmp(name, "krb5i"))
142                 return SPTLRPC_FLVR_KRB5I;
143         if (!strcmp(name, "krb5p"))
144                 return SPTLRPC_FLVR_KRB5P;
145
146         return SPTLRPC_FLVR_INVALID;
147 }
148 EXPORT_SYMBOL(sptlrpc_name2flavor);
149
150 char *sptlrpc_flavor2name(ptlrpc_flavor_t flavor)
151 {
152         switch (flavor) {
153         case SPTLRPC_FLVR_NULL:
154                 return "null";
155         case SPTLRPC_FLVR_PLAIN:
156                 return "plain";
157         case SPTLRPC_FLVR_KRB5:
158                 return "krb5";
159         case SPTLRPC_FLVR_KRB5I:
160                 return "krb5i";
161         case SPTLRPC_FLVR_KRB5P:
162                 return "krb5p";
163         default:
164                 CERROR("invalid flavor 0x%x(p%u,s%u,v%u)\n", flavor,
165                        SEC_FLAVOR_POLICY(flavor), SEC_FLAVOR_SUBPOLICY(flavor),
166                        SEC_FLAVOR_SVC(flavor));
167         }
168         return "UNKNOWN";
169 }
170 EXPORT_SYMBOL(sptlrpc_flavor2name);
171
172 /***********************************************
173  * context helpers                             *
174  * internal APIs                               *
175  * cache management                            *
176  ***********************************************/
177
178 static inline
179 unsigned long ctx_status(struct ptlrpc_cli_ctx *ctx)
180 {
181         smp_mb();
182         return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK);
183 }
184
185 static inline
186 int ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx)
187 {
188         return (ctx_status(ctx) == PTLRPC_CTX_UPTODATE);
189 }
190
191 static inline
192 int ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx)
193 {
194         return (ctx_status(ctx) != 0);
195 }
196
197 static inline
198 int ctx_is_dead(struct ptlrpc_cli_ctx *ctx)
199 {
200         smp_mb();
201         return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0);
202 }
203
204 static inline
205 int ctx_is_eternal(struct ptlrpc_cli_ctx *ctx)
206 {
207         smp_mb();
208         return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0);
209 }
210
211 static
212 int ctx_expire(struct ptlrpc_cli_ctx *ctx)
213 {
214         LASSERT(atomic_read(&ctx->cc_refcount));
215
216         if (!test_and_set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags)) {
217                 cfs_time_t now = cfs_time_current_sec();
218
219                 smp_mb();
220                 clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
221
222                 if (ctx->cc_expire && cfs_time_aftereq(now, ctx->cc_expire))
223                         CWARN("ctx %p(%u->%s): get expired (%lds exceeds)\n",
224                               ctx, ctx->cc_vcred.vc_uid,
225                               sec2target_str(ctx->cc_sec),
226                               cfs_time_sub(now, ctx->cc_expire));
227                 else
228                         CWARN("ctx %p(%u->%s): force to die (%lds remains)\n",
229                               ctx, ctx->cc_vcred.vc_uid,
230                               sec2target_str(ctx->cc_sec),
231                               ctx->cc_expire == 0 ? 0 :
232                               cfs_time_sub(ctx->cc_expire, now));
233
234                 return 1;
235         }
236         return 0;
237 }
238
239 static
240 void ctx_enhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *hash)
241 {
242         set_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
243         atomic_inc(&ctx->cc_refcount);
244         hlist_add_head(&ctx->cc_hash, hash);
245 }
246
247 static
248 void ctx_unhash(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
249 {
250         LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
251         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
252         LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
253         LASSERT(!hlist_unhashed(&ctx->cc_hash));
254
255         clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags);
256
257         if (atomic_dec_and_test(&ctx->cc_refcount)) {
258                 __hlist_del(&ctx->cc_hash);
259                 hlist_add_head(&ctx->cc_hash, freelist);
260         } else
261                 hlist_del_init(&ctx->cc_hash);
262 }
263
264 /*
265  * return 1 if the context is dead.
266  */
267 static
268 int ctx_check_death(struct ptlrpc_cli_ctx *ctx, struct hlist_head *freelist)
269 {
270         if (unlikely(ctx_is_dead(ctx)))
271                 goto unhash;
272
273         /* expire is 0 means never expire. a newly created gss context
274          * which during upcall also has 0 expiration
275          */
276         smp_mb();
277         if (ctx->cc_expire == 0)
278                 return 0;
279
280         /* check real expiration */
281         smp_mb();
282         if (cfs_time_after(ctx->cc_expire, cfs_time_current_sec()))
283                 return 0;
284
285         ctx_expire(ctx);
286
287 unhash:
288         if (freelist)
289                 ctx_unhash(ctx, freelist);
290
291         return 1;
292 }
293
294 static inline
295 int ctx_check_death_locked(struct ptlrpc_cli_ctx *ctx,
296                            struct hlist_head *freelist)
297 {
298         LASSERT(ctx->cc_sec);
299         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
300         LASSERT_SPIN_LOCKED(&ctx->cc_sec->ps_lock);
301         LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags));
302
303         return ctx_check_death(ctx, freelist);
304 }
305
306 static
307 int ctx_check_uptodate(struct ptlrpc_cli_ctx *ctx)
308 {
309         LASSERT(ctx->cc_sec);
310         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
311
312         if (!ctx_check_death(ctx, NULL) && ctx_is_uptodate(ctx))
313                 return 1;
314         return 0;
315 }
316
317 static inline
318 int ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
319 {
320         /* a little bit optimization for null policy */
321         if (!ctx->cc_ops->match)
322                 return 1;
323
324         return ctx->cc_ops->match(ctx, vcred);
325 }
326
327 static
328 void ctx_list_destroy(struct hlist_head *head)
329 {
330         struct ptlrpc_cli_ctx *ctx;
331
332         while (!hlist_empty(head)) {
333                 ctx = hlist_entry(head->first, struct ptlrpc_cli_ctx, cc_hash);
334
335                 LASSERT(atomic_read(&ctx->cc_refcount) == 0);
336                 LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
337
338                 hlist_del_init(&ctx->cc_hash);
339                 sptlrpc_sec_destroy_ctx(ctx->cc_sec, ctx);
340         }
341 }
342
343 static
344 void ctx_cache_gc(struct ptlrpc_sec *sec, struct hlist_head *freelist)
345 {
346         struct ptlrpc_cli_ctx *ctx;
347         struct hlist_node *pos, *next;
348         int i;
349         ENTRY;
350
351         CDEBUG(D_SEC, "do gc on sec %s@%p\n", sec->ps_policy->sp_name, sec);
352
353         for (i = 0; i < sec->ps_ccache_size; i++) {
354                 hlist_for_each_entry_safe(ctx, pos, next,
355                                           &sec->ps_ccache[i], cc_hash)
356                         ctx_check_death_locked(ctx, freelist);
357         }
358
359         sec->ps_gc_next = cfs_time_current_sec() + sec->ps_gc_interval;
360         EXIT;
361 }
362
363 /*
364  * @uid: which user. "-1" means flush all.
365  * @grace: mark context DEAD, allow graceful destroy like notify
366  *         server side, etc.
367  * @force: also flush busy entries.
368  *
369  * return the number of busy context encountered.
370  *
371  * In any cases, never touch "eternal" contexts.
372  */
373 static
374 int ctx_cache_flush(struct ptlrpc_sec *sec, uid_t uid, int grace, int force)
375 {
376         struct ptlrpc_cli_ctx *ctx;
377         struct hlist_node *pos, *next;
378         HLIST_HEAD(freelist);
379         int i, busy = 0;
380         ENTRY;
381
382         might_sleep_if(grace);
383
384         spin_lock(&sec->ps_lock);
385         for (i = 0; i < sec->ps_ccache_size; i++) {
386                 hlist_for_each_entry_safe(ctx, pos, next,
387                                           &sec->ps_ccache[i], cc_hash) {
388                         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
389
390                         if (ctx_is_eternal(ctx))
391                                 continue;
392                         if (uid != -1 && uid != ctx->cc_vcred.vc_uid)
393                                 continue;
394
395                         if (atomic_read(&ctx->cc_refcount) > 1) {
396                                 busy++;
397                                 if (!force)
398                                         continue;
399
400                                 CWARN("flush busy(%d) ctx %p(%u->%s) by force, "
401                                       "grace %d\n",
402                                       atomic_read(&ctx->cc_refcount),
403                                       ctx, ctx->cc_vcred.vc_uid,
404                                       sec2target_str(ctx->cc_sec), grace);
405                         }
406                         ctx_unhash(ctx, &freelist);
407
408                         set_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags);
409                         if (!grace)
410                                 clear_bit(PTLRPC_CTX_UPTODATE_BIT,
411                                           &ctx->cc_flags);
412                 }
413         }
414         spin_unlock(&sec->ps_lock);
415
416         ctx_list_destroy(&freelist);
417         RETURN(busy);
418 }
419
420 static inline
421 unsigned int ctx_hash_index(struct ptlrpc_sec *sec, __u64 key)
422 {
423         return (unsigned int) (key & (sec->ps_ccache_size - 1));
424 }
425
426 /*
427  * return matched context. If it's a newly created one, we also give the
428  * first push to refresh. return NULL if error happens.
429  */
430 static
431 struct ptlrpc_cli_ctx * ctx_cache_lookup(struct ptlrpc_sec *sec,
432                                          struct vfs_cred *vcred,
433                                          int create, int remove_dead)
434 {
435         struct ptlrpc_cli_ctx *ctx = NULL, *new = NULL;
436         struct hlist_head *hash_head;
437         struct hlist_node *pos, *next;
438         HLIST_HEAD(freelist);
439         unsigned int hash, gc = 0, found = 0;
440         ENTRY;
441
442         might_sleep();
443
444         hash = ctx_hash_index(sec, (__u64) vcred->vc_uid);
445         LASSERT(hash < sec->ps_ccache_size);
446         hash_head = &sec->ps_ccache[hash];
447
448 retry:
449         spin_lock(&sec->ps_lock);
450
451         /* gc_next == 0 means never do gc */
452         if (remove_dead && sec->ps_gc_next &&
453             cfs_time_after(cfs_time_current_sec(), sec->ps_gc_next)) {
454                 ctx_cache_gc(sec, &freelist);
455                 gc = 1;
456         }
457
458         hlist_for_each_entry_safe(ctx, pos, next, hash_head, cc_hash) {
459                 if (gc == 0 &&
460                     ctx_check_death_locked(ctx, remove_dead ? &freelist : NULL))
461                         continue;
462
463                 if (ctx_match(ctx, vcred)) {
464                         found = 1;
465                         break;
466                 }
467         }
468
469         if (found) {
470                 if (new && new != ctx) {
471                         /* lost the race, just free it */
472                         hlist_add_head(&new->cc_hash, &freelist);
473                         new = NULL;
474                 }
475
476                 /* hot node, move to head */
477                 if (hash_head->first != &ctx->cc_hash) {
478                         __hlist_del(&ctx->cc_hash);
479                         hlist_add_head(&ctx->cc_hash, hash_head);
480                 }
481         } else {
482                 /* don't allocate for reverse sec */
483                 if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
484                         spin_unlock(&sec->ps_lock);
485                         RETURN(NULL);
486                 }
487
488                 if (new) {
489                         ctx_enhash(new, hash_head);
490                         ctx = new;
491                 } else if (create) {
492                         spin_unlock(&sec->ps_lock);
493                         new = sec->ps_policy->sp_cops->create_ctx(sec, vcred);
494                         if (new) {
495                                 atomic_inc(&sec->ps_busy);
496                                 goto retry;
497                         }
498                 } else
499                         ctx = NULL;
500         }
501
502         /* hold a ref */
503         if (ctx)
504                 atomic_inc(&ctx->cc_refcount);
505
506         spin_unlock(&sec->ps_lock);
507
508         /* the allocator of the context must give the first push to refresh */
509         if (new) {
510                 LASSERT(new == ctx);
511                 sptlrpc_ctx_refresh(new);
512         }
513
514         ctx_list_destroy(&freelist);
515         RETURN(ctx);
516 }
517
518 static inline
519 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
520 {
521         struct vfs_cred vcred = { cfs_current()->uid, cfs_current()->gid };
522         int create = 1, remove_dead = 1;
523
524         if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE) {
525                 vcred.vc_uid = 0;
526                 create = 0;
527                 remove_dead = 0;
528         } else if (sec->ps_flags & PTLRPC_SEC_FL_ROOTONLY)
529                 vcred.vc_uid = 0;
530
531         if (sec->ps_policy->sp_cops->lookup_ctx)
532                 return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred);
533         else
534                 return ctx_cache_lookup(sec, &vcred, create, remove_dead);
535 }
536
537 /**************************************************
538  * client context APIs                            *
539  **************************************************/
540
541 static
542 void sptlrpc_ctx_refresh(struct ptlrpc_cli_ctx *ctx)
543 {
544         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
545
546         if (!ctx_is_refreshed(ctx) && ctx->cc_ops->refresh)
547                 ctx->cc_ops->refresh(ctx);
548 }
549
550 struct ptlrpc_cli_ctx *sptlrpc_ctx_get(struct ptlrpc_cli_ctx *ctx)
551 {
552         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
553         atomic_inc(&ctx->cc_refcount);
554         return ctx;
555 }
556 EXPORT_SYMBOL(sptlrpc_ctx_get);
557
558 void sptlrpc_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
559 {
560         struct ptlrpc_sec *sec = ctx->cc_sec;
561
562         LASSERT(sec);
563         LASSERT(atomic_read(&ctx->cc_refcount));
564
565         if (!atomic_dec_and_test(&ctx->cc_refcount))
566                 return;
567
568         LASSERT(test_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags) == 0);
569         LASSERT(hlist_unhashed(&ctx->cc_hash));
570
571         /* if required async, we must clear the UPTODATE bit to prevent extra
572          * rpcs during destroy procedure.
573          */
574         if (!sync)
575                 clear_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags);
576
577         /* destroy this context */
578         if (!sptlrpc_sec_destroy_ctx(sec, ctx))
579                 return;
580
581         CWARN("%s@%p: put last ctx, also destroy the sec\n",
582               sec->ps_policy->sp_name, sec);
583
584         sptlrpc_sec_destroy(sec);
585 }
586 EXPORT_SYMBOL(sptlrpc_ctx_put);
587
588 /*
589  * mark a ctx as DEAD, and pull it out from hash table.
590  *
591  * NOTE: the caller must hold at least 1 ref on the ctx.
592  */
593 void sptlrpc_ctx_expire(struct ptlrpc_cli_ctx *ctx)
594 {
595         LASSERT(ctx->cc_sec);
596         LASSERT(atomic_read(&ctx->cc_refcount) > 0);
597
598         ctx_expire(ctx);
599
600         spin_lock(&ctx->cc_sec->ps_lock);
601
602         if (test_and_clear_bit(PTLRPC_CTX_HASHED_BIT, &ctx->cc_flags)) {
603                 LASSERT(!hlist_unhashed(&ctx->cc_hash));
604                 LASSERT(atomic_read(&ctx->cc_refcount) > 1);
605
606                 hlist_del_init(&ctx->cc_hash);
607                 if (atomic_dec_and_test(&ctx->cc_refcount))
608                         LBUG();
609         }
610
611         spin_unlock(&ctx->cc_sec->ps_lock);
612 }
613 EXPORT_SYMBOL(sptlrpc_ctx_expire);
614
615 void sptlrpc_ctx_replace(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *new)
616 {
617         struct ptlrpc_cli_ctx *ctx;
618         struct hlist_node *pos, *next;
619         HLIST_HEAD(freelist);
620         unsigned int hash;
621         ENTRY;
622
623         hash = ctx_hash_index(sec, (__u64) new->cc_vcred.vc_uid);
624         LASSERT(hash < sec->ps_ccache_size);
625
626         spin_lock(&sec->ps_lock);
627
628         hlist_for_each_entry_safe(ctx, pos, next,
629                                   &sec->ps_ccache[hash], cc_hash) {
630                 if (!ctx_match(ctx, &new->cc_vcred))
631                         continue;
632
633                 ctx_expire(ctx);
634                 ctx_unhash(ctx, &freelist);
635                 break;
636         }
637
638         ctx_enhash(new, &sec->ps_ccache[hash]);
639         atomic_inc(&sec->ps_busy);
640
641         spin_unlock(&sec->ps_lock);
642
643         ctx_list_destroy(&freelist);
644         EXIT;
645 }
646 EXPORT_SYMBOL(sptlrpc_ctx_replace);
647
648 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
649 {
650         struct obd_import *imp = req->rq_import;
651         ENTRY;
652
653         LASSERT(!req->rq_cli_ctx);
654         LASSERT(imp);
655
656         req->rq_cli_ctx = get_my_ctx(imp->imp_sec);
657
658         if (!req->rq_cli_ctx) {
659                 CERROR("req %p: fail to get context from cache\n", req);
660                 RETURN(-ENOMEM);
661         }
662
663         RETURN(0);
664 }
665
666 void sptlrpc_ctx_wakeup(struct ptlrpc_cli_ctx *ctx)
667 {
668         struct ptlrpc_request *req, *next;
669
670         spin_lock(&ctx->cc_lock);
671         list_for_each_entry_safe(req, next, &ctx->cc_req_list, rq_ctx_chain) {
672                 list_del_init(&req->rq_ctx_chain);
673                 ptlrpc_wake_client_req(req);
674         }
675         spin_unlock(&ctx->cc_lock);
676 }
677 EXPORT_SYMBOL(sptlrpc_ctx_wakeup);
678
679 void sptlrpc_req_put_ctx(struct ptlrpc_request *req)
680 {
681         ENTRY;
682
683         LASSERT(req);
684         LASSERT(req->rq_cli_ctx);
685
686         /* request might be asked to release earlier while still
687          * in the context waiting list.
688          */
689         if (!list_empty(&req->rq_ctx_chain)) {
690                 spin_lock(&req->rq_cli_ctx->cc_lock);
691                 list_del_init(&req->rq_ctx_chain);
692                 spin_unlock(&req->rq_cli_ctx->cc_lock);
693         }
694
695         /* this could be called with spinlock hold, use async mode */
696         sptlrpc_ctx_put(req->rq_cli_ctx, 0);
697         req->rq_cli_ctx = NULL;
698         EXIT;
699 }
700
701 /*
702  * request must have a context. if failed to get new context,
703  * just restore the old one
704  */
705 int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
706 {
707         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
708         int rc;
709         ENTRY;
710
711         LASSERT(ctx);
712         LASSERT(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags));
713
714         /* make sure not on context waiting list */
715         spin_lock(&ctx->cc_lock);
716         list_del_init(&req->rq_ctx_chain);
717         spin_unlock(&ctx->cc_lock);
718
719         sptlrpc_ctx_get(ctx);
720         sptlrpc_req_put_ctx(req);
721         rc = sptlrpc_req_get_ctx(req);
722         if (!rc) {
723                 LASSERT(req->rq_cli_ctx);
724                 LASSERT(req->rq_cli_ctx != ctx);
725                 sptlrpc_ctx_put(ctx, 1);
726         } else {
727                 LASSERT(!req->rq_cli_ctx);
728                 req->rq_cli_ctx = ctx;
729         }
730         RETURN(rc);
731 }
732
733 static
734 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
735 {
736         smp_mb();
737         if (ctx_is_refreshed(ctx))
738                 return 1;
739         return 0;
740 }
741
742 static
743 int ctx_refresh_timeout(void *data)
744 {
745         struct ptlrpc_request *req = data;
746         int rc;
747
748         /* conn_cnt is needed in expire_one_request */
749         lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
750
751         rc = ptlrpc_expire_one_request(req);
752         /* if we started recovery, we should mark this ctx dead; otherwise
753          * in case of lgssd died nobody would retire this ctx, following
754          * connecting will still find the same ctx thus cause deadlock.
755          * there's an assumption that expire time of the request should be
756          * later than the context refresh expire time.
757          */
758         if (rc == 0)
759                 ctx_expire(req->rq_cli_ctx);
760         return rc;
761 }
762
763 static
764 void ctx_refresh_interrupt(void *data)
765 {
766         /* do nothing */
767 }
768
769 /*
770  * the status of context could be subject to be changed by other threads at any
771  * time. we allow this race. but once we return with 0, the caller will
772  * suppose it's uptodated and keep using it until the affected rpc is done.
773  *
774  * @timeout:
775  *    < 0  - don't wait
776  *    = 0  - wait until success or fatal error occur
777  *    > 0  - timeout value
778  *
779  * return 0 only if the context is uptodated.
780  */
781 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
782 {
783         struct ptlrpc_cli_ctx  *ctx = req->rq_cli_ctx;
784         struct l_wait_info      lwi;
785         int                     rc;
786         ENTRY;
787
788         LASSERT(ctx);
789
790         /* special ctxs */
791         if (ctx_is_eternal(ctx) || req->rq_ctx_init || req->rq_ctx_fini)
792                 RETURN(0);
793
794         /* reverse ctxs, don't refresh */
795         if (ctx->cc_sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
796                 RETURN(0);
797
798         spin_lock(&ctx->cc_lock);
799 again:
800         if (ctx_check_uptodate(ctx)) {
801                 if (!list_empty(&req->rq_ctx_chain))
802                         list_del_init(&req->rq_ctx_chain);
803                 spin_unlock(&ctx->cc_lock);
804                 RETURN(0);
805         }
806
807         if (test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags)) {
808                 req->rq_err = 1;
809                 if (!list_empty(&req->rq_ctx_chain))
810                         list_del_init(&req->rq_ctx_chain);
811                 spin_unlock(&ctx->cc_lock);
812                 RETURN(-EPERM);
813         }
814
815         /* This is subtle. For resent message we have to keep original
816          * context to survive following situation:
817          *  1. the request sent to server
818          *  2. recovery was kick start
819          *  3. recovery finished, the request marked as resent
820          *  4. resend the request
821          *  5. old reply from server received (because xid is the same)
822          *  6. verify reply (has to be success)
823          *  7. new reply from server received, lnet drop it
824          *
825          * Note we can't simply change xid for resent request because
826          * server reply on it for reply reconstruction.
827          *
828          * Commonly the original context should be uptodate because we
829          * have a expiry nice time; And server will keep their half part
830          * context because we at least hold a ref of old context which
831          * prevent the context detroy RPC be sent. So server still can
832          * accept the request and finish RPC. Two cases:
833          *  1. If server side context has been trimed, a NO_CONTEXT will
834          *     be returned, gss_cli_ctx_verify/unseal will switch to new
835          *     context by force.
836          *  2. Current context never be refreshed, then we are fine: we
837          *     never really send request with old context before.
838          */
839         if (test_bit(PTLRPC_CTX_UPTODATE, &ctx->cc_flags) &&
840             req->rq_reqmsg &&
841             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
842                 if (!list_empty(&req->rq_ctx_chain))
843                         list_del_init(&req->rq_ctx_chain);
844                 spin_unlock(&ctx->cc_lock);
845                 RETURN(0);
846         }
847
848         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
849                 spin_unlock(&ctx->cc_lock);
850
851                 /* don't have to, but we don't want to release it too soon */
852                 sptlrpc_ctx_get(ctx);
853
854                 rc = sptlrpc_req_replace_dead_ctx(req);
855                 if (rc) {
856                         LASSERT(ctx == req->rq_cli_ctx);
857                         CERROR("req %p: failed to replace dead ctx %p\n",
858                                 req, ctx);
859                         req->rq_err = 1;
860                         LASSERT(list_empty(&req->rq_ctx_chain));
861                         sptlrpc_ctx_put(ctx, 1);
862                         RETURN(-ENOMEM);
863                 }
864
865                 LASSERT(ctx != req->rq_cli_ctx);
866                 CWARN("req %p: replace dead ctx %p(%u->%s) => %p\n",
867                       req, ctx, ctx->cc_vcred.vc_uid,
868                       sec2target_str(ctx->cc_sec), req->rq_cli_ctx);
869
870                 sptlrpc_ctx_put(ctx, 1);
871                 ctx = req->rq_cli_ctx;
872                 LASSERT(list_empty(&req->rq_ctx_chain));
873
874                 spin_lock(&ctx->cc_lock);
875                 goto again;
876         }
877
878         /* Now we're sure this context is during upcall, add myself into
879          * waiting list
880          */
881         if (list_empty(&req->rq_ctx_chain))
882                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
883
884         spin_unlock(&ctx->cc_lock);
885
886         if (timeout < 0) {
887                 RETURN(-EWOULDBLOCK);
888         }
889
890         /* Clear any flags that may be present from previous sends */
891         LASSERT(req->rq_receiving_reply == 0);
892         spin_lock(&req->rq_lock);
893         req->rq_err = 0;
894         req->rq_timedout = 0;
895         req->rq_resend = 0;
896         req->rq_restart = 0;
897         spin_unlock(&req->rq_lock);
898
899         lwi = LWI_TIMEOUT_INTR(timeout == 0 ? LONG_MAX : timeout * HZ,
900                                ctx_refresh_timeout, ctx_refresh_interrupt, req);
901         rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
902
903         spin_lock(&ctx->cc_lock);
904         /* five cases we are here:
905          * 1. successfully refreshed;
906          * 2. someone else mark this ctx dead by force;
907          * 3. interruptted;
908          * 4. timedout, and we don't want recover from the failure;
909          * 5. timedout, and waked up upon recovery finished;
910          */
911         if (!ctx_is_refreshed(ctx)) {
912                 /* timed out or interruptted */
913                 list_del_init(&req->rq_ctx_chain);
914                 spin_unlock(&ctx->cc_lock);
915
916                 LASSERT(rc != 0);
917                 RETURN(rc);
918         }
919
920         goto again;
921 }
922
923 void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
924 {
925         struct sec_flavor_config *conf;
926
927         LASSERT(req->rq_import);
928         LASSERT(req->rq_import->imp_sec);
929         LASSERT(req->rq_cli_ctx);
930         LASSERT(req->rq_cli_ctx->cc_sec);
931         LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
932
933         /* special security flags accoding to opcode */
934         switch (opcode) {
935         case OST_READ:
936         case OST_SAN_READ:
937                 req->rq_bulk_read = 1;
938                 break;
939         case OST_WRITE:
940         case OST_SAN_WRITE:
941                 req->rq_bulk_write = 1;
942                 break;
943         case SEC_CTX_INIT:
944                 req->rq_ctx_init = 1;
945                 break;
946         case SEC_CTX_FINI:
947                 req->rq_ctx_fini = 1;
948                 break;
949         }
950
951         req->rq_sec_flavor = req->rq_cli_ctx->cc_sec->ps_flavor;
952
953         /* force SVC_NONE for context initiation rpc, SVC_AUTH for context
954          * destruction rpc
955          */
956         if (unlikely(req->rq_ctx_init)) {
957                 req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
958                                 SEC_FLAVOR_POLICY(req->rq_sec_flavor),
959                                 SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
960                                 SEC_FLAVOR_SVC(SPTLRPC_SVC_NONE));
961         } else if (unlikely(req->rq_ctx_fini)) {
962                 req->rq_sec_flavor = SEC_MAKE_RPC_FLAVOR(
963                                 SEC_FLAVOR_POLICY(req->rq_sec_flavor),
964                                 SEC_FLAVOR_SUBPOLICY(req->rq_sec_flavor),
965                                 SEC_FLAVOR_SVC(SPTLRPC_SVC_AUTH));
966         }
967
968         conf = &req->rq_import->imp_obd->u.cli.cl_sec_conf;
969
970         /* user descriptor flag, except ROOTONLY which don't need, and
971          * null security which can't
972          */
973         if ((conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY) == 0 &&
974             req->rq_sec_flavor != SPTLRPC_FLVR_NULL)
975                 req->rq_sec_flavor |= SEC_FLAVOR_FL_USER;
976
977         /* bulk security flag */
978         if ((req->rq_bulk_read || req->rq_bulk_write) &&
979             (conf->sfc_bulk_priv != BULK_PRIV_ALG_NULL ||
980              conf->sfc_bulk_csum != BULK_CSUM_ALG_NULL))
981                 req->rq_sec_flavor |= SEC_FLAVOR_FL_BULK;
982 }
983
984 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
985 {
986         if (SEC_FLAVOR_SVC(req->rq_sec_flavor) != SPTLRPC_SVC_PRIV)
987                 return;
988
989         LASSERT(req->rq_clrbuf);
990         if (req->rq_pool || !req->rq_reqbuf)
991                 return;
992
993         OBD_FREE(req->rq_reqbuf, req->rq_reqbuf_len);
994         req->rq_reqbuf = NULL;
995         req->rq_reqbuf_len = 0;
996 }
997
998 /*
999  * check whether current user have valid context for an import or not.
1000  * might repeatedly try in case of non-fatal errors.
1001  * return 0 on success, < 0 on failure
1002  */
1003 int sptlrpc_import_check_ctx(struct obd_import *imp)
1004 {
1005         struct ptlrpc_cli_ctx *ctx;
1006         struct ptlrpc_request *req = NULL;
1007         int rc;
1008         ENTRY;
1009
1010         might_sleep();
1011
1012         ctx = get_my_ctx(imp->imp_sec);
1013         if (!ctx)
1014                 RETURN(1);
1015
1016         if (ctx_is_eternal(ctx)) {
1017                 sptlrpc_ctx_put(ctx, 1);
1018                 RETURN(0);
1019         }
1020
1021         OBD_ALLOC(req, sizeof(*req));
1022         if (!req)
1023                 RETURN(-ENOMEM);
1024
1025         spin_lock_init(&req->rq_lock);
1026         atomic_set(&req->rq_refcount, 10000);
1027         INIT_LIST_HEAD(&req->rq_ctx_chain);
1028         init_waitqueue_head(&req->rq_reply_waitq);
1029         req->rq_import = imp;
1030         req->rq_cli_ctx = ctx;
1031
1032         rc = sptlrpc_req_refresh_ctx(req, 0);
1033         LASSERT(list_empty(&req->rq_ctx_chain));
1034         sptlrpc_ctx_put(req->rq_cli_ctx, 1);
1035         OBD_FREE(req, sizeof(*req));
1036
1037         RETURN(rc);
1038 }
1039
1040 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
1041 {
1042         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1043         int rc = 0;
1044         ENTRY;
1045
1046         LASSERT(ctx);
1047         LASSERT(ctx->cc_sec);
1048         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1049
1050         /* we wrap bulk request here because now we can be sure
1051          * the context is uptodate.
1052          */
1053         if (req->rq_bulk) {
1054                 rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
1055                 if (rc)
1056                         RETURN(rc);
1057         }
1058
1059         switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
1060         case SPTLRPC_SVC_NONE:
1061         case SPTLRPC_SVC_AUTH:
1062                 LASSERT(ctx->cc_ops->sign);
1063                 rc = ctx->cc_ops->sign(ctx, req);
1064                 break;
1065         case SPTLRPC_SVC_PRIV:
1066                 LASSERT(ctx->cc_ops->seal);
1067                 rc = ctx->cc_ops->seal(ctx, req);
1068                 break;
1069         default:
1070                 LBUG();
1071         }
1072
1073         if (rc == 0) {
1074                 LASSERT(req->rq_reqdata_len);
1075                 LASSERT(req->rq_reqdata_len % 8 == 0);
1076                 LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
1077         }
1078
1079         RETURN(rc);
1080 }
1081
1082 /*
1083  * rq_nob_received is the actual received data length
1084  */
1085 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
1086 {
1087         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1088         int rc;
1089         ENTRY;
1090
1091         LASSERT(ctx);
1092         LASSERT(ctx->cc_sec);
1093         LASSERT(ctx->cc_ops);
1094         LASSERT(req->rq_repbuf);
1095
1096         req->rq_repdata_len = req->rq_nob_received;
1097
1098         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
1099                 CERROR("replied data length %d too small\n",
1100                        req->rq_nob_received);
1101                 RETURN(-EPROTO);
1102         }
1103
1104         if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
1105             req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
1106                 /* it's must be null flavor, so our requets also should be
1107                  * in null flavor */
1108                 if (SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
1109                     SPTLRPC_POLICY_NULL) {
1110                         CERROR("request flavor is %x but reply with null\n",
1111                                req->rq_sec_flavor);
1112                         RETURN(-EPROTO);
1113                 }
1114         } else {
1115                 /* v2 message... */
1116                 ptlrpc_flavor_t tmpf = req->rq_repbuf->lm_secflvr;
1117
1118                 if (req->rq_repbuf->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
1119                         __swab32s(&tmpf);
1120
1121                 if (SEC_FLAVOR_POLICY(tmpf) !=
1122                     SEC_FLAVOR_POLICY(req->rq_sec_flavor)) {
1123                         CERROR("request policy %u while reply with %d\n",
1124                                SEC_FLAVOR_POLICY(req->rq_sec_flavor),
1125                                SEC_FLAVOR_POLICY(tmpf));
1126                         RETURN(-EPROTO);
1127                 }
1128
1129                 if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
1130                      SPTLRPC_POLICY_NULL) &&
1131                     lustre_unpack_msg(req->rq_repbuf, req->rq_nob_received))
1132                         RETURN(-EPROTO);
1133         }
1134
1135         switch (SEC_FLAVOR_SVC(req->rq_sec_flavor)) {
1136         case SPTLRPC_SVC_NONE:
1137         case SPTLRPC_SVC_AUTH:
1138                 LASSERT(ctx->cc_ops->verify);
1139                 rc = ctx->cc_ops->verify(ctx, req);
1140                 break;
1141         case SPTLRPC_SVC_PRIV:
1142                 LASSERT(ctx->cc_ops->unseal);
1143                 rc = ctx->cc_ops->unseal(ctx, req);
1144                 break;
1145         default:
1146                 LBUG();
1147         }
1148
1149         LASSERT(rc || req->rq_repmsg);
1150         RETURN(rc);
1151 }
1152
1153 /**************************************************
1154  * security APIs                                  *
1155  **************************************************/
1156
1157 /*
1158  * let policy module to determine whether take refrence of
1159  * import or not.
1160  */
1161 static
1162 struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
1163                                        struct ptlrpc_svc_ctx *ctx,
1164                                        __u32 flavor,
1165                                        unsigned long flags)
1166 {
1167         struct ptlrpc_sec_policy *policy;
1168         struct ptlrpc_sec *sec;
1169         ENTRY;
1170
1171         flavor = SEC_FLAVOR_RPC(flavor);
1172
1173         if (ctx) {
1174                 LASSERT(imp->imp_dlm_fake == 1);
1175
1176                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
1177                        imp->imp_obd->obd_type->typ_name,
1178                        imp->imp_obd->obd_name,
1179                        sptlrpc_flavor2name(flavor));
1180
1181                 policy = sptlrpc_policy_get(ctx->sc_policy);
1182                 flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1183         } else {
1184                 LASSERT(imp->imp_dlm_fake == 0);
1185
1186                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
1187                        imp->imp_obd->obd_type->typ_name,
1188                        imp->imp_obd->obd_name,
1189                        sptlrpc_flavor2name(flavor));
1190
1191                 policy = sptlrpc_flavor2policy(flavor);
1192                 if (!policy) {
1193                         CERROR("invalid flavor 0x%x\n", flavor);
1194                         RETURN(NULL);
1195                 }
1196         }
1197
1198         sec = policy->sp_cops->create_sec(imp, ctx, flavor, flags);
1199         if (sec) {
1200                 atomic_inc(&sec->ps_refcount);
1201
1202                 /* take 1 busy count on behalf of sec itself,
1203                  * balanced in sptlrpc_set_put()
1204                  */
1205                 atomic_inc(&sec->ps_busy);
1206         } else
1207                 sptlrpc_policy_put(policy);
1208
1209         RETURN(sec);
1210 }
1211
1212 static
1213 void sptlrpc_sec_destroy(struct ptlrpc_sec *sec)
1214 {
1215         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1216
1217         LASSERT(policy);
1218         LASSERT(atomic_read(&sec->ps_refcount) == 0);
1219         LASSERT(atomic_read(&sec->ps_busy) == 0);
1220         LASSERT(policy->sp_cops->destroy_sec);
1221
1222         policy->sp_cops->destroy_sec(sec);
1223         sptlrpc_policy_put(policy);
1224 }
1225
1226 static
1227 void sptlrpc_sec_put(struct ptlrpc_sec *sec)
1228 {
1229         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1230
1231         if (!atomic_dec_and_test(&sec->ps_refcount)) {
1232                 sptlrpc_policy_put(policy);
1233                 return;
1234         }
1235
1236         ctx_cache_flush(sec, -1, 1, 1);
1237
1238         if (atomic_dec_and_test(&sec->ps_busy))
1239                 sptlrpc_sec_destroy(sec);
1240         else
1241                 CWARN("delay to destroy %s@%p: busy contexts\n",
1242                       policy->sp_name, sec);
1243 }
1244
1245 /*
1246  * return 1 means we should also destroy the sec structure.
1247  * normally return 0
1248  */
1249 static
1250 int sptlrpc_sec_destroy_ctx(struct ptlrpc_sec *sec,
1251                             struct ptlrpc_cli_ctx *ctx)
1252 {
1253         LASSERT(sec == ctx->cc_sec);
1254         LASSERT(atomic_read(&sec->ps_busy));
1255         LASSERT(atomic_read(&ctx->cc_refcount) == 0);
1256         LASSERT(hlist_unhashed(&ctx->cc_hash));
1257         LASSERT(list_empty(&ctx->cc_req_list));
1258         LASSERT(sec->ps_policy->sp_cops->destroy_ctx);
1259
1260         sec->ps_policy->sp_cops->destroy_ctx(sec, ctx);
1261
1262         if (atomic_dec_and_test(&sec->ps_busy)) {
1263                 LASSERT(atomic_read(&sec->ps_refcount) == 0);
1264                 return 1;
1265         }
1266
1267         return 0;
1268 }
1269
1270 /*
1271  * when complete successfully, req->rq_reqmsg should point to the
1272  * right place.
1273  */
1274 int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
1275 {
1276         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1277         struct ptlrpc_sec_policy *policy;
1278         int rc;
1279
1280         LASSERT(ctx);
1281         LASSERT(atomic_read(&ctx->cc_refcount));
1282         LASSERT(ctx->cc_sec);
1283         LASSERT(ctx->cc_sec->ps_policy);
1284         LASSERT(req->rq_reqmsg == NULL);
1285
1286         policy = ctx->cc_sec->ps_policy;
1287         rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
1288         if (!rc) {
1289                 LASSERT(req->rq_reqmsg);
1290                 LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1291
1292                 /* zeroing preallocated buffer */
1293                 if (req->rq_pool)
1294                         memset(req->rq_reqmsg, 0, msgsize);
1295         }
1296
1297         return rc;
1298 }
1299
1300 void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
1301 {
1302         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1303         struct ptlrpc_sec_policy *policy;
1304
1305         LASSERT(ctx);
1306         LASSERT(atomic_read(&ctx->cc_refcount));
1307         LASSERT(ctx->cc_sec);
1308         LASSERT(ctx->cc_sec->ps_policy);
1309         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1310
1311         policy = ctx->cc_sec->ps_policy;
1312         policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
1313 }
1314
1315 int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
1316 {
1317         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1318         struct ptlrpc_sec_policy *policy;
1319         ENTRY;
1320
1321         LASSERT(ctx);
1322         LASSERT(atomic_read(&ctx->cc_refcount));
1323         LASSERT(ctx->cc_sec);
1324         LASSERT(ctx->cc_sec->ps_policy);
1325
1326         if (req->rq_repbuf)
1327                 RETURN(0);
1328
1329         policy = ctx->cc_sec->ps_policy;
1330         RETURN(policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize));
1331 }
1332
1333 void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
1334 {
1335         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1336         struct ptlrpc_sec_policy *policy;
1337         ENTRY;
1338
1339         LASSERT(ctx);
1340         LASSERT(atomic_read(&ctx->cc_refcount));
1341         LASSERT(ctx->cc_sec);
1342         LASSERT(ctx->cc_sec->ps_policy);
1343         LASSERT(req->rq_repbuf);
1344
1345         policy = ctx->cc_sec->ps_policy;
1346         policy->sp_cops->free_repbuf(ctx->cc_sec, req);
1347         EXIT;
1348 }
1349
1350 int sptlrpc_import_get_sec(struct obd_import *imp,
1351                            struct ptlrpc_svc_ctx *ctx,
1352                            __u32 flavor,
1353                            unsigned long flags)
1354 {
1355         struct obd_device *obd = imp->imp_obd;
1356         ENTRY;
1357
1358         LASSERT(obd);
1359         LASSERT(obd->obd_type);
1360
1361         /* old sec might be still there in reconnecting */
1362         if (imp->imp_sec)
1363                 RETURN(0);
1364
1365         imp->imp_sec = sptlrpc_sec_create(imp, ctx, flavor, flags);
1366         if (!imp->imp_sec)
1367                 RETURN(-EINVAL);
1368
1369         RETURN(0);
1370 }
1371
1372 void sptlrpc_import_put_sec(struct obd_import *imp)
1373 {
1374         if (imp->imp_sec == NULL)
1375                 return;
1376
1377         sptlrpc_sec_put(imp->imp_sec);
1378         imp->imp_sec = NULL;
1379 }
1380
1381 void sptlrpc_import_flush_root_ctx(struct obd_import *imp)
1382 {
1383         if (imp == NULL || imp->imp_sec == NULL)
1384                 return;
1385
1386         /* use 'grace' mode, it's crutial see explain in
1387          * sptlrpc_req_refresh_ctx()
1388          */
1389         ctx_cache_flush(imp->imp_sec, 0, 1, 1);
1390 }
1391
1392 void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
1393 {
1394         if (imp == NULL || imp->imp_sec == NULL)
1395                 return;
1396
1397         ctx_cache_flush(imp->imp_sec, cfs_current()->uid, 1, 1);
1398 }
1399 EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
1400
1401 int sptlrpc_cli_install_rvs_ctx(struct obd_import *imp,
1402                                 struct ptlrpc_cli_ctx *ctx)
1403 {
1404         struct ptlrpc_sec_policy *policy = ctx->cc_sec->ps_policy;
1405
1406         if (!policy->sp_cops->install_rctx)
1407                 return 0;
1408         return policy->sp_cops->install_rctx(imp, ctx->cc_sec, ctx);
1409 }
1410
1411 int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
1412                                 struct ptlrpc_svc_ctx *ctx)
1413 {
1414         struct ptlrpc_sec_policy *policy = ctx->sc_policy;
1415
1416         if (!policy->sp_sops->install_rctx)
1417                 return 0;
1418         return policy->sp_sops->install_rctx(imp, ctx);
1419 }
1420
1421 /****************************************
1422  * server side security                 *
1423  ****************************************/
1424
1425 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
1426 {
1427         struct ptlrpc_sec_policy *policy;
1428         struct lustre_msg *msg = req->rq_reqbuf;
1429         int rc;
1430         ENTRY;
1431
1432         LASSERT(msg);
1433         LASSERT(req->rq_reqmsg == NULL);
1434         LASSERT(req->rq_repmsg == NULL);
1435
1436         /* 
1437          * in any case we avoid to call unpack_msg() for request of null flavor
1438          * which will later be done by ptlrpc_server_handle_request().
1439          */
1440         if (req->rq_reqdata_len < sizeof(struct lustre_msg)) {
1441                 CERROR("request size %d too small\n", req->rq_reqdata_len);
1442                 RETURN(SECSVC_DROP);
1443         }
1444
1445         if (msg->lm_magic == LUSTRE_MSG_MAGIC_V1 ||
1446             msg->lm_magic == LUSTRE_MSG_MAGIC_V1_SWABBED) {
1447                 req->rq_sec_flavor = SPTLRPC_FLVR_NULL;
1448         } else {
1449                 req->rq_sec_flavor = msg->lm_secflvr;
1450
1451                 if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
1452                         __swab32s(&req->rq_sec_flavor);
1453
1454                 if ((SEC_FLAVOR_POLICY(req->rq_sec_flavor) !=
1455                      SPTLRPC_POLICY_NULL) &&
1456                     lustre_unpack_msg(msg, req->rq_reqdata_len))
1457                         RETURN(SECSVC_DROP);
1458         }
1459
1460         policy = sptlrpc_flavor2policy(req->rq_sec_flavor);
1461         if (!policy) {
1462                 CERROR("unsupported security flavor %x\n", req->rq_sec_flavor);
1463                 RETURN(SECSVC_DROP);
1464         }
1465
1466         LASSERT(policy->sp_sops->accept);
1467         rc = policy->sp_sops->accept(req);
1468
1469         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
1470         sptlrpc_policy_put(policy);
1471
1472         /* FIXME move to proper place */
1473         if (rc == SECSVC_OK) {
1474                 __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
1475
1476                 if (opc == OST_WRITE || opc == OST_SAN_WRITE)
1477                         req->rq_bulk_write = 1;
1478                 else if (opc == OST_READ || opc == OST_SAN_READ)
1479                         req->rq_bulk_read = 1;
1480         }
1481
1482         RETURN(rc);
1483 }
1484
1485 int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req,
1486                          int msglen)
1487 {
1488         struct ptlrpc_sec_policy *policy;
1489         struct ptlrpc_reply_state *rs;
1490         int rc;
1491         ENTRY;
1492
1493         LASSERT(req->rq_svc_ctx);
1494         LASSERT(req->rq_svc_ctx->sc_policy);
1495
1496         policy = req->rq_svc_ctx->sc_policy;
1497         LASSERT(policy->sp_sops->alloc_rs);
1498
1499         rc = policy->sp_sops->alloc_rs(req, msglen);
1500         if (unlikely(rc == -ENOMEM)) {
1501                 /* failed alloc, try emergency pool */
1502                 rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service);
1503                 if (rs == NULL)
1504                         RETURN(-ENOMEM);
1505
1506                 req->rq_reply_state = rs;
1507                 rc = policy->sp_sops->alloc_rs(req, msglen);
1508                 if (rc) {
1509                         lustre_put_emerg_rs(rs);
1510                         req->rq_reply_state = NULL;
1511                 }
1512         }
1513
1514         LASSERT(rc != 0 ||
1515                 (req->rq_reply_state && req->rq_reply_state->rs_msg));
1516
1517         RETURN(rc);
1518 }
1519
1520 int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
1521 {
1522         struct ptlrpc_sec_policy *policy;
1523         int rc;
1524         ENTRY;
1525
1526         LASSERT(req->rq_svc_ctx);
1527         LASSERT(req->rq_svc_ctx->sc_policy);
1528
1529         policy = req->rq_svc_ctx->sc_policy;
1530         LASSERT(policy->sp_sops->authorize);
1531
1532         rc = policy->sp_sops->authorize(req);
1533         LASSERT(rc || req->rq_reply_state->rs_repdata_len);
1534
1535         RETURN(rc);
1536 }
1537
1538 void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
1539 {
1540         struct ptlrpc_sec_policy *policy;
1541         unsigned int prealloc;
1542         ENTRY;
1543
1544         LASSERT(rs->rs_svc_ctx);
1545         LASSERT(rs->rs_svc_ctx->sc_policy);
1546
1547         policy = rs->rs_svc_ctx->sc_policy;
1548         LASSERT(policy->sp_sops->free_rs);
1549
1550         prealloc = rs->rs_prealloc;
1551         policy->sp_sops->free_rs(rs);
1552
1553         if (prealloc)
1554                 lustre_put_emerg_rs(rs);
1555         EXIT;
1556 }
1557
1558 void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
1559 {
1560         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
1561
1562         if (ctx == NULL)
1563                 return;
1564
1565         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
1566         atomic_inc(&ctx->sc_refcount);
1567 }
1568
1569 void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
1570 {
1571         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
1572
1573         if (ctx == NULL)
1574                 return;
1575
1576         LASSERT(atomic_read(&ctx->sc_refcount) > 0);
1577         if (atomic_dec_and_test(&ctx->sc_refcount)) {
1578                 if (ctx->sc_policy->sp_sops->free_ctx)
1579                         ctx->sc_policy->sp_sops->free_ctx(ctx);
1580         }
1581         req->rq_svc_ctx = NULL;
1582 }
1583
1584 /****************************************
1585  * bulk security                        *
1586  ****************************************/
1587
1588 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
1589                           struct ptlrpc_bulk_desc *desc)
1590 {
1591         struct ptlrpc_cli_ctx *ctx;
1592
1593         if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
1594                 return 0;
1595
1596         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
1597
1598         ctx = req->rq_cli_ctx;
1599         if (ctx->cc_ops->wrap_bulk)
1600                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
1601         return 0;
1602 }
1603 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
1604
1605 static
1606 void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
1607                       struct ptlrpc_bulk_desc *desc)
1608 {
1609         int i;
1610
1611         LASSERT(pga);
1612         LASSERT(*pga);
1613
1614         for (i = 0; i < pg_count && nob > 0; i++) {
1615 #ifdef __KERNEL__
1616                 desc->bd_iov[i].kiov_page = pga[i]->pg;
1617                 desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
1618                                            nob : pga[i]->count;
1619                 desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
1620 #else
1621 #warning FIXME for liblustre!
1622                 desc->bd_iov[i].iov_base = pga[i]->pg->addr;
1623                 desc->bd_iov[i].iov_len = pga[i]->count > nob ?
1624                                            nob : pga[i]->count;
1625 #endif
1626
1627                 desc->bd_iov_count++;
1628                 nob -= pga[i]->count;
1629         }
1630 }
1631
1632 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
1633                                  int nob, obd_count pg_count,
1634                                  struct brw_page **pga)
1635 {
1636         struct ptlrpc_bulk_desc *desc;
1637         struct ptlrpc_cli_ctx *ctx;
1638         int rc = 0;
1639
1640         if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
1641                 return 0;
1642
1643         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
1644
1645         OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
1646         if (desc == NULL) {
1647                 CERROR("out of memory, can't verify bulk read data\n");
1648                 return -ENOMEM;
1649         }
1650
1651         pga_to_bulk_desc(nob, pg_count, pga, desc);
1652
1653         ctx = req->rq_cli_ctx;
1654         if (ctx->cc_ops->unwrap_bulk)
1655                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
1656
1657         OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
1658
1659         return rc;
1660 }
1661 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
1662
1663 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
1664                                   struct ptlrpc_bulk_desc *desc)
1665 {
1666         struct ptlrpc_cli_ctx *ctx;
1667
1668         if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
1669                 return 0;
1670
1671         LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
1672
1673         ctx = req->rq_cli_ctx;
1674         if (ctx->cc_ops->unwrap_bulk)
1675                 return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
1676
1677         return 0;
1678 }
1679 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
1680
1681 int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
1682                           struct ptlrpc_bulk_desc *desc)
1683 {
1684         struct ptlrpc_svc_ctx *ctx;
1685
1686         if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
1687                 return 0;
1688
1689         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
1690
1691         ctx = req->rq_svc_ctx;
1692         if (ctx->sc_policy->sp_sops->wrap_bulk)
1693                 return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
1694
1695         return 0;
1696 }
1697 EXPORT_SYMBOL(sptlrpc_svc_wrap_bulk);
1698
1699 int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
1700                             struct ptlrpc_bulk_desc *desc)
1701 {
1702         struct ptlrpc_svc_ctx *ctx;
1703
1704         if (!SEC_FLAVOR_HAS_BULK(req->rq_sec_flavor))
1705                 return 0;
1706
1707         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
1708
1709         ctx = req->rq_svc_ctx;
1710         if (ctx->sc_policy->sp_sops->unwrap_bulk);
1711                 return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
1712
1713         return 0;
1714 }
1715 EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
1716
1717
1718 /****************************************
1719  * user descriptor helpers              *
1720  ****************************************/
1721
1722 int sptlrpc_user_desc_size(void)
1723 {
1724 #ifdef __KERNEL__
1725         int ngroups = current_ngroups;
1726
1727         if (ngroups > LUSTRE_MAX_GROUPS)
1728                 ngroups = LUSTRE_MAX_GROUPS;
1729
1730         return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32);
1731 #else
1732         return sizeof(struct ptlrpc_user_desc);
1733 #endif
1734 }
1735 EXPORT_SYMBOL(sptlrpc_user_desc_size);
1736
1737 int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
1738 {
1739         struct ptlrpc_user_desc *pud;
1740
1741         pud = lustre_msg_buf(msg, offset, 0);
1742
1743         pud->pud_uid = cfs_current()->uid;
1744         pud->pud_gid = cfs_current()->gid;
1745         pud->pud_fsuid = cfs_current()->fsuid;
1746         pud->pud_fsgid = cfs_current()->fsgid;
1747         pud->pud_cap = cfs_current()->cap_effective;
1748         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
1749
1750 #ifdef __KERNEL__
1751         task_lock(current);
1752         if (pud->pud_ngroups > current_ngroups)
1753                 pud->pud_ngroups = current_ngroups;
1754         memcpy(pud->pud_groups, cfs_current()->group_info->blocks[0],
1755                pud->pud_ngroups * sizeof(__u32));
1756         task_unlock(current);
1757 #endif
1758
1759         return 0;
1760 }
1761 EXPORT_SYMBOL(sptlrpc_pack_user_desc);
1762
1763 int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset)
1764 {
1765         struct ptlrpc_user_desc *pud;
1766         int                      i;
1767
1768         pud = lustre_msg_buf(msg, offset, sizeof(*pud));
1769         if (!pud)
1770                 return -EINVAL;
1771
1772         if (lustre_msg_swabbed(msg)) {
1773                 __swab32s(&pud->pud_uid);
1774                 __swab32s(&pud->pud_gid);
1775                 __swab32s(&pud->pud_fsuid);
1776                 __swab32s(&pud->pud_fsgid);
1777                 __swab32s(&pud->pud_cap);
1778                 __swab32s(&pud->pud_ngroups);
1779         }
1780
1781         if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
1782                 CERROR("%u groups is too large\n", pud->pud_ngroups);
1783                 return -EINVAL;
1784         }
1785
1786         if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
1787             msg->lm_buflens[offset]) {
1788                 CERROR("%u groups are claimed but bufsize only %u\n",
1789                        pud->pud_ngroups, msg->lm_buflens[offset]);
1790                 return -EINVAL;
1791         }
1792
1793         if (lustre_msg_swabbed(msg)) {
1794                 for (i = 0; i < pud->pud_ngroups; i++)
1795                         __swab32s(&pud->pud_groups[i]);
1796         }
1797
1798         return 0;
1799 }
1800 EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
1801
1802 /****************************************
1803  * Helpers to assist policy modules to  *
1804  * implement checksum funcationality    *
1805  ****************************************/
1806
1807 struct {
1808         char    *name;
1809         int      size;
1810 } csum_types[] = {
1811         [BULK_CSUM_ALG_NULL]    = { "null",     0 },
1812         [BULK_CSUM_ALG_CRC32]   = { "crc32",    4 },
1813         [BULK_CSUM_ALG_MD5]     = { "md5",     16 },
1814         [BULK_CSUM_ALG_SHA1]    = { "sha1",    20 },
1815         [BULK_CSUM_ALG_SHA256]  = { "sha256",  32 },
1816         [BULK_CSUM_ALG_SHA384]  = { "sha384",  48 },
1817         [BULK_CSUM_ALG_SHA512]  = { "sha512",  64 },
1818 };
1819
1820 int bulk_sec_desc_size(__u32 csum_alg, int request, int read)
1821 {
1822         int size = sizeof(struct ptlrpc_bulk_sec_desc);
1823
1824         LASSERT(csum_alg < BULK_CSUM_ALG_MAX);
1825
1826         /* read request don't need extra data */
1827         if (!(read && request))
1828                 size += csum_types[csum_alg].size;
1829
1830         return size;
1831 }
1832 EXPORT_SYMBOL(bulk_sec_desc_size);
1833
1834 int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
1835 {
1836         struct ptlrpc_bulk_sec_desc *bsd;
1837         int    size = msg->lm_buflens[offset];
1838
1839         bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
1840         if (bsd == NULL) {
1841                 CERROR("Invalid bulk sec desc: size %d\n", size);
1842                 return -EINVAL;
1843         }
1844
1845         if (lustre_msg_swabbed(msg)) {
1846                 __swab32s(&bsd->bsd_version);
1847                 __swab32s(&bsd->bsd_pad);
1848                 __swab32s(&bsd->bsd_csum_alg);
1849                 __swab32s(&bsd->bsd_priv_alg);
1850         }
1851
1852         if (bsd->bsd_version != 0) {
1853                 CERROR("Unexpected version %u\n", bsd->bsd_version);
1854                 return -EPROTO;
1855         }
1856
1857         if (bsd->bsd_csum_alg >= BULK_CSUM_ALG_MAX) {
1858                 CERROR("Unsupported checksum algorithm %u\n",
1859                        bsd->bsd_csum_alg);
1860                 return -EINVAL;
1861         }
1862         if (bsd->bsd_priv_alg >= BULK_PRIV_ALG_MAX) {
1863                 CERROR("Unsupported cipher algorithm %u\n",
1864                        bsd->bsd_priv_alg);
1865                 return -EINVAL;
1866         }
1867
1868         if (size > sizeof(*bsd) &&
1869             size < sizeof(*bsd) + csum_types[bsd->bsd_csum_alg].size) {
1870                 CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
1871                        bsd->bsd_csum_alg, size);
1872                 return -EINVAL;
1873         }
1874
1875         return 0;
1876 }
1877 EXPORT_SYMBOL(bulk_sec_desc_unpack);
1878
1879 #ifdef __KERNEL__
1880 static
1881 int do_bulk_checksum_crc32(struct ptlrpc_bulk_desc *desc, void *buf)
1882 {
1883         struct page *page;
1884         int off;
1885         char *ptr;
1886         __u32 crc32 = ~0;
1887         int len, i;
1888
1889         for (i = 0; i < desc->bd_iov_count; i++) {
1890                 page = desc->bd_iov[i].kiov_page;
1891                 off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
1892                 ptr = cfs_kmap(page) + off;
1893                 len = desc->bd_iov[i].kiov_len;
1894
1895                 crc32 = crc32_le(crc32, ptr, len);
1896
1897                 cfs_kunmap(page);
1898         }
1899
1900         *((__u32 *) buf) = crc32;
1901         return 0;
1902 }
1903
1904 static
1905 int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
1906 {
1907         struct crypto_tfm *tfm;
1908         struct scatterlist *sl;
1909         int i, rc = 0;
1910
1911         LASSERT(alg > BULK_CSUM_ALG_NULL &&
1912                 alg < BULK_CSUM_ALG_MAX);
1913
1914         if (alg == BULK_CSUM_ALG_CRC32)
1915                 return do_bulk_checksum_crc32(desc, buf);
1916
1917         tfm = crypto_alloc_tfm(csum_types[alg].name, 0);
1918         if (tfm == NULL) {
1919                 CERROR("Unable to allocate tfm %s\n", csum_types[alg].name);
1920                 return -ENOMEM;
1921         }
1922
1923         OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
1924         if (sl == NULL) {
1925                 rc = -ENOMEM;
1926                 goto out_tfm;
1927         }
1928
1929         for (i = 0; i < desc->bd_iov_count; i++) {
1930                 sl[i].page = desc->bd_iov[i].kiov_page;
1931                 sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
1932                 sl[i].length = desc->bd_iov[i].kiov_len;
1933         }
1934
1935         crypto_digest_init(tfm);
1936         crypto_digest_update(tfm, sl, desc->bd_iov_count);
1937         crypto_digest_final(tfm, buf);
1938
1939         OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
1940
1941 out_tfm:
1942         crypto_free_tfm(tfm);
1943         return rc;
1944 }
1945                          
1946 #else /* !__KERNEL__ */
1947 static
1948 int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
1949 {
1950         __u32 crc32 = ~0;
1951         int i;
1952
1953         LASSERT(alg == BULK_CSUM_ALG_CRC32);
1954
1955         for (i = 0; i < desc->bd_iov_count; i++) {
1956                 char *ptr = desc->bd_iov[i].iov_base;
1957                 int len = desc->bd_iov[i].iov_len;
1958
1959                 crc32 = crc32_le(crc32, ptr, len);
1960         }
1961
1962         *((__u32 *) buf) = crc32;
1963         return 0;
1964 }
1965 #endif
1966
1967 /*
1968  * perform algorithm @alg checksum on @desc, store result in @buf.
1969  * if anything goes wrong, leave 'alg' be BULK_CSUM_ALG_NULL.
1970  */
1971 static
1972 int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
1973                        struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
1974 {
1975         int rc;
1976
1977         LASSERT(bsd);
1978         LASSERT(alg < BULK_CSUM_ALG_MAX);
1979
1980         bsd->bsd_csum_alg = BULK_CSUM_ALG_NULL;
1981
1982         if (alg == BULK_CSUM_ALG_NULL)
1983                 return 0;
1984
1985         LASSERT(bsdsize >= sizeof(*bsd) + csum_types[alg].size);
1986
1987         rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
1988         if (rc == 0)
1989                 bsd->bsd_csum_alg = alg;
1990
1991         return rc;
1992 }
1993
1994 static
1995 int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
1996                      struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
1997                      struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
1998 {
1999         char *csum_p;
2000         char *buf = NULL;
2001         int   csum_size, rc = 0;
2002
2003         LASSERT(bsdv);
2004         LASSERT(bsdv->bsd_csum_alg < BULK_CSUM_ALG_MAX);
2005
2006         if (bsdr)
2007                 bsdr->bsd_csum_alg = BULK_CSUM_ALG_NULL;
2008
2009         if (bsdv->bsd_csum_alg == BULK_CSUM_ALG_NULL)
2010                 return 0;
2011
2012         /* for all supported algorithms */
2013         csum_size = csum_types[bsdv->bsd_csum_alg].size;
2014
2015         if (bsdvsize < sizeof(*bsdv) + csum_size) {
2016                 CERROR("verifier size %d too small, require %d\n",
2017                        bsdvsize, sizeof(*bsdv) + csum_size);
2018                 return -EINVAL;
2019         }
2020
2021         if (bsdr) {
2022                 LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
2023                 csum_p = (char *) bsdr->bsd_csum;
2024         } else {
2025                 OBD_ALLOC(buf, csum_size);
2026                 if (buf == NULL)
2027                         return -EINVAL;
2028                 csum_p = buf;
2029         }
2030
2031         rc = do_bulk_checksum(desc, bsdv->bsd_csum_alg, csum_p);
2032
2033         if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
2034                 CERROR("BAD %s CHECKSUM (%s), data mutated during "
2035                        "transfer!\n", read ? "READ" : "WRITE",
2036                        csum_types[bsdv->bsd_csum_alg].name);
2037                 rc = -EINVAL;
2038         } else {
2039                 CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
2040                       read ? "read" : "write",
2041                       csum_types[bsdv->bsd_csum_alg].name);
2042         }
2043
2044         if (bsdr) {
2045                 bsdr->bsd_csum_alg = bsdv->bsd_csum_alg;
2046                 memcpy(bsdr->bsd_csum, csum_p, csum_size);
2047         } else {
2048                 LASSERT(buf);
2049                 OBD_FREE(buf, csum_size);
2050         }
2051
2052         return rc;
2053 }
2054
2055 int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
2056                           __u32 alg, struct lustre_msg *rmsg, int roff)
2057 {
2058         struct ptlrpc_bulk_sec_desc *bsdr;
2059         int    rsize, rc = 0;
2060
2061         rsize = rmsg->lm_buflens[roff];
2062         bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
2063
2064         LASSERT(bsdr);
2065         LASSERT(rsize >= sizeof(*bsdr));
2066         LASSERT(alg < BULK_CSUM_ALG_MAX);
2067
2068         if (read)
2069                 bsdr->bsd_csum_alg = alg;
2070         else {
2071                 rc = generate_bulk_csum(desc, alg, bsdr, rsize);
2072                 if (rc) {
2073                         CERROR("client bulk write: failed to perform "
2074                                "checksum: %d\n", rc);
2075                 }
2076         }
2077
2078         return rc;
2079 }
2080 EXPORT_SYMBOL(bulk_csum_cli_request);
2081
2082 int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
2083                         struct lustre_msg *rmsg, int roff,
2084                         struct lustre_msg *vmsg, int voff)
2085 {
2086         struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
2087         int    rsize, vsize;
2088
2089         rsize = rmsg->lm_buflens[roff];
2090         vsize = vmsg->lm_buflens[voff];
2091         bsdr = lustre_msg_buf(rmsg, roff, 0);
2092         bsdv = lustre_msg_buf(vmsg, voff, 0);
2093
2094         if (bsdv == NULL || vsize < sizeof(*bsdv)) {
2095                 CERROR("Invalid checksum verifier from server: size %d\n",
2096                        vsize);
2097                 return -EINVAL;
2098         }
2099
2100         LASSERT(bsdr);
2101         LASSERT(rsize >= sizeof(*bsdr));
2102         LASSERT(vsize >= sizeof(*bsdv));
2103
2104         if (bsdr->bsd_csum_alg != bsdv->bsd_csum_alg) {
2105                 CERROR("bulk %s: checksum algorithm mismatch: client request "
2106                        "%s but server reply with %s. try to use the new one "
2107                        "for checksum verification\n",
2108                        read ? "read" : "write",
2109                        csum_types[bsdr->bsd_csum_alg].name,
2110                        csum_types[bsdv->bsd_csum_alg].name);
2111         }
2112
2113         if (read)
2114                 return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
2115         else {
2116                 char *cli, *srv, *new = NULL;
2117                 int csum_size = csum_types[bsdr->bsd_csum_alg].size;
2118
2119                 LASSERT(bsdr->bsd_csum_alg < BULK_CSUM_ALG_MAX);
2120                 if (bsdr->bsd_csum_alg == BULK_CSUM_ALG_NULL)
2121                         return 0;
2122
2123                 if (vsize < sizeof(*bsdv) + csum_size) {
2124                         CERROR("verifier size %d too small, require %d\n",
2125                                vsize, sizeof(*bsdv) + csum_size);
2126                         return -EINVAL;
2127                 }
2128
2129                 cli = (char *) (bsdr + 1);
2130                 srv = (char *) (bsdv + 1);
2131
2132                 if (!memcmp(cli, srv, csum_size)) {
2133                         /* checksum confirmed */
2134                         CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
2135                               csum_types[bsdr->bsd_csum_alg].name);
2136                         return 0;
2137                 }
2138
2139                 /* checksum mismatch, re-compute a new one and compare with
2140                  * others, give out proper warnings.
2141                  */
2142                 OBD_ALLOC(new, csum_size);
2143                 if (new == NULL)
2144                         return -ENOMEM;
2145
2146                 do_bulk_checksum(desc, bsdr->bsd_csum_alg, new);
2147
2148                 if (!memcmp(new, srv, csum_size)) {
2149                         CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
2150                                "on the client after we checksummed them\n",
2151                                csum_types[bsdr->bsd_csum_alg].name);
2152                 } else if (!memcmp(new, cli, csum_size)) {
2153                         CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
2154                                "in transit\n",
2155                                csum_types[bsdr->bsd_csum_alg].name);
2156                 } else {
2157                         CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
2158                                "in transit, and the current page contents "
2159                                "don't match the originals and what the server "
2160                                "received\n",
2161                                csum_types[bsdr->bsd_csum_alg].name);
2162                 }
2163                 OBD_FREE(new, csum_size);
2164
2165                 return -EINVAL;
2166         }
2167 }
2168 EXPORT_SYMBOL(bulk_csum_cli_reply);
2169
2170 int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
2171                   struct lustre_msg *vmsg, int voff,
2172                   struct lustre_msg *rmsg, int roff)
2173 {
2174         struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
2175         int    vsize, rsize, rc;
2176
2177         vsize = vmsg->lm_buflens[voff];
2178         rsize = rmsg->lm_buflens[roff];
2179         bsdv = lustre_msg_buf(vmsg, voff, 0);
2180         bsdr = lustre_msg_buf(rmsg, roff, 0);
2181
2182         LASSERT(vsize >= sizeof(*bsdv));
2183         LASSERT(rsize >= sizeof(*bsdr));
2184         LASSERT(bsdv && bsdr);
2185
2186         if (read) {
2187                 rc = generate_bulk_csum(desc, bsdv->bsd_csum_alg, bsdr, rsize);
2188                 if (rc)
2189                         CERROR("bulk read: server failed to generate %s "
2190                                "checksum: %d\n",
2191                                csum_types[bsdv->bsd_csum_alg].name, rc);
2192         } else
2193                 rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
2194
2195         return rc;
2196 }
2197 EXPORT_SYMBOL(bulk_csum_svc);
2198
2199 /****************************************
2200  * user supplied flavor string parsing  *
2201  ****************************************/
2202
2203 static
2204 int get_default_flavor(enum lustre_part to_part, struct sec_flavor_config *conf)
2205 {
2206         conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
2207         conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
2208         conf->sfc_flags = 0;
2209
2210         switch (to_part) {
2211         case LUSTRE_MDT:
2212                 conf->sfc_rpc_flavor = SPTLRPC_FLVR_PLAIN;
2213                 return 0;
2214         case LUSTRE_OST:
2215                 conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
2216                 return 0;
2217         default:
2218                 CERROR("Unknown to lustre part %d, apply defaults\n", to_part);
2219                 conf->sfc_rpc_flavor = SPTLRPC_FLVR_NULL;
2220                 return -EINVAL;
2221         }
2222 }
2223
2224 static
2225 void get_flavor_by_rpc(__u32 rpc_flavor, struct sec_flavor_config *conf)
2226 {
2227         conf->sfc_rpc_flavor = rpc_flavor;
2228         conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
2229         conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
2230         conf->sfc_flags = 0;
2231
2232         switch (rpc_flavor) {
2233         case SPTLRPC_FLVR_NULL:
2234         case SPTLRPC_FLVR_PLAIN:
2235                 break;
2236         case SPTLRPC_FLVR_KRB5P:
2237                 conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
2238                 /* fall through */
2239         case SPTLRPC_FLVR_KRB5I:
2240                 conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
2241                 break;
2242         default:
2243                 LBUG();
2244         }
2245 }
2246
2247 static
2248 void get_flavor_by_rpc_bulk(__u32 rpc_flavor, int bulk_priv,
2249                             struct sec_flavor_config *conf)
2250 {
2251         if (bulk_priv)
2252                 conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
2253         else
2254                 conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
2255
2256         switch (rpc_flavor) {
2257         case SPTLRPC_FLVR_PLAIN:
2258                 conf->sfc_bulk_csum = BULK_CSUM_ALG_MD5;
2259                 break;
2260         case SPTLRPC_FLVR_KRB5I:
2261         case SPTLRPC_FLVR_KRB5P:
2262                 conf->sfc_bulk_csum = BULK_CSUM_ALG_SHA1;
2263                 break;
2264         default:
2265                 LBUG();
2266         }
2267 }
2268
2269 static __u32 __flavors[] = {
2270         SPTLRPC_FLVR_NULL,
2271         SPTLRPC_FLVR_PLAIN,
2272         SPTLRPC_FLVR_KRB5I,
2273         SPTLRPC_FLVR_KRB5P,
2274 };
2275
2276 #define __nflavors      (sizeof(__flavors)/sizeof(__u32))
2277
2278 /*
2279  * flavor string format: rpc[-bulk[:cksum/enc]]
2280  * for examples:
2281  *  null
2282  *  plain-bulki
2283  *  krb5p-bulkn
2284  *  krb5i-bulkp
2285  *  krb5i-bulkp:sha512/arc4
2286  */
2287 int sptlrpc_parse_flavor(enum lustre_part from_part, enum lustre_part to_part,
2288                          char *str, struct sec_flavor_config *conf)
2289 {
2290         char   *f, *bulk, *alg, *enc;
2291         char    buf[64];
2292         int     i, bulk_priv;
2293         ENTRY;
2294
2295         if (str == NULL) {
2296                 if (get_default_flavor(to_part, conf))
2297                         return -EINVAL;
2298                 goto set_flags;
2299         }
2300
2301         for (i = 0; i < __nflavors; i++) {
2302                 f = sptlrpc_flavor2name(__flavors[i]);
2303                 if (strncmp(str, f, strlen(f)) == 0)
2304                         break;
2305         }
2306
2307         if (i >= __nflavors)
2308                 GOTO(invalid, -EINVAL);
2309
2310         /* prepare local buffer thus we can modify it as we want */
2311         strncpy(buf, str, 64);
2312         buf[64 - 1] = '\0';
2313
2314         /* find bulk string */
2315         bulk = strchr(buf, '-');
2316         if (bulk)
2317                 *bulk++ = '\0';
2318
2319         /* now the first part must equal to rpc flavor name */
2320         if (strcmp(buf, f) != 0)
2321                 GOTO(invalid, -EINVAL);
2322
2323         get_flavor_by_rpc(__flavors[i], conf);
2324
2325         if (bulk == NULL)
2326                 goto set_flags;
2327
2328         /* null flavor should not have any suffix */
2329         if (__flavors[i] == SPTLRPC_FLVR_NULL)
2330                 GOTO(invalid, -EINVAL);
2331
2332         /* find bulk algorithm string */
2333         alg = strchr(bulk, ':');
2334         if (alg)
2335                 *alg++ = '\0';
2336
2337         /* verify bulk section */
2338         if (strcmp(bulk, "bulkn") == 0) {
2339                 conf->sfc_bulk_csum = BULK_CSUM_ALG_NULL;
2340                 conf->sfc_bulk_priv = BULK_PRIV_ALG_NULL;
2341                 goto set_flags;
2342         }
2343
2344         if (strcmp(bulk, "bulki") == 0)
2345                 bulk_priv = 0;
2346         else if (strcmp(bulk, "bulkp") == 0)
2347                 bulk_priv = 1;
2348         else
2349                 GOTO(invalid, -EINVAL);
2350
2351         /* plain policy dosen't support bulk encryption */
2352         if (bulk_priv && __flavors[i] == SPTLRPC_FLVR_PLAIN)
2353                 GOTO(invalid, -EINVAL);
2354
2355         get_flavor_by_rpc_bulk(__flavors[i], bulk_priv, conf);
2356
2357         if (alg == NULL)
2358                 goto set_flags;
2359
2360         /* find encryption algorithm string */
2361         enc = strchr(alg, '/');
2362         if (enc)
2363                 *enc++ = '\0';
2364
2365         /* bulk combination sanity check */
2366         if ((bulk_priv && enc == NULL) || (bulk_priv == 0 && enc))
2367                 GOTO(invalid, -EINVAL);
2368
2369         /* checksum algorithm */
2370         for (i = 0; i < BULK_CSUM_ALG_MAX; i++) {
2371                 if (strcmp(alg, csum_types[i].name) == 0) {
2372                         conf->sfc_bulk_csum = i;
2373                         break;
2374                 }
2375         }
2376         if (i >= BULK_CSUM_ALG_MAX)
2377                 GOTO(invalid, -EINVAL);
2378
2379         /* privacy algorithm */
2380         if (enc) {
2381                 if (strcmp(enc, "arc4") != 0)
2382                         GOTO(invalid, -EINVAL);
2383                 conf->sfc_bulk_priv = BULK_PRIV_ALG_ARC4;
2384         }
2385
2386 set_flags:
2387         /* set ROOTONLY flag to:
2388          *  - to OST
2389          *  - from MDT to MDT
2390          */
2391         if ((to_part == LUSTRE_MDT && from_part == LUSTRE_MDT) ||
2392             to_part == LUSTRE_OST)
2393                 conf->sfc_flags |= PTLRPC_SEC_FL_ROOTONLY;
2394
2395 #ifdef __BIG_ENDIAN
2396         __swab32s(&conf->sfc_rpc_flavor);
2397         __swab32s(&conf->sfc_bulk_csum);
2398         __swab32s(&conf->sfc_bulk_priv);
2399         __swab32s(&conf->sfc_flags);
2400 #endif
2401         return 0;
2402 invalid:
2403         CERROR("invalid flavor string: %s\n", str);
2404         return -EINVAL;
2405 }
2406 EXPORT_SYMBOL(sptlrpc_parse_flavor);
2407
2408 /****************************************
2409  * misc helpers                         *
2410  ****************************************/
2411
2412 const char * sec2target_str(struct ptlrpc_sec *sec)
2413 {
2414         if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
2415                 return "*";
2416         if (sec->ps_flags & PTLRPC_SEC_FL_REVERSE)
2417                 return "c";
2418         return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
2419 }
2420 EXPORT_SYMBOL(sec2target_str);
2421
2422 int sptlrpc_lprocfs_rd(char *page, char **start, off_t off, int count,
2423                        int *eof, void *data)
2424 {
2425         struct obd_device        *obd = data;
2426         struct sec_flavor_config *conf = &obd->u.cli.cl_sec_conf;
2427         struct ptlrpc_sec        *sec = NULL;
2428         char                      flags_str[20];
2429
2430         if (obd == NULL)
2431                 return 0;
2432
2433         LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
2434                 strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
2435                 strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME) == 0);
2436         LASSERT(conf->sfc_bulk_csum < BULK_CSUM_ALG_MAX);
2437         LASSERT(conf->sfc_bulk_priv < BULK_PRIV_ALG_MAX);
2438
2439         if (obd->u.cli.cl_import)
2440                 sec = obd->u.cli.cl_import->imp_sec;
2441
2442         flags_str[0] = '\0';
2443         if (conf->sfc_flags & PTLRPC_SEC_FL_REVERSE)
2444                 strncat(flags_str, "reverse,", sizeof(flags_str));
2445         if (conf->sfc_flags & PTLRPC_SEC_FL_ROOTONLY)
2446                 strncat(flags_str, "rootonly,", sizeof(flags_str));
2447         if (flags_str[0] != '\0')
2448                 flags_str[strlen(flags_str) - 1] = '\0';
2449
2450         return snprintf(page, count,
2451                         "rpc_flavor:  %s\n"
2452                         "bulk_flavor: %s checksum, %s encryption\n"
2453                         "flags:       %s\n"
2454                         "ctx_cache:   size %u, busy %d\n"
2455                         "gc:          interval %lus, next %lds\n",
2456                         sptlrpc_flavor2name(conf->sfc_rpc_flavor),
2457                         csum_types[conf->sfc_bulk_csum].name,
2458                         conf->sfc_bulk_priv == BULK_PRIV_ALG_NULL ?
2459                         "null" : "arc4", // XXX
2460                         flags_str,
2461                         sec ? sec->ps_ccache_size : 0,
2462                         sec ? atomic_read(&sec->ps_busy) : 0,
2463                         sec ? sec->ps_gc_interval: 0,
2464                         sec ? (sec->ps_gc_interval ?
2465                                sec->ps_gc_next - cfs_time_current_sec() : 0)
2466                               : 0);
2467 }
2468 EXPORT_SYMBOL(sptlrpc_lprocfs_rd);
2469
2470
2471 int sptlrpc_init(void)
2472 {
2473         int rc;
2474
2475         rc = sptlrpc_null_init();
2476         if (rc)
2477                 goto out;
2478
2479         rc = sptlrpc_plain_init();
2480         if (rc)
2481                 goto out_null;
2482         return 0;
2483
2484 out_null:
2485         sptlrpc_null_exit();
2486 out:
2487         return rc;
2488 }
2489
2490 int sptlrpc_exit(void)
2491 {
2492         sptlrpc_plain_exit();
2493         sptlrpc_null_exit();
2494         return 0;
2495 }