Whamcloud - gitweb
land b1_5 onto HEAD
[fs/lustre-release.git] / lustre / lvfs / upcall_cache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Supplementary groups cache.
5  *
6  *  Copyright (c) 2004 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_SEC
25
26 #ifdef HAVE_KERNEL_CONFIG_H
27 #include <linux/config.h>
28 #endif
29 #include <linux/module.h>
30 #include <linux/kernel.h>
31 #include <linux/mm.h>
32 #include <linux/kmod.h>
33 #include <linux/string.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/version.h>
37 #include <linux/unistd.h>
38
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
41
42 #include <linux/fs.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #include <asm/segment.h>
47
48 #include <obd_support.h>
49 #include <lustre_lib.h>
50
51 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
52 struct group_info *groups_alloc(int ngroups)
53 {
54         struct group_info *ginfo;
55
56         LASSERT(ngroups <= NGROUPS_SMALL);
57
58         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
59         if (!ginfo)
60                 return NULL;
61         ginfo->ngroups = ngroups;
62         ginfo->nblocks = 1;
63         ginfo->blocks[0] = ginfo->small_block;
64         atomic_set(&ginfo->usage, 1);
65
66         return ginfo;
67 }
68
69 void groups_free(struct group_info *ginfo)
70 {
71         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
72         LASSERT(ginfo->nblocks == 1);
73         LASSERT(ginfo->blocks[0] == ginfo->small_block);
74
75         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
76 }
77 #endif
78
79 static struct upcall_cache_entry *alloc_entry(__u64 key)
80 {
81         struct upcall_cache_entry *entry;
82
83         OBD_ALLOC(entry, sizeof(*entry));
84         if (!entry)
85                 return NULL;
86
87         UC_CACHE_SET_NEW(entry);
88         INIT_LIST_HEAD(&entry->ue_hash);
89         entry->ue_key = key;
90         atomic_set(&entry->ue_refcount, 0);
91         init_waitqueue_head(&entry->ue_waitq);
92         return entry;
93 }
94
95 /* protected by hash lock */
96 static void free_entry(struct upcall_cache_entry *entry)
97 {
98         if (entry->ue_group_info)
99                 groups_free(entry->ue_group_info);
100         list_del(&entry->ue_hash);
101         CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
102                entry, entry->ue_key);
103         OBD_FREE(entry, sizeof(*entry));
104 }
105
106 static void get_entry(struct upcall_cache_entry *entry)
107 {
108         atomic_inc(&entry->ue_refcount);
109 }
110
111 static void put_entry(struct upcall_cache_entry *entry)
112 {
113         if (atomic_dec_and_test(&entry->ue_refcount) &&
114             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
115                 free_entry(entry);
116         }
117 }
118
119 static int check_unlink_entry(struct upcall_cache_entry *entry)
120 {
121         if (UC_CACHE_IS_VALID(entry) &&
122             time_before(jiffies, entry->ue_expire))
123                 return 0;
124
125         if (UC_CACHE_IS_ACQUIRING(entry)) {
126                 if (time_before(jiffies, entry->ue_acquire_expire))
127                         return 0;
128
129                 UC_CACHE_SET_EXPIRED(entry);
130                 wake_up_all(&entry->ue_waitq);
131         } else if (!UC_CACHE_IS_INVALID(entry)) {
132                 UC_CACHE_SET_EXPIRED(entry);
133         }
134
135         list_del_init(&entry->ue_hash);
136         if (!atomic_read(&entry->ue_refcount))
137                 free_entry(entry);
138         return 1;
139 }
140
141 static int refresh_entry(struct upcall_cache *hash,
142                          struct upcall_cache_entry *entry)
143 {
144         char *argv[4];
145         char *envp[3];
146         char keystr[16];
147         int rc;
148         ENTRY;
149
150         snprintf(keystr, 16, LPU64, entry->ue_key);
151
152         CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
153         argv[0] = hash->uc_upcall;
154         argv[1] = hash->uc_name;
155         argv[2] = keystr;
156         argv[3] = NULL;
157
158         envp[0] = "HOME=/";
159         envp[1] = "PATH=/sbin:/usr/sbin";
160         envp[2] = NULL;
161
162         rc = USERMODEHELPER(argv[0], argv, envp);
163         if (rc < 0) {
164                 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
165                        "check /proc/fs/lustre/mds/%s/group_upcall\n",
166                        hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
167         } else {
168                 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
169                        argv[0], argv[1], argv[2]);
170                 rc = 0;
171         }
172         RETURN(rc);
173 }
174
175 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
176                                 __u32 ngroups, __u32 *groups)
177 {
178         struct group_info *ginfo;
179         int i, j;
180         ENTRY;
181
182 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
183         if (ngroups > NGROUPS)
184                 ngroups = NGROUPS;
185 #endif
186
187         if (ngroups > NGROUPS_MAX) {
188                 CERROR("using first %d supplementary groups for uid "LPU64"\n",
189                        NGROUPS_MAX, entry->ue_key);
190                 ngroups = NGROUPS_MAX;
191         }
192
193         ginfo = groups_alloc(ngroups);
194         if (!ginfo) {
195                 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
196                        entry->ue_key, ngroups);
197                 RETURN(-ENOMEM);
198         }
199         entry->ue_group_info = ginfo;
200         entry->ue_primary = primary;
201
202         for (i = 0; i < ginfo->nblocks; i++) {
203                 int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
204                 int off = i * NGROUPS_PER_BLOCK;
205
206                 for (j = 0; j < cp_count; j++)
207                         ginfo->blocks[i][j] = groups[off + j];
208
209                 ngroups -= cp_count;
210         }
211         RETURN(0);
212 }
213
214 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
215                                                   __u64 key, __u32 primary,
216                                                   __u32 ngroups, __u32 *groups)
217 {
218         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
219         struct list_head *head;
220         wait_queue_t wait;
221         int rc, found;
222         ENTRY;
223
224         LASSERT(hash);
225
226         if (strcmp(hash->uc_upcall, "NONE") == 0) {
227                 new = alloc_entry(key);
228                 if (!new) {
229                         CERROR("fail to alloc entry\n");
230                         RETURN(NULL);
231                 }
232                 get_entry(new);
233
234                 /* We have to sort the groups for 2.6 kernels */
235                 LASSERT(ngroups <= 2);
236                 if (ngroups == 2 && groups[1] == -1)
237                         ngroups--;
238 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
239                 /* 2.6 needs groups array sorted */
240                 if (ngroups == 2 && groups[0] > groups[1]) {
241                         __u32 tmp = groups[1];
242                         groups[1] = groups[0];
243                         groups[0] = tmp;
244                 }
245 #endif
246                 if (ngroups > 0 && groups[0] == -1) {
247                         groups[0] = groups[1];
248                         ngroups--;
249                 }
250
251                 rc = entry_set_group_info(new, primary, ngroups, groups);
252
253                 /* We can't cache this entry as it only has a subset of
254                  * the user's groups, as sent in suppgid1, suppgid2. */
255                 UC_CACHE_SET_EXPIRED(new);
256                 RETURN(new);
257         }
258         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
259 find_again:
260         found = 0;
261         spin_lock(&hash->uc_lock);
262         list_for_each_entry_safe(entry, next, head, ue_hash) {
263                 /* check invalid & expired items */
264                 if (check_unlink_entry(entry))
265                         continue;
266                 if (entry->ue_key == key) {
267                         found = 1;
268                         break;
269                 }
270         }
271
272         if (!found) { /* didn't find it */
273                 if (!new) {
274                         spin_unlock(&hash->uc_lock);
275                         new = alloc_entry(key);
276                         if (!new) {
277                                 CERROR("fail to alloc entry\n");
278                                 RETURN(ERR_PTR(-ENOMEM));
279                         }
280                         goto find_again;
281                 } else {
282                         list_add(&new->ue_hash, head);
283                         entry = new;
284                 }
285         } else {
286                 if (new) {
287                         free_entry(new);
288                         new = NULL;
289                 }
290                 list_move(&entry->ue_hash, head);
291         }
292         get_entry(entry);
293
294         /* acquire for new one */
295         if (UC_CACHE_IS_NEW(entry)) {
296                 UC_CACHE_SET_ACQUIRING(entry);
297                 UC_CACHE_CLEAR_NEW(entry);
298                 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
299                 spin_unlock(&hash->uc_lock);
300                 rc = refresh_entry(hash, entry);
301                 spin_lock(&hash->uc_lock);
302                 if (rc < 0) {
303                         UC_CACHE_CLEAR_ACQUIRING(entry);
304                         UC_CACHE_SET_INVALID(entry);
305                 }
306                 /* fall through */
307         }
308         /* someone (and only one) is doing upcall upon
309          * this item, just wait it complete
310          */
311         if (UC_CACHE_IS_ACQUIRING(entry)) {
312                 init_waitqueue_entry(&wait, current);
313                 add_wait_queue(&entry->ue_waitq, &wait);
314                 set_current_state(TASK_INTERRUPTIBLE);
315                 spin_unlock(&hash->uc_lock);
316
317                 schedule_timeout(hash->uc_acquire_expire);
318
319                 spin_lock(&hash->uc_lock);
320                 remove_wait_queue(&entry->ue_waitq, &wait);
321                 if (UC_CACHE_IS_ACQUIRING(entry)) {
322                         static unsigned long next;
323                         /* we're interrupted or upcall failed in the middle */
324                         if (time_after(jiffies, next)) {
325                                 CERROR("acquire timeout exceeded for key "LPU64
326                                        "\n", entry->ue_key);
327                                 next = jiffies + 1800;
328                         }
329                         put_entry(entry);
330                         GOTO(out, entry = ERR_PTR(-EIDRM));
331                 }
332                 /* fall through */
333         }
334
335         /* invalid means error, don't need to try again */
336         if (UC_CACHE_IS_INVALID(entry)) {
337                 put_entry(entry);
338                 GOTO(out, entry = ERR_PTR(-EIDRM));
339         }
340
341         /* check expired
342          * We can't refresh the existing one because some
343          * memory might be shared by multiple processes.
344          */
345         if (check_unlink_entry(entry)) {
346                 /* if expired, try again. but if this entry is
347                  * created by me but too quickly turn to expired
348                  * without any error, should at least give a
349                  * chance to use it once.
350                  */
351                 if (entry != new) {
352                         put_entry(entry);
353                         spin_unlock(&hash->uc_lock);
354                         new = NULL;
355                         goto find_again;
356                 }
357         }
358
359         /* Now we know it's good */
360 out:
361         spin_unlock(&hash->uc_lock);
362         RETURN(entry);
363 }
364 EXPORT_SYMBOL(upcall_cache_get_entry);
365
366 void upcall_cache_put_entry(struct upcall_cache *hash,
367                             struct upcall_cache_entry *entry)
368 {
369         ENTRY;
370
371         if (!entry) {
372                 EXIT;
373                 return;
374         }
375
376         LASSERT(atomic_read(&entry->ue_refcount) > 0);
377         spin_lock(&hash->uc_lock);
378         put_entry(entry);
379         spin_unlock(&hash->uc_lock);
380         EXIT;
381 }
382 EXPORT_SYMBOL(upcall_cache_put_entry);
383
384 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
385                           __u32 primary, __u32 ngroups, __u32 *groups)
386 {
387         struct upcall_cache_entry *entry = NULL;
388         struct list_head *head;
389         int found = 0, rc = 0;
390         ENTRY;
391
392         LASSERT(hash);
393
394         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
395
396         spin_lock(&hash->uc_lock);
397         list_for_each_entry(entry, head, ue_hash) {
398                 if (entry->ue_key == key) {
399                         found = 1;
400                         get_entry(entry);
401                         break;
402                 }
403         }
404
405         if (!found) {
406                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
407                        hash->uc_name, entry->ue_key);
408                 /* haven't found, it's possible */
409                 spin_unlock(&hash->uc_lock);
410                 RETURN(-EINVAL);
411         }
412
413         if (err) {
414                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
415                        hash->uc_name, entry->ue_key, err);
416                 GOTO(out, rc = -EINVAL);
417         }
418
419         if (!UC_CACHE_IS_ACQUIRING(entry)) {
420                 CDEBUG(D_HA, "%s: found uptodate entry %p (key "LPU64")\n",
421                        hash->uc_name, entry, entry->ue_key);
422                 GOTO(out, rc = 0);
423         }
424
425         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
426                 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
427                        hash->uc_name, entry, entry->ue_key);
428                 GOTO(out, rc = -EINVAL);
429         }
430
431         spin_unlock(&hash->uc_lock);
432         rc = entry_set_group_info(entry, primary, ngroups, groups);
433         spin_lock(&hash->uc_lock);
434         if (rc)
435                 GOTO(out, rc);
436
437         entry->ue_expire = jiffies + hash->uc_entry_expire;
438         UC_CACHE_SET_VALID(entry);
439         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
440                hash->uc_name, entry, entry->ue_key);
441 out:
442         if (rc) {
443                 UC_CACHE_SET_INVALID(entry);
444                 list_del_init(&entry->ue_hash);
445         }
446         UC_CACHE_CLEAR_ACQUIRING(entry);
447         spin_unlock(&hash->uc_lock);
448         wake_up_all(&entry->ue_waitq);
449         put_entry(entry);
450
451         RETURN(rc);
452 }
453 EXPORT_SYMBOL(upcall_cache_downcall);
454
455 static void cache_flush(struct upcall_cache *hash, int force)
456 {
457         struct upcall_cache_entry *entry, *next;
458         int i;
459         ENTRY;
460
461         spin_lock(&hash->uc_lock);
462         for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
463                 list_for_each_entry_safe(entry, next,
464                                          &hash->uc_hashtable[i], ue_hash) {
465                         if (!force && atomic_read(&entry->ue_refcount)) {
466                                 UC_CACHE_SET_EXPIRED(entry);
467                                 continue;
468                         }
469                         LASSERT(!atomic_read(&entry->ue_refcount));
470                         free_entry(entry);
471                 }
472         }
473         spin_unlock(&hash->uc_lock);
474         EXIT;
475 }
476
477 void upcall_cache_flush_idle(struct upcall_cache *cache)
478 {
479         cache_flush(cache, 0);
480 }
481 EXPORT_SYMBOL(upcall_cache_flush_idle);
482
483 void upcall_cache_flush_all(struct upcall_cache *cache)
484 {
485         cache_flush(cache, 1);
486 }
487 EXPORT_SYMBOL(upcall_cache_flush_all);
488
489 struct upcall_cache *upcall_cache_init(const char *name)
490 {
491         struct upcall_cache *hash;
492         int i;
493         ENTRY;
494
495         OBD_ALLOC(hash, sizeof(*hash));
496         if (!hash)
497                 RETURN(ERR_PTR(-ENOMEM));
498
499         spin_lock_init(&hash->uc_lock);
500         for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
501                 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
502         strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
503         /* set default value, proc tunable */
504         strcpy(hash->uc_upcall, "NONE");
505         hash->uc_entry_expire = 10 * 60 * HZ;
506         hash->uc_acquire_expire = 15 * HZ;
507
508         RETURN(hash);
509 }
510 EXPORT_SYMBOL(upcall_cache_init);
511
512 void upcall_cache_cleanup(struct upcall_cache *hash)
513 {
514         if (!hash)
515                 return;
516         upcall_cache_flush_all(hash);
517         OBD_FREE(hash, sizeof(*hash));
518 }
519 EXPORT_SYMBOL(upcall_cache_cleanup);