Whamcloud - gitweb
- update from b1_4_mountconf
[fs/lustre-release.git] / lustre / lvfs / upcall_cache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Supplementary groups cache.
5  *
6  *  Copyright (c) 2004 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_SEC
25
26 #include <linux/config.h>
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/mm.h>
30 #include <linux/kmod.h>
31 #include <linux/string.h>
32 #include <linux/stat.h>
33 #include <linux/errno.h>
34 #include <linux/version.h>
35 #include <linux/unistd.h>
36
37 #include <asm/system.h>
38 #include <asm/uaccess.h>
39
40 #include <linux/fs.h>
41 #include <linux/stat.h>
42 #include <asm/uaccess.h>
43 #include <linux/slab.h>
44 #include <asm/segment.h>
45
46 #include <obd_support.h>
47 #include <lustre_lib.h>
48
49 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
50 struct group_info *groups_alloc(int ngroups)
51 {
52         struct group_info *ginfo;
53
54         LASSERT(ngroups <= NGROUPS_SMALL);
55
56         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
57         if (!ginfo)
58                 return NULL;
59         ginfo->ngroups = ngroups;
60         ginfo->nblocks = 1;
61         ginfo->blocks[0] = ginfo->small_block;
62         atomic_set(&ginfo->usage, 1);
63
64         return ginfo;
65 }
66
67 void groups_free(struct group_info *ginfo)
68 {
69         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
70         LASSERT(ginfo->nblocks == 1);
71         LASSERT(ginfo->blocks[0] == ginfo->small_block);
72
73         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
74 }
75 #endif
76
77 static struct upcall_cache_entry *alloc_entry(__u64 key)
78 {
79         struct upcall_cache_entry *entry;
80
81         OBD_ALLOC(entry, sizeof(*entry));
82         if (!entry)
83                 return NULL;
84
85         UC_CACHE_SET_NEW(entry);
86         INIT_LIST_HEAD(&entry->ue_hash);
87         entry->ue_key = key;
88         atomic_set(&entry->ue_refcount, 0);
89         init_waitqueue_head(&entry->ue_waitq);
90         return entry;
91 }
92
93 /* protected by hash lock */
94 static void free_entry(struct upcall_cache_entry *entry)
95 {
96         if (entry->ue_group_info)
97                 groups_free(entry->ue_group_info);
98         list_del(&entry->ue_hash);
99         CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
100                entry, entry->ue_key);
101         OBD_FREE(entry, sizeof(*entry));
102 }
103
104 static void get_entry(struct upcall_cache_entry *entry)
105 {
106         atomic_inc(&entry->ue_refcount);
107 }
108
109 static void put_entry(struct upcall_cache_entry *entry)
110 {
111         if (atomic_dec_and_test(&entry->ue_refcount) &&
112             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
113                 free_entry(entry);
114         }
115 }
116
117 static int check_unlink_entry(struct upcall_cache_entry *entry)
118 {
119         if (UC_CACHE_IS_VALID(entry) &&
120             time_before(jiffies, entry->ue_expire))
121                 return 0;
122
123         if (UC_CACHE_IS_ACQUIRING(entry)) {
124                 if (time_before(jiffies, entry->ue_acquire_expire))
125                         return 0;
126
127                 UC_CACHE_SET_EXPIRED(entry);
128                 wake_up_all(&entry->ue_waitq);
129         } else if (!UC_CACHE_IS_INVALID(entry)) {
130                 UC_CACHE_SET_EXPIRED(entry);
131         }
132
133         list_del_init(&entry->ue_hash);
134         if (!atomic_read(&entry->ue_refcount))
135                 free_entry(entry);
136         return 1;
137 }
138
139 static int refresh_entry(struct upcall_cache *hash,
140                          struct upcall_cache_entry *entry)
141 {
142         char *argv[4];
143         char *envp[3];
144         char keystr[16];
145         int rc;
146         ENTRY;
147
148         snprintf(keystr, 16, LPU64, entry->ue_key);
149
150         CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
151         argv[0] = hash->uc_upcall;
152         argv[1] = hash->uc_name;
153         argv[2] = keystr;
154         argv[3] = NULL;
155
156         envp[0] = "HOME=/";
157         envp[1] = "PATH=/sbin:/usr/sbin";
158         envp[2] = NULL;
159
160         rc = USERMODEHELPER(argv[0], argv, envp);
161         if (rc < 0) {
162                 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
163                        "check /proc/fs/lustre/mds/%s/group_upcall\n",
164                        hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
165         } else {
166                 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
167                        argv[0], argv[1], argv[2]);
168                 rc = 0;
169         }
170         RETURN(rc);
171 }
172
173 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
174                                 __u32 ngroups, __u32 *groups)
175 {
176         struct group_info *ginfo;
177         int i, j;
178         ENTRY;
179
180 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
181         if (ngroups > NGROUPS)
182                 ngroups = NGROUPS;
183 #endif
184
185         if (ngroups > NGROUPS_MAX) {
186                 CERROR("using first %d supplementary groups for uid "LPU64"\n",
187                        NGROUPS_MAX, entry->ue_key);
188                 ngroups = NGROUPS_MAX;
189         }
190
191         ginfo = groups_alloc(ngroups);
192         if (!ginfo) {
193                 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
194                        entry->ue_key, ngroups);
195                 RETURN(-ENOMEM);
196         }
197         entry->ue_group_info = ginfo;
198         entry->ue_primary = primary;
199
200         for (i = 0; i < ginfo->nblocks; i++) {
201                 int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
202                 int off = i * NGROUPS_PER_BLOCK;
203
204                 for (j = 0; j < cp_count; j++)
205                         ginfo->blocks[i][j] = groups[off + j];
206
207                 ngroups -= cp_count;
208         }
209         RETURN(0);
210 }
211
212 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
213                                                   __u64 key, __u32 primary,
214                                                   __u32 ngroups, __u32 *groups)
215 {
216         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
217         struct list_head *head;
218         wait_queue_t wait;
219         int rc, found;
220         ENTRY;
221
222         LASSERT(hash);
223
224         if (strcmp(hash->uc_upcall, "NONE") == 0) {
225                 new = alloc_entry(key);
226                 if (!new) {
227                         CERROR("fail to alloc entry\n");
228                         RETURN(NULL);
229                 }
230                 get_entry(new);
231
232                 /* We have to sort the groups for 2.6 kernels */
233                 LASSERT(ngroups <= 2);
234                 if (ngroups == 2 && groups[1] == -1)
235                         ngroups--;
236 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
237                 /* 2.6 needs groups array sorted */
238                 if (ngroups == 2 && groups[0] > groups[1]) {
239                         __u32 tmp = groups[1];
240                         groups[1] = groups[0];
241                         groups[0] = tmp;
242                 }
243 #endif
244                 if (ngroups > 0 && groups[0] == -1) {
245                         groups[0] = groups[1];
246                         ngroups--;
247                 }
248
249                 rc = entry_set_group_info(new, primary, ngroups, groups);
250
251                 /* We can't cache this entry as it only has a subset of
252                  * the user's groups, as sent in suppgid1, suppgid2. */
253                 UC_CACHE_SET_EXPIRED(new);
254                 RETURN(new);
255         }
256         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
257 find_again:
258         found = 0;
259         spin_lock(&hash->uc_lock);
260         list_for_each_entry_safe(entry, next, head, ue_hash) {
261                 /* check invalid & expired items */
262                 if (check_unlink_entry(entry))
263                         continue;
264                 if (entry->ue_key == key) {
265                         found = 1;
266                         break;
267                 }
268         }
269
270         if (!found) { /* didn't find it */
271                 if (!new) {
272                         spin_unlock(&hash->uc_lock);
273                         new = alloc_entry(key);
274                         if (!new) {
275                                 CERROR("fail to alloc entry\n");
276                                 RETURN(ERR_PTR(-ENOMEM));
277                         }
278                         goto find_again;
279                 } else {
280                         list_add(&new->ue_hash, head);
281                         entry = new;
282                 }
283         } else {
284                 if (new) {
285                         free_entry(new);
286                         new = NULL;
287                 }
288                 list_move(&entry->ue_hash, head);
289         }
290         get_entry(entry);
291
292         /* acquire for new one */
293         if (UC_CACHE_IS_NEW(entry)) {
294                 UC_CACHE_SET_ACQUIRING(entry);
295                 UC_CACHE_CLEAR_NEW(entry);
296                 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
297                 spin_unlock(&hash->uc_lock);
298                 rc = refresh_entry(hash, entry);
299                 spin_lock(&hash->uc_lock);
300                 if (rc < 0) {
301                         UC_CACHE_CLEAR_ACQUIRING(entry);
302                         UC_CACHE_SET_INVALID(entry);
303                 }
304                 /* fall through */
305         }
306         /* someone (and only one) is doing upcall upon
307          * this item, just wait it complete
308          */
309         if (UC_CACHE_IS_ACQUIRING(entry)) {
310                 init_waitqueue_entry(&wait, current);
311                 add_wait_queue(&entry->ue_waitq, &wait);
312                 set_current_state(TASK_INTERRUPTIBLE);
313                 spin_unlock(&hash->uc_lock);
314
315                 schedule_timeout(hash->uc_acquire_expire);
316
317                 spin_lock(&hash->uc_lock);
318                 remove_wait_queue(&entry->ue_waitq, &wait);
319                 if (UC_CACHE_IS_ACQUIRING(entry)) {
320                         static unsigned long next;
321                         /* we're interrupted or upcall failed in the middle */
322                         if (time_after(jiffies, next)) {
323                                 CERROR("key "LPU64" update failed: check %s\n",
324                                        entry->ue_key, hash->uc_upcall);
325                                 next = jiffies + 1800;
326                         }
327                         put_entry(entry);
328                         GOTO(out, entry = ERR_PTR(-EIDRM));
329                 }
330                 /* fall through */
331         }
332
333         /* invalid means error, don't need to try again */
334         if (UC_CACHE_IS_INVALID(entry)) {
335                 put_entry(entry);
336                 GOTO(out, entry = ERR_PTR(-EIDRM));
337         }
338
339         /* check expired
340          * We can't refresh the existing one because some
341          * memory might be shared by multiple processes.
342          */
343         if (check_unlink_entry(entry)) {
344                 /* if expired, try again. but if this entry is
345                  * created by me but too quickly turn to expired
346                  * without any error, should at least give a
347                  * chance to use it once.
348                  */
349                 if (entry != new) {
350                         put_entry(entry);
351                         spin_unlock(&hash->uc_lock);
352                         new = NULL;
353                         goto find_again;
354                 }
355         }
356
357         /* Now we know it's good */
358 out:
359         spin_unlock(&hash->uc_lock);
360         RETURN(entry);
361 }
362 EXPORT_SYMBOL(upcall_cache_get_entry);
363
364 void upcall_cache_put_entry(struct upcall_cache *hash,
365                             struct upcall_cache_entry *entry)
366 {
367         ENTRY;
368
369         if (!entry) {
370                 EXIT;
371                 return;
372         }
373
374         LASSERT(atomic_read(&entry->ue_refcount) > 0);
375         spin_lock(&hash->uc_lock);
376         put_entry(entry);
377         spin_unlock(&hash->uc_lock);
378         EXIT;
379 }
380 EXPORT_SYMBOL(upcall_cache_put_entry);
381
382 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
383                           __u32 primary, __u32 ngroups, __u32 *groups)
384 {
385         struct upcall_cache_entry *entry = NULL;
386         struct list_head *head;
387         int found = 0, rc = 0;
388         ENTRY;
389
390         LASSERT(hash);
391
392         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
393
394         spin_lock(&hash->uc_lock);
395         list_for_each_entry(entry, head, ue_hash) {
396                 if (entry->ue_key == key) {
397                         found = 1;
398                         get_entry(entry);
399                         break;
400                 }
401         }
402
403         if (!found) {
404                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
405                        hash->uc_name, entry->ue_key);
406                 /* haven't found, it's possible */
407                 spin_unlock(&hash->uc_lock);
408                 RETURN(-EINVAL);
409         }
410
411         if (err) {
412                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
413                        hash->uc_name, entry->ue_key, err);
414                 GOTO(out, rc = -EINVAL);
415         }
416
417         if (!UC_CACHE_IS_ACQUIRING(entry)) {
418                 CERROR("%s: found uptodate entry %p (key "LPU64") in ioctl\n",
419                        hash->uc_name, entry, entry->ue_key);
420                 GOTO(out, rc = 0);
421         }
422
423         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
424                 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
425                        hash->uc_name, entry, entry->ue_key);
426                 GOTO(out, rc = -EINVAL);
427         }
428
429         spin_unlock(&hash->uc_lock);
430         rc = entry_set_group_info(entry, primary, ngroups, groups);
431         spin_lock(&hash->uc_lock);
432         if (rc)
433                 GOTO(out, rc);
434
435         entry->ue_expire = jiffies + hash->uc_entry_expire;
436         UC_CACHE_SET_VALID(entry);
437         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
438                hash->uc_name, entry, entry->ue_key);
439 out:
440         if (rc) {
441                 UC_CACHE_SET_INVALID(entry);
442                 list_del_init(&entry->ue_hash);
443         }
444         UC_CACHE_CLEAR_ACQUIRING(entry);
445         spin_unlock(&hash->uc_lock);
446         wake_up_all(&entry->ue_waitq);
447         put_entry(entry);
448
449         RETURN(rc);
450 }
451 EXPORT_SYMBOL(upcall_cache_downcall);
452
453 static void cache_flush(struct upcall_cache *hash, int force)
454 {
455         struct upcall_cache_entry *entry, *next;
456         int i;
457         ENTRY;
458
459         spin_lock(&hash->uc_lock);
460         for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
461                 list_for_each_entry_safe(entry, next,
462                                          &hash->uc_hashtable[i], ue_hash) {
463                         if (!force && atomic_read(&entry->ue_refcount)) {
464                                 UC_CACHE_SET_EXPIRED(entry);
465                                 continue;
466                         }
467                         LASSERT(!atomic_read(&entry->ue_refcount));
468                         free_entry(entry);
469                 }
470         }
471         spin_unlock(&hash->uc_lock);
472         EXIT;
473 }
474
475 void upcall_cache_flush_idle(struct upcall_cache *cache)
476 {
477         cache_flush(cache, 0);
478 }
479 EXPORT_SYMBOL(upcall_cache_flush_idle);
480
481 void upcall_cache_flush_all(struct upcall_cache *cache)
482 {
483         cache_flush(cache, 1);
484 }
485 EXPORT_SYMBOL(upcall_cache_flush_all);
486
487 struct upcall_cache *upcall_cache_init(const char *name)
488 {
489         struct upcall_cache *hash;
490         int i;
491         ENTRY;
492
493         OBD_ALLOC(hash, sizeof(*hash));
494         if (!hash)
495                 RETURN(ERR_PTR(-ENOMEM));
496
497         spin_lock_init(&hash->uc_lock);
498         for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
499                 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
500         strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
501         /* set default value, proc tunable */
502         strcpy(hash->uc_upcall, "NONE");
503         hash->uc_entry_expire = 5 * 60 * HZ;
504         hash->uc_acquire_expire = 5 * HZ;
505
506         RETURN(hash);
507 }
508 EXPORT_SYMBOL(upcall_cache_init);
509
510 void upcall_cache_cleanup(struct upcall_cache *hash)
511 {
512         if (!hash)
513                 return;
514         upcall_cache_flush_all(hash);
515         OBD_FREE(hash, sizeof(*hash));
516 }
517 EXPORT_SYMBOL(upcall_cache_cleanup);