Whamcloud - gitweb
Branch b1_6
[fs/lustre-release.git] / lustre / lvfs / upcall_cache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Supplementary groups cache.
5  *
6  *  Copyright (c) 2004 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_SEC
25
26 #ifndef AUTOCONF_INCLUDED
27 #include <linux/config.h>
28 #endif
29 #include <linux/module.h>
30 #include <linux/kernel.h>
31 #include <linux/mm.h>
32 #include <linux/kmod.h>
33 #include <linux/string.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <linux/version.h>
37 #include <linux/unistd.h>
38
39 #include <asm/system.h>
40 #include <asm/uaccess.h>
41
42 #include <linux/fs.h>
43 #include <linux/stat.h>
44 #include <asm/uaccess.h>
45 #include <linux/slab.h>
46 #ifdef HAVE_SEGMENT_H
47 # include <asm/segment.h>
48 #endif
49
50 #include <obd_support.h>
51 #include <lustre_lib.h>
52
53 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
54 struct group_info *groups_alloc(int ngroups)
55 {
56         struct group_info *ginfo;
57
58         LASSERT(ngroups <= NGROUPS_SMALL);
59
60         OBD_ALLOC(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
61         if (!ginfo)
62                 return NULL;
63         ginfo->ngroups = ngroups;
64         ginfo->nblocks = 1;
65         ginfo->blocks[0] = ginfo->small_block;
66         atomic_set(&ginfo->usage, 1);
67
68         return ginfo;
69 }
70
71 void groups_free(struct group_info *ginfo)
72 {
73         LASSERT(ginfo->ngroups <= NGROUPS_SMALL);
74         LASSERT(ginfo->nblocks == 1);
75         LASSERT(ginfo->blocks[0] == ginfo->small_block);
76
77         OBD_FREE(ginfo, sizeof(*ginfo) + 1 * sizeof(gid_t *));
78 }
79 #endif
80
81 static struct upcall_cache_entry *alloc_entry(__u64 key)
82 {
83         struct upcall_cache_entry *entry;
84
85         OBD_ALLOC(entry, sizeof(*entry));
86         if (!entry)
87                 return NULL;
88
89         UC_CACHE_SET_NEW(entry);
90         INIT_LIST_HEAD(&entry->ue_hash);
91         entry->ue_key = key;
92         atomic_set(&entry->ue_refcount, 0);
93         init_waitqueue_head(&entry->ue_waitq);
94         return entry;
95 }
96
97 /* protected by hash lock */
98 static void free_entry(struct upcall_cache_entry *entry)
99 {
100         if (entry->ue_group_info)
101                 groups_free(entry->ue_group_info);
102         list_del(&entry->ue_hash);
103         CDEBUG(D_OTHER, "destroy cache entry %p for key "LPU64"\n",
104                entry, entry->ue_key);
105         OBD_FREE(entry, sizeof(*entry));
106 }
107
108 static void get_entry(struct upcall_cache_entry *entry)
109 {
110         atomic_inc(&entry->ue_refcount);
111 }
112
113 static void put_entry(struct upcall_cache_entry *entry)
114 {
115         if (atomic_dec_and_test(&entry->ue_refcount) &&
116             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
117                 free_entry(entry);
118         }
119 }
120
121 static int check_unlink_entry(struct upcall_cache_entry *entry)
122 {
123         if (UC_CACHE_IS_VALID(entry) &&
124             time_before(jiffies, entry->ue_expire))
125                 return 0;
126
127         if (UC_CACHE_IS_ACQUIRING(entry)) {
128                 if (time_before(jiffies, entry->ue_acquire_expire))
129                         return 0;
130
131                 UC_CACHE_SET_EXPIRED(entry);
132                 wake_up_all(&entry->ue_waitq);
133         } else if (!UC_CACHE_IS_INVALID(entry)) {
134                 UC_CACHE_SET_EXPIRED(entry);
135         }
136
137         list_del_init(&entry->ue_hash);
138         if (!atomic_read(&entry->ue_refcount))
139                 free_entry(entry);
140         return 1;
141 }
142
143 static int refresh_entry(struct upcall_cache *hash,
144                          struct upcall_cache_entry *entry)
145 {
146         char *argv[4];
147         char *envp[3];
148         char keystr[16];
149         int rc;
150         ENTRY;
151
152         snprintf(keystr, 16, LPU64, entry->ue_key);
153
154         CDEBUG(D_INFO, "The groups upcall is: %s \n", hash->uc_upcall);
155         argv[0] = hash->uc_upcall;
156         argv[1] = hash->uc_name;
157         argv[2] = keystr;
158         argv[3] = NULL;
159
160         envp[0] = "HOME=/";
161         envp[1] = "PATH=/sbin:/usr/sbin";
162         envp[2] = NULL;
163
164         rc = USERMODEHELPER(argv[0], argv, envp);
165         if (rc < 0) {
166                 CERROR("%s: error invoking getgroups upcall %s %s %s: rc %d; "
167                        "check /proc/fs/lustre/mds/%s/group_upcall\n",
168                        hash->uc_name, argv[0], argv[1], argv[2], rc, argv[1]);
169         } else {
170                 CDEBUG(D_HA, "%s: invoked upcall %s %s %s\n", hash->uc_name,
171                        argv[0], argv[1], argv[2]);
172                 rc = 0;
173         }
174         RETURN(rc);
175 }
176
177 static int entry_set_group_info(struct upcall_cache_entry *entry, __u32 primary,
178                                 __u32 ngroups, __u32 *groups)
179 {
180         struct group_info *ginfo;
181         int i, j;
182         ENTRY;
183
184 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
185         if (ngroups > NGROUPS)
186                 ngroups = NGROUPS;
187 #endif
188
189         if (ngroups > NGROUPS_MAX) {
190                 CERROR("using first %d supplementary groups for uid "LPU64"\n",
191                        NGROUPS_MAX, entry->ue_key);
192                 ngroups = NGROUPS_MAX;
193         }
194
195         ginfo = groups_alloc(ngroups);
196         if (!ginfo) {
197                 CERROR("uid "LPU64" update can't alloc ginfo for %d groups\n",
198                        entry->ue_key, ngroups);
199                 RETURN(-ENOMEM);
200         }
201         entry->ue_group_info = ginfo;
202         entry->ue_primary = primary;
203
204         for (i = 0; i < ginfo->nblocks; i++) {
205                 int cp_count = min(NGROUPS_PER_BLOCK, (int)ngroups);
206                 int off = i * NGROUPS_PER_BLOCK;
207
208                 for (j = 0; j < cp_count; j++)
209                         ginfo->blocks[i][j] = groups[off + j];
210
211                 ngroups -= cp_count;
212         }
213         RETURN(0);
214 }
215
216 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
217                                                   __u64 key, __u32 primary,
218                                                   __u32 ngroups, __u32 *groups)
219 {
220         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
221         struct list_head *head;
222         wait_queue_t wait;
223         int rc, found;
224         ENTRY;
225
226         LASSERT(hash);
227
228         if (strcmp(hash->uc_upcall, "NONE") == 0) {
229                 new = alloc_entry(key);
230                 if (!new) {
231                         CERROR("fail to alloc entry\n");
232                         RETURN(NULL);
233                 }
234                 get_entry(new);
235
236                 /* We have to sort the groups for 2.6 kernels */
237                 LASSERT(ngroups <= 2);
238                 if (ngroups == 2 && groups[1] == -1)
239                         ngroups--;
240 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
241                 /* 2.6 needs groups array sorted */
242                 if (ngroups == 2 && groups[0] > groups[1]) {
243                         __u32 tmp = groups[1];
244                         groups[1] = groups[0];
245                         groups[0] = tmp;
246                 }
247 #endif
248                 if (ngroups > 0 && groups[0] == -1) {
249                         groups[0] = groups[1];
250                         ngroups--;
251                 }
252
253                 rc = entry_set_group_info(new, primary, ngroups, groups);
254
255                 /* We can't cache this entry as it only has a subset of
256                  * the user's groups, as sent in suppgid1, suppgid2. */
257                 UC_CACHE_SET_EXPIRED(new);
258                 RETURN(new);
259         }
260         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
261 find_again:
262         found = 0;
263         spin_lock(&hash->uc_lock);
264         list_for_each_entry_safe(entry, next, head, ue_hash) {
265                 /* check invalid & expired items */
266                 if (check_unlink_entry(entry))
267                         continue;
268                 if (entry->ue_key == key) {
269                         found = 1;
270                         break;
271                 }
272         }
273
274         if (!found) { /* didn't find it */
275                 if (!new) {
276                         spin_unlock(&hash->uc_lock);
277                         new = alloc_entry(key);
278                         if (!new) {
279                                 CERROR("fail to alloc entry\n");
280                                 RETURN(ERR_PTR(-ENOMEM));
281                         }
282                         goto find_again;
283                 } else {
284                         list_add(&new->ue_hash, head);
285                         entry = new;
286                 }
287         } else {
288                 if (new) {
289                         free_entry(new);
290                         new = NULL;
291                 }
292                 list_move(&entry->ue_hash, head);
293         }
294         get_entry(entry);
295
296         /* acquire for new one */
297         if (UC_CACHE_IS_NEW(entry)) {
298                 UC_CACHE_SET_ACQUIRING(entry);
299                 UC_CACHE_CLEAR_NEW(entry);
300                 entry->ue_acquire_expire = jiffies + hash->uc_acquire_expire;
301                 spin_unlock(&hash->uc_lock);
302                 rc = refresh_entry(hash, entry);
303                 spin_lock(&hash->uc_lock);
304                 if (rc < 0) {
305                         UC_CACHE_CLEAR_ACQUIRING(entry);
306                         UC_CACHE_SET_INVALID(entry);
307                 }
308                 /* fall through */
309         }
310         /* someone (and only one) is doing upcall upon
311          * this item, just wait it complete
312          */
313         if (UC_CACHE_IS_ACQUIRING(entry)) {
314                 init_waitqueue_entry(&wait, current);
315                 add_wait_queue(&entry->ue_waitq, &wait);
316                 set_current_state(TASK_INTERRUPTIBLE);
317                 spin_unlock(&hash->uc_lock);
318
319                 schedule_timeout(hash->uc_acquire_expire);
320
321                 spin_lock(&hash->uc_lock);
322                 remove_wait_queue(&entry->ue_waitq, &wait);
323                 if (UC_CACHE_IS_ACQUIRING(entry)) {
324                         static unsigned long next;
325                         /* we're interrupted or upcall failed in the middle */
326                         if (time_after(jiffies, next)) {
327                                 CERROR("acquire timeout exceeded for key "LPU64
328                                        "\n", entry->ue_key);
329                                 next = jiffies + 1800;
330                         }
331                         put_entry(entry);
332                         GOTO(out, entry = ERR_PTR(-EIDRM));
333                 }
334                 /* fall through */
335         }
336
337         /* invalid means error, don't need to try again */
338         if (UC_CACHE_IS_INVALID(entry)) {
339                 put_entry(entry);
340                 GOTO(out, entry = ERR_PTR(-EIDRM));
341         }
342
343         /* check expired
344          * We can't refresh the existing one because some
345          * memory might be shared by multiple processes.
346          */
347         if (check_unlink_entry(entry)) {
348                 /* if expired, try again. but if this entry is
349                  * created by me but too quickly turn to expired
350                  * without any error, should at least give a
351                  * chance to use it once.
352                  */
353                 if (entry != new) {
354                         put_entry(entry);
355                         spin_unlock(&hash->uc_lock);
356                         new = NULL;
357                         goto find_again;
358                 }
359         }
360
361         /* Now we know it's good */
362 out:
363         spin_unlock(&hash->uc_lock);
364         RETURN(entry);
365 }
366 EXPORT_SYMBOL(upcall_cache_get_entry);
367
368 void upcall_cache_put_entry(struct upcall_cache *hash,
369                             struct upcall_cache_entry *entry)
370 {
371         ENTRY;
372
373         if (!entry) {
374                 EXIT;
375                 return;
376         }
377
378         LASSERT(atomic_read(&entry->ue_refcount) > 0);
379         spin_lock(&hash->uc_lock);
380         put_entry(entry);
381         spin_unlock(&hash->uc_lock);
382         EXIT;
383 }
384 EXPORT_SYMBOL(upcall_cache_put_entry);
385
386 int upcall_cache_downcall(struct upcall_cache *hash, __u32 err, __u64 key,
387                           __u32 primary, __u32 ngroups, __u32 *groups)
388 {
389         struct upcall_cache_entry *entry = NULL;
390         struct list_head *head;
391         int found = 0, rc = 0;
392         ENTRY;
393
394         LASSERT(hash);
395
396         head = &hash->uc_hashtable[UC_CACHE_HASH_INDEX(key)];
397
398         spin_lock(&hash->uc_lock);
399         list_for_each_entry(entry, head, ue_hash) {
400                 if (entry->ue_key == key) {
401                         found = 1;
402                         get_entry(entry);
403                         break;
404                 }
405         }
406
407         if (!found) {
408                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" not expected\n",
409                        hash->uc_name, entry->ue_key);
410                 /* haven't found, it's possible */
411                 spin_unlock(&hash->uc_lock);
412                 RETURN(-EINVAL);
413         }
414
415         if (err) {
416                 CDEBUG(D_OTHER, "%s: upcall for key "LPU64" returned %d\n",
417                        hash->uc_name, entry->ue_key, err);
418                 GOTO(out, rc = -EINVAL);
419         }
420
421         if (!UC_CACHE_IS_ACQUIRING(entry)) {
422                 CDEBUG(D_RPCTRACE,"%s: found uptodate entry %p (key "LPU64")\n",
423                        hash->uc_name, entry, entry->ue_key);
424                 GOTO(out, rc = 0);
425         }
426
427         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
428                 CERROR("%s: found a stale entry %p (key "LPU64") in ioctl\n",
429                        hash->uc_name, entry, entry->ue_key);
430                 GOTO(out, rc = -EINVAL);
431         }
432
433         spin_unlock(&hash->uc_lock);
434         rc = entry_set_group_info(entry, primary, ngroups, groups);
435         spin_lock(&hash->uc_lock);
436         if (rc)
437                 GOTO(out, rc);
438
439         entry->ue_expire = jiffies + hash->uc_entry_expire;
440         UC_CACHE_SET_VALID(entry);
441         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key "LPU64"\n",
442                hash->uc_name, entry, entry->ue_key);
443 out:
444         if (rc) {
445                 UC_CACHE_SET_INVALID(entry);
446                 list_del_init(&entry->ue_hash);
447         }
448         UC_CACHE_CLEAR_ACQUIRING(entry);
449         spin_unlock(&hash->uc_lock);
450         wake_up_all(&entry->ue_waitq);
451         put_entry(entry);
452
453         RETURN(rc);
454 }
455 EXPORT_SYMBOL(upcall_cache_downcall);
456
457 static void cache_flush(struct upcall_cache *hash, int force)
458 {
459         struct upcall_cache_entry *entry, *next;
460         int i;
461         ENTRY;
462
463         spin_lock(&hash->uc_lock);
464         for (i = 0; i < UC_CACHE_HASH_SIZE; i++) {
465                 list_for_each_entry_safe(entry, next,
466                                          &hash->uc_hashtable[i], ue_hash) {
467                         if (!force && atomic_read(&entry->ue_refcount)) {
468                                 UC_CACHE_SET_EXPIRED(entry);
469                                 continue;
470                         }
471                         LASSERT(!atomic_read(&entry->ue_refcount));
472                         free_entry(entry);
473                 }
474         }
475         spin_unlock(&hash->uc_lock);
476         EXIT;
477 }
478
479 void upcall_cache_flush_idle(struct upcall_cache *cache)
480 {
481         cache_flush(cache, 0);
482 }
483 EXPORT_SYMBOL(upcall_cache_flush_idle);
484
485 void upcall_cache_flush_all(struct upcall_cache *cache)
486 {
487         cache_flush(cache, 1);
488 }
489 EXPORT_SYMBOL(upcall_cache_flush_all);
490
491 struct upcall_cache *upcall_cache_init(const char *name)
492 {
493         struct upcall_cache *hash;
494         int i;
495         ENTRY;
496
497         OBD_ALLOC(hash, sizeof(*hash));
498         if (!hash)
499                 RETURN(ERR_PTR(-ENOMEM));
500
501         spin_lock_init(&hash->uc_lock);
502         for (i = 0; i < UC_CACHE_HASH_SIZE; i++)
503                 INIT_LIST_HEAD(&hash->uc_hashtable[i]);
504         strncpy(hash->uc_name, name, sizeof(hash->uc_name) - 1);
505         /* set default value, proc tunable */
506         strcpy(hash->uc_upcall, "NONE");
507         hash->uc_entry_expire = 10 * 60 * HZ;
508         hash->uc_acquire_expire = 15 * HZ;
509
510         RETURN(hash);
511 }
512 EXPORT_SYMBOL(upcall_cache_init);
513
514 void upcall_cache_cleanup(struct upcall_cache *hash)
515 {
516         if (!hash)
517                 return;
518         upcall_cache_flush_all(hash);
519         OBD_FREE(hash, sizeof(*hash));
520 }
521 EXPORT_SYMBOL(upcall_cache_cleanup);