Whamcloud - gitweb
LU-17402 kernel: RHEL 8.10 client and server support
[fs/lustre-release.git] / lustre / obdclass / upcall_cache.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/obdclass/upcall_cache.c
32  *
33  * Supplementary groups cache.
34  */
35 #define DEBUG_SUBSYSTEM S_SEC
36
37 #include <libcfs/libcfs.h>
38 #include <uapi/linux/lnet/lnet-types.h>
39 #include <upcall_cache.h>
40
41 static struct upcall_cache_entry *alloc_entry(struct upcall_cache *cache,
42                                               __u64 key, void *args)
43 {
44         struct upcall_cache_entry *entry;
45
46         LIBCFS_ALLOC(entry, sizeof(*entry));
47         if (!entry)
48                 return NULL;
49
50         UC_CACHE_SET_NEW(entry);
51         INIT_LIST_HEAD(&entry->ue_hash);
52         entry->ue_key = key;
53         atomic_set(&entry->ue_refcount, 0);
54         init_waitqueue_head(&entry->ue_waitq);
55         entry->ue_acquire_expire = 0;
56         entry->ue_expire = 0;
57         if (cache->uc_ops->init_entry)
58                 cache->uc_ops->init_entry(entry, args);
59         return entry;
60 }
61
62 /* protected by cache lock */
63 static void free_entry(struct upcall_cache *cache,
64                        struct upcall_cache_entry *entry)
65 {
66         if (cache->uc_ops->free_entry)
67                 cache->uc_ops->free_entry(cache, entry);
68
69         list_del(&entry->ue_hash);
70         CDEBUG(D_OTHER, "destroy cache entry %p for key %llu\n",
71                 entry, entry->ue_key);
72         LIBCFS_FREE(entry, sizeof(*entry));
73 }
74
75 static inline int upcall_compare(struct upcall_cache *cache,
76                                  struct upcall_cache_entry *entry,
77                                  __u64 key, void *args)
78 {
79         if (entry->ue_key != key)
80                 return -1;
81
82         if (cache->uc_ops->upcall_compare)
83                 return cache->uc_ops->upcall_compare(cache, entry, key, args);
84
85         return 0;
86 }
87
88 static inline int downcall_compare(struct upcall_cache *cache,
89                                    struct upcall_cache_entry *entry,
90                                    __u64 key, void *args)
91 {
92         if (entry->ue_key != key)
93                 return -1;
94
95         if (cache->uc_ops->downcall_compare)
96                 return cache->uc_ops->downcall_compare(cache, entry, key, args);
97
98         return 0;
99 }
100
101 static inline void get_entry(struct upcall_cache_entry *entry)
102 {
103         atomic_inc(&entry->ue_refcount);
104 }
105
106 static inline void put_entry(struct upcall_cache *cache,
107                              struct upcall_cache_entry *entry)
108 {
109         if (atomic_dec_and_test(&entry->ue_refcount) &&
110             (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry))) {
111                 free_entry(cache, entry);
112         }
113 }
114
115 static inline void write_lock_from_read(rwlock_t *lock, bool *writelock)
116 {
117         if (!*writelock) {
118                 read_unlock(lock);
119                 write_lock(lock);
120                 *writelock = true;
121         }
122 }
123
124 static int check_unlink_entry(struct upcall_cache *cache,
125                               struct upcall_cache_entry *entry,
126                               bool writelock)
127 {
128         time64_t now = ktime_get_seconds();
129
130         if (UC_CACHE_IS_VALID(entry) && now < entry->ue_expire)
131                 return 0;
132
133         if (UC_CACHE_IS_ACQUIRING(entry)) {
134                 if (entry->ue_acquire_expire == 0 ||
135                     now < entry->ue_acquire_expire)
136                         return 0;
137
138                 if (writelock) {
139                         UC_CACHE_SET_EXPIRED(entry);
140                         wake_up(&entry->ue_waitq);
141                 }
142         } else if (!UC_CACHE_IS_INVALID(entry) && writelock) {
143                 UC_CACHE_SET_EXPIRED(entry);
144         }
145
146         if (writelock) {
147                 list_del_init(&entry->ue_hash);
148                 if (!atomic_read(&entry->ue_refcount))
149                         free_entry(cache, entry);
150         }
151         return 1;
152 }
153
154 int upcall_cache_set_upcall(struct upcall_cache *cache, const char *buffer,
155                             size_t count, bool path_only)
156 {
157         char *upcall;
158         int rc = 0;
159
160         if (count >= UC_CACHE_UPCALL_MAXPATH)
161                 return -E2BIG;
162
163         OBD_ALLOC(upcall, count + 1);
164         if (upcall == NULL)
165                 return -ENOMEM;
166
167         /* Remove any extraneous bits from the upcall (e.g. linefeeds) */
168         if (sscanf(buffer, "%s", upcall) != 1)
169                 GOTO(out, rc = -EINVAL);
170
171         /* Accepted values are:
172          * - an absolute path to an executable
173          * - if path_only is false: "none", case insensitive
174          */
175         if (upcall[0] != '/') {
176                 if (!path_only && strcasecmp(upcall, "NONE") == 0)
177                         snprintf(upcall, count + 1, "NONE");
178                 else
179                         GOTO(out, rc = -EINVAL);
180         }
181
182         down_write(&cache->uc_upcall_rwsem);
183         strncpy(cache->uc_upcall, upcall, count + 1);
184         up_write(&cache->uc_upcall_rwsem);
185
186 out:
187         OBD_FREE(upcall, count + 1);
188         return rc;
189 }
190 EXPORT_SYMBOL(upcall_cache_set_upcall);
191
192 static inline int refresh_entry(struct upcall_cache *cache,
193                          struct upcall_cache_entry *entry)
194 {
195         LASSERT(cache->uc_ops->do_upcall);
196         return cache->uc_ops->do_upcall(cache, entry);
197 }
198
199 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *cache,
200                                                   __u64 key, void *args)
201 {
202         struct upcall_cache_entry *entry = NULL, *new = NULL, *next;
203         bool failedacquiring = false;
204         struct list_head *head;
205         wait_queue_entry_t wait;
206         bool writelock;
207         int rc, found;
208
209         ENTRY;
210
211         LASSERT(cache);
212
213         head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key,
214                                                         cache->uc_hashsize)];
215 find_again:
216         found = 0;
217         if (new) {
218                 write_lock(&cache->uc_lock);
219                 writelock = true;
220         } else {
221                 read_lock(&cache->uc_lock);
222                 writelock = false;
223         }
224 find_with_lock:
225         list_for_each_entry_safe(entry, next, head, ue_hash) {
226                 /* check invalid & expired items */
227                 if (check_unlink_entry(cache, entry, writelock))
228                         continue;
229                 if (upcall_compare(cache, entry, key, args) == 0) {
230                         found = 1;
231                         break;
232                 }
233         }
234
235         if (!found) {
236                 if (!new) {
237                         if (writelock)
238                                 write_unlock(&cache->uc_lock);
239                         else
240                                 read_unlock(&cache->uc_lock);
241                         new = alloc_entry(cache, key, args);
242                         if (!new) {
243                                 CERROR("%s: fail to alloc entry: rc = %d\n",
244                                        cache->uc_name, -ENOMEM);
245                                 RETURN(ERR_PTR(-ENOMEM));
246                         }
247                         goto find_again;
248                 } else {
249                         list_add(&new->ue_hash, head);
250                         entry = new;
251                 }
252         } else {
253                 if (new) {
254                         free_entry(cache, new);
255                         new = NULL;
256                 } else if (!writelock) {
257                         /* We found an entry while holding the read lock, so
258                          * convert it to a write lock and find again, to check
259                          * that entry was not modified/freed in between.
260                          */
261                         write_lock_from_read(&cache->uc_lock, &writelock);
262                         found = 0;
263                         goto find_with_lock;
264                 }
265                 list_move(&entry->ue_hash, head);
266         }
267         /* now we hold a write lock */
268         get_entry(entry);
269
270         /* acquire for new one */
271         if (UC_CACHE_IS_NEW(entry)) {
272                 UC_CACHE_SET_ACQUIRING(entry);
273                 UC_CACHE_CLEAR_NEW(entry);
274                 write_unlock(&cache->uc_lock);
275                 rc = refresh_entry(cache, entry);
276                 write_lock(&cache->uc_lock);
277                 entry->ue_acquire_expire = ktime_get_seconds() +
278                                            cache->uc_acquire_expire;
279                 if (rc < 0) {
280                         UC_CACHE_CLEAR_ACQUIRING(entry);
281                         UC_CACHE_SET_INVALID(entry);
282                         wake_up(&entry->ue_waitq);
283                         if (unlikely(rc == -EREMCHG)) {
284                                 put_entry(cache, entry);
285                                 GOTO(out, entry = ERR_PTR(rc));
286                         }
287                 }
288         }
289         /* someone (and only one) is doing upcall upon this item,
290          * wait it to complete */
291         if (UC_CACHE_IS_ACQUIRING(entry)) {
292                 long expiry = (entry == new) ?
293                               cfs_time_seconds(cache->uc_acquire_expire) :
294                               MAX_SCHEDULE_TIMEOUT;
295                 long left;
296
297                 init_wait(&wait);
298                 add_wait_queue(&entry->ue_waitq, &wait);
299                 set_current_state(TASK_INTERRUPTIBLE);
300                 write_unlock(&cache->uc_lock);
301
302                 left = schedule_timeout(expiry);
303
304                 write_lock(&cache->uc_lock);
305                 remove_wait_queue(&entry->ue_waitq, &wait);
306                 if (UC_CACHE_IS_ACQUIRING(entry)) {
307                         /* we're interrupted or upcall failed in the middle */
308                         rc = left > 0 ? -EINTR : -ETIMEDOUT;
309                         /* if we waited uc_acquire_expire, we can try again
310                          * with same data, but only if acquire is replayable
311                          */
312                         if (left <= 0 && !cache->uc_acquire_replay)
313                                 failedacquiring = true;
314                         put_entry(cache, entry);
315                         if (!failedacquiring) {
316                                 write_unlock(&cache->uc_lock);
317                                 failedacquiring = true;
318                                 new = NULL;
319                                 CDEBUG(D_OTHER,
320                                        "retry acquire for key %llu (got %d)\n",
321                                        entry->ue_key, rc);
322                                 goto find_again;
323                         }
324                         wake_up_all(&entry->ue_waitq);
325                         CERROR("%s: acquire for key %lld after %llu: rc = %d\n",
326                                cache->uc_name, entry->ue_key,
327                                cache->uc_acquire_expire, rc);
328                         GOTO(out, entry = ERR_PTR(rc));
329                 }
330         }
331
332         /* invalid means error, don't need to try again */
333         if (UC_CACHE_IS_INVALID(entry)) {
334                 put_entry(cache, entry);
335                 GOTO(out, entry = ERR_PTR(-EIDRM));
336         }
337
338         /* check expired
339          * We can't refresh the existing one because some
340          * memory might be shared by multiple processes.
341          */
342         if (check_unlink_entry(cache, entry, writelock)) {
343                 /* if expired, try again. but if this entry is
344                  * created by me but too quickly turn to expired
345                  * without any error, should at least give a
346                  * chance to use it once.
347                  */
348                 if (entry != new) {
349                         /* as stated above, we already hold a write lock */
350                         put_entry(cache, entry);
351                         write_unlock(&cache->uc_lock);
352                         new = NULL;
353                         goto find_again;
354                 }
355         }
356
357         /* Now we know it's good */
358 out:
359         if (writelock)
360                 write_unlock(&cache->uc_lock);
361         else
362                 read_unlock(&cache->uc_lock);
363         RETURN(entry);
364 }
365 EXPORT_SYMBOL(upcall_cache_get_entry);
366
367 void upcall_cache_get_entry_raw(struct upcall_cache_entry *entry)
368 {
369         get_entry(entry);
370 }
371 EXPORT_SYMBOL(upcall_cache_get_entry_raw);
372
373 void upcall_cache_update_entry(struct upcall_cache *cache,
374                                struct upcall_cache_entry *entry,
375                                time64_t expire, int state)
376 {
377         write_lock(&cache->uc_lock);
378         entry->ue_expire = expire;
379         if (!state)
380                 UC_CACHE_SET_VALID(entry);
381         else
382                 entry->ue_flags |= state;
383         write_unlock(&cache->uc_lock);
384 }
385 EXPORT_SYMBOL(upcall_cache_update_entry);
386
387 void upcall_cache_put_entry(struct upcall_cache *cache,
388                             struct upcall_cache_entry *entry)
389 {
390         ENTRY;
391
392         if (!entry) {
393                 EXIT;
394                 return;
395         }
396
397         LASSERT(atomic_read(&entry->ue_refcount) > 0);
398         write_lock(&cache->uc_lock);
399         put_entry(cache, entry);
400         write_unlock(&cache->uc_lock);
401         EXIT;
402 }
403 EXPORT_SYMBOL(upcall_cache_put_entry);
404
405 int upcall_cache_downcall(struct upcall_cache *cache, __u32 err, __u64 key,
406                           void *args)
407 {
408         struct upcall_cache_entry *entry = NULL;
409         struct list_head *head;
410         int found = 0, rc = 0;
411         bool writelock = false;
412         ENTRY;
413
414         LASSERT(cache);
415
416         head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key,
417                                                         cache->uc_hashsize)];
418
419         read_lock(&cache->uc_lock);
420         list_for_each_entry(entry, head, ue_hash) {
421                 if (downcall_compare(cache, entry, key, args) == 0) {
422                         found = 1;
423                         get_entry(entry);
424                         break;
425                 }
426         }
427
428         if (!found) {
429                 CDEBUG(D_OTHER, "%s: upcall for key %llu not expected\n",
430                        cache->uc_name, key);
431                 /* haven't found, it's possible */
432                 read_unlock(&cache->uc_lock);
433                 RETURN(-EINVAL);
434         }
435
436         if (err) {
437                 CDEBUG(D_OTHER, "%s: upcall for key %llu returned %d\n",
438                        cache->uc_name, entry->ue_key, err);
439                 write_lock_from_read(&cache->uc_lock, &writelock);
440                 GOTO(out, rc = err);
441         }
442
443         if (!UC_CACHE_IS_ACQUIRING(entry)) {
444                 CDEBUG(D_RPCTRACE, "%s: found uptodate entry %p (key %llu)"
445                        "\n", cache->uc_name, entry, entry->ue_key);
446                 write_lock_from_read(&cache->uc_lock, &writelock);
447                 GOTO(out, rc = 0);
448         }
449
450         if (UC_CACHE_IS_INVALID(entry) || UC_CACHE_IS_EXPIRED(entry)) {
451                 CERROR("%s: found a stale entry %p (key %llu) in ioctl\n",
452                        cache->uc_name, entry, entry->ue_key);
453                 write_lock_from_read(&cache->uc_lock, &writelock);
454                 GOTO(out, rc = -EINVAL);
455         }
456
457         read_unlock(&cache->uc_lock);
458         if (cache->uc_ops->parse_downcall)
459                 rc = cache->uc_ops->parse_downcall(cache, entry, args);
460         write_lock(&cache->uc_lock);
461         if (rc)
462                 GOTO(out, rc);
463
464         if (!entry->ue_expire)
465                 entry->ue_expire = ktime_get_seconds() + cache->uc_entry_expire;
466         UC_CACHE_SET_VALID(entry);
467         CDEBUG(D_OTHER, "%s: created upcall cache entry %p for key %llu\n",
468                cache->uc_name, entry, entry->ue_key);
469 out:
470         /* 'goto out' needs to make sure to take a write lock first */
471         if (rc) {
472                 UC_CACHE_SET_INVALID(entry);
473                 list_del_init(&entry->ue_hash);
474         }
475         UC_CACHE_CLEAR_ACQUIRING(entry);
476         wake_up(&entry->ue_waitq);
477         put_entry(cache, entry);
478         write_unlock(&cache->uc_lock);
479
480         RETURN(rc);
481 }
482 EXPORT_SYMBOL(upcall_cache_downcall);
483
484 void upcall_cache_flush(struct upcall_cache *cache, int force)
485 {
486         struct upcall_cache_entry *entry, *next;
487         int i;
488         ENTRY;
489
490         write_lock(&cache->uc_lock);
491         for (i = 0; i < cache->uc_hashsize; i++) {
492                 list_for_each_entry_safe(entry, next,
493                                          &cache->uc_hashtable[i], ue_hash) {
494                         if (!force && atomic_read(&entry->ue_refcount)) {
495                                 UC_CACHE_SET_EXPIRED(entry);
496                                 continue;
497                         }
498                         LASSERT(!atomic_read(&entry->ue_refcount));
499                         free_entry(cache, entry);
500                 }
501         }
502         write_unlock(&cache->uc_lock);
503         EXIT;
504 }
505 EXPORT_SYMBOL(upcall_cache_flush);
506
507 void upcall_cache_flush_one(struct upcall_cache *cache, __u64 key, void *args)
508 {
509         struct list_head *head;
510         struct upcall_cache_entry *entry;
511         int found = 0;
512         ENTRY;
513
514         head = &cache->uc_hashtable[UC_CACHE_HASH_INDEX(key,
515                                                         cache->uc_hashsize)];
516
517         write_lock(&cache->uc_lock);
518         list_for_each_entry(entry, head, ue_hash) {
519                 if (upcall_compare(cache, entry, key, args) == 0) {
520                         found = 1;
521                         break;
522                 }
523         }
524
525         if (found) {
526                 CWARN("%s: flush entry %p: key %llu, ref %d, fl %x, "
527                       "cur %lld, ex %lld/%lld\n",
528                       cache->uc_name, entry, entry->ue_key,
529                       atomic_read(&entry->ue_refcount), entry->ue_flags,
530                       ktime_get_real_seconds(), entry->ue_acquire_expire,
531                       entry->ue_expire);
532                 get_entry(entry);
533                 UC_CACHE_SET_EXPIRED(entry);
534                 put_entry(cache, entry);
535         }
536         write_unlock(&cache->uc_lock);
537 }
538 EXPORT_SYMBOL(upcall_cache_flush_one);
539
540 struct upcall_cache *upcall_cache_init(const char *name, const char *upcall,
541                                        int hashsz, time64_t entry_expire,
542                                        time64_t acquire_expire, bool replayable,
543                                        struct upcall_cache_ops *ops)
544 {
545         struct upcall_cache *cache;
546         int i;
547         ENTRY;
548
549         LIBCFS_ALLOC(cache, sizeof(*cache));
550         if (!cache)
551                 RETURN(ERR_PTR(-ENOMEM));
552
553         rwlock_init(&cache->uc_lock);
554         init_rwsem(&cache->uc_upcall_rwsem);
555         cache->uc_hashsize = hashsz;
556         LIBCFS_ALLOC(cache->uc_hashtable,
557                      sizeof(*cache->uc_hashtable) * cache->uc_hashsize);
558         if (!cache->uc_hashtable)
559                 RETURN(ERR_PTR(-ENOMEM));
560         for (i = 0; i < cache->uc_hashsize; i++)
561                 INIT_LIST_HEAD(&cache->uc_hashtable[i]);
562         strscpy(cache->uc_name, name, sizeof(cache->uc_name));
563         /* upcall pathname proc tunable */
564         strscpy(cache->uc_upcall, upcall, sizeof(cache->uc_upcall));
565         cache->uc_entry_expire = entry_expire;
566         cache->uc_acquire_expire = acquire_expire;
567         cache->uc_acquire_replay = replayable;
568         cache->uc_ops = ops;
569
570         RETURN(cache);
571 }
572 EXPORT_SYMBOL(upcall_cache_init);
573
574 void upcall_cache_cleanup(struct upcall_cache *cache)
575 {
576         if (!cache)
577                 return;
578         upcall_cache_flush_all(cache);
579         LIBCFS_FREE(cache->uc_hashtable,
580                     sizeof(*cache->uc_hashtable) * cache->uc_hashsize);
581         LIBCFS_FREE(cache, sizeof(*cache));
582 }
583 EXPORT_SYMBOL(upcall_cache_cleanup);