Whamcloud - gitweb
LU-3105 osc: remove capa support
[fs/lustre-release.git] / lustre / llite / llite_capa.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/llite/llite_capa.c
37  *
38  * Author: Lai Siyao <lsy@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <linux/fs.h>
44 #include <linux/version.h>
45 #include <linux/kthread.h>
46 #include <asm/uaccess.h>
47 #include <linux/file.h>
48 #include <linux/kmod.h>
49
50 #include "llite_internal.h"
51
52 /* for obd_capa.c_list, client capa might stay in three places:
53  * 1. ll_capa_list.
54  * 2. ll_idle_capas.
55  * 3. stand alone: just allocated.
56  */
57
58 /* capas for oss writeback and those failed to renew */
59 static struct list_head ll_idle_capas = LIST_HEAD_INIT(ll_idle_capas);
60 static struct ptlrpc_thread ll_capa_thread;
61 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
62
63 /* llite capa renewal timer */
64 struct timer_list ll_capa_timer;
65 /* for debug: indicate whether capa on llite is enabled or not */
66 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
67 static unsigned long long ll_capa_renewed = 0;
68 static unsigned long long ll_capa_renewal_noent = 0;
69 static unsigned long long ll_capa_renewal_failed = 0;
70 static unsigned long long ll_capa_renewal_retries = 0;
71
72 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
73
74 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
75 {
76         if (cfs_time_before(expiry, ll_capa_timer.expires) ||
77             !timer_pending(&ll_capa_timer)) {
78                 mod_timer(&ll_capa_timer, expiry);
79                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
80                            "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
81         }
82 }
83
84 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
85 {
86         return cfs_time_sub(ocapa->c_expiry,
87                             cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
88 }
89
90 static inline int capa_is_to_expire(struct obd_capa *ocapa)
91 {
92         return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
93 }
94
95 static inline int have_expired_capa(void)
96 {
97         struct obd_capa *ocapa = NULL;
98         int expired = 0;
99
100         /* if ll_capa_list has client capa to expire or ll_idle_capas has
101          * expired capa, return 1.
102          */
103         spin_lock(&capa_lock);
104         if (!list_empty(ll_capa_list)) {
105                 ocapa = list_entry(ll_capa_list->next, struct obd_capa,
106                                        c_list);
107                 expired = capa_is_to_expire(ocapa);
108                 if (!expired)
109                         update_capa_timer(ocapa, capa_renewal_time(ocapa));
110         } else if (!list_empty(&ll_idle_capas)) {
111                 ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
112                                        c_list);
113                 expired = capa_is_expired(ocapa);
114                 if (!expired)
115                         update_capa_timer(ocapa, ocapa->c_expiry);
116         }
117         spin_unlock(&capa_lock);
118
119         if (expired)
120                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
121         return expired;
122 }
123
124 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
125 {
126         struct obd_capa *tmp;
127         struct list_head *before = NULL;
128
129         /* TODO: client capa is sorted by expiry, this could be optimized */
130         list_for_each_entry_reverse(tmp, head, c_list) {
131                 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
132                         before = &tmp->c_list;
133                         break;
134                 }
135         }
136
137         LASSERT(&ocapa->c_list != before);
138         list_add(&ocapa->c_list, before ?: head);
139 }
140
141 static inline int obd_capa_open_count(struct obd_capa *oc)
142 {
143         struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
144         return atomic_read(&lli->lli_open_count);
145 }
146
147 static void ll_delete_capa(struct obd_capa *ocapa)
148 {
149         struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
150
151         if (capa_for_mds(&ocapa->c_capa)) {
152                 LASSERT(lli->lli_mds_capa == ocapa);
153                 lli->lli_mds_capa = NULL;
154         } else if (capa_for_oss(&ocapa->c_capa)) {
155                 list_del_init(&ocapa->u.cli.lli_list);
156         }
157
158         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
159         list_del_init(&ocapa->c_list);
160         capa_count[CAPA_SITE_CLIENT]--;
161         /* release the ref when alloc */
162         capa_put(ocapa);
163 }
164
165 /* three places where client capa is deleted:
166  * 1. capa_thread_main(), main place to delete expired capa.
167  * 2. ll_clear_inode_capas() in ll_clear_inode().
168  * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
169  */
170 static int capa_thread_main(void *unused)
171 {
172         struct obd_capa *ocapa, *tmp, *next;
173         struct inode *inode = NULL;
174         struct l_wait_info lwi = { 0 };
175         int rc;
176         ENTRY;
177
178         thread_set_flags(&ll_capa_thread, SVC_RUNNING);
179         wake_up(&ll_capa_thread.t_ctl_waitq);
180
181         while (1) {
182                 l_wait_event(ll_capa_thread.t_ctl_waitq,
183                              !thread_is_running(&ll_capa_thread) ||
184                              have_expired_capa(),
185                              &lwi);
186
187                 if (!thread_is_running(&ll_capa_thread))
188                         break;
189
190                 next = NULL;
191
192                 spin_lock(&capa_lock);
193                 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
194                         __u64 ibits;
195
196                         LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
197
198                         if (!capa_is_to_expire(ocapa)) {
199                                 next = ocapa;
200                                 break;
201                         }
202
203                         list_del_init(&ocapa->c_list);
204
205                         /* for MDS capability, only renew those which belong to
206                          * dir, or its inode is opened, or client holds LOOKUP
207                          * lock.
208                          */
209                         /* ibits may be changed by ll_have_md_lock() so we have
210                          * to set it each time */
211                         ibits = MDS_INODELOCK_LOOKUP;
212                         if (capa_for_mds(&ocapa->c_capa) &&
213                             !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
214                             obd_capa_open_count(ocapa) == 0 &&
215                             !ll_have_md_lock(ocapa->u.cli.inode,
216                                              &ibits, LCK_MINMODE)) {
217                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
218                                            "skip renewal for");
219                                 sort_add_capa(ocapa, &ll_idle_capas);
220                                 continue;
221                         }
222
223                         /* for OSS capability, only renew those whose inode is
224                          * opened.
225                          */
226                         if (capa_for_oss(&ocapa->c_capa) &&
227                             obd_capa_open_count(ocapa) == 0) {
228                                 /* oss capa with open count == 0 won't renew,
229                                  * move to idle list */
230                                 sort_add_capa(ocapa, &ll_idle_capas);
231                                 continue;
232                         }
233
234                         /* NB iput() is in ll_update_capa() */
235                         inode = igrab(ocapa->u.cli.inode);
236                         if (inode == NULL) {
237                                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
238                                            "igrab failed for");
239                                 continue;
240                         }
241
242                         capa_get(ocapa);
243                         ll_capa_renewed++;
244                         spin_unlock(&capa_lock);
245                         rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
246                                            ll_update_capa);
247                         spin_lock(&capa_lock);
248                         if (rc) {
249                                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
250                                            "renew failed: %d", rc);
251                                 ll_capa_renewal_failed++;
252                         }
253                 }
254
255                 if (next)
256                         update_capa_timer(next, capa_renewal_time(next));
257
258                 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
259                                              c_list) {
260                         if (!capa_is_expired(ocapa)) {
261                                 if (!next)
262                                         update_capa_timer(ocapa,
263                                                           ocapa->c_expiry);
264                                 break;
265                         }
266
267                         if (atomic_read(&ocapa->c_refc) > 1) {
268                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
269                                            "expired(c_refc %d), don't release",
270                                            atomic_read(&ocapa->c_refc));
271                                 /* don't try to renew any more */
272                                 list_del_init(&ocapa->c_list);
273                                 continue;
274                         }
275
276                         /* expired capa is released. */
277                         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
278                         ll_delete_capa(ocapa);
279                 }
280
281                 spin_unlock(&capa_lock);
282         }
283
284         thread_set_flags(&ll_capa_thread, SVC_STOPPED);
285         wake_up(&ll_capa_thread.t_ctl_waitq);
286         RETURN(0);
287 }
288
289 void ll_capa_timer_callback(unsigned long unused)
290 {
291         wake_up(&ll_capa_thread.t_ctl_waitq);
292 }
293
294 int ll_capa_thread_start(void)
295 {
296         struct task_struct *task;
297         ENTRY;
298
299         init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
300
301         task = kthread_run(capa_thread_main, NULL, "ll_capa");
302         if (IS_ERR(task)) {
303                 CERROR("cannot start expired capa thread: rc %ld\n",
304                         PTR_ERR(task));
305                 RETURN(PTR_ERR(task));
306         }
307         wait_event(ll_capa_thread.t_ctl_waitq,
308                        thread_is_running(&ll_capa_thread));
309
310         RETURN(0);
311 }
312
313 void ll_capa_thread_stop(void)
314 {
315         thread_set_flags(&ll_capa_thread, SVC_STOPPING);
316         wake_up(&ll_capa_thread.t_ctl_waitq);
317         wait_event(ll_capa_thread.t_ctl_waitq,
318                        thread_is_stopped(&ll_capa_thread));
319 }
320
321 struct obd_capa *ll_mdscapa_get(struct inode *inode)
322 {
323         struct ll_inode_info *lli = ll_i2info(inode);
324         struct obd_capa *ocapa;
325         ENTRY;
326
327         LASSERT(inode != NULL);
328
329         if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
330                 RETURN(NULL);
331
332         spin_lock(&capa_lock);
333         ocapa = capa_get(lli->lli_mds_capa);
334         spin_unlock(&capa_lock);
335         if (!ocapa && atomic_read(&ll_capa_debug)) {
336                 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
337                 atomic_set(&ll_capa_debug, 0);
338         }
339
340         RETURN(ocapa);
341 }
342
343 static struct obd_capa *do_add_mds_capa(struct inode *inode,
344                                         struct obd_capa *ocapa)
345 {
346         struct ll_inode_info *lli = ll_i2info(inode);
347         struct obd_capa *old = lli->lli_mds_capa;
348         struct lustre_capa *capa = &ocapa->c_capa;
349
350         if (!old) {
351                 ocapa->u.cli.inode = inode;
352                 lli->lli_mds_capa = ocapa;
353                 capa_count[CAPA_SITE_CLIENT]++;
354
355                 DEBUG_CAPA(D_SEC, capa, "add MDS");
356         } else {
357                 spin_lock(&old->c_lock);
358                 old->c_capa = *capa;
359                 spin_unlock(&old->c_lock);
360
361                 DEBUG_CAPA(D_SEC, capa, "update MDS");
362
363                 capa_put(ocapa);
364                 ocapa = old;
365         }
366         return ocapa;
367 }
368
369 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
370 {
371         struct ll_inode_info *lli = ll_i2info(inode);
372         struct obd_capa *ocapa;
373
374         /* inside capa_lock */
375         list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
376                 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
377                         continue;
378
379                 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
380                                   ll_inode2fid(inode)));
381                 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
382
383                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
384                 return ocapa;
385         }
386
387         return NULL;
388 }
389
390 static inline void inode_add_oss_capa(struct inode *inode,
391                                       struct obd_capa *ocapa)
392 {
393         struct ll_inode_info *lli = ll_i2info(inode);
394         struct obd_capa *tmp;
395         struct list_head *next = NULL;
396
397         /* capa is sorted in lli_oss_capas so lookup can always find the
398          * latest one */
399         list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
400                 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
401                         next = &tmp->u.cli.lli_list;
402                         break;
403                 }
404         }
405         LASSERT(&ocapa->u.cli.lli_list != next);
406         list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
407 }
408
409 static struct obd_capa *do_add_oss_capa(struct inode *inode,
410                                         struct obd_capa *ocapa)
411 {
412         struct obd_capa *old;
413         struct lustre_capa *capa = &ocapa->c_capa;
414
415         LASSERTF(S_ISREG(inode->i_mode),
416                  "inode has oss capa, but not regular file, mode: %d\n",
417                  inode->i_mode);
418
419         /* FIXME: can't replace it so easily with fine-grained opc */
420         old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
421         if (!old) {
422                 ocapa->u.cli.inode = inode;
423                 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
424                 capa_count[CAPA_SITE_CLIENT]++;
425
426                 DEBUG_CAPA(D_SEC, capa, "add OSS");
427         } else {
428                 spin_lock(&old->c_lock);
429                 old->c_capa = *capa;
430                 spin_unlock(&old->c_lock);
431
432                 DEBUG_CAPA(D_SEC, capa, "update OSS");
433
434                 capa_put(ocapa);
435                 ocapa = old;
436         }
437
438         inode_add_oss_capa(inode, ocapa);
439         return ocapa;
440 }
441
442 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
443 {
444         spin_lock(&capa_lock);
445         ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
446                                                do_add_oss_capa(inode, ocapa);
447
448         /* truncate capa won't renew */
449         if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
450                 set_capa_expiry(ocapa);
451                 list_del_init(&ocapa->c_list);
452                 sort_add_capa(ocapa, ll_capa_list);
453
454                 update_capa_timer(ocapa, capa_renewal_time(ocapa));
455         }
456
457         spin_unlock(&capa_lock);
458
459         atomic_set(&ll_capa_debug, 1);
460         return ocapa;
461 }
462
463 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
464 {
465         /* NB: set a fake expiry for this capa to prevent it renew too soon */
466         oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
467 }
468
469 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
470 {
471         struct inode *inode = ocapa->u.cli.inode;
472         int rc = 0;
473         ENTRY;
474
475         LASSERT(ocapa);
476
477         if (IS_ERR(capa)) {
478                 /* set error code */
479                 rc = PTR_ERR(capa);
480                 spin_lock(&capa_lock);
481                 if (rc == -ENOENT) {
482                         DEBUG_CAPA(D_SEC, &ocapa->c_capa,
483                                    "renewal canceled because object removed");
484                         ll_capa_renewal_noent++;
485                 } else {
486                         ll_capa_renewal_failed++;
487
488                         /* failed capa won't be renewed any longer, but if -EIO,
489                          * client might be doing recovery, retry in 2 min. */
490                         if (rc == -EIO && !capa_is_expired(ocapa)) {
491                                 delay_capa_renew(ocapa, 120);
492                                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
493                                            "renewal failed: -EIO, "
494                                            "retry in 2 mins");
495                                 ll_capa_renewal_retries++;
496                                 GOTO(retry, rc);
497                         } else {
498                                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
499                                            "renewal failed(rc: %d) for", rc);
500                         }
501                 }
502
503                 list_del_init(&ocapa->c_list);
504                 sort_add_capa(ocapa, &ll_idle_capas);
505                 spin_unlock(&capa_lock);
506
507                 capa_put(ocapa);
508                 iput(inode);
509                 RETURN(rc);
510         }
511
512         spin_lock(&ocapa->c_lock);
513         LASSERT(!memcmp(&ocapa->c_capa, capa,
514                         offsetof(struct lustre_capa, lc_opc)));
515         ocapa->c_capa = *capa;
516         set_capa_expiry(ocapa);
517         spin_unlock(&ocapa->c_lock);
518
519         spin_lock(&capa_lock);
520         if (capa_for_oss(capa))
521                 inode_add_oss_capa(inode, ocapa);
522         DEBUG_CAPA(D_SEC, capa, "renew");
523         EXIT;
524 retry:
525         list_del_init(&ocapa->c_list);
526         sort_add_capa(ocapa, ll_capa_list);
527         update_capa_timer(ocapa, capa_renewal_time(ocapa));
528         spin_unlock(&capa_lock);
529
530         capa_put(ocapa);
531         iput(inode);
532         return rc;
533 }
534
535 void ll_capa_open(struct inode *inode)
536 {
537         struct ll_inode_info *lli = ll_i2info(inode);
538
539         if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
540             == 0)
541                 return;
542
543         if (!S_ISREG(inode->i_mode))
544                 return;
545
546         atomic_inc(&lli->lli_open_count);
547 }
548
549 void ll_capa_close(struct inode *inode)
550 {
551         struct ll_inode_info *lli = ll_i2info(inode);
552
553         if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
554             == 0)
555                 return;
556
557         if (!S_ISREG(inode->i_mode))
558                 return;
559
560         atomic_dec(&lli->lli_open_count);
561 }
562
563 /* delete CAPA_OPC_OSS_TRUNC only */
564 void ll_truncate_free_capa(struct obd_capa *ocapa)
565 {
566         if (!ocapa)
567                 return;
568
569         LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
570         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
571
572         /* release ref when find */
573         capa_put(ocapa);
574         if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
575                 spin_lock(&capa_lock);
576                 ll_delete_capa(ocapa);
577                 spin_unlock(&capa_lock);
578         }
579 }
580
581 void ll_clear_inode_capas(struct inode *inode)
582 {
583         struct ll_inode_info *lli = ll_i2info(inode);
584         struct obd_capa *ocapa, *tmp;
585
586         spin_lock(&capa_lock);
587         ocapa = lli->lli_mds_capa;
588         if (ocapa)
589                 ll_delete_capa(ocapa);
590
591         list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
592                                      u.cli.lli_list)
593                 ll_delete_capa(ocapa);
594         spin_unlock(&capa_lock);
595 }
596
597 void ll_print_capa_stat(struct ll_sb_info *sbi)
598 {
599         if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
600                 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
601                               "Fid capabilities renewal ENOENT: %llu\n"
602                               "Fid capabilities failed to renew: %llu\n"
603                               "Fid capabilities renewal retries: %llu\n",
604                               ll_capa_renewed, ll_capa_renewal_noent,
605                               ll_capa_renewal_failed, ll_capa_renewal_retries);
606 }