4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/llite/llite_capa.c
38 * Author: Lai Siyao <lsy@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <linux/version.h>
45 #include <asm/uaccess.h>
46 #include <linux/file.h>
47 #include <linux/kmod.h>
49 #include "llite_internal.h"
51 /* for obd_capa.c_list, client capa might stay in three places:
54 * 3. stand alone: just allocated.
57 /* capas for oss writeback and those failed to renew */
58 static struct list_head ll_idle_capas = LIST_HEAD_INIT(ll_idle_capas);
59 static struct ptlrpc_thread ll_capa_thread;
60 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
62 /* llite capa renewal timer */
63 struct timer_list ll_capa_timer;
64 /* for debug: indicate whether capa on llite is enabled or not */
65 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
66 static unsigned long long ll_capa_renewed = 0;
67 static unsigned long long ll_capa_renewal_noent = 0;
68 static unsigned long long ll_capa_renewal_failed = 0;
69 static unsigned long long ll_capa_renewal_retries = 0;
71 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
73 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
75 if (cfs_time_before(expiry, ll_capa_timer.expires) ||
76 !timer_pending(&ll_capa_timer)) {
77 mod_timer(&ll_capa_timer, expiry);
78 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
79 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
83 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
85 return cfs_time_sub(ocapa->c_expiry,
86 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
89 static inline int capa_is_to_expire(struct obd_capa *ocapa)
91 return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
94 static inline int have_expired_capa(void)
96 struct obd_capa *ocapa = NULL;
99 /* if ll_capa_list has client capa to expire or ll_idle_capas has
100 * expired capa, return 1.
102 spin_lock(&capa_lock);
103 if (!list_empty(ll_capa_list)) {
104 ocapa = list_entry(ll_capa_list->next, struct obd_capa,
106 expired = capa_is_to_expire(ocapa);
108 update_capa_timer(ocapa, capa_renewal_time(ocapa));
109 } else if (!list_empty(&ll_idle_capas)) {
110 ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
112 expired = capa_is_expired(ocapa);
114 update_capa_timer(ocapa, ocapa->c_expiry);
116 spin_unlock(&capa_lock);
119 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
123 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
125 struct obd_capa *tmp;
126 struct list_head *before = NULL;
128 /* TODO: client capa is sorted by expiry, this could be optimized */
129 list_for_each_entry_reverse(tmp, head, c_list) {
130 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
131 before = &tmp->c_list;
136 LASSERT(&ocapa->c_list != before);
137 list_add(&ocapa->c_list, before ?: head);
140 static inline int obd_capa_open_count(struct obd_capa *oc)
142 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
143 return atomic_read(&lli->lli_open_count);
146 static void ll_delete_capa(struct obd_capa *ocapa)
148 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
150 if (capa_for_mds(&ocapa->c_capa)) {
151 LASSERT(lli->lli_mds_capa == ocapa);
152 lli->lli_mds_capa = NULL;
153 } else if (capa_for_oss(&ocapa->c_capa)) {
154 list_del_init(&ocapa->u.cli.lli_list);
157 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
158 list_del_init(&ocapa->c_list);
159 capa_count[CAPA_SITE_CLIENT]--;
160 /* release the ref when alloc */
164 /* three places where client capa is deleted:
165 * 1. capa_thread_main(), main place to delete expired capa.
166 * 2. ll_clear_inode_capas() in ll_clear_inode().
167 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
169 static int capa_thread_main(void *unused)
171 struct obd_capa *ocapa, *tmp, *next;
172 struct inode *inode = NULL;
173 struct l_wait_info lwi = { 0 };
177 thread_set_flags(&ll_capa_thread, SVC_RUNNING);
178 wake_up(&ll_capa_thread.t_ctl_waitq);
181 l_wait_event(ll_capa_thread.t_ctl_waitq,
182 !thread_is_running(&ll_capa_thread) ||
186 if (!thread_is_running(&ll_capa_thread))
191 spin_lock(&capa_lock);
192 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
195 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
197 if (!capa_is_to_expire(ocapa)) {
202 list_del_init(&ocapa->c_list);
204 /* for MDS capability, only renew those which belong to
205 * dir, or its inode is opened, or client holds LOOKUP
208 /* ibits may be changed by ll_have_md_lock() so we have
209 * to set it each time */
210 ibits = MDS_INODELOCK_LOOKUP;
211 if (capa_for_mds(&ocapa->c_capa) &&
212 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
213 obd_capa_open_count(ocapa) == 0 &&
214 !ll_have_md_lock(ocapa->u.cli.inode,
215 &ibits, LCK_MINMODE)) {
216 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
218 sort_add_capa(ocapa, &ll_idle_capas);
222 /* for OSS capability, only renew those whose inode is
225 if (capa_for_oss(&ocapa->c_capa) &&
226 obd_capa_open_count(ocapa) == 0) {
227 /* oss capa with open count == 0 won't renew,
228 * move to idle list */
229 sort_add_capa(ocapa, &ll_idle_capas);
233 /* NB iput() is in ll_update_capa() */
234 inode = igrab(ocapa->u.cli.inode);
236 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
243 spin_unlock(&capa_lock);
244 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
246 spin_lock(&capa_lock);
248 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
249 "renew failed: %d", rc);
250 ll_capa_renewal_failed++;
255 update_capa_timer(next, capa_renewal_time(next));
257 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
259 if (!capa_is_expired(ocapa)) {
261 update_capa_timer(ocapa,
266 if (atomic_read(&ocapa->c_refc) > 1) {
267 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
268 "expired(c_refc %d), don't release",
269 atomic_read(&ocapa->c_refc));
270 /* don't try to renew any more */
271 list_del_init(&ocapa->c_list);
275 /* expired capa is released. */
276 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
277 ll_delete_capa(ocapa);
280 spin_unlock(&capa_lock);
283 thread_set_flags(&ll_capa_thread, SVC_STOPPED);
284 wake_up(&ll_capa_thread.t_ctl_waitq);
288 void ll_capa_timer_callback(unsigned long unused)
290 wake_up(&ll_capa_thread.t_ctl_waitq);
293 int ll_capa_thread_start(void)
295 struct task_struct *task;
298 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
300 task = kthread_run(capa_thread_main, NULL, "ll_capa");
302 CERROR("cannot start expired capa thread: rc %ld\n",
304 RETURN(PTR_ERR(task));
306 wait_event(ll_capa_thread.t_ctl_waitq,
307 thread_is_running(&ll_capa_thread));
312 void ll_capa_thread_stop(void)
314 thread_set_flags(&ll_capa_thread, SVC_STOPPING);
315 wake_up(&ll_capa_thread.t_ctl_waitq);
316 wait_event(ll_capa_thread.t_ctl_waitq,
317 thread_is_stopped(&ll_capa_thread));
320 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
322 struct ll_inode_info *lli = ll_i2info(inode);
323 struct obd_capa *ocapa;
328 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
331 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
332 opc == CAPA_OPC_OSS_TRUNC);
334 spin_lock(&capa_lock);
335 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
336 if (capa_is_expired(ocapa))
338 if ((opc & CAPA_OPC_OSS_WRITE) &&
339 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
342 } else if ((opc & CAPA_OPC_OSS_READ) &&
343 capa_opc_supported(&ocapa->c_capa,
344 CAPA_OPC_OSS_READ)) {
347 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
348 capa_opc_supported(&ocapa->c_capa, opc)) {
355 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
356 ll_inode2fid(inode)));
357 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
361 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
365 if (atomic_read(&ll_capa_debug)) {
366 CERROR("no capability for "DFID" opc "LPX64"\n",
367 PFID(&lli->lli_fid), opc);
368 atomic_set(&ll_capa_debug, 0);
371 spin_unlock(&capa_lock);
376 struct obd_capa *ll_mdscapa_get(struct inode *inode)
378 struct ll_inode_info *lli = ll_i2info(inode);
379 struct obd_capa *ocapa;
382 LASSERT(inode != NULL);
384 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
387 spin_lock(&capa_lock);
388 ocapa = capa_get(lli->lli_mds_capa);
389 spin_unlock(&capa_lock);
390 if (!ocapa && atomic_read(&ll_capa_debug)) {
391 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
392 atomic_set(&ll_capa_debug, 0);
398 static struct obd_capa *do_add_mds_capa(struct inode *inode,
399 struct obd_capa *ocapa)
401 struct ll_inode_info *lli = ll_i2info(inode);
402 struct obd_capa *old = lli->lli_mds_capa;
403 struct lustre_capa *capa = &ocapa->c_capa;
406 ocapa->u.cli.inode = inode;
407 lli->lli_mds_capa = ocapa;
408 capa_count[CAPA_SITE_CLIENT]++;
410 DEBUG_CAPA(D_SEC, capa, "add MDS");
412 spin_lock(&old->c_lock);
414 spin_unlock(&old->c_lock);
416 DEBUG_CAPA(D_SEC, capa, "update MDS");
424 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
426 struct ll_inode_info *lli = ll_i2info(inode);
427 struct obd_capa *ocapa;
429 /* inside capa_lock */
430 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
431 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
434 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
435 ll_inode2fid(inode)));
436 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
438 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
445 static inline void inode_add_oss_capa(struct inode *inode,
446 struct obd_capa *ocapa)
448 struct ll_inode_info *lli = ll_i2info(inode);
449 struct obd_capa *tmp;
450 struct list_head *next = NULL;
452 /* capa is sorted in lli_oss_capas so lookup can always find the
454 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
455 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
456 next = &tmp->u.cli.lli_list;
460 LASSERT(&ocapa->u.cli.lli_list != next);
461 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
464 static struct obd_capa *do_add_oss_capa(struct inode *inode,
465 struct obd_capa *ocapa)
467 struct obd_capa *old;
468 struct lustre_capa *capa = &ocapa->c_capa;
470 LASSERTF(S_ISREG(inode->i_mode),
471 "inode has oss capa, but not regular file, mode: %d\n",
474 /* FIXME: can't replace it so easily with fine-grained opc */
475 old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
477 ocapa->u.cli.inode = inode;
478 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
479 capa_count[CAPA_SITE_CLIENT]++;
481 DEBUG_CAPA(D_SEC, capa, "add OSS");
483 spin_lock(&old->c_lock);
485 spin_unlock(&old->c_lock);
487 DEBUG_CAPA(D_SEC, capa, "update OSS");
493 inode_add_oss_capa(inode, ocapa);
497 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
499 spin_lock(&capa_lock);
500 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
501 do_add_oss_capa(inode, ocapa);
503 /* truncate capa won't renew */
504 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
505 set_capa_expiry(ocapa);
506 list_del_init(&ocapa->c_list);
507 sort_add_capa(ocapa, ll_capa_list);
509 update_capa_timer(ocapa, capa_renewal_time(ocapa));
512 spin_unlock(&capa_lock);
514 atomic_set(&ll_capa_debug, 1);
518 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
520 /* NB: set a fake expiry for this capa to prevent it renew too soon */
521 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
524 static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
526 struct inode *inode = ocapa->u.cli.inode;
535 spin_lock(&capa_lock);
537 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
538 "renewal canceled because object removed");
539 ll_capa_renewal_noent++;
541 ll_capa_renewal_failed++;
543 /* failed capa won't be renewed any longer, but if -EIO,
544 * client might be doing recovery, retry in 2 min. */
545 if (rc == -EIO && !capa_is_expired(ocapa)) {
546 delay_capa_renew(ocapa, 120);
547 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
548 "renewal failed: -EIO, "
550 ll_capa_renewal_retries++;
553 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
554 "renewal failed(rc: %d) for", rc);
558 list_del_init(&ocapa->c_list);
559 sort_add_capa(ocapa, &ll_idle_capas);
560 spin_unlock(&capa_lock);
567 spin_lock(&ocapa->c_lock);
568 LASSERT(!memcmp(&ocapa->c_capa, capa,
569 offsetof(struct lustre_capa, lc_opc)));
570 ocapa->c_capa = *capa;
571 set_capa_expiry(ocapa);
572 spin_unlock(&ocapa->c_lock);
574 spin_lock(&capa_lock);
575 if (capa_for_oss(capa))
576 inode_add_oss_capa(inode, ocapa);
577 DEBUG_CAPA(D_SEC, capa, "renew");
580 list_del_init(&ocapa->c_list);
581 sort_add_capa(ocapa, ll_capa_list);
582 update_capa_timer(ocapa, capa_renewal_time(ocapa));
583 spin_unlock(&capa_lock);
590 void ll_capa_open(struct inode *inode)
592 struct ll_inode_info *lli = ll_i2info(inode);
594 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
598 if (!S_ISREG(inode->i_mode))
601 atomic_inc(&lli->lli_open_count);
604 void ll_capa_close(struct inode *inode)
606 struct ll_inode_info *lli = ll_i2info(inode);
608 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
612 if (!S_ISREG(inode->i_mode))
615 atomic_dec(&lli->lli_open_count);
618 /* delete CAPA_OPC_OSS_TRUNC only */
619 void ll_truncate_free_capa(struct obd_capa *ocapa)
624 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
625 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
627 /* release ref when find */
629 if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
630 spin_lock(&capa_lock);
631 ll_delete_capa(ocapa);
632 spin_unlock(&capa_lock);
636 void ll_clear_inode_capas(struct inode *inode)
638 struct ll_inode_info *lli = ll_i2info(inode);
639 struct obd_capa *ocapa, *tmp;
641 spin_lock(&capa_lock);
642 ocapa = lli->lli_mds_capa;
644 ll_delete_capa(ocapa);
646 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
648 ll_delete_capa(ocapa);
649 spin_unlock(&capa_lock);
652 void ll_print_capa_stat(struct ll_sb_info *sbi)
654 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
655 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
656 "Fid capabilities renewal ENOENT: %llu\n"
657 "Fid capabilities failed to renew: %llu\n"
658 "Fid capabilities renewal retries: %llu\n",
659 ll_capa_renewed, ll_capa_renewal_noent,
660 ll_capa_renewal_failed, ll_capa_renewal_retries);