1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * Copyright (c) 2011 Whamcloud, Inc.
36 * This file is part of Lustre, http://www.lustre.org/
37 * Lustre is a trademark of Sun Microsystems, Inc.
39 * lustre/llite/llite_capa.c
41 * Author: Lai Siyao <lsy@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_LLITE
47 #include <linux/version.h>
48 #include <asm/uaccess.h>
49 #include <linux/file.h>
50 #include <linux/kmod.h>
52 #include <lustre_lite.h>
53 #include "llite_internal.h"
55 /* for obd_capa.c_list, client capa might stay in three places:
58 * 3. stand alone: just allocated.
61 /* capas for oss writeback and those failed to renew */
62 static CFS_LIST_HEAD(ll_idle_capas);
63 static struct ptlrpc_thread ll_capa_thread;
64 static cfs_list_t *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
66 /* llite capa renewal timer */
67 struct timer_list ll_capa_timer;
68 /* for debug: indicate whether capa on llite is enabled or not */
69 static cfs_atomic_t ll_capa_debug = CFS_ATOMIC_INIT(0);
70 static unsigned long long ll_capa_renewed = 0;
71 static unsigned long long ll_capa_renewal_noent = 0;
72 static unsigned long long ll_capa_renewal_failed = 0;
73 static unsigned long long ll_capa_renewal_retries = 0;
75 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
77 if (cfs_time_before(expiry, ll_capa_timer.expires) ||
78 !timer_pending(&ll_capa_timer)) {
79 mod_timer(&ll_capa_timer, expiry);
80 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
81 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
85 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
87 return cfs_time_sub(ocapa->c_expiry,
88 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
91 static inline int capa_is_to_expire(struct obd_capa *ocapa)
93 return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
96 static inline int have_expired_capa(void)
98 struct obd_capa *ocapa = NULL;
101 /* if ll_capa_list has client capa to expire or ll_idle_capas has
102 * expired capa, return 1.
104 cfs_spin_lock(&capa_lock);
105 if (!cfs_list_empty(ll_capa_list)) {
106 ocapa = cfs_list_entry(ll_capa_list->next, struct obd_capa,
108 expired = capa_is_to_expire(ocapa);
110 update_capa_timer(ocapa, capa_renewal_time(ocapa));
111 } else if (!cfs_list_empty(&ll_idle_capas)) {
112 ocapa = cfs_list_entry(ll_idle_capas.next, struct obd_capa,
114 expired = capa_is_expired(ocapa);
116 update_capa_timer(ocapa, ocapa->c_expiry);
118 cfs_spin_unlock(&capa_lock);
121 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
125 static void sort_add_capa(struct obd_capa *ocapa, cfs_list_t *head)
127 struct obd_capa *tmp;
128 cfs_list_t *before = NULL;
130 /* TODO: client capa is sorted by expiry, this could be optimized */
131 cfs_list_for_each_entry_reverse(tmp, head, c_list) {
132 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
133 before = &tmp->c_list;
138 LASSERT(&ocapa->c_list != before);
139 cfs_list_add(&ocapa->c_list, before ?: head);
142 static inline int obd_capa_open_count(struct obd_capa *oc)
144 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
145 return cfs_atomic_read(&lli->lli_open_count);
148 static void ll_delete_capa(struct obd_capa *ocapa)
150 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
152 if (capa_for_mds(&ocapa->c_capa)) {
153 LASSERT(lli->lli_mds_capa == ocapa);
154 lli->lli_mds_capa = NULL;
155 } else if (capa_for_oss(&ocapa->c_capa)) {
156 cfs_list_del_init(&ocapa->u.cli.lli_list);
159 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
160 cfs_list_del_init(&ocapa->c_list);
161 capa_count[CAPA_SITE_CLIENT]--;
162 /* release the ref when alloc */
166 /* three places where client capa is deleted:
167 * 1. capa_thread_main(), main place to delete expired capa.
168 * 2. ll_clear_inode_capas() in ll_clear_inode().
169 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
171 static int capa_thread_main(void *unused)
173 struct obd_capa *ocapa, *tmp, *next;
174 struct inode *inode = NULL;
175 struct l_wait_info lwi = { 0 };
179 cfs_daemonize("ll_capa");
181 thread_set_flags(&ll_capa_thread, SVC_RUNNING);
182 cfs_waitq_signal(&ll_capa_thread.t_ctl_waitq);
185 l_wait_event(ll_capa_thread.t_ctl_waitq,
186 !thread_is_running(&ll_capa_thread) ||
190 if (!thread_is_running(&ll_capa_thread))
195 cfs_spin_lock(&capa_lock);
196 cfs_list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
199 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
201 if (!capa_is_to_expire(ocapa)) {
206 cfs_list_del_init(&ocapa->c_list);
208 /* for MDS capability, only renew those which belong to
209 * dir, or its inode is opened, or client holds LOOKUP
212 /* ibits may be changed by ll_have_md_lock() so we have
213 * to set it each time */
214 ibits = MDS_INODELOCK_LOOKUP;
215 if (capa_for_mds(&ocapa->c_capa) &&
216 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
217 obd_capa_open_count(ocapa) == 0 &&
218 !ll_have_md_lock(ocapa->u.cli.inode,
219 &ibits, LCK_MINMODE)) {
220 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
222 sort_add_capa(ocapa, &ll_idle_capas);
226 /* for OSS capability, only renew those whose inode is
229 if (capa_for_oss(&ocapa->c_capa) &&
230 obd_capa_open_count(ocapa) == 0) {
231 /* oss capa with open count == 0 won't renew,
232 * move to idle list */
233 sort_add_capa(ocapa, &ll_idle_capas);
237 /* NB iput() is in ll_update_capa() */
238 inode = igrab(ocapa->u.cli.inode);
240 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
247 cfs_spin_unlock(&capa_lock);
248 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
250 cfs_spin_lock(&capa_lock);
252 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
253 "renew failed: %d", rc);
254 ll_capa_renewal_failed++;
259 update_capa_timer(next, capa_renewal_time(next));
261 cfs_list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
263 if (!capa_is_expired(ocapa)) {
265 update_capa_timer(ocapa,
270 if (cfs_atomic_read(&ocapa->c_refc) > 1) {
271 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
272 "expired(c_refc %d), don't release",
273 cfs_atomic_read(&ocapa->c_refc));
274 /* don't try to renew any more */
275 cfs_list_del_init(&ocapa->c_list);
279 /* expired capa is released. */
280 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
281 ll_delete_capa(ocapa);
284 cfs_spin_unlock(&capa_lock);
287 thread_set_flags(&ll_capa_thread, SVC_STOPPED);
288 cfs_waitq_signal(&ll_capa_thread.t_ctl_waitq);
292 void ll_capa_timer_callback(unsigned long unused)
294 cfs_waitq_signal(&ll_capa_thread.t_ctl_waitq);
297 int ll_capa_thread_start(void)
302 cfs_waitq_init(&ll_capa_thread.t_ctl_waitq);
304 rc = cfs_create_thread(capa_thread_main, NULL, 0);
306 CERROR("cannot start expired capa thread: rc %d\n", rc);
309 cfs_wait_event(ll_capa_thread.t_ctl_waitq,
310 thread_is_running(&ll_capa_thread));
315 void ll_capa_thread_stop(void)
317 thread_set_flags(&ll_capa_thread, SVC_STOPPING);
318 cfs_waitq_signal(&ll_capa_thread.t_ctl_waitq);
319 cfs_wait_event(ll_capa_thread.t_ctl_waitq,
320 thread_is_stopped(&ll_capa_thread));
323 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
325 struct ll_inode_info *lli = ll_i2info(inode);
326 struct obd_capa *ocapa;
331 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
334 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
335 opc == CAPA_OPC_OSS_TRUNC);
337 cfs_spin_lock(&capa_lock);
338 cfs_list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
339 if (capa_is_expired(ocapa))
341 if ((opc & CAPA_OPC_OSS_WRITE) &&
342 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
345 } else if ((opc & CAPA_OPC_OSS_READ) &&
346 capa_opc_supported(&ocapa->c_capa,
347 CAPA_OPC_OSS_READ)) {
350 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
351 capa_opc_supported(&ocapa->c_capa, opc)) {
358 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
359 ll_inode2fid(inode)));
360 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
364 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
368 if (cfs_atomic_read(&ll_capa_debug)) {
369 CERROR("no capability for "DFID" opc "LPX64"\n",
370 PFID(&lli->lli_fid), opc);
371 cfs_atomic_set(&ll_capa_debug, 0);
374 cfs_spin_unlock(&capa_lock);
378 EXPORT_SYMBOL(ll_osscapa_get);
380 struct obd_capa *ll_mdscapa_get(struct inode *inode)
382 struct ll_inode_info *lli = ll_i2info(inode);
383 struct obd_capa *ocapa;
386 LASSERT(inode != NULL);
388 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
391 cfs_spin_lock(&capa_lock);
392 ocapa = capa_get(lli->lli_mds_capa);
393 cfs_spin_unlock(&capa_lock);
394 if (!ocapa && cfs_atomic_read(&ll_capa_debug)) {
395 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
396 cfs_atomic_set(&ll_capa_debug, 0);
402 static struct obd_capa *do_add_mds_capa(struct inode *inode,
403 struct obd_capa *ocapa)
405 struct ll_inode_info *lli = ll_i2info(inode);
406 struct obd_capa *old = lli->lli_mds_capa;
407 struct lustre_capa *capa = &ocapa->c_capa;
410 ocapa->u.cli.inode = inode;
411 lli->lli_mds_capa = ocapa;
412 capa_count[CAPA_SITE_CLIENT]++;
414 DEBUG_CAPA(D_SEC, capa, "add MDS");
416 cfs_spin_lock(&old->c_lock);
418 cfs_spin_unlock(&old->c_lock);
420 DEBUG_CAPA(D_SEC, capa, "update MDS");
428 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
430 struct ll_inode_info *lli = ll_i2info(inode);
431 struct obd_capa *ocapa;
433 /* inside capa_lock */
434 cfs_list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
435 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
438 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
439 ll_inode2fid(inode)));
440 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
442 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
449 static inline void inode_add_oss_capa(struct inode *inode,
450 struct obd_capa *ocapa)
452 struct ll_inode_info *lli = ll_i2info(inode);
453 struct obd_capa *tmp;
454 cfs_list_t *next = NULL;
456 /* capa is sorted in lli_oss_capas so lookup can always find the
458 cfs_list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
459 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
460 next = &tmp->u.cli.lli_list;
464 LASSERT(&ocapa->u.cli.lli_list != next);
465 cfs_list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
468 static struct obd_capa *do_add_oss_capa(struct inode *inode,
469 struct obd_capa *ocapa)
471 struct obd_capa *old;
472 struct lustre_capa *capa = &ocapa->c_capa;
474 LASSERTF(S_ISREG(inode->i_mode),
475 "inode has oss capa, but not regular file, mode: %d\n",
478 /* FIXME: can't replace it so easily with fine-grained opc */
479 old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
481 ocapa->u.cli.inode = inode;
482 CFS_INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
483 capa_count[CAPA_SITE_CLIENT]++;
485 DEBUG_CAPA(D_SEC, capa, "add OSS");
487 cfs_spin_lock(&old->c_lock);
489 cfs_spin_unlock(&old->c_lock);
491 DEBUG_CAPA(D_SEC, capa, "update OSS");
497 inode_add_oss_capa(inode, ocapa);
501 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
503 cfs_spin_lock(&capa_lock);
504 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
505 do_add_oss_capa(inode, ocapa);
507 /* truncate capa won't renew */
508 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
509 set_capa_expiry(ocapa);
510 cfs_list_del_init(&ocapa->c_list);
511 sort_add_capa(ocapa, ll_capa_list);
513 update_capa_timer(ocapa, capa_renewal_time(ocapa));
516 cfs_spin_unlock(&capa_lock);
518 cfs_atomic_set(&ll_capa_debug, 1);
522 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
524 /* NB: set a fake expiry for this capa to prevent it renew too soon */
525 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
528 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
530 struct inode *inode = ocapa->u.cli.inode;
539 cfs_spin_lock(&capa_lock);
541 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
542 "renewal canceled because object removed");
543 ll_capa_renewal_noent++;
545 ll_capa_renewal_failed++;
547 /* failed capa won't be renewed any longer, but if -EIO,
548 * client might be doing recovery, retry in 2 min. */
549 if (rc == -EIO && !capa_is_expired(ocapa)) {
550 delay_capa_renew(ocapa, 120);
551 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
552 "renewal failed: -EIO, "
554 ll_capa_renewal_retries++;
557 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
558 "renewal failed(rc: %d) for", rc);
562 cfs_list_del_init(&ocapa->c_list);
563 sort_add_capa(ocapa, &ll_idle_capas);
564 cfs_spin_unlock(&capa_lock);
571 cfs_spin_lock(&ocapa->c_lock);
572 LASSERT(!memcmp(&ocapa->c_capa, capa,
573 offsetof(struct lustre_capa, lc_opc)));
574 ocapa->c_capa = *capa;
575 set_capa_expiry(ocapa);
576 cfs_spin_unlock(&ocapa->c_lock);
578 cfs_spin_lock(&capa_lock);
579 if (capa_for_oss(capa))
580 inode_add_oss_capa(inode, ocapa);
581 DEBUG_CAPA(D_SEC, capa, "renew");
584 cfs_list_del_init(&ocapa->c_list);
585 sort_add_capa(ocapa, ll_capa_list);
586 update_capa_timer(ocapa, capa_renewal_time(ocapa));
587 cfs_spin_unlock(&capa_lock);
594 void ll_capa_open(struct inode *inode)
596 struct ll_inode_info *lli = ll_i2info(inode);
598 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
602 if (!S_ISREG(inode->i_mode))
605 cfs_atomic_inc(&lli->lli_open_count);
608 void ll_capa_close(struct inode *inode)
610 struct ll_inode_info *lli = ll_i2info(inode);
612 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
616 if (!S_ISREG(inode->i_mode))
619 cfs_atomic_dec(&lli->lli_open_count);
622 /* delete CAPA_OPC_OSS_TRUNC only */
623 void ll_truncate_free_capa(struct obd_capa *ocapa)
628 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
629 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
631 /* release ref when find */
633 if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
634 cfs_spin_lock(&capa_lock);
635 ll_delete_capa(ocapa);
636 cfs_spin_unlock(&capa_lock);
640 void ll_clear_inode_capas(struct inode *inode)
642 struct ll_inode_info *lli = ll_i2info(inode);
643 struct obd_capa *ocapa, *tmp;
645 cfs_spin_lock(&capa_lock);
646 ocapa = lli->lli_mds_capa;
648 ll_delete_capa(ocapa);
650 cfs_list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
652 ll_delete_capa(ocapa);
653 cfs_spin_unlock(&capa_lock);
656 void ll_print_capa_stat(struct ll_sb_info *sbi)
658 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
659 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
660 "Fid capabilities renewal ENOENT: %llu\n"
661 "Fid capabilities failed to renew: %llu\n"
662 "Fid capabilities renewal retries: %llu\n",
663 ll_capa_renewed, ll_capa_renewal_noent,
664 ll_capa_renewal_failed, ll_capa_renewal_retries);