1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/llite/llite_capa.c
38 * Author: Lai Siyao <lsy@clusterfs.com>
41 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <linux/version.h>
45 #include <asm/uaccess.h>
46 #include <linux/file.h>
47 #include <linux/kmod.h>
49 #include <lustre_lite.h>
50 #include "llite_internal.h"
52 /* for obd_capa.c_list, client capa might stay in three places:
55 * 3. stand alone: just allocated.
58 /* capas for oss writeback and those failed to renew */
59 static LIST_HEAD(ll_idle_capas);
60 static struct ptlrpc_thread ll_capa_thread;
61 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
63 /* llite capa renewal timer */
64 struct timer_list ll_capa_timer;
65 /* for debug: indicate whether capa on llite is enabled or not */
66 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
67 static unsigned long long ll_capa_renewed = 0;
68 static unsigned long long ll_capa_renewal_noent = 0;
69 static unsigned long long ll_capa_renewal_failed = 0;
70 static unsigned long long ll_capa_renewal_retries = 0;
72 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
74 if (time_before(expiry, ll_capa_timer.expires) ||
75 !timer_pending(&ll_capa_timer)) {
76 mod_timer(&ll_capa_timer, expiry);
77 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
78 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
82 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
84 return cfs_time_sub(ocapa->c_expiry,
85 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
88 static inline int capa_is_to_expire(struct obd_capa *ocapa)
90 return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
93 static inline int have_expired_capa(void)
95 struct obd_capa *ocapa = NULL;
98 /* if ll_capa_list has client capa to expire or ll_idle_capas has
99 * expired capa, return 1.
101 spin_lock(&capa_lock);
102 if (!list_empty(ll_capa_list)) {
103 ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
104 expired = capa_is_to_expire(ocapa);
106 update_capa_timer(ocapa, capa_renewal_time(ocapa));
107 } else if (!list_empty(&ll_idle_capas)) {
108 ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
109 expired = capa_is_expired(ocapa);
111 update_capa_timer(ocapa, ocapa->c_expiry);
113 spin_unlock(&capa_lock);
116 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
120 static inline int ll_capa_check_stop(void)
122 return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
125 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
127 struct obd_capa *tmp;
128 struct list_head *before = NULL;
130 /* TODO: client capa is sorted by expiry, this could be optimized */
131 list_for_each_entry_reverse(tmp, head, c_list) {
132 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
133 before = &tmp->c_list;
138 LASSERT(&ocapa->c_list != before);
139 list_add(&ocapa->c_list, before ?: head);
142 static inline int obd_capa_open_count(struct obd_capa *oc)
144 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
145 return atomic_read(&lli->lli_open_count);
148 static void ll_delete_capa(struct obd_capa *ocapa)
150 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
152 if (capa_for_mds(&ocapa->c_capa)) {
153 LASSERT(lli->lli_mds_capa == ocapa);
154 lli->lli_mds_capa = NULL;
155 } else if (capa_for_oss(&ocapa->c_capa)) {
156 list_del_init(&ocapa->u.cli.lli_list);
159 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
160 list_del(&ocapa->c_list);
161 capa_count[CAPA_SITE_CLIENT]--;
165 /* three places where client capa is deleted:
166 * 1. capa_thread_main(), main place to delete expired capa.
167 * 2. ll_clear_inode_capas() in ll_clear_inode().
168 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
170 static int capa_thread_main(void *unused)
172 struct obd_capa *ocapa, *tmp, *next;
173 struct inode *inode = NULL;
174 struct l_wait_info lwi = { 0 };
178 cfs_daemonize("ll_capa");
180 ll_capa_thread.t_flags = SVC_RUNNING;
181 wake_up(&ll_capa_thread.t_ctl_waitq);
184 l_wait_event(ll_capa_thread.t_ctl_waitq,
185 (ll_capa_check_stop() || have_expired_capa()),
188 if (ll_capa_check_stop())
193 spin_lock(&capa_lock);
194 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
195 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
197 if (!capa_is_to_expire(ocapa)) {
202 list_del_init(&ocapa->c_list);
204 /* for MDS capability, only renew those which belong to
205 * dir, or its inode is opened, or client holds LOOKUP
208 if (capa_for_mds(&ocapa->c_capa) &&
209 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
210 obd_capa_open_count(ocapa) == 0 &&
211 !ll_have_md_lock(ocapa->u.cli.inode,
212 MDS_INODELOCK_LOOKUP)) {
213 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
215 sort_add_capa(ocapa, &ll_idle_capas);
219 /* for OSS capability, only renew those whose inode is
222 if (capa_for_oss(&ocapa->c_capa) &&
223 obd_capa_open_count(ocapa) == 0) {
224 /* oss capa with open count == 0 won't renew,
225 * move to idle list */
226 sort_add_capa(ocapa, &ll_idle_capas);
230 /* NB iput() is in ll_update_capa() */
231 inode = igrab(ocapa->u.cli.inode);
233 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
240 spin_unlock(&capa_lock);
242 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
244 spin_lock(&capa_lock);
246 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
247 "renew failed: %d", rc);
248 ll_capa_renewal_failed++;
253 update_capa_timer(next, capa_renewal_time(next));
255 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
256 if (!capa_is_expired(ocapa)) {
258 update_capa_timer(ocapa, ocapa->c_expiry);
262 if (atomic_read(&ocapa->c_refc)) {
263 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
264 "expired(c_refc %d), don't release",
265 atomic_read(&ocapa->c_refc));
266 /* don't try to renew any more */
267 list_del_init(&ocapa->c_list);
271 /* expired capa is released. */
272 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
273 ll_delete_capa(ocapa);
276 spin_unlock(&capa_lock);
279 ll_capa_thread.t_flags = SVC_STOPPED;
280 wake_up(&ll_capa_thread.t_ctl_waitq);
284 void ll_capa_timer_callback(unsigned long unused)
286 wake_up(&ll_capa_thread.t_ctl_waitq);
289 int ll_capa_thread_start(void)
294 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
296 rc = kernel_thread(capa_thread_main, NULL, 0);
298 CERROR("cannot start expired capa thread: rc %d\n", rc);
301 wait_event(ll_capa_thread.t_ctl_waitq,
302 ll_capa_thread.t_flags & SVC_RUNNING);
307 void ll_capa_thread_stop(void)
309 ll_capa_thread.t_flags = SVC_STOPPING;
310 wake_up(&ll_capa_thread.t_ctl_waitq);
311 wait_event(ll_capa_thread.t_ctl_waitq,
312 ll_capa_thread.t_flags & SVC_STOPPED);
315 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
317 struct ll_inode_info *lli = ll_i2info(inode);
318 struct obd_capa *ocapa;
320 /* inside capa_lock */
321 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
322 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
325 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
326 ll_inode2fid(inode)));
327 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
329 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
336 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
338 struct ll_inode_info *lli = ll_i2info(inode);
339 struct obd_capa *ocapa;
342 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
346 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
347 opc == CAPA_OPC_OSS_TRUNC);
349 spin_lock(&capa_lock);
350 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
351 if (capa_is_expired(ocapa))
353 if ((opc & CAPA_OPC_OSS_WRITE) &&
354 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
356 } else if ((opc & CAPA_OPC_OSS_READ) &&
357 capa_opc_supported(&ocapa->c_capa,
358 CAPA_OPC_OSS_READ)) {
360 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
361 capa_opc_supported(&ocapa->c_capa, opc)) {
367 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
368 ll_inode2fid(inode)));
369 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
373 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
377 if (atomic_read(&ll_capa_debug)) {
378 CERROR("no capability for "DFID" opc "LPX64"\n",
379 PFID(&lli->lli_fid), opc);
380 atomic_set(&ll_capa_debug, 0);
383 spin_unlock(&capa_lock);
387 EXPORT_SYMBOL(ll_osscapa_get);
389 struct obd_capa *ll_mdscapa_get(struct inode *inode)
391 struct ll_inode_info *lli = ll_i2info(inode);
392 struct obd_capa *ocapa;
395 LASSERT(inode != NULL);
397 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
400 spin_lock(&capa_lock);
401 ocapa = capa_get(lli->lli_mds_capa);
402 spin_unlock(&capa_lock);
403 if (!ocapa && atomic_read(&ll_capa_debug)) {
404 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
405 atomic_set(&ll_capa_debug, 0);
411 static struct obd_capa *do_add_mds_capa(struct inode *inode,
412 struct obd_capa *ocapa)
414 struct ll_inode_info *lli = ll_i2info(inode);
415 struct obd_capa *old = lli->lli_mds_capa;
416 struct lustre_capa *capa = &ocapa->c_capa;
419 ocapa->u.cli.inode = inode;
420 lli->lli_mds_capa = ocapa;
421 capa_count[CAPA_SITE_CLIENT]++;
423 DEBUG_CAPA(D_SEC, capa, "add MDS");
425 spin_lock(&old->c_lock);
427 spin_unlock(&old->c_lock);
429 DEBUG_CAPA(D_SEC, capa, "update MDS");
437 static inline void inode_add_oss_capa(struct inode *inode,
438 struct obd_capa *ocapa)
440 struct ll_inode_info *lli = ll_i2info(inode);
441 struct obd_capa *tmp;
442 struct list_head *next = NULL;
444 /* capa is sorted in lli_oss_capas so lookup can always find the
446 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
447 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
448 next = &tmp->u.cli.lli_list;
452 LASSERT(&ocapa->u.cli.lli_list != next);
453 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
456 static struct obd_capa *do_add_oss_capa(struct inode *inode,
457 struct obd_capa *ocapa)
459 struct obd_capa *old;
460 struct lustre_capa *capa = &ocapa->c_capa;
462 LASSERTF(S_ISREG(inode->i_mode),
463 "inode has oss capa, but not regular file, mode: %d\n",
466 /* FIXME: can't replace it so easily with fine-grained opc */
467 old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
469 ocapa->u.cli.inode = inode;
470 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
471 capa_count[CAPA_SITE_CLIENT]++;
473 DEBUG_CAPA(D_SEC, capa, "add OSS");
475 spin_lock(&old->c_lock);
477 spin_unlock(&old->c_lock);
479 DEBUG_CAPA(D_SEC, capa, "update OSS");
485 inode_add_oss_capa(inode, ocapa);
489 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
491 spin_lock(&capa_lock);
492 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
493 do_add_oss_capa(inode, ocapa);
495 /* truncate capa won't renew */
496 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
497 set_capa_expiry(ocapa);
498 list_del(&ocapa->c_list);
499 sort_add_capa(ocapa, ll_capa_list);
501 update_capa_timer(ocapa, capa_renewal_time(ocapa));
504 spin_unlock(&capa_lock);
506 atomic_set(&ll_capa_debug, 1);
510 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
512 /* NB: set a fake expiry for this capa to prevent it renew too soon */
513 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
516 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
518 struct inode *inode = ocapa->u.cli.inode;
527 spin_lock(&capa_lock);
529 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
530 "renewal canceled because object removed");
531 ll_capa_renewal_noent++;
533 ll_capa_renewal_failed++;
535 /* failed capa won't be renewed any longer, but if -EIO,
536 * client might be doing recovery, retry in 2 min. */
537 if (rc == -EIO && !capa_is_expired(ocapa)) {
538 delay_capa_renew(ocapa, 120);
539 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
540 "renewal failed: -EIO, retry in 2 mins");
541 ll_capa_renewal_retries++;
544 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
545 "renewal failed(rc: %d) for", rc);
549 list_del(&ocapa->c_list);
550 sort_add_capa(ocapa, &ll_idle_capas);
551 spin_unlock(&capa_lock);
558 spin_lock(&ocapa->c_lock);
559 LASSERT(!memcmp(&ocapa->c_capa, capa,
560 offsetof(struct lustre_capa, lc_flags)));
561 ocapa->c_capa = *capa;
562 set_capa_expiry(ocapa);
563 spin_unlock(&ocapa->c_lock);
565 spin_lock(&capa_lock);
566 if (capa_for_oss(capa))
567 inode_add_oss_capa(inode, ocapa);
568 DEBUG_CAPA(D_SEC, capa, "renew");
571 list_del_init(&ocapa->c_list);
572 sort_add_capa(ocapa, ll_capa_list);
573 update_capa_timer(ocapa, capa_renewal_time(ocapa));
574 spin_unlock(&capa_lock);
581 void ll_capa_open(struct inode *inode)
583 struct ll_inode_info *lli = ll_i2info(inode);
585 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
589 if (!S_ISREG(inode->i_mode))
592 atomic_inc(&lli->lli_open_count);
595 void ll_capa_close(struct inode *inode)
597 struct ll_inode_info *lli = ll_i2info(inode);
599 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
603 if (!S_ISREG(inode->i_mode))
606 atomic_dec(&lli->lli_open_count);
609 /* delete CAPA_OPC_OSS_TRUNC only */
610 void ll_truncate_free_capa(struct obd_capa *ocapa)
615 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
616 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
619 spin_lock(&capa_lock);
620 ll_delete_capa(ocapa);
621 spin_unlock(&capa_lock);
624 void ll_clear_inode_capas(struct inode *inode)
626 struct ll_inode_info *lli = ll_i2info(inode);
627 struct obd_capa *ocapa, *tmp;
629 spin_lock(&capa_lock);
630 ocapa = lli->lli_mds_capa;
632 ll_delete_capa(ocapa);
634 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
636 ll_delete_capa(ocapa);
637 spin_unlock(&capa_lock);
640 void ll_print_capa_stat(struct ll_sb_info *sbi)
642 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
643 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
644 "Fid capabilities renewal ENOENT: %llu\n"
645 "Fid capabilities failed to renew: %llu\n"
646 "Fid capabilities renewal retries: %llu\n",
647 ll_capa_renewed, ll_capa_renewal_noent,
648 ll_capa_renewal_failed, ll_capa_renewal_retries);