1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2005 Cluster File Systems, Inc.
6 * Author: Lai Siyao <lsy@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
27 #include <linux/version.h>
28 #include <asm/uaccess.h>
29 #include <linux/file.h>
30 #include <linux/kmod.h>
32 #include <lustre_lite.h>
33 #include "llite_internal.h"
35 /* for obd_capa.c_list, client capa might stay in three places:
38 * 3. stand alone: just allocated.
41 /* capas for oss writeback and those failed to renew */
42 static LIST_HEAD(ll_idle_capas);
43 static struct ptlrpc_thread ll_capa_thread;
44 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
46 /* llite capa renewal timer */
47 struct timer_list ll_capa_timer;
48 /* for debug: indicate whether capa on llite is enabled or not */
49 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
50 static unsigned long long ll_capa_renewed = 0;
51 static unsigned long long ll_capa_renewal_noent = 0;
52 static unsigned long long ll_capa_renewal_failed = 0;
53 static unsigned long long ll_capa_renewal_retries = 0;
55 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
57 if (time_before(expiry, ll_capa_timer.expires) ||
58 !timer_pending(&ll_capa_timer)) {
59 mod_timer(&ll_capa_timer, expiry);
60 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
61 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
65 static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
67 return cfs_time_sub(ocapa->c_expiry,
68 cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
71 static inline int capa_is_to_expire(struct obd_capa *ocapa)
73 return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
76 static inline int have_expired_capa(void)
78 struct obd_capa *ocapa = NULL;
81 /* if ll_capa_list has client capa to expire or ll_idle_capas has
82 * expired capa, return 1.
84 spin_lock(&capa_lock);
85 if (!list_empty(ll_capa_list)) {
86 ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
87 expired = capa_is_to_expire(ocapa);
89 update_capa_timer(ocapa, capa_renewal_time(ocapa));
90 } else if (!list_empty(&ll_idle_capas)) {
91 ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
92 expired = capa_is_expired(ocapa);
94 update_capa_timer(ocapa, ocapa->c_expiry);
96 spin_unlock(&capa_lock);
99 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
103 static inline int ll_capa_check_stop(void)
105 return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
108 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
110 struct obd_capa *tmp;
111 struct list_head *before = NULL;
113 /* TODO: client capa is sorted by expiry, this could be optimized */
114 list_for_each_entry_reverse(tmp, head, c_list) {
115 if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
116 before = &tmp->c_list;
121 LASSERT(&ocapa->c_list != before);
122 list_add(&ocapa->c_list, before ?: head);
125 static inline int obd_capa_open_count(struct obd_capa *oc)
127 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
128 return atomic_read(&lli->lli_open_count);
131 static void ll_delete_capa(struct obd_capa *ocapa)
133 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
135 if (capa_for_mds(&ocapa->c_capa)) {
136 LASSERT(lli->lli_mds_capa == ocapa);
137 lli->lli_mds_capa = NULL;
138 } else if (capa_for_oss(&ocapa->c_capa)) {
139 list_del_init(&ocapa->u.cli.lli_list);
142 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
143 list_del(&ocapa->c_list);
144 capa_count[CAPA_SITE_CLIENT]--;
148 /* three places where client capa is deleted:
149 * 1. capa_thread_main(), main place to delete expired capa.
150 * 2. ll_clear_inode_capas() in ll_clear_inode().
151 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
153 static int capa_thread_main(void *unused)
155 struct obd_capa *ocapa, *tmp, *next;
156 struct inode *inode = NULL;
157 struct l_wait_info lwi = { 0 };
161 cfs_daemonize("ll_capa");
163 ll_capa_thread.t_flags = SVC_RUNNING;
164 wake_up(&ll_capa_thread.t_ctl_waitq);
167 l_wait_event(ll_capa_thread.t_ctl_waitq,
168 (ll_capa_check_stop() || have_expired_capa()),
171 if (ll_capa_check_stop())
176 spin_lock(&capa_lock);
177 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
178 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
180 if (!capa_is_to_expire(ocapa)) {
185 list_del_init(&ocapa->c_list);
187 /* for MDS capability, only renew those which belong to
188 * dir, or its inode is opened, or client holds LOOKUP
191 if (capa_for_mds(&ocapa->c_capa) &&
192 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
193 obd_capa_open_count(ocapa) == 0 &&
194 !ll_have_md_lock(ocapa->u.cli.inode,
195 MDS_INODELOCK_LOOKUP)) {
196 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
198 sort_add_capa(ocapa, &ll_idle_capas);
202 /* for OSS capability, only renew those whose inode is
205 if (capa_for_oss(&ocapa->c_capa) &&
206 obd_capa_open_count(ocapa) == 0) {
207 /* oss capa with open count == 0 won't renew,
208 * move to idle list */
209 sort_add_capa(ocapa, &ll_idle_capas);
213 /* NB iput() is in ll_update_capa() */
214 inode = igrab(ocapa->u.cli.inode);
216 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
223 spin_unlock(&capa_lock);
225 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
227 spin_lock(&capa_lock);
229 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
230 "renew failed: %d", rc);
231 ll_capa_renewal_failed++;
236 update_capa_timer(next, capa_renewal_time(next));
238 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
239 if (!capa_is_expired(ocapa)) {
241 update_capa_timer(ocapa, ocapa->c_expiry);
245 if (atomic_read(&ocapa->c_refc)) {
246 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
247 "expired(c_refc %d), don't release",
248 atomic_read(&ocapa->c_refc));
249 /* don't try to renew any more */
250 list_del_init(&ocapa->c_list);
254 /* expired capa is released. */
255 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
256 ll_delete_capa(ocapa);
259 spin_unlock(&capa_lock);
262 ll_capa_thread.t_flags = SVC_STOPPED;
263 wake_up(&ll_capa_thread.t_ctl_waitq);
267 void ll_capa_timer_callback(unsigned long unused)
269 wake_up(&ll_capa_thread.t_ctl_waitq);
272 int ll_capa_thread_start(void)
277 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
279 rc = kernel_thread(capa_thread_main, NULL, 0);
281 CERROR("cannot start expired capa thread: rc %d\n", rc);
284 wait_event(ll_capa_thread.t_ctl_waitq,
285 ll_capa_thread.t_flags & SVC_RUNNING);
290 void ll_capa_thread_stop(void)
292 ll_capa_thread.t_flags = SVC_STOPPING;
293 wake_up(&ll_capa_thread.t_ctl_waitq);
294 wait_event(ll_capa_thread.t_ctl_waitq,
295 ll_capa_thread.t_flags & SVC_STOPPED);
298 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, uid_t uid,
301 struct ll_inode_info *lli = ll_i2info(inode);
302 struct obd_capa *ocapa;
304 /* inside capa_lock */
305 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
306 if (uid != capa_uid(&ocapa->c_capa))
308 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
311 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
312 ll_inode2fid(inode)));
313 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
315 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
322 /* FIXME: once uid is 0, this is mmaped IO, or fsync, truncate. */
323 struct obd_capa *ll_osscapa_get(struct inode *inode, uid_t uid, __u64 opc)
325 struct ll_inode_info *lli = ll_i2info(inode);
326 struct obd_capa *ocapa;
329 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
333 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
334 opc == CAPA_OPC_OSS_TRUNC);
336 spin_lock(&capa_lock);
337 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
338 if (capa_is_expired(ocapa))
340 if (uid != 0 && uid != capa_uid(&ocapa->c_capa))
342 if ((opc & CAPA_OPC_OSS_WRITE) &&
343 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
345 } else if ((opc & CAPA_OPC_OSS_READ) &&
346 capa_opc_supported(&ocapa->c_capa,
347 CAPA_OPC_OSS_READ)) {
349 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
350 capa_opc_supported(&ocapa->c_capa, opc)) {
356 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
357 ll_inode2fid(inode)));
358 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
362 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
366 if (atomic_read(&ll_capa_debug)) {
367 CERROR("no capability for "DFID" opc "LPX64"\n",
368 PFID(&lli->lli_fid), opc);
369 atomic_set(&ll_capa_debug, 0);
372 spin_unlock(&capa_lock);
377 struct obd_capa *ll_mdscapa_get(struct inode *inode)
379 struct ll_inode_info *lli = ll_i2info(inode);
380 struct obd_capa *ocapa;
383 LASSERT(inode != NULL);
385 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
388 spin_lock(&capa_lock);
389 ocapa = capa_get(lli->lli_mds_capa);
390 spin_unlock(&capa_lock);
391 if (!ocapa && atomic_read(&ll_capa_debug)) {
392 CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
393 atomic_set(&ll_capa_debug, 0);
399 static struct obd_capa *do_add_mds_capa(struct inode *inode,
400 struct obd_capa *ocapa)
402 struct ll_inode_info *lli = ll_i2info(inode);
403 struct obd_capa *old = lli->lli_mds_capa;
404 struct lustre_capa *capa = &ocapa->c_capa;
407 ocapa->u.cli.inode = inode;
408 lli->lli_mds_capa = ocapa;
409 capa_count[CAPA_SITE_CLIENT]++;
411 DEBUG_CAPA(D_SEC, capa, "add MDS");
413 spin_lock(&old->c_lock);
415 spin_unlock(&old->c_lock);
417 DEBUG_CAPA(D_SEC, capa, "update MDS");
425 static inline void inode_add_oss_capa(struct inode *inode,
426 struct obd_capa *ocapa)
428 struct ll_inode_info *lli = ll_i2info(inode);
429 struct obd_capa *tmp;
430 struct list_head *next = NULL;
432 /* capa is sorted in lli_oss_capas so lookup can always find the
434 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
435 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
436 next = &tmp->u.cli.lli_list;
440 LASSERT(&ocapa->u.cli.lli_list != next);
441 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
444 static struct obd_capa *do_add_oss_capa(struct inode *inode,
445 struct obd_capa *ocapa)
447 struct obd_capa *old;
448 struct lustre_capa *capa = &ocapa->c_capa;
450 LASSERTF(S_ISREG(inode->i_mode),
451 "inode has oss capa, but not regular file, mode: %d\n",
454 /* FIXME: can't replace it so easily with fine-grained opc */
455 old = do_lookup_oss_capa(inode, capa_uid(capa),
456 capa_opc(capa) & CAPA_OPC_OSS_ONLY);
458 ocapa->u.cli.inode = inode;
459 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
460 capa_count[CAPA_SITE_CLIENT]++;
462 DEBUG_CAPA(D_SEC, capa, "add OSS");
464 spin_lock(&old->c_lock);
466 spin_unlock(&old->c_lock);
468 DEBUG_CAPA(D_SEC, capa, "update OSS");
474 inode_add_oss_capa(inode, ocapa);
478 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
480 spin_lock(&capa_lock);
481 ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
482 do_add_oss_capa(inode, ocapa);
484 /* truncate capa won't renew */
485 if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
486 set_capa_expiry(ocapa);
487 list_del(&ocapa->c_list);
488 sort_add_capa(ocapa, ll_capa_list);
490 update_capa_timer(ocapa, capa_renewal_time(ocapa));
493 spin_unlock(&capa_lock);
495 atomic_set(&ll_capa_debug, 1);
499 static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
501 /* NB: set a fake expiry for this capa to prevent it renew too soon */
502 oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
505 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
507 struct inode *inode = ocapa->u.cli.inode;
516 spin_lock(&capa_lock);
518 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
519 "renewal canceled because object removed");
520 ll_capa_renewal_noent++;
522 ll_capa_renewal_failed++;
524 /* failed capa won't be renewed any longer, but if -EIO,
525 * client might be doing recovery, retry in 2 min. */
526 if (rc == -EIO && !capa_is_expired(ocapa)) {
527 delay_capa_renew(ocapa, 120);
528 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
529 "renewal failed: -EIO, retry in 2 mins");
530 ll_capa_renewal_retries++;
533 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
534 "renewal failed(rc: %d) for", rc);
538 list_del(&ocapa->c_list);
539 sort_add_capa(ocapa, &ll_idle_capas);
540 spin_unlock(&capa_lock);
547 spin_lock(&ocapa->c_lock);
548 LASSERT(!memcmp(&ocapa->c_capa, capa,
549 offsetof(struct lustre_capa, lc_flags)));
550 ocapa->c_capa = *capa;
551 set_capa_expiry(ocapa);
552 spin_unlock(&ocapa->c_lock);
554 spin_lock(&capa_lock);
555 if (capa_for_oss(capa))
556 inode_add_oss_capa(inode, ocapa);
557 DEBUG_CAPA(D_SEC, capa, "renew");
560 list_del_init(&ocapa->c_list);
561 sort_add_capa(ocapa, ll_capa_list);
562 update_capa_timer(ocapa, capa_renewal_time(ocapa));
563 spin_unlock(&capa_lock);
570 void ll_capa_open(struct inode *inode)
572 struct ll_inode_info *lli = ll_i2info(inode);
574 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
578 if (!S_ISREG(inode->i_mode))
581 atomic_inc(&lli->lli_open_count);
584 void ll_capa_close(struct inode *inode)
586 struct ll_inode_info *lli = ll_i2info(inode);
588 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
592 if (!S_ISREG(inode->i_mode))
595 atomic_dec(&lli->lli_open_count);
598 /* delete CAPA_OPC_OSS_TRUNC only */
599 void ll_truncate_free_capa(struct obd_capa *ocapa)
604 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
605 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
608 spin_lock(&capa_lock);
609 ll_delete_capa(ocapa);
610 spin_unlock(&capa_lock);
613 void ll_clear_inode_capas(struct inode *inode)
615 struct ll_inode_info *lli = ll_i2info(inode);
616 struct obd_capa *ocapa, *tmp;
618 spin_lock(&capa_lock);
619 ocapa = lli->lli_mds_capa;
621 ll_delete_capa(ocapa);
623 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
625 ll_delete_capa(ocapa);
626 spin_unlock(&capa_lock);
629 void ll_print_capa_stat(struct ll_sb_info *sbi)
631 if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
632 LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
633 "Fid capabilities renewal ENOENT: %llu\n"
634 "Fid capabilities failed to renew: %llu\n"
635 "Fid capabilities renewal retries: %llu\n",
636 ll_capa_renewed, ll_capa_renewal_noent,
637 ll_capa_renewal_failed, ll_capa_renewal_retries);