1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2005 Cluster File Systems, Inc.
6 * Author: Lai Siyao <lsy@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
27 #include <linux/version.h>
28 #include <asm/uaccess.h>
29 #include <linux/file.h>
30 #include <linux/kmod.h>
32 #include <lustre_lite.h>
33 #include "llite_internal.h"
35 /* for obd_capa.c_list, client capa might stay in three places:
38 * 3. stand alone: just allocated.
41 /* capas for oss writeback and those failed to renew */
42 static LIST_HEAD(ll_idle_capas);
43 static struct ptlrpc_thread ll_capa_thread;
44 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
46 /* llite capa renewal timer */
47 cfs_timer_t ll_capa_timer;
48 /* for debug: indicate whether capa on llite is enabled or not */
49 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
51 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
53 if (cfs_time_before(expiry, cfs_timer_deadline(&ll_capa_timer)) ||
54 !cfs_timer_is_armed(&ll_capa_timer)) {
55 cfs_timer_arm(&ll_capa_timer, expiry);
56 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
57 "ll_capa_timer update: %lu/%lu by",
62 static inline int have_expired_capa(void)
64 struct obd_capa *ocapa = NULL;
67 /* if ll_capa_list has client capa to expire or ll_idle_capas has
68 * expired capa, return 1.
70 spin_lock(&capa_lock);
71 if (!list_empty(ll_capa_list)) {
72 ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
73 expired = capa_is_to_expire(ocapa);
75 update_capa_timer(ocapa, capa_renewal_time(ocapa));
76 } else if (!list_empty(&ll_idle_capas)) {
77 ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
78 expired = capa_is_expired(ocapa);
80 update_capa_timer(ocapa, ocapa->c_expiry);
82 spin_unlock(&capa_lock);
85 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
89 static inline int ll_capa_check_stop(void)
91 return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
94 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
97 struct list_head *before = NULL;
99 /* TODO: client capa is sorted by expiry, this could be optimized */
100 list_for_each_entry_reverse(tmp, head, c_list) {
101 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
102 before = &tmp->c_list;
107 LASSERT(&ocapa->c_list != before);
108 list_add(&ocapa->c_list, before ?: head);
111 static inline int obd_capa_open_count(struct obd_capa *oc)
113 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
114 return atomic_read(&lli->lli_open_count);
117 static void ll_delete_capa(struct obd_capa *ocapa)
119 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
121 if (capa_for_mds(&ocapa->c_capa)) {
122 LASSERT(lli->lli_mds_capa == ocapa);
123 lli->lli_mds_capa = NULL;
124 } else if (capa_for_oss(&ocapa->c_capa)) {
125 list_del_init(&ocapa->u.cli.lli_list);
128 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
129 list_del(&ocapa->c_list);
133 /* three places where client capa is deleted:
134 * 1. capa_thread_main(), main place to delete expired capa.
135 * 2. ll_clear_inode_capas() in ll_clear_inode().
136 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
138 static int capa_thread_main(void *unused)
140 struct obd_capa *ocapa, *tmp, *next;
141 struct inode *inode = NULL;
142 struct l_wait_info lwi = { 0 };
146 cfs_daemonize("ll_capa");
148 ll_capa_thread.t_flags = SVC_RUNNING;
149 wake_up(&ll_capa_thread.t_ctl_waitq);
152 l_wait_event(ll_capa_thread.t_ctl_waitq,
153 (ll_capa_check_stop() || have_expired_capa()),
156 if (ll_capa_check_stop())
159 spin_lock(&capa_lock);
161 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
162 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
164 if (!capa_is_to_expire(ocapa)) {
169 if (capa_for_mds(&ocapa->c_capa) &&
170 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
171 obd_capa_open_count(ocapa) == 0 &&
172 !obd_capa_is_root(ocapa) &&
173 !ll_have_md_lock(ocapa->u.cli.inode,
174 MDS_INODELOCK_LOOKUP)) {
175 /* MDS capa without LOOKUP lock, and the related
176 * inode is not opened, it won't renew,
177 * move to idle list (except root fid) */
178 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
180 list_del_init(&ocapa->c_list);
181 sort_add_capa(ocapa, &ll_idle_capas);
185 if (capa_for_oss(&ocapa->c_capa) &&
186 obd_capa_open_count(ocapa) == 0) {
187 /* oss capa with open count == 0 won't renew,
188 * move to idle list */
189 list_del_init(&ocapa->c_list);
190 sort_add_capa(ocapa, &ll_idle_capas);
194 /* NB iput() is in ll_update_capa() */
195 inode = igrab(ocapa->u.cli.inode);
197 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
199 ll_delete_capa(ocapa);
203 list_del_init(&ocapa->c_list);
205 spin_unlock(&capa_lock);
207 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
209 spin_lock(&capa_lock);
211 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
212 "renew failed: %d", rc);
213 sort_add_capa(ocapa, &ll_idle_capas);
218 update_capa_timer(next, capa_renewal_time(next));
220 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
221 if (!capa_is_expired(ocapa)) {
223 update_capa_timer(ocapa, ocapa->c_expiry);
227 if (atomic_read(&ocapa->c_refc)) {
228 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
229 "expired(c_refc %d), don't release",
230 atomic_read(&ocapa->c_refc));
231 obd_capa_set_expired(ocapa);
232 /* don't try to renew any more */
233 list_del_init(&ocapa->c_list);
237 /* expired capa is released. */
238 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
239 ll_delete_capa(ocapa);
242 spin_unlock(&capa_lock);
245 ll_capa_thread.t_flags = SVC_STOPPED;
246 wake_up(&ll_capa_thread.t_ctl_waitq);
250 void ll_capa_timer_callback(unsigned long unused)
252 wake_up(&ll_capa_thread.t_ctl_waitq);
255 int ll_capa_thread_start(void)
260 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
262 rc = kernel_thread(capa_thread_main, NULL, 0);
264 CERROR("cannot start expired capa thread: rc %d\n", rc);
267 wait_event(ll_capa_thread.t_ctl_waitq,
268 ll_capa_thread.t_flags & SVC_RUNNING);
273 void ll_capa_thread_stop(void)
275 ll_capa_thread.t_flags = SVC_STOPPING;
276 wake_up(&ll_capa_thread.t_ctl_waitq);
277 wait_event(ll_capa_thread.t_ctl_waitq,
278 ll_capa_thread.t_flags & SVC_STOPPED);
281 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
283 struct ll_inode_info *lli = ll_i2info(inode);
284 struct obd_capa *ocapa;
286 /* inside capa_lock */
287 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
288 if (!obd_capa_is_valid(ocapa))
290 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
293 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
294 ll_inode2fid(inode)));
295 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
297 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
304 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
306 struct ll_inode_info *lli = ll_i2info(inode);
307 struct obd_capa *ocapa;
310 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
314 LASSERT(opc == CAPA_OPC_OSS_WRITE ||
315 opc == (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ) ||
316 opc == CAPA_OPC_OSS_TRUNC);
318 spin_lock(&capa_lock);
319 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
320 if (!obd_capa_is_valid(ocapa))
322 if ((opc & CAPA_OPC_OSS_WRITE) &&
323 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
325 } else if ((opc & CAPA_OPC_OSS_READ) &&
326 capa_opc_supported(&ocapa->c_capa,
327 CAPA_OPC_OSS_READ)) {
329 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
330 capa_opc_supported(&ocapa->c_capa, opc)) {
336 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
337 ll_inode2fid(inode)));
338 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
342 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
346 if (atomic_read(&ll_capa_debug)) {
347 CERROR("no capability for "DFID" opc "LPX64"\n",
348 PFID(&lli->lli_fid), opc);
349 atomic_set(&ll_capa_debug, 0);
352 spin_unlock(&capa_lock);
357 struct obd_capa *ll_mdscapa_get(struct inode *inode)
359 struct ll_inode_info *lli = ll_i2info(inode);
360 struct obd_capa *ocapa;
363 LASSERT(inode != NULL);
365 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
368 spin_lock(&capa_lock);
369 ocapa = capa_get(lli->lli_mds_capa);
370 spin_unlock(&capa_lock);
372 if (ocapa && !obd_capa_is_valid(ocapa)) {
373 DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid (flags %d)",
379 if (!ocapa && atomic_read(&ll_capa_debug)) {
381 LASSERT(!S_ISDIR(inode->i_mode));
382 LASSERT(!obd_capa_open_count(ocapa));
383 LASSERT(!ll_have_md_lock(ocapa->u.cli.inode,
384 MDS_INODELOCK_LOOKUP));
386 atomic_set(&ll_capa_debug, 0);
392 static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa)
394 struct ll_inode_info *lli = ll_i2info(inode);
395 struct obd_capa *old = lli->lli_mds_capa;
396 struct obd_capa *ocapa = *pcapa;
400 ocapa->u.cli.inode = inode;
401 lli->lli_mds_capa = ocapa;
402 obd_capa_clear_new(ocapa);
403 obd_capa_set_valid(ocapa);
405 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add MDS");
407 if (!memcmp(&old->c_capa, &ocapa->c_capa, sizeof(old->c_capa)))
411 spin_lock(&old->c_lock);
412 old->c_capa = ocapa->c_capa;
413 obd_capa_set_valid(old);
414 spin_unlock(&old->c_lock);
416 DEBUG_CAPA(D_SEC, &old->c_capa, "update MDS");
426 static inline void inode_add_oss_capa(struct inode *inode,
427 struct obd_capa *ocapa)
429 struct ll_inode_info *lli = ll_i2info(inode);
430 struct obd_capa *tmp;
431 struct list_head *next = NULL;
433 /* capa is sorted in lli_oss_capas so lookup can always find the
435 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
436 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
437 next = &tmp->u.cli.lli_list;
441 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
444 static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa)
446 struct obd_capa *old, *ocapa = *pcapa;
447 struct lustre_capa *capa = &ocapa->c_capa;
450 LASSERTF(S_ISREG(inode->i_mode),
451 "inode has oss capa, but not regular file, mode: %d\n",
454 /* FIXME: can't replace it so easily with fine-grained opc */
455 old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY);
457 ocapa->u.cli.inode = inode;
458 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
459 obd_capa_set_valid(ocapa);
461 DEBUG_CAPA(D_SEC, capa, "add OSS");
463 if (old->c_capa.lc_expiry == capa->lc_expiry) {
466 spin_lock(&old->c_lock);
468 obd_capa_set_valid(old);
469 spin_unlock(&old->c_lock);
471 DEBUG_CAPA(D_SEC, capa, "update OSS");
479 inode_add_oss_capa(inode, *pcapa);
483 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
485 struct obd_capa **pcapa = &ocapa;
488 spin_lock(&capa_lock);
489 rc = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, pcapa) :
490 do_add_oss_capa(inode, pcapa);
493 /* truncate capa won't renew, or no existed capa changed, don't update
495 if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
496 spin_lock(&ocapa->c_lock);
497 set_capa_expiry(ocapa);
498 spin_unlock(&ocapa->c_lock);
500 list_del_init(&ocapa->c_list);
501 sort_add_capa(ocapa, ll_capa_list);
503 update_capa_timer(ocapa, capa_renewal_time(ocapa));
506 atomic_set(&ll_capa_debug, 1);
507 spin_unlock(&capa_lock);
513 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
515 struct inode *inode = ocapa->u.cli.inode;
524 /* failed capa won't be renewed any longer, but if -EIO, client
525 * might be doing recovery, retry in 1 min. */
526 spin_lock(&capa_lock);
527 if (rc == -EIO && !capa_is_expired(ocapa)) {
528 expiry = jiffies + 60 * HZ;
529 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
530 "renewal failed: -EIO, retry in 1 min");
533 if (rc == -ENOENT && !capa_is_to_expire(ocapa)) {
534 /* NB: in period of renewal, inode might be
535 * deleted and then created, so actually ocapa
536 * is a completely new one! */
537 LASSERT(!list_empty(&ocapa->c_list));
539 LASSERT(list_empty(&ocapa->c_list));
540 sort_add_capa(ocapa, &ll_idle_capas);
543 spin_unlock(&capa_lock);
545 DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa,
546 "renewal failed(rc: %d) for", rc);
550 LASSERT(!memcmp(&ocapa->c_capa, capa,
551 offsetof(struct lustre_capa, lc_flags)));
553 spin_lock(&ocapa->c_lock);
554 ocapa->c_capa = *capa;
555 set_capa_expiry(ocapa);
556 spin_unlock(&ocapa->c_lock);
558 spin_lock(&capa_lock);
559 if (capa->lc_opc & (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE))
560 inode_add_oss_capa(inode, ocapa);
561 DEBUG_CAPA(D_SEC, capa, "renew");
563 expiry = capa_renewal_time(ocapa);
565 sort_add_capa(ocapa, ll_capa_list);
566 update_capa_timer(ocapa, expiry);
567 spin_unlock(&capa_lock);
575 void ll_capa_open(struct inode *inode)
577 struct ll_inode_info *lli = ll_i2info(inode);
579 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
583 if (!S_ISREG(inode->i_mode))
586 atomic_inc(&lli->lli_open_count);
589 void ll_capa_close(struct inode *inode)
591 struct ll_inode_info *lli = ll_i2info(inode);
593 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
597 if (!S_ISREG(inode->i_mode))
600 atomic_dec(&lli->lli_open_count);
603 /* delete CAPA_OPC_OSS_TRUNC only */
604 void ll_truncate_free_capa(struct obd_capa *ocapa)
611 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
612 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate");
614 inode = ocapa->u.cli.inode;
616 spin_lock(&capa_lock);
618 ll_delete_capa(ocapa);
619 spin_unlock(&capa_lock);
622 void ll_clear_inode_capas(struct inode *inode)
624 struct ll_inode_info *lli = ll_i2info(inode);
625 struct obd_capa *ocapa, *tmp;
627 spin_lock(&capa_lock);
628 ocapa = lli->lli_mds_capa;
630 ll_delete_capa(ocapa);
632 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
634 ll_delete_capa(ocapa);
635 spin_unlock(&capa_lock);