1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2005 Cluster File Systems, Inc.
6 * Author: Lai Siyao <lsy@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
27 #include <linux/version.h>
28 #include <asm/uaccess.h>
29 #include <linux/file.h>
30 #include <linux/kmod.h>
32 #include <lustre_lite.h>
33 #include "llite_internal.h"
35 /* for obd_capa.c_list, client capa might stay in three places:
38 * 3. stand alone: just allocated.
41 /* capas for oss writeback and those failed to renew */
42 static LIST_HEAD(ll_idle_capas);
43 static struct ptlrpc_thread ll_capa_thread;
44 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
46 /* llite capa renewal timer */
47 struct timer_list ll_capa_timer;
48 /* for debug: indicate whether capa on llite is enabled or not */
49 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
51 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
53 if (time_before(expiry, ll_capa_timer.expires) ||
54 !timer_pending(&ll_capa_timer)) {
55 mod_timer(&ll_capa_timer, expiry);
56 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
57 "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
61 static inline int have_expired_capa(void)
63 struct obd_capa *ocapa = NULL;
66 /* if ll_capa_list has client capa to expire or ll_idle_capas has
67 * expired capa, return 1.
69 spin_lock(&capa_lock);
70 if (!list_empty(ll_capa_list)) {
71 ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
72 expired = capa_is_to_expire(ocapa);
74 update_capa_timer(ocapa, capa_renewal_time(ocapa));
75 } else if (!list_empty(&ll_idle_capas)) {
76 ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
77 expired = capa_is_expired(ocapa);
79 update_capa_timer(ocapa, ocapa->c_expiry);
81 spin_unlock(&capa_lock);
84 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
88 static inline int ll_capa_check_stop(void)
90 return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
93 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
96 struct list_head *before = NULL;
98 /* TODO: client capa is sorted by expiry, this could be optimized */
99 list_for_each_entry_reverse(tmp, head, c_list) {
100 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
101 before = &tmp->c_list;
106 LASSERT(&ocapa->c_list != before);
107 list_add(&ocapa->c_list, before ?: head);
110 static inline int obd_capa_open_count(struct obd_capa *oc)
112 struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
113 return atomic_read(&lli->lli_open_count);
116 static void ll_delete_capa(struct obd_capa *ocapa)
118 struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
120 if (capa_for_mds(&ocapa->c_capa)) {
121 LASSERT(lli->lli_mds_capa == ocapa);
122 lli->lli_mds_capa = NULL;
123 } else if (capa_for_oss(&ocapa->c_capa)) {
124 list_del_init(&ocapa->u.cli.lli_list);
127 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
128 list_del(&ocapa->c_list);
132 /* three places where client capa is deleted:
133 * 1. capa_thread_main(), main place to delete expired capa.
134 * 2. ll_clear_inode_capas() in ll_clear_inode().
135 * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
137 static int capa_thread_main(void *unused)
139 struct obd_capa *ocapa, *tmp, *next;
140 struct inode *inode = NULL;
141 struct l_wait_info lwi = { 0 };
145 cfs_daemonize("ll_capa");
147 ll_capa_thread.t_flags = SVC_RUNNING;
148 wake_up(&ll_capa_thread.t_ctl_waitq);
151 l_wait_event(ll_capa_thread.t_ctl_waitq,
152 (ll_capa_check_stop() || have_expired_capa()),
155 if (ll_capa_check_stop())
158 spin_lock(&capa_lock);
160 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
161 LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
163 if (!capa_is_to_expire(ocapa)) {
168 /* for MDS capability, only renew those which belong to
169 * dir, or its inode is opened, or client holds LOOKUP
172 if (capa_for_mds(&ocapa->c_capa) &&
173 !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
174 obd_capa_open_count(ocapa) == 0 &&
175 !ll_have_md_lock(ocapa->u.cli.inode,
176 MDS_INODELOCK_LOOKUP)) {
177 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
179 list_del_init(&ocapa->c_list);
180 sort_add_capa(ocapa, &ll_idle_capas);
184 /* for OSS capability, only renew those whose inode is
187 if (capa_for_oss(&ocapa->c_capa) &&
188 obd_capa_open_count(ocapa) == 0) {
189 /* oss capa with open count == 0 won't renew,
190 * move to idle list */
191 list_del_init(&ocapa->c_list);
192 sort_add_capa(ocapa, &ll_idle_capas);
196 /* NB iput() is in ll_update_capa() */
197 inode = igrab(ocapa->u.cli.inode);
199 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
201 ll_delete_capa(ocapa);
205 list_del_init(&ocapa->c_list);
207 spin_unlock(&capa_lock);
209 rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
211 spin_lock(&capa_lock);
213 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
214 "renew failed: %d", rc);
215 sort_add_capa(ocapa, &ll_idle_capas);
220 update_capa_timer(next, capa_renewal_time(next));
222 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
223 if (!capa_is_expired(ocapa)) {
225 update_capa_timer(ocapa, ocapa->c_expiry);
229 if (atomic_read(&ocapa->c_refc)) {
230 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
231 "expired(c_refc %d), don't release",
232 atomic_read(&ocapa->c_refc));
233 obd_capa_set_expired(ocapa);
234 /* don't try to renew any more */
235 list_del_init(&ocapa->c_list);
239 /* expired capa is released. */
240 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
241 ll_delete_capa(ocapa);
244 spin_unlock(&capa_lock);
247 ll_capa_thread.t_flags = SVC_STOPPED;
248 wake_up(&ll_capa_thread.t_ctl_waitq);
252 void ll_capa_timer_callback(unsigned long unused)
254 wake_up(&ll_capa_thread.t_ctl_waitq);
257 int ll_capa_thread_start(void)
262 init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
264 rc = kernel_thread(capa_thread_main, NULL, 0);
266 CERROR("cannot start expired capa thread: rc %d\n", rc);
269 wait_event(ll_capa_thread.t_ctl_waitq,
270 ll_capa_thread.t_flags & SVC_RUNNING);
275 void ll_capa_thread_stop(void)
277 ll_capa_thread.t_flags = SVC_STOPPING;
278 wake_up(&ll_capa_thread.t_ctl_waitq);
279 wait_event(ll_capa_thread.t_ctl_waitq,
280 ll_capa_thread.t_flags & SVC_STOPPED);
283 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
285 struct ll_inode_info *lli = ll_i2info(inode);
286 struct obd_capa *ocapa;
288 /* inside capa_lock */
289 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
290 if (!obd_capa_is_valid(ocapa))
292 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
295 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
296 ll_inode2fid(inode)));
297 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
299 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
306 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
308 struct ll_inode_info *lli = ll_i2info(inode);
309 struct obd_capa *ocapa;
312 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
316 LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
317 opc == CAPA_OPC_OSS_TRUNC);
319 spin_lock(&capa_lock);
320 list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
321 if (!obd_capa_is_valid(ocapa))
323 if ((opc & CAPA_OPC_OSS_WRITE) &&
324 capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
326 } else if ((opc & CAPA_OPC_OSS_READ) &&
327 capa_opc_supported(&ocapa->c_capa,
328 CAPA_OPC_OSS_READ)) {
330 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
331 capa_opc_supported(&ocapa->c_capa, opc)) {
337 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
338 ll_inode2fid(inode)));
339 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
343 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
347 if (atomic_read(&ll_capa_debug)) {
348 CERROR("no capability for "DFID" opc "LPX64"\n",
349 PFID(&lli->lli_fid), opc);
350 atomic_set(&ll_capa_debug, 0);
353 spin_unlock(&capa_lock);
358 struct obd_capa *ll_mdscapa_get(struct inode *inode)
360 struct ll_inode_info *lli = ll_i2info(inode);
361 struct obd_capa *ocapa;
364 LASSERT(inode != NULL);
366 if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
369 spin_lock(&capa_lock);
370 ocapa = capa_get(lli->lli_mds_capa);
371 spin_unlock(&capa_lock);
373 if (ocapa && !obd_capa_is_valid(ocapa)) {
374 DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid (flags %d)",
380 if (!ocapa && atomic_read(&ll_capa_debug)) {
382 LASSERT(!S_ISDIR(inode->i_mode));
383 LASSERT(!obd_capa_open_count(ocapa));
384 LASSERT(!ll_have_md_lock(ocapa->u.cli.inode,
385 MDS_INODELOCK_LOOKUP));
387 atomic_set(&ll_capa_debug, 0);
393 static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa)
395 struct ll_inode_info *lli = ll_i2info(inode);
396 struct obd_capa *old = lli->lli_mds_capa;
397 struct obd_capa *ocapa = *pcapa;
401 ocapa->u.cli.inode = inode;
402 lli->lli_mds_capa = ocapa;
403 obd_capa_clear_new(ocapa);
404 obd_capa_set_valid(ocapa);
406 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add MDS");
408 if (!memcmp(&old->c_capa, &ocapa->c_capa, sizeof(old->c_capa)))
412 spin_lock(&old->c_lock);
413 old->c_capa = ocapa->c_capa;
414 obd_capa_set_valid(old);
415 spin_unlock(&old->c_lock);
417 DEBUG_CAPA(D_SEC, &old->c_capa, "update MDS");
427 static inline void inode_add_oss_capa(struct inode *inode,
428 struct obd_capa *ocapa)
430 struct ll_inode_info *lli = ll_i2info(inode);
431 struct obd_capa *tmp;
432 struct list_head *next = NULL;
434 /* capa is sorted in lli_oss_capas so lookup can always find the
436 list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
437 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
438 next = &tmp->u.cli.lli_list;
442 list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
445 static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa)
447 struct obd_capa *old, *ocapa = *pcapa;
448 struct lustre_capa *capa = &ocapa->c_capa;
451 LASSERTF(S_ISREG(inode->i_mode),
452 "inode has oss capa, but not regular file, mode: %d\n",
455 /* FIXME: can't replace it so easily with fine-grained opc */
456 old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY);
458 ocapa->u.cli.inode = inode;
459 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
460 obd_capa_set_valid(ocapa);
462 DEBUG_CAPA(D_SEC, capa, "add OSS");
464 if (old->c_capa.lc_expiry == capa->lc_expiry) {
467 spin_lock(&old->c_lock);
469 obd_capa_set_valid(old);
470 spin_unlock(&old->c_lock);
472 DEBUG_CAPA(D_SEC, capa, "update OSS");
480 inode_add_oss_capa(inode, *pcapa);
484 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
486 struct obd_capa **pcapa = &ocapa;
489 spin_lock(&capa_lock);
490 rc = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, pcapa) :
491 do_add_oss_capa(inode, pcapa);
494 /* truncate capa won't renew, or no existed capa changed, don't update
496 if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
497 spin_lock(&ocapa->c_lock);
498 set_capa_expiry(ocapa);
499 spin_unlock(&ocapa->c_lock);
501 list_del_init(&ocapa->c_list);
502 sort_add_capa(ocapa, ll_capa_list);
504 update_capa_timer(ocapa, capa_renewal_time(ocapa));
507 atomic_set(&ll_capa_debug, 1);
508 spin_unlock(&capa_lock);
514 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
516 struct inode *inode = ocapa->u.cli.inode;
525 /* failed capa won't be renewed any longer, but if -EIO, client
526 * might be doing recovery, retry in 1 min. */
527 spin_lock(&capa_lock);
528 if (rc == -EIO && !capa_is_expired(ocapa)) {
529 expiry = jiffies + 60 * HZ;
530 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
531 "renewal failed: -EIO, retry in 1 min");
534 if (rc == -ENOENT && !capa_is_to_expire(ocapa)) {
535 /* NB: in period of renewal, inode might be
536 * deleted and then created, so actually ocapa
537 * is a completely new one! */
538 LASSERT(!list_empty(&ocapa->c_list));
540 LASSERT(list_empty(&ocapa->c_list));
541 sort_add_capa(ocapa, &ll_idle_capas);
544 spin_unlock(&capa_lock);
546 DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa,
547 "renewal failed(rc: %d) for", rc);
551 LASSERT(!memcmp(&ocapa->c_capa, capa,
552 offsetof(struct lustre_capa, lc_flags)));
554 spin_lock(&ocapa->c_lock);
555 ocapa->c_capa = *capa;
556 set_capa_expiry(ocapa);
557 spin_unlock(&ocapa->c_lock);
559 spin_lock(&capa_lock);
560 if (capa->lc_opc & CAPA_OPC_OSS_RW)
561 inode_add_oss_capa(inode, ocapa);
562 DEBUG_CAPA(D_SEC, capa, "renew");
564 expiry = capa_renewal_time(ocapa);
566 sort_add_capa(ocapa, ll_capa_list);
567 update_capa_timer(ocapa, expiry);
568 spin_unlock(&capa_lock);
576 void ll_capa_open(struct inode *inode)
578 struct ll_inode_info *lli = ll_i2info(inode);
580 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
584 if (!S_ISREG(inode->i_mode))
587 atomic_inc(&lli->lli_open_count);
590 void ll_capa_close(struct inode *inode)
592 struct ll_inode_info *lli = ll_i2info(inode);
594 if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
598 if (!S_ISREG(inode->i_mode))
601 atomic_dec(&lli->lli_open_count);
604 /* delete CAPA_OPC_OSS_TRUNC only */
605 void ll_truncate_free_capa(struct obd_capa *ocapa)
612 LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
613 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate");
615 inode = ocapa->u.cli.inode;
617 spin_lock(&capa_lock);
619 ll_delete_capa(ocapa);
620 spin_unlock(&capa_lock);
623 void ll_clear_inode_capas(struct inode *inode)
625 struct ll_inode_info *lli = ll_i2info(inode);
626 struct obd_capa *ocapa, *tmp;
628 spin_lock(&capa_lock);
629 ocapa = lli->lli_mds_capa;
631 ll_delete_capa(ocapa);
633 list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
635 ll_delete_capa(ocapa);
636 spin_unlock(&capa_lock);