X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_capa.c;h=7b6302df199769b7f4fd52d5baeb96963a14d51b;hp=4f34a807176fc3d58632b2b59dc0d4cb954a8685;hb=2b23ad0d183141dc25377f2d37de6e6e36ba1169;hpb=8a8d1fbac00a362d0204899b747b9f773bffdd7b diff --git a/lustre/llite/llite_capa.c b/lustre/llite/llite_capa.c index 4f34a80..7b6302d 100644 --- a/lustre/llite/llite_capa.c +++ b/lustre/llite/llite_capa.c @@ -1,24 +1,41 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Copyright (C) 2004, 2005 Cluster File Systems, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Author: Lai Siyao + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * This file is part of Lustre, http://www.lustre.org. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * Copyright (c) 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * lustre/llite/llite_capa.c * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Author: Lai Siyao */ #define DEBUG_SUBSYSTEM S_LLITE @@ -29,305 +46,616 @@ #include #include -#include +#include #include "llite_internal.h" -static struct list_head *ll_capa_list = &capa_list[CLIENT_CAPA]; -static struct ptlrpc_thread capa_thread; +/* for obd_capa.c_list, client capa might stay in three places: + * 1. ll_capa_list. + * 2. ll_idle_capas. + * 3. stand alone: just allocated. + */ -static struct thread_ctl { - struct completion ctl_starting; - struct completion ctl_finishing; -} ll_capa_ctl; +/* capas for oss writeback and those failed to renew */ +static CFS_LIST_HEAD(ll_idle_capas); +static struct ptlrpc_thread ll_capa_thread; +static cfs_list_t *ll_capa_list = &capa_list[CAPA_SITE_CLIENT]; + +/* llite capa renewal timer */ +struct timer_list ll_capa_timer; +/* for debug: indicate whether capa on llite is enabled or not */ +static cfs_atomic_t ll_capa_debug = CFS_ATOMIC_INIT(0); +static unsigned long long ll_capa_renewed = 0; +static unsigned long long ll_capa_renewal_noent = 0; +static unsigned long long ll_capa_renewal_failed = 0; +static unsigned long long ll_capa_renewal_retries = 0; + +static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry) +{ + if (cfs_time_before(expiry, ll_capa_timer.expires) || + !timer_pending(&ll_capa_timer)) { + mod_timer(&ll_capa_timer, expiry); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "ll_capa_timer update: %lu/%lu by", expiry, jiffies); + } +} + +static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa) +{ + return cfs_time_sub(ocapa->c_expiry, + cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2); +} + +static inline int capa_is_to_expire(struct obd_capa *ocapa) +{ + return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current()); +} static inline int have_expired_capa(void) { - struct obd_capa *ocapa; - struct lustre_capa *capa; - int expired = 0; - unsigned long expiry; - ENTRY; + struct obd_capa *ocapa = NULL; + int expired = 0; + + /* if ll_capa_list has client capa to expire or ll_idle_capas has + * expired capa, return 1. + */ + spin_lock(&capa_lock); + if (!cfs_list_empty(ll_capa_list)) { + ocapa = cfs_list_entry(ll_capa_list->next, struct obd_capa, + c_list); + expired = capa_is_to_expire(ocapa); + if (!expired) + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } else if (!cfs_list_empty(&ll_idle_capas)) { + ocapa = cfs_list_entry(ll_idle_capas.next, struct obd_capa, + c_list); + expired = capa_is_expired(ocapa); + if (!expired) + update_capa_timer(ocapa, ocapa->c_expiry); + } + spin_unlock(&capa_lock); - spin_lock(&capa_lock); - if (!list_empty(ll_capa_list)) { - ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list); - expired = __capa_is_to_expire(ocapa); - if (!expired) { - capa = &ocapa->c_capa; - expiry = expiry_to_jiffies(capa->lc_expiry - - capa_pre_expiry(capa)); - if (time_before(expiry, ll_capa_timer.expires) || - !timer_pending(&ll_capa_timer)) { - mod_timer(&ll_capa_timer, expiry); - CDEBUG(D_INFO,"ll_capa_timer new expiry: %lu\n", - expiry); - } + if (expired) + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired"); + return expired; +} + +static void sort_add_capa(struct obd_capa *ocapa, cfs_list_t *head) +{ + struct obd_capa *tmp; + cfs_list_t *before = NULL; + + /* TODO: client capa is sorted by expiry, this could be optimized */ + cfs_list_for_each_entry_reverse(tmp, head, c_list) { + if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) { + before = &tmp->c_list; + break; } } - spin_unlock(&capa_lock); - RETURN(expired); + LASSERT(&ocapa->c_list != before); + cfs_list_add(&ocapa->c_list, before ?: head); } -static int inline ll_capa_check_stop(void) +static inline int obd_capa_open_count(struct obd_capa *oc) { - return (capa_thread.t_flags & SVC_STOPPING) ? 1: 0; + struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode); + return cfs_atomic_read(&lli->lli_open_count); } -static int ll_renew_capa(struct obd_capa *ocapa) +static void ll_delete_capa(struct obd_capa *ocapa) { - struct ptlrpc_request *req = NULL; - /* no need to lock, no one else will touch it */ - struct inode *inode = ocapa->c_inode; - struct obd_export *md_exp = ll_i2mdexp(inode); - struct ll_inode_info *lli = ll_i2info(inode); - __u64 valid = OBD_MD_CAPA; - int rc; - ENTRY; + struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode); - if (capa_expired(&ocapa->c_capa)) - RETURN(-ESTALE); + if (capa_for_mds(&ocapa->c_capa)) { + LASSERT(lli->lli_mds_capa == ocapa); + lli->lli_mds_capa = NULL; + } else if (capa_for_oss(&ocapa->c_capa)) { + cfs_list_del_init(&ocapa->u.cli.lli_list); + } - rc = md_getattr(md_exp, &lli->lli_id, valid, NULL, NULL, 0, - 0, ocapa, &req); - RETURN(rc); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client"); + cfs_list_del_init(&ocapa->c_list); + capa_count[CAPA_SITE_CLIENT]--; + /* release the ref when alloc */ + capa_put(ocapa); } -static int ll_capa_thread(void *arg) +/* three places where client capa is deleted: + * 1. capa_thread_main(), main place to delete expired capa. + * 2. ll_clear_inode_capas() in ll_clear_inode(). + * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost(). + */ +static int capa_thread_main(void *unused) { - struct thread_ctl *ctl = arg; - unsigned long flags; - int rc; - ENTRY; - - { - char name[sizeof(current->comm)]; - snprintf(name, sizeof(name) - 1, "ll_capa"); - kportal_daemonize(name); - } + struct obd_capa *ocapa, *tmp, *next; + struct inode *inode = NULL; + struct l_wait_info lwi = { 0 }; + int rc; + ENTRY; + + thread_set_flags(&ll_capa_thread, SVC_RUNNING); + wake_up(&ll_capa_thread.t_ctl_waitq); + + while (1) { + l_wait_event(ll_capa_thread.t_ctl_waitq, + !thread_is_running(&ll_capa_thread) || + have_expired_capa(), + &lwi); + + if (!thread_is_running(&ll_capa_thread)) + break; - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); + next = NULL; - /* - * letting starting function know, that we are ready and control may be - * returned. - */ - capa_thread.t_flags = SVC_RUNNING; - complete(&ctl->ctl_starting); + spin_lock(&capa_lock); + cfs_list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) { + __u64 ibits; - while (1) { - struct l_wait_info lwi = { 0 }; - struct obd_capa *ocapa, *tmp, *next = NULL, tcapa; - unsigned long expiry, sleep = CAPA_PRE_EXPIRY; + LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC); - l_wait_event(capa_thread.t_ctl_waitq, - (have_expired_capa() || ll_capa_check_stop()), - &lwi); + if (!capa_is_to_expire(ocapa)) { + next = ocapa; + break; + } - if (ll_capa_check_stop()) - break; + cfs_list_del_init(&ocapa->c_list); + + /* for MDS capability, only renew those which belong to + * dir, or its inode is opened, or client holds LOOKUP + * lock. + */ + /* ibits may be changed by ll_have_md_lock() so we have + * to set it each time */ + ibits = MDS_INODELOCK_LOOKUP; + if (capa_for_mds(&ocapa->c_capa) && + !S_ISDIR(ocapa->u.cli.inode->i_mode) && + obd_capa_open_count(ocapa) == 0 && + !ll_have_md_lock(ocapa->u.cli.inode, + &ibits, LCK_MINMODE)) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "skip renewal for"); + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } - spin_lock(&capa_lock); - list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) { - if (ocapa->c_capa.lc_flags & CAPA_FL_SHORT) - sleep = CAPA_PRE_EXPIRY_SHORT; + /* for OSS capability, only renew those whose inode is + * opened. + */ + if (capa_for_oss(&ocapa->c_capa) && + obd_capa_open_count(ocapa) == 0) { + /* oss capa with open count == 0 won't renew, + * move to idle list */ + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } - if (ocapa->c_capa.lc_op == CAPA_TRUNC) + /* NB iput() is in ll_update_capa() */ + inode = igrab(ocapa->u.cli.inode); + if (inode == NULL) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "igrab failed for"); continue; + } - if (__capa_is_to_expire(ocapa)) { - /* copy capa in case it's deleted */ - tcapa = *ocapa; - spin_unlock(&capa_lock); + capa_get(ocapa); + ll_capa_renewed++; + spin_unlock(&capa_lock); + rc = md_renew_capa(ll_i2mdexp(inode), ocapa, + ll_update_capa); + spin_lock(&capa_lock); + if (rc) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renew failed: %d", rc); + ll_capa_renewal_failed++; + } + } - rc = ll_renew_capa(&tcapa); - if (rc) - capa_put(ocapa); + if (next) + update_capa_timer(next, capa_renewal_time(next)); - spin_lock(&capa_lock); - } else { - next = ocapa; + cfs_list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, + c_list) { + if (!capa_is_expired(ocapa)) { + if (!next) + update_capa_timer(ocapa, + ocapa->c_expiry); break; } - } - if (next) { - struct lustre_capa *capa = &next->c_capa; - - expiry = expiry_to_jiffies(capa->lc_expiry - - capa_pre_expiry(capa)); - if (time_before(expiry, ll_capa_timer.expires) || - !timer_pending(&ll_capa_timer)) { - mod_timer(&ll_capa_timer, expiry); - CDEBUG(D_INFO,"ll_capa_timer new expiry: %lu\n", - expiry); + if (cfs_atomic_read(&ocapa->c_refc) > 1) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "expired(c_refc %d), don't release", + cfs_atomic_read(&ocapa->c_refc)); + /* don't try to renew any more */ + cfs_list_del_init(&ocapa->c_list); + continue; } - } - spin_unlock(&capa_lock); - /* wait ll_renew_capa finish */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(sleep * HZ); - } + /* expired capa is released. */ + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired"); + ll_delete_capa(ocapa); + } - capa_thread.t_flags = SVC_STOPPED; + spin_unlock(&capa_lock); + } - /* this is SMP-safe way to finish thread. */ - complete_and_exit(&ctl->ctl_finishing, 0); - EXIT; + thread_set_flags(&ll_capa_thread, SVC_STOPPED); + wake_up(&ll_capa_thread.t_ctl_waitq); + RETURN(0); } -/* just wake up, others are handled by ll_capa_thread */ void ll_capa_timer_callback(unsigned long unused) { - ENTRY; - wake_up(&capa_thread.t_ctl_waitq); - EXIT; + wake_up(&ll_capa_thread.t_ctl_waitq); } int ll_capa_thread_start(void) { - int rc; - ENTRY; + struct task_struct *task; + ENTRY; - LASSERT(capa_thread.t_flags == 0); - init_completion(&ll_capa_ctl.ctl_starting); - init_completion(&ll_capa_ctl.ctl_finishing); - init_waitqueue_head(&capa_thread.t_ctl_waitq); - - rc = kernel_thread(ll_capa_thread, &ll_capa_ctl, - (CLONE_VM | CLONE_FILES)); - if (rc < 0) { - CERROR("cannot start expired capa thread, " - "err = %d\n", rc); - RETURN(rc); - } - wait_for_completion(&ll_capa_ctl.ctl_starting); - LASSERT(capa_thread.t_flags == SVC_RUNNING); - RETURN(0); + init_waitqueue_head(&ll_capa_thread.t_ctl_waitq); + + task = kthread_run(capa_thread_main, NULL, "ll_capa"); + if (IS_ERR(task)) { + CERROR("cannot start expired capa thread: rc %ld\n", + PTR_ERR(task)); + RETURN(PTR_ERR(task)); + } + wait_event(ll_capa_thread.t_ctl_waitq, + thread_is_running(&ll_capa_thread)); + + RETURN(0); } void ll_capa_thread_stop(void) { + thread_set_flags(&ll_capa_thread, SVC_STOPPING); + wake_up(&ll_capa_thread.t_ctl_waitq); + wait_event(ll_capa_thread.t_ctl_waitq, + thread_is_stopped(&ll_capa_thread)); +} + +struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + int found = 0; + ENTRY; - capa_thread.t_flags = SVC_STOPPING; - wake_up(&capa_thread.t_ctl_waitq); - wait_for_completion(&ll_capa_ctl.ctl_finishing); - LASSERT(capa_thread.t_flags == SVC_STOPPED); - capa_thread.t_flags = 0; + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + RETURN(NULL); + + LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW || + opc == CAPA_OPC_OSS_TRUNC); + + spin_lock(&capa_lock); + cfs_list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (capa_is_expired(ocapa)) + continue; + if ((opc & CAPA_OPC_OSS_WRITE) && + capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) { + found = 1; + break; + } else if ((opc & CAPA_OPC_OSS_READ) && + capa_opc_supported(&ocapa->c_capa, + CAPA_OPC_OSS_READ)) { + found = 1; + break; + } else if ((opc & CAPA_OPC_OSS_TRUNC) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; + break; + } + } - EXIT; + if (found) { + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + capa_get(ocapa); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + } else { + ocapa = NULL; + + if (cfs_atomic_read(&ll_capa_debug)) { + CERROR("no capability for "DFID" opc "LPX64"\n", + PFID(&lli->lli_fid), opc); + cfs_atomic_set(&ll_capa_debug, 0); + } + } + spin_unlock(&capa_lock); + + RETURN(ocapa); } +EXPORT_SYMBOL(ll_osscapa_get); -int ll_set_capa(struct inode *inode, struct lookup_intent *it, - struct obd_client_handle *och) +struct obd_capa *ll_mdscapa_get(struct inode *inode) { - struct ptlrpc_request *req = LUSTRE_IT(it)->it_data; - struct mds_body *body; - struct lustre_capa *capa; - struct obd_capa *ocapa; struct ll_inode_info *lli = ll_i2info(inode); - unsigned long expiry; + struct obd_capa *ocapa; + ENTRY; - if (!S_ISREG(inode->i_mode)) - return 0; + LASSERT(inode != NULL); - /* GNS code path will have no req */ - if (!req) - return 0; + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0) + RETURN(NULL); - body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); - LASSERT(body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED(req, 1); /* and swabbed down */ + spin_lock(&capa_lock); + ocapa = capa_get(lli->lli_mds_capa); + spin_unlock(&capa_lock); + if (!ocapa && cfs_atomic_read(&ll_capa_debug)) { + CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid)); + cfs_atomic_set(&ll_capa_debug, 0); + } - if (!(body->valid & OBD_MD_CAPA)) - return 0; + RETURN(ocapa); +} - ENTRY; +static struct obd_capa *do_add_mds_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *old = lli->lli_mds_capa; + struct lustre_capa *capa = &ocapa->c_capa; - capa = lustre_msg_buf(req->rq_repmsg, 7, sizeof (*capa)); - LASSERT(capa != NULL); /* reply already checked out */ - LASSERT_REPSWABBED(req, 7); /* and swabbed down */ + if (!old) { + ocapa->u.cli.inode = inode; + lli->lli_mds_capa = ocapa; + capa_count[CAPA_SITE_CLIENT]++; - ocapa = capa_renew(capa, CLIENT_CAPA); - if (!ocapa) - RETURN(-ENOMEM); + DEBUG_CAPA(D_SEC, capa, "add MDS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); - spin_lock(&capa_lock); - ocapa->c_inode = inode; - ocapa->c_handle = och->och_fh; - spin_unlock(&capa_lock); + DEBUG_CAPA(D_SEC, capa, "update MDS"); - spin_lock(&lli->lli_lock); - /* in case it was linked to lli_capas already */ - if (list_empty(&ocapa->c_lli_list)) - list_add(&ocapa->c_lli_list, &lli->lli_capas); - spin_unlock(&lli->lli_lock); + capa_put(ocapa); + ocapa = old; + } + return ocapa; +} - expiry = expiry_to_jiffies(capa->lc_expiry - capa_pre_expiry(capa)); +static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; - spin_lock(&capa_lock); - if (time_before(expiry, ll_capa_timer.expires) || - !timer_pending(&ll_capa_timer)) { - mod_timer(&ll_capa_timer, expiry); - CDEBUG(D_INFO, "ll_capa_timer new expiry: %lu\n", expiry); + /* inside capa_lock */ + cfs_list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if ((capa_opc(&ocapa->c_capa) & opc) != opc) + continue; + + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + return ocapa; } - spin_unlock(&capa_lock); - RETURN(0); + return NULL; } -int ll_set_trunc_capa(struct ptlrpc_request *req, int offset, struct inode *inode) +static inline void inode_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) { - struct mds_body *body; - struct obd_capa *ocapa; - struct lustre_capa *capa; struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *tmp; + cfs_list_t *next = NULL; + + /* capa is sorted in lli_oss_capas so lookup can always find the + * latest one */ + cfs_list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) { + if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) { + next = &tmp->u.cli.lli_list; + break; + } + } + LASSERT(&ocapa->u.cli.lli_list != next); + cfs_list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas); +} - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - if (!body) - return -ENOMEM; +static struct obd_capa *do_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct obd_capa *old; + struct lustre_capa *capa = &ocapa->c_capa; + + LASSERTF(S_ISREG(inode->i_mode), + "inode has oss capa, but not regular file, mode: %d\n", + inode->i_mode); + + /* FIXME: can't replace it so easily with fine-grained opc */ + old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY); + if (!old) { + ocapa->u.cli.inode = inode; + CFS_INIT_LIST_HEAD(&ocapa->u.cli.lli_list); + capa_count[CAPA_SITE_CLIENT]++; + + DEBUG_CAPA(D_SEC, capa, "add OSS"); + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update OSS"); + + capa_put(ocapa); + ocapa = old; + } + + inode_add_oss_capa(inode, ocapa); + return ocapa; +} + +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa) +{ + spin_lock(&capa_lock); + ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) : + do_add_oss_capa(inode, ocapa); - if (!(body->valid & OBD_MD_CAPA)) - return 0; + /* truncate capa won't renew */ + if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) { + set_capa_expiry(ocapa); + cfs_list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } + + spin_unlock(&capa_lock); + + cfs_atomic_set(&ll_capa_debug, 1); + return ocapa; +} + +static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay) +{ + /* NB: set a fake expiry for this capa to prevent it renew too soon */ + oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay)); +} + +int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa) +{ + struct inode *inode = ocapa->u.cli.inode; + int rc = 0; ENTRY; - capa = (struct lustre_capa *)lustre_swab_repbuf(req, offset + 1, - sizeof(*capa), lustre_swab_lustre_capa); - if (!capa) - RETURN(-ENOMEM); - ocapa = capa_renew(capa, CLIENT_CAPA); - if (!ocapa) - RETURN(-ENOMEM); + LASSERT(ocapa); + + if (IS_ERR(capa)) { + /* set error code */ + rc = PTR_ERR(capa); + spin_lock(&capa_lock); + if (rc == -ENOENT) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "renewal canceled because object removed"); + ll_capa_renewal_noent++; + } else { + ll_capa_renewal_failed++; + + /* failed capa won't be renewed any longer, but if -EIO, + * client might be doing recovery, retry in 2 min. */ + if (rc == -EIO && !capa_is_expired(ocapa)) { + delay_capa_renew(ocapa, 120); + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed: -EIO, " + "retry in 2 mins"); + ll_capa_renewal_retries++; + GOTO(retry, rc); + } else { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, + "renewal failed(rc: %d) for", rc); + } + } + + cfs_list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, &ll_idle_capas); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + RETURN(rc); + } + + spin_lock(&ocapa->c_lock); + LASSERT(!memcmp(&ocapa->c_capa, capa, + offsetof(struct lustre_capa, lc_opc))); + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + spin_unlock(&ocapa->c_lock); + + spin_lock(&capa_lock); + if (capa_for_oss(capa)) + inode_add_oss_capa(inode, ocapa); + DEBUG_CAPA(D_SEC, capa, "renew"); + EXIT; +retry: + cfs_list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + spin_unlock(&capa_lock); + + capa_put(ocapa); + iput(inode); + return rc; +} + +void ll_capa_open(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); - spin_lock(&lli->lli_lock); - /* in case it was linked to lli_capas already */ - if (list_empty(&ocapa->c_lli_list)) - list_add(&ocapa->c_lli_list, &lli->lli_capas); - spin_unlock(&lli->lli_lock); + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; - RETURN(0); + if (!S_ISREG(inode->i_mode)) + return; + + cfs_atomic_inc(&lli->lli_open_count); } -struct obd_capa *ll_get_capa(struct inode *inode, uid_t uid, int op) +void ll_capa_close(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); - struct obd_capa *ocapa, *tmp; - ENTRY; - list_for_each_entry_safe(ocapa, tmp, &lli->lli_capas, c_lli_list) { - if (ocapa->c_capa.lc_ruid != uid) - continue; - if (ocapa->c_capa.lc_op != op) - continue; + if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + == 0) + return; - RETURN(ocapa); - } - - RETURN(NULL); + if (!S_ISREG(inode->i_mode)) + return; + + cfs_atomic_dec(&lli->lli_open_count); +} + +/* delete CAPA_OPC_OSS_TRUNC only */ +void ll_truncate_free_capa(struct obd_capa *ocapa) +{ + if (!ocapa) + return; + + LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate"); + + /* release ref when find */ + capa_put(ocapa); + if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) { + spin_lock(&capa_lock); + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); + } +} + +void ll_clear_inode_capas(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa, *tmp; + + spin_lock(&capa_lock); + ocapa = lli->lli_mds_capa; + if (ocapa) + ll_delete_capa(ocapa); + + cfs_list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas, + u.cli.lli_list) + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} + +void ll_print_capa_stat(struct ll_sb_info *sbi) +{ + if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA)) + LCONSOLE_INFO("Fid capabilities renewed: %llu\n" + "Fid capabilities renewal ENOENT: %llu\n" + "Fid capabilities failed to renew: %llu\n" + "Fid capabilities renewal retries: %llu\n", + ll_capa_renewed, ll_capa_renewal_noent, + ll_capa_renewal_failed, ll_capa_renewal_retries); }