-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
*
- * Copyright (C) 2004, 2005 Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * Author: Lai Siyao <lsy@clusterfs.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * This file is part of Lustre, http://www.lustre.org.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/llite/llite_capa.c
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
*/
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/file.h>
#include <linux/kmod.h>
-#include <linux/lustre_lite.h>
#include "llite_internal.h"
-static struct list_head *ll_capa_list = &capa_list[CLIENT_CAPA];
-static struct ptlrpc_thread capa_thread;
+/* for obd_capa.c_list, client capa might stay in three places:
+ * 1. ll_capa_list.
+ * 2. ll_idle_capas.
+ * 3. stand alone: just allocated.
+ */
-static struct thread_ctl {
- struct completion ctl_starting;
- struct completion ctl_finishing;
-} ll_capa_ctl;
+/* capas for oss writeback and those failed to renew */
+static struct list_head ll_idle_capas = LIST_HEAD_INIT(ll_idle_capas);
+static struct ptlrpc_thread ll_capa_thread;
+static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
-static inline int have_expired_capa(void)
-{
- struct obd_capa *ocapa;
- struct lustre_capa *capa;
- int expired = 0;
- unsigned long expiry;
- ENTRY;
+/* llite capa renewal timer */
+struct timer_list ll_capa_timer;
+/* for debug: indicate whether capa on llite is enabled or not */
+static atomic_t ll_capa_debug = ATOMIC_INIT(0);
+static unsigned long long ll_capa_renewed = 0;
+static unsigned long long ll_capa_renewal_noent = 0;
+static unsigned long long ll_capa_renewal_failed = 0;
+static unsigned long long ll_capa_renewal_retries = 0;
- spin_lock(&capa_lock);
- if (!list_empty(ll_capa_list)) {
- ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
- expired = __capa_is_to_expire(ocapa);
-
- if (!expired) {
- capa = &ocapa->c_capa;
- expiry = expiry_to_jiffies(capa->lc_expiry -
- capa_pre_expiry(capa));
- if (time_before(expiry, ll_capa_timer.expires) ||
- !timer_pending(&ll_capa_timer)) {
- mod_timer(&ll_capa_timer, expiry);
- CDEBUG(D_INFO,"ll_capa_timer new expiry: %lu\n",
- expiry);
- }
- }
- }
- spin_unlock(&capa_lock);
+static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
- RETURN(expired);
+static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
+{
+ if (cfs_time_before(expiry, ll_capa_timer.expires) ||
+ !timer_pending(&ll_capa_timer)) {
+ mod_timer(&ll_capa_timer, expiry);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
+ }
}
-static int inline ll_capa_check_stop(void)
+static inline cfs_time_t capa_renewal_time(struct obd_capa *ocapa)
{
- return (capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
+ return cfs_time_sub(ocapa->c_expiry,
+ cfs_time_seconds(ocapa->c_capa.lc_timeout) / 2);
}
-static int ll_renew_capa(struct obd_capa *ocapa)
+static inline int capa_is_to_expire(struct obd_capa *ocapa)
{
- struct ptlrpc_request *req = NULL;
- /* no need to lock, no one else will touch it */
- struct inode *inode = ocapa->c_inode;
- struct obd_export *md_exp = ll_i2mdexp(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- __u64 valid = OBD_MD_CAPA;
- int rc;
- ENTRY;
+ return cfs_time_beforeq(capa_renewal_time(ocapa), cfs_time_current());
+}
- if (capa_expired(&ocapa->c_capa))
- RETURN(-ESTALE);
+static inline int have_expired_capa(void)
+{
+ struct obd_capa *ocapa = NULL;
+ int expired = 0;
+
+ /* if ll_capa_list has client capa to expire or ll_idle_capas has
+ * expired capa, return 1.
+ */
+ spin_lock(&capa_lock);
+ if (!list_empty(ll_capa_list)) {
+ ocapa = list_entry(ll_capa_list->next, struct obd_capa,
+ c_list);
+ expired = capa_is_to_expire(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ } else if (!list_empty(&ll_idle_capas)) {
+ ocapa = list_entry(ll_idle_capas.next, struct obd_capa,
+ c_list);
+ expired = capa_is_expired(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, ocapa->c_expiry);
+ }
+ spin_unlock(&capa_lock);
- rc = md_getattr(md_exp, &lli->lli_id, valid, NULL, NULL, 0,
- 0, ocapa, &req);
- RETURN(rc);
+ if (expired)
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
+ return expired;
}
-static int ll_capa_thread(void *arg)
+static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
{
- struct thread_ctl *ctl = arg;
- unsigned long flags;
- int rc;
- ENTRY;
+ struct obd_capa *tmp;
+ struct list_head *before = NULL;
- {
- char name[sizeof(current->comm)];
- snprintf(name, sizeof(name) - 1, "ll_capa");
- kportal_daemonize(name);
+ /* TODO: client capa is sorted by expiry, this could be optimized */
+ list_for_each_entry_reverse(tmp, head, c_list) {
+ if (cfs_time_aftereq(ocapa->c_expiry, tmp->c_expiry)) {
+ before = &tmp->c_list;
+ break;
+ }
}
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
+ LASSERT(&ocapa->c_list != before);
+ list_add(&ocapa->c_list, before ?: head);
+}
- /*
- * letting starting function know, that we are ready and control may be
- * returned.
- */
- capa_thread.t_flags = SVC_RUNNING;
- complete(&ctl->ctl_starting);
+static inline int obd_capa_open_count(struct obd_capa *oc)
+{
+ struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
+ return atomic_read(&lli->lli_open_count);
+}
- while (1) {
- struct l_wait_info lwi = { 0 };
- struct obd_capa *ocapa, *tmp, *next = NULL, tcapa;
- unsigned long expiry, sleep = CAPA_PRE_EXPIRY;
+static void ll_delete_capa(struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
- l_wait_event(capa_thread.t_ctl_waitq,
- (have_expired_capa() || ll_capa_check_stop()),
- &lwi);
+ if (capa_for_mds(&ocapa->c_capa)) {
+ LASSERT(lli->lli_mds_capa == ocapa);
+ lli->lli_mds_capa = NULL;
+ } else if (capa_for_oss(&ocapa->c_capa)) {
+ list_del_init(&ocapa->u.cli.lli_list);
+ }
- if (ll_capa_check_stop())
- break;
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
+ list_del_init(&ocapa->c_list);
+ capa_count[CAPA_SITE_CLIENT]--;
+ /* release the ref when alloc */
+ capa_put(ocapa);
+}
- spin_lock(&capa_lock);
- list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
- if (ocapa->c_capa.lc_flags & CAPA_FL_SHORT)
- sleep = CAPA_PRE_EXPIRY_SHORT;
+/* three places where client capa is deleted:
+ * 1. capa_thread_main(), main place to delete expired capa.
+ * 2. ll_clear_inode_capas() in ll_clear_inode().
+ * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_setattr_ost().
+ */
+static int capa_thread_main(void *unused)
+{
+ struct obd_capa *ocapa, *tmp, *next;
+ struct inode *inode = NULL;
+ struct l_wait_info lwi = { 0 };
+ int rc;
+ ENTRY;
+
+ thread_set_flags(&ll_capa_thread, SVC_RUNNING);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+
+ while (1) {
+ l_wait_event(ll_capa_thread.t_ctl_waitq,
+ !thread_is_running(&ll_capa_thread) ||
+ have_expired_capa(),
+ &lwi);
+
+ if (!thread_is_running(&ll_capa_thread))
+ break;
- if (ocapa->c_capa.lc_op == CAPA_TRUNC)
- continue;
+ next = NULL;
- if (__capa_is_to_expire(ocapa)) {
- /* copy capa in case it's deleted */
- tcapa = *ocapa;
- spin_unlock(&capa_lock);
+ spin_lock(&capa_lock);
+ list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
+ __u64 ibits;
- rc = ll_renew_capa(&tcapa);
- if (rc)
- capa_put(ocapa);
+ LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
- spin_lock(&capa_lock);
- } else {
+ if (!capa_is_to_expire(ocapa)) {
next = ocapa;
break;
}
- }
- if (next) {
- struct lustre_capa *capa = &next->c_capa;
+ list_del_init(&ocapa->c_list);
+
+ /* for MDS capability, only renew those which belong to
+ * dir, or its inode is opened, or client holds LOOKUP
+ * lock.
+ */
+ /* ibits may be changed by ll_have_md_lock() so we have
+ * to set it each time */
+ ibits = MDS_INODELOCK_LOOKUP;
+ if (capa_for_mds(&ocapa->c_capa) &&
+ !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
+ obd_capa_open_count(ocapa) == 0 &&
+ !ll_have_md_lock(ocapa->u.cli.inode,
+ &ibits, LCK_MINMODE)) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "skip renewal for");
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
- expiry = expiry_to_jiffies(capa->lc_expiry -
- capa_pre_expiry(capa));
- if (time_before(expiry, ll_capa_timer.expires) ||
- !timer_pending(&ll_capa_timer)) {
- mod_timer(&ll_capa_timer, expiry);
- CDEBUG(D_INFO,"ll_capa_timer new expiry: %lu\n",
- expiry);
+ /* for OSS capability, only renew those whose inode is
+ * opened.
+ */
+ if (capa_for_oss(&ocapa->c_capa) &&
+ obd_capa_open_count(ocapa) == 0) {
+ /* oss capa with open count == 0 won't renew,
+ * move to idle list */
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
+
+ /* NB iput() is in ll_update_capa() */
+ inode = igrab(ocapa->u.cli.inode);
+ if (inode == NULL) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "igrab failed for");
+ continue;
+ }
+
+ capa_get(ocapa);
+ ll_capa_renewed++;
+ spin_unlock(&capa_lock);
+ rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
+ ll_update_capa);
+ spin_lock(&capa_lock);
+ if (rc) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renew failed: %d", rc);
+ ll_capa_renewal_failed++;
}
}
- spin_unlock(&capa_lock);
- /* wait ll_renew_capa finish */
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(sleep * HZ);
- }
+ if (next)
+ update_capa_timer(next, capa_renewal_time(next));
+
+ list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas,
+ c_list) {
+ if (!capa_is_expired(ocapa)) {
+ if (!next)
+ update_capa_timer(ocapa,
+ ocapa->c_expiry);
+ break;
+ }
+
+ if (atomic_read(&ocapa->c_refc) > 1) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "expired(c_refc %d), don't release",
+ atomic_read(&ocapa->c_refc));
+ /* don't try to renew any more */
+ list_del_init(&ocapa->c_list);
+ continue;
+ }
+
+ /* expired capa is released. */
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
+ ll_delete_capa(ocapa);
+ }
- capa_thread.t_flags = SVC_STOPPED;
+ spin_unlock(&capa_lock);
+ }
- /* this is SMP-safe way to finish thread. */
- complete_and_exit(&ctl->ctl_finishing, 0);
- EXIT;
+ thread_set_flags(&ll_capa_thread, SVC_STOPPED);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ RETURN(0);
}
-/* just wake up, others are handled by ll_capa_thread */
void ll_capa_timer_callback(unsigned long unused)
{
- ENTRY;
- wake_up(&capa_thread.t_ctl_waitq);
- EXIT;
+ wake_up(&ll_capa_thread.t_ctl_waitq);
}
int ll_capa_thread_start(void)
{
- int rc;
- ENTRY;
+ struct task_struct *task;
+ ENTRY;
- LASSERT(capa_thread.t_flags == 0);
- init_completion(&ll_capa_ctl.ctl_starting);
- init_completion(&ll_capa_ctl.ctl_finishing);
- init_waitqueue_head(&capa_thread.t_ctl_waitq);
-
- rc = kernel_thread(ll_capa_thread, &ll_capa_ctl,
- (CLONE_VM | CLONE_FILES));
- if (rc < 0) {
- CERROR("cannot start expired capa thread, "
- "err = %d\n", rc);
- RETURN(rc);
- }
- wait_for_completion(&ll_capa_ctl.ctl_starting);
- LASSERT(capa_thread.t_flags == SVC_RUNNING);
- RETURN(0);
+ init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
+
+ task = kthread_run(capa_thread_main, NULL, "ll_capa");
+ if (IS_ERR(task)) {
+ CERROR("cannot start expired capa thread: rc %ld\n",
+ PTR_ERR(task));
+ RETURN(PTR_ERR(task));
+ }
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ thread_is_running(&ll_capa_thread));
+
+ RETURN(0);
}
void ll_capa_thread_stop(void)
{
+ thread_set_flags(&ll_capa_thread, SVC_STOPPING);
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ thread_is_stopped(&ll_capa_thread));
+}
+
+struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+ int found = 0;
+
ENTRY;
- capa_thread.t_flags = SVC_STOPPING;
- wake_up(&capa_thread.t_ctl_waitq);
- wait_for_completion(&ll_capa_ctl.ctl_finishing);
- LASSERT(capa_thread.t_flags == SVC_STOPPED);
- capa_thread.t_flags = 0;
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+ RETURN(NULL);
+
+ LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
+ opc == CAPA_OPC_OSS_TRUNC);
+
+ spin_lock(&capa_lock);
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (capa_is_expired(ocapa))
+ continue;
+ if ((opc & CAPA_OPC_OSS_WRITE) &&
+ capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
+ found = 1;
+ break;
+ } else if ((opc & CAPA_OPC_OSS_READ) &&
+ capa_opc_supported(&ocapa->c_capa,
+ CAPA_OPC_OSS_READ)) {
+ found = 1;
+ break;
+ } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
+ capa_opc_supported(&ocapa->c_capa, opc)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ capa_get(ocapa);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ } else {
+ ocapa = NULL;
- EXIT;
+ if (atomic_read(&ll_capa_debug)) {
+ CERROR("no capability for "DFID" opc "LPX64"\n",
+ PFID(&lli->lli_fid), opc);
+ atomic_set(&ll_capa_debug, 0);
+ }
+ }
+ spin_unlock(&capa_lock);
+
+ RETURN(ocapa);
}
+EXPORT_SYMBOL(ll_osscapa_get);
-int ll_set_capa(struct inode *inode, struct lookup_intent *it)
+struct obd_capa *ll_mdscapa_get(struct inode *inode)
{
- struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
- struct mds_body *body;
- struct lustre_capa *capa;
- struct obd_capa *ocapa;
- struct ll_inode_info *lli = ll_i2info(inode);
- unsigned long expiry;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+ ENTRY;
- if (!S_ISREG(inode->i_mode))
- return 0;
+ LASSERT(inode != NULL);
- body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
- LASSERT(body != NULL); /* reply already checked out */
- LASSERT_REPSWABBED(req, 1); /* and swabbed down */
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
+ RETURN(NULL);
- if (!(body->valid & OBD_MD_CAPA))
- return 0;
+ spin_lock(&capa_lock);
+ ocapa = capa_get(lli->lli_mds_capa);
+ spin_unlock(&capa_lock);
+ if (!ocapa && atomic_read(&ll_capa_debug)) {
+ CERROR("no mds capability for "DFID"\n", PFID(&lli->lli_fid));
+ atomic_set(&ll_capa_debug, 0);
+ }
- ENTRY;
+ RETURN(ocapa);
+}
- capa = lustre_msg_buf(req->rq_repmsg, 7, sizeof (*capa));
- LASSERT(capa != NULL); /* reply already checked out */
- LASSERT_REPSWABBED(req, 7); /* and swabbed down */
+static struct obd_capa *do_add_mds_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *old = lli->lli_mds_capa;
+ struct lustre_capa *capa = &ocapa->c_capa;
- ocapa = capa_renew(capa, CLIENT_CAPA);
- if (!ocapa)
- RETURN(-ENOMEM);
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ lli->lli_mds_capa = ocapa;
+ capa_count[CAPA_SITE_CLIENT]++;
- spin_lock(&capa_lock);
- ocapa->c_inode = inode;
- ocapa->c_handle = body->handle;
- spin_unlock(&capa_lock);
+ DEBUG_CAPA(D_SEC, capa, "add MDS");
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ spin_unlock(&old->c_lock);
- spin_lock(&lli->lli_lock);
- /* in case it was linked to lli_capas already */
- if (list_empty(&ocapa->c_lli_list))
- list_add(&ocapa->c_lli_list, &lli->lli_capas);
- spin_unlock(&lli->lli_lock);
+ DEBUG_CAPA(D_SEC, capa, "update MDS");
- expiry = expiry_to_jiffies(capa->lc_expiry - capa_pre_expiry(capa));
+ capa_put(ocapa);
+ ocapa = old;
+ }
+ return ocapa;
+}
- spin_lock(&capa_lock);
- if (time_before(expiry, ll_capa_timer.expires) ||
- !timer_pending(&ll_capa_timer)) {
- mod_timer(&ll_capa_timer, expiry);
- CDEBUG(D_INFO, "ll_capa_timer new expiry: %lu\n", expiry);
+static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+
+ /* inside capa_lock */
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if ((capa_opc(&ocapa->c_capa) & opc) != opc)
+ continue;
+
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ return ocapa;
}
- spin_unlock(&capa_lock);
- RETURN(0);
+ return NULL;
}
-int ll_set_trunc_capa(struct ptlrpc_request *req, int offset, struct inode *inode)
+static inline void inode_add_oss_capa(struct inode *inode,
+ struct obd_capa *ocapa)
{
- struct mds_body *body;
- struct obd_capa *ocapa;
- struct lustre_capa *capa;
struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *tmp;
+ struct list_head *next = NULL;
+
+ /* capa is sorted in lli_oss_capas so lookup can always find the
+ * latest one */
+ list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+ next = &tmp->u.cli.lli_list;
+ break;
+ }
+ }
+ LASSERT(&ocapa->u.cli.lli_list != next);
+ list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
+}
- body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body));
- if (!body)
- return -ENOMEM;
+static struct obd_capa *do_add_oss_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct obd_capa *old;
+ struct lustre_capa *capa = &ocapa->c_capa;
+
+ LASSERTF(S_ISREG(inode->i_mode),
+ "inode has oss capa, but not regular file, mode: %d\n",
+ inode->i_mode);
+
+ /* FIXME: can't replace it so easily with fine-grained opc */
+ old = do_lookup_oss_capa(inode, capa_opc(capa) & CAPA_OPC_OSS_ONLY);
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+ capa_count[CAPA_SITE_CLIENT]++;
+
+ DEBUG_CAPA(D_SEC, capa, "add OSS");
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ spin_unlock(&old->c_lock);
+
+ DEBUG_CAPA(D_SEC, capa, "update OSS");
+
+ capa_put(ocapa);
+ ocapa = old;
+ }
- if (!(body->valid & OBD_MD_CAPA))
- return 0;
+ inode_add_oss_capa(inode, ocapa);
+ return ocapa;
+}
- ENTRY;
- capa = (struct lustre_capa *)lustre_swab_repbuf(req, offset + 1,
- sizeof(*capa), lustre_swab_lustre_capa);
- if (!capa)
- RETURN(-ENOMEM);
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
+{
+ spin_lock(&capa_lock);
+ ocapa = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, ocapa) :
+ do_add_oss_capa(inode, ocapa);
- ocapa = capa_renew(capa, CLIENT_CAPA);
- if (!ocapa)
- RETURN(-ENOMEM);
+ /* truncate capa won't renew */
+ if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
+ set_capa_expiry(ocapa);
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, ll_capa_list);
+
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ }
- spin_lock(&lli->lli_lock);
- /* in case it was linked to lli_capas already */
- if (list_empty(&ocapa->c_lli_list))
- list_add(&ocapa->c_lli_list, &lli->lli_capas);
- spin_unlock(&lli->lli_lock);
+ spin_unlock(&capa_lock);
- RETURN(0);
+ atomic_set(&ll_capa_debug, 1);
+ return ocapa;
}
-struct obd_capa *ll_get_capa(struct inode *inode, uid_t uid, int op)
+static inline void delay_capa_renew(struct obd_capa *oc, cfs_time_t delay)
{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_capa *ocapa, *tmp;
+ /* NB: set a fake expiry for this capa to prevent it renew too soon */
+ oc->c_expiry = cfs_time_add(oc->c_expiry, cfs_time_seconds(delay));
+}
+
+static int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
+{
+ struct inode *inode = ocapa->u.cli.inode;
+ int rc = 0;
ENTRY;
- list_for_each_entry_safe(ocapa, tmp, &lli->lli_capas, c_lli_list) {
- if (ocapa->c_capa.lc_ruid != uid)
- continue;
- if (ocapa->c_capa.lc_op != op)
- continue;
+ LASSERT(ocapa);
+
+ if (IS_ERR(capa)) {
+ /* set error code */
+ rc = PTR_ERR(capa);
+ spin_lock(&capa_lock);
+ if (rc == -ENOENT) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "renewal canceled because object removed");
+ ll_capa_renewal_noent++;
+ } else {
+ ll_capa_renewal_failed++;
+
+ /* failed capa won't be renewed any longer, but if -EIO,
+ * client might be doing recovery, retry in 2 min. */
+ if (rc == -EIO && !capa_is_expired(ocapa)) {
+ delay_capa_renew(ocapa, 120);
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renewal failed: -EIO, "
+ "retry in 2 mins");
+ ll_capa_renewal_retries++;
+ GOTO(retry, rc);
+ } else {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
+ "renewal failed(rc: %d) for", rc);
+ }
+ }
- RETURN(ocapa);
- }
-
- RETURN(NULL);
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, &ll_idle_capas);
+ spin_unlock(&capa_lock);
+
+ capa_put(ocapa);
+ iput(inode);
+ RETURN(rc);
+ }
+
+ spin_lock(&ocapa->c_lock);
+ LASSERT(!memcmp(&ocapa->c_capa, capa,
+ offsetof(struct lustre_capa, lc_opc)));
+ ocapa->c_capa = *capa;
+ set_capa_expiry(ocapa);
+ spin_unlock(&ocapa->c_lock);
+
+ spin_lock(&capa_lock);
+ if (capa_for_oss(capa))
+ inode_add_oss_capa(inode, ocapa);
+ DEBUG_CAPA(D_SEC, capa, "renew");
+ EXIT;
+retry:
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, ll_capa_list);
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ spin_unlock(&capa_lock);
+
+ capa_put(ocapa);
+ iput(inode);
+ return rc;
+}
+
+void ll_capa_open(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ atomic_inc(&lli->lli_open_count);
+}
+
+void ll_capa_close(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ atomic_dec(&lli->lli_open_count);
+}
+
+/* delete CAPA_OPC_OSS_TRUNC only */
+void ll_truncate_free_capa(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return;
+
+ LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
+
+ /* release ref when find */
+ capa_put(ocapa);
+ if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
+ spin_lock(&capa_lock);
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+ }
+}
+
+void ll_clear_inode_capas(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa, *tmp;
+
+ spin_lock(&capa_lock);
+ ocapa = lli->lli_mds_capa;
+ if (ocapa)
+ ll_delete_capa(ocapa);
+
+ list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
+ u.cli.lli_list)
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+}
+
+void ll_print_capa_stat(struct ll_sb_info *sbi)
+{
+ if (sbi->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
+ LCONSOLE_INFO("Fid capabilities renewed: %llu\n"
+ "Fid capabilities renewal ENOENT: %llu\n"
+ "Fid capabilities failed to renew: %llu\n"
+ "Fid capabilities renewal retries: %llu\n",
+ ll_capa_renewed, ll_capa_renewal_noent,
+ ll_capa_renewal_failed, ll_capa_renewal_retries);
}