/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * lustre/mds/mds_orphan.c
+ * GPL HEADER START
*
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- * Author: Peter Braam <braam@clusterfs.com>
- * Author: Andreas Dilger <adilger@clusterfs.com>
- * Author: Phil Schwan <phil@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * You may have signed or agreed to another license before downloading
- * this software. If so, you are bound by the terms and conditions
- * of that agreement, and the following does not apply to you. See the
- * LICENSE file included with this distribution for more information.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * If you did not agree to a different license, then this copy of Lustre
- * is open source software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * In either case, Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * license text for more details.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mds/mds_unlink_open.c
+ *
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
*/
/* code for handling open unlinked files */
#include <obd_class.h>
#include <lustre_fsfilt.h>
#include <lustre_mds.h>
-#include <lustre_commit_confd.h>
#include <lvfs.h>
#include "mds_internal.h"
ENTRY;
LASSERT(mds->mds_osc_obd != NULL);
-
+
/* We don't need to do any of these other things for orhpan dirs,
* especially not mds_get_md (may get a default LOV EA, bug 4554) */
mode = inode->i_mode;
if (S_ISDIR(mode)) {
- rc = vfs_rmdir(pending_dir, dchild);
+ rc = ll_vfs_rmdir(pending_dir, dchild, mds->mds_vfsmnt);
if (rc)
CERROR("error %d unlinking dir %*s from PENDING\n",
rc, dchild->d_name.len, dchild->d_name.name);
if (lmm == NULL)
RETURN(-ENOMEM);
- rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0);
+ rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0, 0);
if (rc < 0)
GOTO(out_free_lmm, rc);
GOTO(out_free_lmm, rc);
}
- rc = vfs_unlink(pending_dir, dchild);
+ rc = ll_vfs_unlink(pending_dir, dchild, mds->mds_vfsmnt);
if (rc) {
CERROR("error %d unlinking orphan %.*s from PENDING\n",
rc, dchild->d_name.len, dchild->d_name.name);
RETURN(rc);
}
+static __u64 mds_orphans_max_version(struct obd_device *obd)
+{
+ struct obd_export *exp;
+ __u32 epoch = lr_epoch(obd->u.mds.mds_last_transno);
+ spin_lock(&obd->obd_dev_lock);
+ list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) {
+ struct lu_export_data *led = &exp->exp_target_data;
+ epoch = min(epoch, le32_to_cpu(led->led_lcd->lcd_first_epoch));
+ }
+ spin_unlock(&obd->obd_dev_lock);
+ return (__u64)epoch << LR_EPOCH_BITS;
+}
+
/* Delete inodes which were previously open-unlinked but were not reopened
* during MDS recovery for whatever reason (e.g. client also failed, recovery
* aborted, etc). */
struct list_head dentry_list;
char d_name[LL_FID_NAMELEN];
unsigned long inum;
+ __u64 max_version;
int i = 0, rc = 0, item = 0, namlen;
ENTRY;
if (IS_ERR(file))
GOTO(err_pop, rc = PTR_ERR(file));
- INIT_LIST_HEAD(&dentry_list);
+ CFS_INIT_LIST_HEAD(&dentry_list);
rc = l_readdir(file, &dentry_list);
filp_close(file, 0);
if (rc < 0)
GOTO(err_out, rc);
+ /** Get maximum version for orphans to delete. All other orphans may be
+ * needed for delayed clients */
+ max_version = mds_orphans_max_version(obd);
+
list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
+ __u64 version;
+
i++;
list_del(&dirent->lld_list);
LASSERT(sizeof(d_name) >= namlen + 1);
strcpy(d_name, dirent->lld_name);
inum = dirent->lld_ino;
- OBD_FREE(dirent, sizeof(*dirent));
+ OBD_FREE_PTR(dirent);
CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name);
obd->obd_name, d_name);
GOTO(next, rc = 0);
}
+ /** Keep orphans for possible use by delayed exports. Remove
+ * orphans with version lower than minimal one of all exports */
+ version = fsfilt_get_version(obd, child_inode);
+ if ((__s64)version != -EOPNOTSUPP &&
+ version >= max_version) {
+ MDS_UP_READ_ORPHAN_SEM(child_inode);
+ CDEBUG(D_INFO,
+ "%s: orphan %s is needed for delayed exports\n",
+ obd->obd_name, d_name);
+ GOTO(next, rc = 0);
+ }
MDS_UP_READ_ORPHAN_SEM(child_inode);
rc = mds_unlink_orphan(obd, dchild, child_inode, pending_dir);
l_dput(mds->mds_pending_dir);
goto err_pop;
}
+
+/**
+ * Determine there is no orphan with the same inode number. That may happens
+ * since unlink replay don't delete inode but keep orphan for delayed clients.
+ * Therefore replays like 'create, unlink, create' will fail due to inode can't
+ * be reused.
+ */
+int mds_check_stale_orphan(struct obd_device *obd, struct ll_fid *fid)
+{
+ struct mds_obd *mds = &obd->u.mds;
+ char fidname[32];
+ struct dentry *result;
+ struct inode *inode, *pending_dir = mds->mds_pending_dir->d_inode;
+ int fidlen = 0, rc = 0;
+
+ /* no need in checks*/
+ if (fid->id == 0 || obd->obd_recovering == 0)
+ RETURN(0);
+
+ /** open by fid like mds_fid2dentry does */
+ snprintf(fidname, sizeof(fidname), "0x%lx", (unsigned long)(fid->id));
+ fidlen = strlen(fidname);
+ result = mds_lookup(obd, fidname, mds->mds_fid_de, fidlen);
+ if (IS_ERR(result))
+ RETURN(0);
+ inode = result->d_inode;
+ if (!inode)
+ GOTO(out, rc = 0);
+
+ LOCK_INODE_MUTEX(pending_dir);
+ MDS_DOWN_READ_ORPHAN_SEM(inode);
+ if (mds_inode_is_orphan(inode)) {
+ struct dentry *orphan;
+
+ /* The exactly same inode can't be orphan */
+ LASSERT(inode->i_generation != fid->generation);
+
+ if (mds_orphan_open_count(inode) > 0) {
+ CERROR("Orphan "LPU64"/%u is in use!\n",
+ fid->id, fid->generation);
+ GOTO(unlock_child, rc = -EFAULT);
+ }
+
+ /** Found orphan in pending dir and delete it */
+ fidlen = ll_fid2str(fidname, fid->id, inode->i_generation);
+ orphan = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+ if (IS_ERR(orphan)) {
+ rc = PTR_ERR(orphan);
+ CERROR("error looking up %s in PENDING: rc = %d\n",
+ fidname, rc);
+ GOTO(unlock_child, rc);
+ }
+ if (orphan->d_inode != inode) {
+ l_dput(orphan);
+ CWARN("%s: Found wrong orphan %s %p/%p\n",
+ obd->obd_name, fidname, orphan->d_inode, inode);
+ GOTO(unlock_child, rc = -EFAULT);
+ }
+ MDS_UP_READ_ORPHAN_SEM(inode);
+
+ rc = mds_unlink_orphan(obd, orphan, inode, pending_dir);
+ CDEBUG(D_INODE, "%s: removed orphan %s: rc %d\n",
+ obd->obd_name, fidname, rc);
+ l_dput(orphan);
+ GOTO(unlock, rc);
+ }
+unlock_child:
+ MDS_UP_READ_ORPHAN_SEM(inode);
+unlock:
+ UNLOCK_INODE_MUTEX(pending_dir);
+out:
+ l_dput(result);
+ RETURN(0);
+}