* Author: Andreas Dilger <adilger@clusterfs.com>
* Author: Phil Schwan <phil@clusterfs.com>
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
*/
/* code for handling open unlinked files */
#define DEBUG_SUBSYSTEM S_MDS
+#ifndef AUTOCONF_INCLUDED
#include <linux/config.h>
+#endif
#include <linux/module.h>
#include <linux/version.h>
-#include <portals/list.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/lvfs.h>
+#include <libcfs/list.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <lustre_commit_confd.h>
+#include <lvfs.h>
#include "mds_internal.h"
-
-/* If we are unlinking an open file/dir (i.e. creating an orphan) then
- * we instead link the inode into the PENDING directory until it is
- * finally released. We can't simply call mds_reint_rename() or some
- * part thereof, because we don't have the inode to check for link
- * count/open status until after it is locked.
- *
- * For lock ordering, we always get the PENDING, then pending_child lock
- * last to avoid deadlocks.
- */
-
-int mds_open_unlink_rename(struct mds_update_record *rec,
- struct obd_device *obd, struct dentry *dparent,
- struct dentry *dchild, void **handle)
+int mds_osc_destroy_orphan(struct obd_device *obd,
+ umode_t mode,
+ struct lov_mds_md *lmm,
+ int lmm_size,
+ struct llog_cookie *logcookies,
+ int log_unlink)
{
struct mds_obd *mds = &obd->u.mds;
- struct inode *pending_dir = mds->mds_pending_dir->d_inode;
- struct dentry *pending_child;
- char fidname[LL_FID_NAMELEN];
- int fidlen = 0, rc;
- ENTRY;
-
- LASSERT(!mds_inode_is_orphan(dchild->d_inode));
-
- down(&pending_dir->i_sem);
- fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino,
- dchild->d_inode->i_generation);
-
- CWARN("pending destroy of %dx open file %s = %s\n",
- mds_open_orphan_count(dchild->d_inode),
- rec->ur_name, fidname);
-
- pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
- if (IS_ERR(pending_child))
- GOTO(out_lock, rc = PTR_ERR(pending_child));
-
- if (pending_child->d_inode != NULL) {
- CERROR("re-destroying orphan file %s?\n", rec->ur_name);
- LASSERT(pending_child->d_inode == dchild->d_inode);
- GOTO(out_dput, rc = 0);
- }
-
- *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL);
- if (IS_ERR(*handle))
- GOTO(out_dput, rc = PTR_ERR(*handle));
-
- lock_kernel();
- rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child);
- unlock_kernel();
- if (rc)
- CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n",
- dparent->d_inode->i_ino, rec->ur_name, rc);
- else
- mds_inode_set_orphan(dchild->d_inode);
-out_dput:
- dput(pending_child);
-out_lock:
- up(&pending_dir->i_sem);
- RETURN(rc);
-}
-
-static int mds_osc_destroy_orphan(struct mds_obd *mds,
- struct ptlrpc_request *request)
-{
- struct mds_body *body;
- struct lov_mds_md *lmm = NULL;
struct lov_stripe_md *lsm = NULL;
struct obd_trans_info oti = { 0 };
struct obdo *oa;
int rc;
ENTRY;
- body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
- if (!(body->valid & OBD_MD_FLEASIZE))
+ if (lmm_size == 0)
RETURN(0);
- if (body->eadatasize == 0) {
- CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
- RETURN(rc = -EPROTO);
- }
- lmm = lustre_msg_buf(request->rq_repmsg, 1, body->eadatasize);
- LASSERT(lmm != NULL);
-
- rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, body->eadatasize);
+ rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
if (rc < 0) {
CERROR("Error unpack md %p\n", lmm);
RETURN(rc);
rc = 0;
}
- oa = obdo_alloc();
+ rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, lsm);
+ if (rc)
+ GOTO(out_free_memmd, rc);
+
+ OBDO_ALLOC(oa);
if (oa == NULL)
GOTO(out_free_memmd, rc = -ENOMEM);
oa->o_id = lsm->lsm_object_id;
- oa->o_mode = body->mode & S_IFMT;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
+ oa->o_gr = lsm->lsm_object_gr;
+ oa->o_mode = mode & S_IFMT;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
-#ifdef ENABLE_ORPHANS
- if (body->valid & OBD_MD_FLCOOKIE) {
+ if (log_unlink && logcookies) {
oa->o_valid |= OBD_MD_FLCOOKIE;
- oti.oti_logcookies =
- lustre_msg_buf(request->rq_repmsg, 2,
- sizeof(struct llog_cookie) *
- lsm->lsm_stripe_count);
- if (oti.oti_logcookies == NULL)
- oa->o_valid &= ~OBD_MD_FLCOOKIE;
- body->valid &= ~OBD_MD_FLCOOKIE;
+ oti.oti_logcookies = logcookies;
}
-#endif
-
- rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti);
- obdo_free(oa);
- if (rc)
- CERROR("destroy orphan objid 0x"LPX64" on ost error "
+ rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export);
+ OBDO_FREE(oa);
+ if (rc)
+ CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
"%d\n", lsm->lsm_object_id, rc);
out_free_memmd:
obd_free_memmd(mds->mds_osc_exp, &lsm);
}
static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
- struct inode *inode, struct inode *pending_dir)
+ struct inode *inode, struct inode *pending_dir)
{
struct mds_obd *mds = &obd->u.mds;
- struct mds_body *body;
+ struct lov_mds_md *lmm = NULL;
+ struct llog_cookie *logcookies = NULL;
+ int lmm_size, log_unlink = 0, cookie_size = 0;
void *handle = NULL;
- struct ptlrpc_request *req;
- int lengths[3] = {sizeof(struct mds_body),
- mds->mds_max_mdsize,
- mds->mds_max_cookiesize};
- int rc;
+ umode_t mode;
+ int rc, err;
ENTRY;
LASSERT(mds->mds_osc_obd != NULL);
- OBD_ALLOC(req, sizeof(*req));
- if (!req) {
- CERROR("request allocation out of memory\n");
- GOTO(err_alloc_req, rc = -ENOMEM);
- }
- rc = lustre_pack_reply(req, 3, lengths, NULL);
- if (rc) {
- CERROR("cannot pack request %d\n", rc);
- GOTO(out_free_req, rc);
+
+ /* We don't need to do any of these other things for orhpan dirs,
+ * especially not mds_get_md (may get a default LOV EA, bug 4554) */
+ mode = inode->i_mode;
+ if (S_ISDIR(mode)) {
+ rc = vfs_rmdir(pending_dir, dchild);
+ if (rc)
+ CERROR("error %d unlinking dir %*s from PENDING\n",
+ rc, dchild->d_name.len, dchild->d_name.name);
+ RETURN(rc);
}
- body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
- LASSERT(body != NULL);
- mds_pack_inode2body(body, inode);
- mds_pack_md(obd, req->rq_repmsg, 1, body, inode, 1);
+ lmm_size = mds->mds_max_mdsize;
+ OBD_ALLOC(lmm, lmm_size);
+ if (lmm == NULL)
+ RETURN(-ENOMEM);
+
+ rc = mds_get_md(obd, inode, lmm, &lmm_size, 1);
+ if (rc < 0)
+ GOTO(out_free_lmm, rc);
- handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK_LOG, NULL);
+ handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL,
+ le32_to_cpu(lmm->lmm_stripe_count));
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
CERROR("error fsfilt_start: %d\n", rc);
handle = NULL;
- GOTO(out_free_msg, rc);
+ GOTO(out_free_lmm, rc);
}
- rc = vfs_unlink(pending_dir, dchild);
- if (rc)
- CERROR("error unlinking orphan from PENDING directory");
-#ifdef ENABLE_ORPHANS
- if ((body->valid & OBD_MD_FLEASIZE)) {
- if (mds_log_op_unlink(obd, inode, req->rq_repmsg, 1) > 0)
- body->valid |= OBD_MD_FLCOOKIE;
+ rc = vfs_unlink(pending_dir, dchild);
+ if (rc) {
+ CERROR("error %d unlinking orphan %.*s from PENDING\n",
+ rc, dchild->d_name.len, dchild->d_name.name);
+ } else if (lmm_size) {
+ cookie_size = mds_get_cookie_size(obd, lmm);
+ OBD_ALLOC(logcookies, cookie_size);
+ if (logcookies == NULL)
+ rc = -ENOMEM;
+ else if (mds_log_op_unlink(obd, lmm,lmm_size,logcookies,
+ cookie_size) > 0)
+ log_unlink = 1;
}
-#endif
- if (handle) {
- int err = fsfilt_commit(obd, pending_dir, handle, 0);
- if (err) {
- CERROR("error committing orphan unlink: %d\n",
- err);
+
+ err = fsfilt_commit(obd, pending_dir, handle, 0);
+ if (err) {
+ CERROR("error committing orphan unlink: %d\n", err);
+ if (!rc)
rc = err;
- GOTO(out_free_msg, rc);
- }
+ } else if (!rc) {
+ rc = mds_osc_destroy_orphan(obd, mode, lmm, lmm_size,
+ logcookies, log_unlink);
}
- rc = mds_osc_destroy_orphan(mds, req);
-out_free_msg:
- OBD_FREE(req->rq_repmsg, req->rq_replen);
- req->rq_repmsg = NULL;
-out_free_req:
- OBD_FREE(req, sizeof(*req));
-err_alloc_req:
+
+ if (logcookies != NULL)
+ OBD_FREE(logcookies, cookie_size);
+out_free_lmm:
+ OBD_FREE(lmm, mds->mds_max_mdsize);
RETURN(rc);
}
-int mds_cleanup_orphans(struct obd_device *obd)
+/* Delete inodes which were previously open-unlinked but were not reopened
+ * during MDS recovery for whatever reason (e.g. client also failed, recovery
+ * aborted, etc). */
+int mds_cleanup_pending(struct obd_device *obd)
{
struct mds_obd *mds = &obd->u.mds;
- struct obd_run_ctxt saved;
+ struct lvfs_run_ctxt saved;
struct file *file;
- struct dentry *dchild;
+ struct dentry *dchild, *dentry;
+ struct vfsmount *mnt;
struct inode *child_inode, *pending_dir = mds->mds_pending_dir->d_inode;
- struct l_linux_dirent *dirent, *ptr;
- unsigned int count = pending_dir->i_size;
- int rc = 0, rc2 = 0, item = 0;
+ struct l_linux_dirent *dirent, *n;
+ struct list_head dentry_list;
+ char d_name[LL_FID_NAMELEN];
+ unsigned long inum;
+ int i = 0, rc = 0, item = 0, namlen;
ENTRY;
- push_ctxt(&saved, &obd->obd_ctxt, NULL);
- dget(mds->mds_pending_dir);
- mntget(mds->mds_vfsmnt);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ /* dentry and mnt ref dropped in dentry_open() on error, or
+ * in filp_close() if dentry_open() succeeds */
+ dentry = dget(mds->mds_pending_dir);
+ if (IS_ERR(dentry))
+ GOTO(err_pop, rc = PTR_ERR(dentry));
+ mnt = mntget(mds->mds_vfsmnt);
+ if (IS_ERR(mnt))
+ GOTO(err_mntget, rc = PTR_ERR(mnt));
+
file = dentry_open(mds->mds_pending_dir, mds->mds_vfsmnt,
O_RDONLY | O_LARGEFILE);
if (IS_ERR(file))
- GOTO(err_open, rc2 = PTR_ERR(file));
-
- OBD_ALLOC(dirent, count);
- if (dirent == NULL)
- GOTO(err_alloc_dirent, rc2 = -ENOMEM);
+ GOTO(err_pop, rc = PTR_ERR(file));
- rc = l_readdir(file, dirent, count);
+ INIT_LIST_HEAD(&dentry_list);
+ rc = l_readdir(file, &dentry_list);
filp_close(file, 0);
if (rc < 0)
- GOTO(err_out, rc2 = rc);
+ GOTO(err_out, rc);
+
+ list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
+ i++;
+ list_del(&dirent->lld_list);
+
+ namlen = strlen(dirent->lld_name);
+ LASSERT(sizeof(d_name) >= namlen + 1);
+ strcpy(d_name, dirent->lld_name);
+ inum = dirent->lld_ino;
+ OBD_FREE(dirent, sizeof(*dirent));
- for (ptr = dirent; (char *)ptr < (char *)dirent + rc;
- (char *)ptr += ptr->d_reclen) {
- int namlen = strlen(ptr->d_name);
+ CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name);
- if (((namlen == 1) && !strcmp(ptr->d_name, ".")) ||
- ((namlen == 2) && !strcmp(ptr->d_name, "..")))
+ if (((namlen == 1) && !strcmp(d_name, ".")) ||
+ ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
continue;
- down(&pending_dir->i_sem);
- dchild = lookup_one_len(ptr->d_name, mds->mds_pending_dir,
- namlen);
+ LOCK_INODE_MUTEX(pending_dir);
+ dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
if (IS_ERR(dchild)) {
- up(&pending_dir->i_sem);
- GOTO(err_out, rc2 = PTR_ERR(dchild));
+ UNLOCK_INODE_MUTEX(pending_dir);
+ GOTO(err_out, rc = PTR_ERR(dchild));
}
if (!dchild->d_inode) {
- CDEBUG(D_ERROR, "orphan %s has been removed\n",
- ptr->d_name);
- GOTO(next, rc2 = 0);
+ CWARN("%s: orphan %s has already been removed\n",
+ obd->obd_name, d_name);
+ GOTO(next, rc = 0);
}
- child_inode = dchild->d_inode;
- if (mds_inode_is_orphan(child_inode) &&
- mds_open_orphan_count(child_inode)) {
- CWARN("orphan %s was re-opened during recovery\n",
- ptr->d_name);
- GOTO(next, rc2 = 0);
+ if (is_bad_inode(dchild->d_inode)) {
+ CERROR("%s: bad orphan inode found %lu/%u\n",
+ obd->obd_name, dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation);
+ GOTO(next, rc = -ENOENT);
}
- rc2 = mds_unlink_orphan(obd, dchild, child_inode, pending_dir);
- if (rc2 == 0) {
- item ++;
- CWARN("removed orphan %s from MDS and OST\n",
- ptr->d_name);
- } else {
- l_dput(dchild);
- up(&pending_dir->i_sem);
- GOTO(err_out, rc2);
+ child_inode = dchild->d_inode;
+ MDS_DOWN_READ_ORPHAN_SEM(child_inode);
+ if (mds_inode_is_orphan(child_inode) &&
+ mds_orphan_open_count(child_inode)) {
+ MDS_UP_READ_ORPHAN_SEM(child_inode);
+ CWARN("%s: orphan %s re-opened during recovery\n",
+ obd->obd_name, d_name);
+ GOTO(next, rc = 0);
}
+ MDS_UP_READ_ORPHAN_SEM(child_inode);
+
+ rc = mds_unlink_orphan(obd, dchild, child_inode, pending_dir);
+ CDEBUG(D_INODE, "%s: removed orphan %s: rc %d\n",
+ obd->obd_name, d_name, rc);
+ if (rc == 0)
+ item++;
+ else
+ rc = 0;
next:
l_dput(dchild);
- up(&pending_dir->i_sem);
+ UNLOCK_INODE_MUTEX(pending_dir);
}
+ rc = 0;
err_out:
- OBD_FREE(dirent, count);
+ list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
+ list_del(&dirent->lld_list);
+ OBD_FREE(dirent, sizeof(*dirent));
+ }
err_pop:
- pop_ctxt(&saved, &obd->obd_ctxt, NULL);
- if (rc2 == 0)
- rc2 = item;
-
- RETURN(rc2);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (item > 0)
+ CWARN("%s: removed %d pending open-unlinked files\n",
+ obd->obd_name, item);
+ RETURN(rc);
-err_open:
- mntput(mds->mds_vfsmnt);
+err_mntget:
l_dput(mds->mds_pending_dir);
goto err_pop;
-err_alloc_dirent:
- filp_close(file, 0);
- goto err_pop;
}