1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mds/mds_unlink_open.c
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Andreas Dilger <adilger@clusterfs.com>
40 * Author: Phil Schwan <phil@clusterfs.com>
43 /* code for handling open unlinked files */
45 #define DEBUG_SUBSYSTEM S_MDS
47 #ifndef AUTOCONF_INCLUDED
48 #include <linux/config.h>
50 #include <linux/module.h>
51 #include <linux/version.h>
53 #include <libcfs/list.h>
54 #include <obd_class.h>
55 #include <lustre_fsfilt.h>
56 #include <lustre_mds.h>
59 #include "mds_internal.h"
61 int mds_osc_destroy_orphan(struct obd_device *obd,
63 struct lov_mds_md *lmm,
65 struct llog_cookie *logcookies,
68 struct mds_obd *mds = &obd->u.mds;
69 struct lov_stripe_md *lsm = NULL;
70 struct obd_trans_info oti = { 0 };
78 rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
80 CERROR("Error unpack md %p\n", lmm);
83 LASSERT(rc >= sizeof(*lsm));
87 rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, lsm);
89 GOTO(out_free_memmd, rc);
93 GOTO(out_free_memmd, rc = -ENOMEM);
94 oa->o_id = lsm->lsm_object_id;
96 oa->o_mode = mode & S_IFMT;
97 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
99 if (log_unlink && logcookies) {
100 oa->o_valid |= OBD_MD_FLCOOKIE;
101 oti.oti_logcookies = logcookies;
103 rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export);
106 CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
107 "%d\n", lsm->lsm_object_id, rc);
109 obd_free_memmd(mds->mds_osc_exp, &lsm);
113 static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
114 struct inode *inode, struct inode *pending_dir)
116 struct mds_obd *mds = &obd->u.mds;
117 struct lov_mds_md *lmm = NULL;
118 struct llog_cookie *logcookies = NULL;
119 int lmm_size, log_unlink = 0, cookie_size = 0;
125 LASSERT(mds->mds_osc_obd != NULL);
127 /* We don't need to do any of these other things for orhpan dirs,
128 * especially not mds_get_md (may get a default LOV EA, bug 4554) */
129 mode = inode->i_mode;
131 rc = ll_vfs_rmdir(pending_dir, dchild, mds->mds_vfsmnt);
133 CERROR("error %d unlinking dir %*s from PENDING\n",
134 rc, dchild->d_name.len, dchild->d_name.name);
138 lmm_size = mds->mds_max_mdsize;
139 OBD_ALLOC(lmm, lmm_size);
143 rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0, 0);
145 GOTO(out_free_lmm, rc);
147 handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL,
148 le32_to_cpu(lmm->lmm_stripe_count));
149 if (IS_ERR(handle)) {
150 rc = PTR_ERR(handle);
151 CERROR("error fsfilt_start: %d\n", rc);
153 GOTO(out_free_lmm, rc);
156 rc = ll_vfs_unlink(pending_dir, dchild, mds->mds_vfsmnt);
158 CERROR("error %d unlinking orphan %.*s from PENDING\n",
159 rc, dchild->d_name.len, dchild->d_name.name);
160 } else if (lmm_size) {
161 cookie_size = mds_get_cookie_size(obd, lmm);
162 OBD_ALLOC(logcookies, cookie_size);
163 if (logcookies == NULL)
165 else if (mds_log_op_unlink(obd, lmm,lmm_size,logcookies,
170 err = fsfilt_commit(obd, pending_dir, handle, 0);
172 CERROR("error committing orphan unlink: %d\n", err);
176 rc = mds_osc_destroy_orphan(obd, mode, lmm, lmm_size,
177 logcookies, log_unlink);
180 if (logcookies != NULL)
181 OBD_FREE(logcookies, cookie_size);
183 OBD_FREE(lmm, mds->mds_max_mdsize);
187 static __u64 mds_orphans_max_version(struct obd_device *obd)
189 struct obd_export *exp;
190 __u32 epoch = lr_epoch(obd->u.mds.mds_last_transno);
191 spin_lock(&obd->obd_dev_lock);
192 list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) {
193 struct lu_export_data *led = &exp->exp_target_data;
194 epoch = min(epoch, le32_to_cpu(led->led_lcd->lcd_first_epoch));
196 spin_unlock(&obd->obd_dev_lock);
197 return (__u64)epoch << LR_EPOCH_BITS;
200 /* Delete inodes which were previously open-unlinked but were not reopened
201 * during MDS recovery for whatever reason (e.g. client also failed, recovery
203 int mds_cleanup_pending(struct obd_device *obd)
205 struct mds_obd *mds = &obd->u.mds;
206 struct lvfs_run_ctxt saved;
208 struct dentry *dchild, *dentry;
209 struct vfsmount *mnt;
210 struct inode *child_inode, *pending_dir = mds->mds_pending_dir->d_inode;
211 struct l_linux_dirent *dirent, *n;
212 struct list_head dentry_list;
213 char d_name[LL_FID_NAMELEN];
216 int i = 0, rc = 0, item = 0, namlen;
219 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
220 /* dentry and mnt ref dropped in dentry_open() on error, or
221 * in filp_close() if dentry_open() succeeds */
222 dentry = dget(mds->mds_pending_dir);
224 GOTO(err_pop, rc = PTR_ERR(dentry));
225 mnt = mntget(mds->mds_vfsmnt);
227 GOTO(err_mntget, rc = PTR_ERR(mnt));
229 file = dentry_open(mds->mds_pending_dir, mds->mds_vfsmnt,
230 O_RDONLY | O_LARGEFILE);
232 GOTO(err_pop, rc = PTR_ERR(file));
234 CFS_INIT_LIST_HEAD(&dentry_list);
235 rc = l_readdir(file, &dentry_list);
240 /** Get maximum version for orphans to delete. All other orphans may be
241 * needed for delayed clients */
242 max_version = mds_orphans_max_version(obd);
244 list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
248 list_del(&dirent->lld_list);
250 namlen = strlen(dirent->lld_name);
251 LASSERT(sizeof(d_name) >= namlen + 1);
252 strcpy(d_name, dirent->lld_name);
253 inum = dirent->lld_ino;
254 OBD_FREE_PTR(dirent);
256 CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name);
258 if (((namlen == 1) && !strcmp(d_name, ".")) ||
259 ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
262 LOCK_INODE_MUTEX(pending_dir);
263 dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
264 if (IS_ERR(dchild)) {
265 UNLOCK_INODE_MUTEX(pending_dir);
266 GOTO(err_out, rc = PTR_ERR(dchild));
268 if (!dchild->d_inode) {
269 CWARN("%s: orphan %s has already been removed\n",
270 obd->obd_name, d_name);
274 if (is_bad_inode(dchild->d_inode)) {
275 CERROR("%s: bad orphan inode found %lu/%u\n",
276 obd->obd_name, dchild->d_inode->i_ino,
277 dchild->d_inode->i_generation);
278 GOTO(next, rc = -ENOENT);
281 child_inode = dchild->d_inode;
282 MDS_DOWN_READ_ORPHAN_SEM(child_inode);
283 if (mds_inode_is_orphan(child_inode) &&
284 mds_orphan_open_count(child_inode)) {
285 MDS_UP_READ_ORPHAN_SEM(child_inode);
286 CWARN("%s: orphan %s re-opened during recovery\n",
287 obd->obd_name, d_name);
290 /** Keep orphans for possible use by delayed exports. Remove
291 * orphans with version lower than minimal one of all exports */
292 version = fsfilt_get_version(obd, child_inode);
293 if ((__s64)version != -EOPNOTSUPP &&
294 version >= max_version) {
295 MDS_UP_READ_ORPHAN_SEM(child_inode);
297 "%s: orphan %s is needed for delayed exports\n",
298 obd->obd_name, d_name);
301 MDS_UP_READ_ORPHAN_SEM(child_inode);
303 rc = mds_unlink_orphan(obd, dchild, child_inode, pending_dir);
304 CDEBUG(D_INODE, "%s: removed orphan %s: rc %d\n",
305 obd->obd_name, d_name, rc);
312 UNLOCK_INODE_MUTEX(pending_dir);
316 list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
317 list_del(&dirent->lld_list);
318 OBD_FREE(dirent, sizeof(*dirent));
321 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
323 CWARN("%s: removed %d pending open-unlinked files\n",
324 obd->obd_name, item);
328 l_dput(mds->mds_pending_dir);
333 * Determine there is no orphan with the same inode number. That may happens
334 * since unlink replay don't delete inode but keep orphan for delayed clients.
335 * Therefore replays like 'create, unlink, create' will fail due to inode can't
338 int mds_check_stale_orphan(struct obd_device *obd, struct ll_fid *fid)
340 struct mds_obd *mds = &obd->u.mds;
342 struct dentry *result;
343 struct inode *inode, *pending_dir = mds->mds_pending_dir->d_inode;
344 int fidlen = 0, rc = 0;
346 /* no need in checks*/
347 if (fid->id == 0 || obd->obd_recovering == 0)
350 /** open by fid like mds_fid2dentry does */
351 snprintf(fidname, sizeof(fidname), "0x%lx", (unsigned long)(fid->id));
352 fidlen = strlen(fidname);
353 result = mds_lookup(obd, fidname, mds->mds_fid_de, fidlen);
356 inode = result->d_inode;
360 LOCK_INODE_MUTEX(pending_dir);
361 MDS_DOWN_READ_ORPHAN_SEM(inode);
362 if (mds_inode_is_orphan(inode)) {
363 struct dentry *orphan;
365 /* The exactly same inode can't be orphan */
366 LASSERT(inode->i_generation != fid->generation);
368 if (mds_orphan_open_count(inode) > 0) {
369 CERROR("Orphan "LPU64"/%u is in use!\n",
370 fid->id, fid->generation);
371 GOTO(unlock_child, rc = -EFAULT);
374 /** Found orphan in pending dir and delete it */
375 fidlen = ll_fid2str(fidname, fid->id, inode->i_generation);
376 orphan = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
377 if (IS_ERR(orphan)) {
378 rc = PTR_ERR(orphan);
379 CERROR("error looking up %s in PENDING: rc = %d\n",
381 GOTO(unlock_child, rc);
383 if (orphan->d_inode != inode) {
385 CWARN("%s: Found wrong orphan %s %p/%p\n",
386 obd->obd_name, fidname, orphan->d_inode, inode);
387 GOTO(unlock_child, rc = -EFAULT);
389 MDS_UP_READ_ORPHAN_SEM(inode);
391 rc = mds_unlink_orphan(obd, orphan, inode, pending_dir);
392 CDEBUG(D_INODE, "%s: removed orphan %s: rc %d\n",
393 obd->obd_name, fidname, rc);
398 MDS_UP_READ_ORPHAN_SEM(inode);
400 UNLOCK_INODE_MUTEX(pending_dir);