1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <obd_support.h>
30 #include <lustre_lite.h>
31 #include <lustre/lustre_idl.h>
32 #include <lustre_dlm.h>
33 #include <lustre_mdc.h>
34 #include <lustre_ver.h>
36 #include "llite_internal.h"
38 /* should NOT be called with the dcache lock, see fs/dcache.c */
39 static void ll_release(struct dentry *de)
41 struct ll_dentry_data *lld;
45 if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
49 LASSERT(lld->lld_cwd_count == 0);
50 LASSERT(lld->lld_mnt_count == 0);
51 OBD_FREE(de->d_fsdata, sizeof(*lld));
56 /* Compare if two dentries are the same. Don't match if the existing dentry
57 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
59 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60 * an AST before calling d_revalidate_it(). The dentry still exists (marked
61 * INVALID) so d_lookup() matches it, but we have no lock on it (so
62 * lock_match() fails) and we spin around real_lookup(). */
63 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
65 struct dentry *dchild;
68 if (d_name->len != name->len)
71 if (memcmp(d_name->name, name->name, name->len))
74 /* XXX: d_name must be in-dentry structure */
75 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
76 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
77 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
85 /* should NOT be called with the dcache lock, see fs/dcache.c */
86 static int ll_ddelete(struct dentry *de)
90 CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
91 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
92 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
93 d_unhashed(de) ? "" : "hashed,",
94 list_empty(&de->d_subdirs) ? "" : "subdirs");
98 void ll_set_dd(struct dentry *de)
103 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
104 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
105 atomic_read(&de->d_count));
107 if (de->d_fsdata == NULL) {
108 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
115 void ll_intent_drop_lock(struct lookup_intent *it)
117 struct lustre_handle *handle;
119 if (it->it_op && it->d.lustre.it_lock_mode) {
120 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
121 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122 " from it %p\n", handle->cookie, it);
123 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
125 /* bug 494: intent_release may be called multiple times, from
126 * this thread and we don't want to double-decref this lock */
127 it->d.lustre.it_lock_mode = 0;
131 void ll_intent_release(struct lookup_intent *it)
135 CDEBUG(D_INFO, "intent %p released\n", it);
136 ll_intent_drop_lock(it);
138 it->it_op_release = 0;
139 it->d.lustre.it_disposition = 0;
140 it->d.lustre.it_data = NULL;
144 /* Drop dentry if it is not used already, unhash otherwise.
145 Should be called with dcache lock held!
146 Returns: 1 if dentry was dropped, 0 if unhashed. */
147 int ll_drop_dentry(struct dentry *dentry)
150 CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
151 "inode %p flags %d\n", dentry->d_name.len,
152 dentry->d_name.name, dentry, dentry->d_parent,
153 dentry->d_inode, dentry->d_flags);
155 if (atomic_read(&dentry->d_count) == 0) {
156 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
157 "inode %p\n", dentry->d_name.len,
158 dentry->d_name.name, dentry, dentry->d_parent,
162 unlock_dentry(dentry);
163 spin_unlock(&dcache_lock);
165 spin_lock(&dcache_lock);
169 #ifdef LUSTRE_KERNEL_VERSION
170 if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
172 if (!d_unhashed(dentry)) {
173 struct inode *inode = dentry->d_inode;
175 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
176 "inode %p refc %d\n", dentry->d_name.len,
177 dentry->d_name.name, dentry, dentry->d_parent,
178 dentry->d_inode, atomic_read(&dentry->d_count));
179 /* actually we don't unhash the dentry, rather just
180 * mark it inaccessible for to __d_lookup(). otherwise
181 * sys_getcwd() could return -ENOENT -bzzz */
182 #ifdef LUSTRE_KERNEL_VERSION
183 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
185 if (!inode || !S_ISDIR(inode->i_mode))
189 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
192 /* Put positive dentries to orphan list */
193 hlist_add_head(&dentry->d_hash,
194 &ll_i2sbi(inode)->ll_orphan_dentry_list);
198 unlock_dentry(dentry);
202 void ll_unhash_aliases(struct inode *inode)
204 struct list_head *tmp, *head;
208 CERROR("unexpected NULL inode, tell phil\n");
212 CDEBUG(D_INODE, "marking dentries for 111 ino %lu/%u(%p) invalid\n",
213 inode->i_ino, inode->i_generation, inode);
215 head = &inode->i_dentry;
216 spin_lock(&dcache_lock);
219 while ((tmp = tmp->next) != head) {
220 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
222 CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
223 "inode %p flags %d\n", dentry->d_name.len,
224 dentry->d_name.name, dentry, dentry->d_parent,
225 dentry->d_inode, dentry->d_flags);
227 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
228 CERROR("called on root (?) dentry=%p, inode=%p "
229 "ino=%lu\n", dentry, inode, inode->i_ino);
230 lustre_dump_dentry(dentry, 1);
231 libcfs_debug_dumpstack(NULL);
232 } else if (d_mountpoint(dentry)) {
233 /* For mountpoints we skip removal of the dentry
234 which happens solely because we have a lock on it
235 obtained when this dentry was not a mountpoint yet */
236 CDEBUG(D_DENTRY, "Skippind mountpoint dentry removal "
237 "%.*s (%p) parent %p\n",
240 dentry, dentry->d_parent);
245 if (ll_drop_dentry(dentry))
248 spin_unlock(&dcache_lock);
252 int ll_revalidate_it_finish(struct ptlrpc_request *request,
253 int offset, struct lookup_intent *it,
262 if (it_disposition(it, DISP_LOOKUP_NEG)) {
263 /*Sometimes, revalidate_it might also create node in MDS,
264 *So we need check whether it is really created, if not,
265 *just return the -ENOENT, if it is, return -ESTALE anyway.
266 *which is original done in mdc_intent_lock,
267 *but no chance for new fid allocation in client.
269 if (it_disposition(it, DISP_OPEN_CREATE) &&
270 !it_open_error(DISP_OPEN_CREATE, it)) {
271 /*These 2 req finished is for balancing 2 add ref
272 *in mdc_intent_lock, since there are no create_node
273 *and file_open following revalidate_it*/
274 it_set_disposition(it, DISP_ENQ_COMPLETE);
275 if (it_disposition(it, DISP_OPEN_CREATE) &&
276 !it_open_error(DISP_OPEN_CREATE, it))
277 ptlrpc_req_finished(request);
278 if (it_disposition(it, DISP_OPEN_OPEN) &&
279 !it_open_error(DISP_OPEN_OPEN, it))
280 ptlrpc_req_finished(request);
287 rc = ll_prep_inode(&de->d_inode,
288 request, offset, NULL);
293 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
296 LASSERT(dentry != NULL);
298 if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
299 struct inode *inode = dentry->d_inode;
300 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
302 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
303 inode, inode->i_ino, inode->i_generation);
304 md_set_lock_data(sbi->ll_md_exp, &it->d.lustre.it_lock_handle,
308 /* drop lookup or getattr locks immediately */
309 if (it->it_op & (IT_LOOKUP | IT_GETATTR)) {
310 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
311 /* on 2.6 there are situation when several lookups and
312 * revalidations may be requested during single operation.
313 * therefore, we don't release intent here -bzzz */
314 ll_intent_drop_lock(it);
316 ll_intent_release(it);
321 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
323 struct lookup_intent *it = *itp;
324 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
326 LASSERTF(it->it_magic == INTENT_MAGIC,
327 "%p has bad intent magic: %x\n",
332 if (!it || it->it_op == IT_GETXATTR)
335 it->it_op_release = ll_intent_release;
338 int ll_revalidate_it(struct dentry *de, int lookup_flags,
339 struct lookup_intent *it)
342 struct it_cb_data icbd;
343 struct md_op_data *op_data;
344 struct ptlrpc_request *req = NULL;
345 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
346 struct obd_export *exp;
347 struct inode *parent;
350 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
353 /* Cached negative dentries are unsafe for now - look them up again */
354 if (de->d_inode == NULL)
357 exp = ll_i2mdexp(de->d_inode);
358 icbd.icbd_parent = de->d_parent->d_inode;
359 icbd.icbd_childp = &de;
361 /* Never execute intents for mount points.
362 * Attributes will be fixed up in ll_inode_revalidate_it */
363 if (d_mountpoint(de))
366 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
367 ll_frob_intent(&it, &lookup_it);
370 parent = de->d_parent->d_inode;
372 OBD_ALLOC_PTR(op_data);
376 if (it->it_op & IT_CREAT) {
377 struct lu_placement_hint hint = { .ph_pname = NULL,
378 .ph_cname = &de->d_name,
379 .ph_opc = LUSTRE_OPC_CREATE };
381 ll_prepare_md_op_data(op_data, parent, NULL,
382 de->d_name.name, de->d_name.len, 0);
383 rc = ll_fid_md_alloc(ll_i2sbi(parent), &op_data->fid2,
386 CERROR("can't allocate new fid, rc %d\n", rc);
390 ll_prepare_md_op_data(op_data, parent, de->d_inode,
391 de->d_name.name, de->d_name.len, 0);
394 rc = md_intent_lock(exp, op_data, NULL, 0, it, lookup_flags,
395 &req, ll_md_blocking_ast, 0);
397 OBD_FREE_PTR(op_data);
398 /* If req is NULL, then md_intent_lock only tried to do a lock match;
399 * if all was well, it will return 1 if it found locks, 0 otherwise. */
400 if (req == NULL && rc >= 0) {
406 CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
407 "%d\n", rc, it->d.lustre.it_status);
412 rc = ll_revalidate_it_finish(req, 1, it, de);
414 if (rc != -ESTALE && rc != -ENOENT)
415 ll_intent_release(it);
418 if ((it->it_op & IT_OPEN) && de->d_inode &&
419 !S_ISREG(de->d_inode->i_mode) &&
420 !S_ISDIR(de->d_inode->i_mode)) {
421 ll_release_openhandle(de, it);
425 /* unfortunately ll_intent_lock may cause a callback and revoke our
427 spin_lock(&dcache_lock);
432 spin_unlock(&dcache_lock);
435 /* We do not free request as it may be reused during following lookup
436 (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
437 be freed in ll_lookup_it or in ll_intent_release. But if
438 request was not completed, we need to free it. (bug 5154) */
439 if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
440 ptlrpc_req_finished(req);
442 ll_unhash_aliases(de->d_inode);
443 /* done in ll_unhash_aliases()
444 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
446 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
447 "inode %p refc %d\n", de->d_name.len,
448 de->d_name.name, de, de->d_parent, de->d_inode,
449 atomic_read(&de->d_count));
450 ll_lookup_finish_locks(it, de);
452 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
458 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
460 struct inode *inode= de->d_inode;
461 struct ll_sb_info *sbi = ll_i2sbi(inode);
462 struct ll_dentry_data *ldd = ll_d2d(de);
463 struct obd_client_handle *handle;
469 /* Strictly speaking this introduces an additional race: the
470 * increments should wait until the rpc has returned.
471 * However, given that at present the function is void, this
473 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
479 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
486 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
487 rc = obd_pin(sbi->ll_md_exp, &ll_i2info(inode)->lli_fid,
492 memset(handle, 0, sizeof(*handle));
494 ldd->lld_cwd_count--;
496 ldd->lld_mnt_count--;
504 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
506 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
507 struct ll_dentry_data *ldd = ll_d2d(de);
508 struct obd_client_handle handle;
514 /* Strictly speaking this introduces an additional race: the
515 * increments should wait until the rpc has returned.
516 * However, given that at present the function is void, this
518 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
519 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
520 /* the "pin" failed */
527 count = --ldd->lld_mnt_count;
529 count = --ldd->lld_cwd_count;
537 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
542 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
543 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
548 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
549 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
551 rc = ll_revalidate_it(dentry, 0, NULL);
557 struct dentry_operations ll_d_ops = {
558 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
559 .d_revalidate = ll_revalidate_nd,
561 .d_revalidate_it = ll_revalidate_it,
563 .d_release = ll_release,
564 .d_delete = ll_ddelete,
565 .d_compare = ll_dcompare,