1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
34 #include "llite_internal.h"
36 /* should NOT be called with the dcache lock, see fs/dcache.c */
37 static void ll_release(struct dentry *de)
39 struct ll_dentry_data *lld;
43 CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
46 if (lld) { /* Root dentry does not have ll_dentry_data */
47 LASSERT(lld->lld_cwd_count == 0);
48 LASSERT(lld->lld_mnt_count == 0);
49 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
55 /* Compare if two dentries are the same. Don't match if the existing dentry
56 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
58 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
59 * an AST before calling d_revalidate_it(). The dentry still exists (marked
60 * INVALID) so d_lookup() matches it, but we have no lock on it (so
61 * lock_match() fails) and we spin around real_lookup(). */
62 static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
64 struct dentry *dchild;
67 if (d_name->len != name->len)
70 if (memcmp(d_name->name, name->name, name->len))
73 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
74 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
75 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
83 /* should NOT be called with the dcache lock, see fs/dcache.c */
84 static int ll_ddelete(struct dentry *de)
88 CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
89 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
90 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
91 d_unhashed(de) ? "" : "hashed,",
92 list_empty(&de->d_subdirs) ? "" : "subdirs");
96 void ll_set_dd(struct dentry *de)
101 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
102 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
103 atomic_read(&de->d_count));
105 if (de->d_fsdata == NULL) {
106 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
113 void ll_intent_drop_lock(struct lookup_intent *it)
115 struct lustre_handle *handle;
116 struct lustre_intent_data *itdata = LUSTRE_IT(it);
118 if (it->it_op && itdata && itdata->it_lock_mode) {
119 handle = (struct lustre_handle *)&itdata->it_lock_handle;
120 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
121 " from it %p\n", handle->cookie, it);
122 ldlm_lock_decref(handle, itdata->it_lock_mode);
124 /* bug 494: intent_release may be called multiple times, from
125 * this thread and we don't want to double-decref this lock */
126 itdata->it_lock_mode = 0;
130 void ll_intent_release(struct lookup_intent *it)
134 ll_intent_drop_lock(it);
136 it->it_op_release = 0;
141 void ll_unhash_aliases(struct inode *inode)
143 struct list_head *tmp, *head;
144 struct ll_sb_info *sbi;
148 CERROR("unexpected NULL inode, tell phil\n");
153 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
154 inode->i_ino, inode->i_generation, inode);
156 sbi = ll_i2sbi(inode);
157 head = &inode->i_dentry;
159 spin_lock(&dcache_lock);
161 while ((tmp = tmp->next) != head) {
162 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
163 if (atomic_read(&dentry->d_count) == 0) {
164 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
165 "inode %p\n", dentry->d_name.len,
166 dentry->d_name.name, dentry, dentry->d_parent,
170 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
171 INIT_HLIST_NODE(&dentry->d_hash);
173 spin_unlock(&dcache_lock);
176 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
177 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
178 "inode %p refc %d\n", dentry->d_name.len,
179 dentry->d_name.name, dentry, dentry->d_parent,
180 dentry->d_inode, atomic_read(&dentry->d_count));
181 hlist_del_init(&dentry->d_hash);
182 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
183 hlist_add_head(&dentry->d_hash,
184 &sbi->ll_orphan_dentry_list);
187 spin_unlock(&dcache_lock);
191 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
193 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
194 struct lookup_intent *it, struct dentry *de)
196 struct ll_sb_info *sbi;
203 if (it_disposition(it, DISP_LOOKUP_NEG))
206 sbi = ll_i2sbi(de->d_inode);
207 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
208 &de->d_inode, request, offset, NULL);
213 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
216 LASSERT(dentry != NULL);
218 if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
219 struct inode *inode = dentry->d_inode;
220 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
221 inode, inode->i_ino, inode->i_generation);
222 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
225 /* drop lookup or getattr locks immediately */
226 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
227 it->it_op == IT_CHDIR) {
228 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
230 * on 2.6 there are situations when several lookups and
231 * revalidations may be requested during single operation.
232 * Therefore, we don't release intent here -bzzz
234 ll_intent_drop_lock(it);
236 ll_intent_release(it);
241 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
243 struct lookup_intent *it = *itp;
245 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
247 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
252 if (!it || it->it_op == IT_GETXATTR)
258 if (ll_intent_alloc(it)) {
259 CERROR("Failed to allocate memory for lustre specific intent "
261 /* XXX: we cannot return status just yet */
266 int ll_intent_alloc(struct lookup_intent *it)
269 CERROR("Intent alloc on already allocated intent\n");
272 OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
273 sizeof(struct lustre_intent_data));
274 if (!it->d.fs_data) {
275 CERROR("Failed to allocate memory for lustre specific intent "
280 it->it_op_release = ll_intent_release;
284 void ll_intent_free(struct lookup_intent *it)
287 struct lustre_intent_data *lustre_data =
288 (struct lustre_intent_data *)it->d.fs_data;
289 if (lustre_data->it_key) {
290 OBD_FREE(lustre_data->it_key,
291 lustre_data->it_key_size);
292 lustre_data->it_key = NULL;
293 lustre_data->it_key_size = 0;
295 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
296 sizeof(struct lustre_intent_data));
297 it->d.fs_data = NULL;
302 ll_special_name(struct dentry *de)
304 if (de->d_name.name[0] == '.') switch (de->d_name.len) {
306 if (de->d_name.name[1] == '.')
316 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
317 struct lookup_intent *it)
319 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
320 struct ptlrpc_request *req = NULL;
321 int gns_it, gns_flags, rc = 0;
322 struct obd_export *exp;
323 struct it_cb_data icbd;
324 struct lustre_id pid;
325 struct lustre_id cid;
328 CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
331 /* Cached negative dentries are unsafe for now - look them up again */
332 if (de->d_inode == NULL)
336 * root of the tree is always valid, attributes would be fixed in
337 * ll_inode_revalidate_it()
339 if (de->d_sb->s_root == de)
342 CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
343 de, de->d_name.len, de->d_name.name,
344 (unsigned long) de->d_inode->i_ino,
345 (unsigned long) de->d_inode->i_generation);
347 exp = ll_i2mdexp(de->d_inode);
348 ll_inode2id(&pid, de->d_parent->d_inode);
349 ll_inode2id(&cid, de->d_inode);
350 LASSERT(id_fid(&cid) != 0);
352 icbd.icbd_parent = de->d_parent->d_inode;
353 icbd.icbd_childp = &de;
356 * never execute intents for mount points. Attributes will be fixed up
357 * in ll_inode_revalidate_it().
359 if (d_mountpoint(de))
363 nd->mnt->mnt_last_used = jiffies;
365 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
366 gns_it = nd ? nd->intent.open.it_op : IT_OPEN;
367 gns_flags = nd ? nd->flags : LOOKUP_CONTINUE;
369 if (it && it->it_op == IT_GETATTR)
370 it = NULL; /* will use it_lookup */
371 else if (it && (it->it_op == IT_OPEN) && de->d_inode) {
372 /* open lock stuff */
373 struct inode *inode = de->d_inode;
374 struct ll_inode_info *lli = ll_i2info(inode);
375 struct obd_client_handle **och_p;
377 struct obd_device *obddev;
378 struct lustre_handle lockh;
379 int flags = LDLM_FL_BLOCK_GRANTED;
380 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
381 struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
382 id_group(&lli->lli_id)}};
385 if (it->it_flags & FMODE_WRITE) {
386 och_p = &lli->lli_mds_write_och;
387 och_usecount = &lli->lli_open_fd_write_count;
389 } else if (it->it_flags & FMODE_EXEC) {
390 och_p = &lli->lli_mds_exec_och;
391 och_usecount = &lli->lli_open_fd_exec_count;
394 och_p = &lli->lli_mds_read_och;
395 och_usecount = &lli->lli_open_fd_read_count;
399 /* Check for the proper lock */
400 obddev = md_get_real_obd(exp, &lli->lli_id);
401 if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
402 LDLM_IBITS, &policy, lockmode, &lockh))
404 down(&lli->lli_och_sem);
405 if (*och_p) { /* Everything is open already, do nothing */
406 /*(*och_usecount)++; Do not let them steal our open
407 handle from under us */
408 /* XXX The code above was my original idea, but in case
409 we have the handle, but we cannot use it due to later
410 checks (e.g. O_CREAT|O_EXCL flags set), nobody
411 would decrement counter increased here. So we just
412 hope the lock won't be invalidated in between. But
413 if it would be, we'll reopen the open request to
414 MDS later during file open path */
415 up(&lli->lli_och_sem);
416 if (ll_intent_alloc(it))
418 memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
420 LUSTRE_IT(it)->it_lock_mode = lockmode;
423 * we do not check here for possible GNS dentry as if
424 * file is opened on it, it is mounted already and we do
425 * not need do anything. --umka
429 /* Hm, interesting. Lock is present, but no open
431 up(&lli->lli_och_sem);
432 ldlm_lock_decref(&lockh, lockmode);
437 ll_frob_intent(&it, &lookup_it);
440 rc = ll_crypto_init_it_key(de->d_inode, it);
444 rc = md_intent_lock(exp, &pid, (char *)de->d_name.name, de->d_name.len,
445 NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
446 /* If req is NULL, then md_intent_lock() only tried to do a lock match;
447 * if all was well, it will return 1 if it found locks, 0 otherwise. */
448 if (req == NULL && rc >= 0) {
456 CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
457 "%d\n", rc, LUSTRE_IT(it)->it_status);
462 rc = revalidate_it_finish(req, 1, it, de);
464 ll_intent_release(it);
469 /* unfortunately ll_intent_lock may cause a callback and revoke our
471 spin_lock(&dcache_lock);
472 hlist_del_init(&de->d_hash);
474 spin_unlock(&dcache_lock);
478 /* If we had succesful it lookup on mds, but it happened to be negative,
479 we do not free request as it will be reused during lookup (see
480 comment in mdc/mdc_locks.c::mdc_intent_lock(). But if
481 request was not completed, we need to free it. (bug 5154) */
482 if (req != NULL && (rc == 1 || !it_disposition(it, DISP_ENQ_COMPLETE))) {
483 ptlrpc_req_finished(req);
488 if (it == &lookup_it)
489 ll_intent_release(it);
491 ll_unhash_aliases(de->d_inode);
496 * if we found that this is possible GNS mount and dentry is still valid
497 * and may be used by system, we drop the lock and return 0, that means
498 * that re-lookup is needed. Such a way we cause real mounting only in
499 * lookup control path, which is always made with parent's i_sem taken.
502 if (nd && atomic_read(&ll_i2sbi(de->d_inode)->ll_gns_enabled) &&
503 (de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode) &&
504 (gns_flags & LOOKUP_CONTINUE || (gns_it & (IT_CHDIR | IT_OPEN)))) {
506 * special "." and ".." has to be always revalidated because
507 * they never should be passed to lookup()
509 if (!ll_special_name(de)) {
510 CDEBUG(D_DENTRY, "possible GNS dentry %*s %p found, "
511 "causing mounting\n", (int)de->d_name.len,
512 de->d_name.name, de);
514 LASSERT(req == NULL);
515 if (it == &lookup_it) {
516 ll_intent_release(it);
518 ll_intent_drop_lock(it);
520 ll_unhash_aliases(de->d_inode);
525 CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
526 "inode %p refc %d\n", de->d_name.len,
527 de->d_name.name, de, de->d_parent, de->d_inode,
528 atomic_read(&de->d_count));
530 if (it == &lookup_it)
531 ll_intent_release(it);
533 ll_lookup_finish_locks(it, de);
535 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
539 if (it != &lookup_it) {
540 ll_lookup_finish_locks(it, de);
542 if (ll_intent_alloc(it))
545 rc = ll_crypto_init_it_key(de->d_inode, it);
548 rc = md_intent_lock(exp, &pid, (char *)de->d_name.name, de->d_name.len,
549 NULL, 0, NULL, it, 0, &req, ll_mdc_blocking_ast);
551 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1,
554 /* see if we got same inode, if not - return error */
555 if (id_equal_stc(&cid, &mds_body->id1))
556 goto revalidate_finish;
562 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
564 struct inode *inode= de->d_inode;
565 struct ll_sb_info *sbi = ll_i2sbi(inode);
566 struct ll_dentry_data *ldd = ll_d2d(de);
567 struct obd_client_handle *handle;
573 /* Strictly speaking this introduces an additional race: the
574 * increments should wait until the rpc has returned.
575 * However, given that at present the function is void, this
577 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
583 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
590 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
591 rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
592 inode->i_mode & S_IFMT, handle, flag);
596 memset(handle, 0, sizeof(*handle));
598 ldd->lld_cwd_count--;
600 ldd->lld_mnt_count--;
608 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
610 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
611 struct ll_dentry_data *ldd = ll_d2d(de);
612 struct obd_client_handle handle;
618 /* Strictly speaking this introduces an additional race: the
619 * increments should wait until the rpc has returned.
620 * However, given that at present the function is void, this
622 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
623 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
624 /* the "pin" failed */
631 count = --ldd->lld_mnt_count;
633 count = --ldd->lld_cwd_count;
641 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
645 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
646 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
651 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
652 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
654 rc = ll_revalidate_it(dentry, 0, nd, NULL);
661 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
662 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
664 struct ll_sb_info *sbi = ll_i2sbi(inode);
665 struct lustre_id parent, child;
667 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
668 ll_inode2id(&parent, dentry->d_parent->d_inode);
669 ll_inode2id(&child, inode);
670 md_change_cbdata_name(sbi->ll_md_exp, &parent,
671 (char *)dentry->d_name.name,
672 dentry->d_name.len, &child,
673 null_if_equal, inode);
677 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
679 struct ll_sb_info *sbi = ll_i2sbi(inode);
680 struct lustre_id parent, child;
682 if (dentry->d_parent != dentry) {
683 /* Do not do this for root of the tree */
684 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
685 ll_inode2id(&parent, dentry->d_parent->d_inode);
686 ll_inode2id(&child, inode);
687 md_change_cbdata_name(sbi->ll_md_exp, &parent,
688 (char *)dentry->d_name.name,
689 dentry->d_name.len, &child,
690 null_if_equal, inode);
698 struct dentry_operations ll_d_ops = {
699 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
700 .d_revalidate = ll_revalidate_nd,
702 .d_revalidate_it = ll_revalidate_it,
704 .d_release = ll_release,
706 .d_iput = ll_dentry_iput,
708 .d_delete = ll_ddelete,
709 .d_compare = ll_dcompare,