1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * derived in small part from linux/fs/ext2/namei.c
23 * Copyright (C) 1991, 1992 Linus Torvalds
25 * Big-endian to little-endian byte-swapping/bitmaps by
26 * David S. Miller (davem@caip.rutgers.edu), 1995
27 * Directory entry file type support and forward compatibility hooks
28 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
32 #include <linux/sched.h>
34 #include <linux/smp_lock.h>
35 #include <linux/quotaops.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
39 #define DEBUG_SUBSYSTEM S_LLITE
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lite.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_version.h>
45 #include "llite_internal.h"
49 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
50 static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
52 static int ll_test_inode(struct inode *inode, void *opaque)
55 static int last_ino, last_gen, last_count;
56 struct lustre_md *md = opaque;
58 if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) {
59 CERROR("MDS body missing inum or generation\n");
63 if (last_ino == id_ino(&md->body->id1) &&
64 last_gen == id_gen(&md->body->id1) &&
69 CDEBUG(D_VFSTRACE, "compared %u/%u %u times\n",
70 last_ino, last_gen, last_count);
72 last_ino = id_ino(&md->body->id1);
73 last_gen = id_gen(&md->body->id1);
75 "comparing inode %p ino "DLID4" to body "DLID4"\n",
76 inode, OLID4(&ll_i2info(inode)->lli_id),
77 OLID4(&md->body->id1));
80 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
81 if (inode->i_ino != id_ino(&md->body->id1))
84 if (inode->i_generation != id_gen(&md->body->id1))
87 if (id_group(&ll_i2info(inode)->lli_id) != id_group(&md->body->id1))
90 /* apply the attributes in 'opaque' to this inode. */
91 ll_update_inode(inode, md);
95 extern struct dentry_operations ll_d_ops;
97 int ll_unlock(__u32 mode, struct lustre_handle *lockh)
101 ldlm_lock_decref(lockh, mode);
107 * get an inode by inode number (already instantiated by the intent lookup).
108 * Returns inode or NULL.
110 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
111 int ll_set_inode(struct inode *inode, void *opaque)
113 ll_read_inode2(inode, opaque);
117 struct inode *ll_iget(struct super_block *sb, ino_t hash,
118 struct lustre_md *md)
123 inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
126 if (inode->i_state & I_NEW)
127 unlock_new_inode(inode);
128 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
129 inode->i_generation, inode);
135 struct inode *ll_iget(struct super_block *sb, ino_t hash,
136 struct lustre_md *md)
140 inode = iget4(sb, hash, ll_test_inode, md);
142 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
143 inode->i_generation, inode);
148 int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
149 void *data, int flag)
152 struct lustre_handle lockh;
156 case LDLM_CB_BLOCKING:
157 ldlm_lock2handle(lock, &lockh);
158 rc = ldlm_cli_cancel(&lockh);
160 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
164 case LDLM_CB_CANCELING: {
165 struct inode *inode = ll_inode_from_lock(lock);
166 struct ll_inode_info *li = ll_i2info(inode);
167 __u64 bits = lock->l_policy_data.l_inodebits.bits;
169 /* For lookup locks: Invalidate all dentries associated with
170 this inode, for UPDATE locks - invalidate directory pages */
174 if (lock->l_resource->lr_name.name[0] != id_fid(&li->lli_id) ||
175 lock->l_resource->lr_name.name[1] != id_group(&li->lli_id)) {
176 LDLM_ERROR(lock, "data mismatch with object %lu/%lu",
177 (unsigned long)id_fid(&li->lli_id),
178 (unsigned long)id_group(&li->lli_id));
181 if (bits & MDS_INODELOCK_OPEN) {
183 switch (lock->l_req_mode) {
194 CERROR("Unexpected lock mode for OPEN lock "
195 "%d, inode %ld\n", lock->l_req_mode,
198 ll_md_real_close(ll_i2mdexp(inode), inode, flags);
201 if (bits & MDS_INODELOCK_UPDATE)
202 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
203 &(ll_i2info(inode)->lli_flags));
206 /* If lookup lock is cancelled, we just drop the dentry and
207 this will cause us to reget data from MDS when we'd want to
208 access this dentry/inode again. If this is lock on
209 other parts of inode that is cancelled, we do not need to do
210 much (but need to discard data from readdir, if any), since
211 abscence of lock will cause ll_revalidate_it (called from
212 stat() and similar functions) to renew the data anyway */
213 if (S_ISDIR(inode->i_mode) &&
214 (bits & MDS_INODELOCK_UPDATE)) {
215 CDEBUG(D_INODE, "invalidating inode %lu/%u(%p)\n",
216 inode->i_ino, inode->i_generation, inode);
217 truncate_inode_pages(inode->i_mapping, 0);
220 if (inode->i_sb->s_root &&
221 inode != inode->i_sb->s_root->d_inode &&
222 (bits & MDS_INODELOCK_LOOKUP))
223 ll_unhash_aliases(inode);
234 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
235 int flags, void *opaque)
237 struct ll_inode_info *li = ll_i2info(inode);
238 struct ldlm_res_id res_id =
239 { .name = {id_fid(&li->lli_id), id_group(&li->lli_id)} };
240 struct obd_device *obddev = class_conn2obd(conn);
243 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
247 /* Search "inode"'s alias list for a dentry that has the same name and parent as
248 * de. If found, return it. If not found, return de. */
249 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
251 struct list_head *tmp;
253 spin_lock(&dcache_lock);
254 list_for_each(tmp, &inode->i_dentry) {
255 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
257 /* We are called here with 'de' already on the aliases list. */
263 if (dentry->d_parent != de->d_parent)
266 if (dentry->d_name.len != de->d_name.len)
269 if (memcmp(dentry->d_name.name, de->d_name.name,
270 de->d_name.len) != 0)
273 if (!list_empty(&dentry->d_lru))
274 list_del_init(&dentry->d_lru);
276 hlist_del_init(&dentry->d_hash);
277 __d_rehash(dentry); /* avoid taking dcache_lock inside */
278 spin_unlock(&dcache_lock);
279 atomic_inc(&dentry->d_count);
281 dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
282 CDEBUG(D_DENTRY, "alias dentry %*s (%p) parent %p inode %p "
283 "refc %d\n", de->d_name.len, de->d_name.name, de,
284 de->d_parent, de->d_inode, atomic_read(&de->d_count));
288 spin_unlock(&dcache_lock);
293 static int lookup_it_finish(struct ptlrpc_request *request, int offset,
294 struct lookup_intent *it, void *data)
296 struct it_cb_data *icbd = data;
297 struct dentry **de = icbd->icbd_childp;
298 struct inode *parent = icbd->icbd_parent;
299 struct ll_sb_info *sbi = ll_i2sbi(parent);
300 struct dentry *dentry = *de, *saved = *de;
301 struct inode *inode = NULL;
304 /* NB 1 request reference will be taken away by ll_intent_lock()
306 if (!it_disposition(it, DISP_LOOKUP_NEG)) {
309 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
310 &inode, request, offset, dentry->d_sb);
314 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
315 inode, inode->i_ino, inode->i_generation);
317 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
319 /* If this is a stat, get the authoritative file size */
320 if (it->it_op == IT_GETATTR && S_ISREG(inode->i_mode) &&
321 ll_i2info(inode)->lli_smd != NULL) {
322 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
325 LASSERT(lsm->lsm_object_id != 0);
327 /* bug 2334: drop MDS lock before acquiring OST lock */
328 ll_intent_drop_lock(it);
330 rc = ll_glimpse_size(inode);
337 dentry = *de = ll_find_alias(inode, dentry);
342 dentry->d_op = &ll_d_ops;
346 d_add(dentry, inode);
351 static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
352 struct nameidata *nd, int flags)
354 struct dentry *save = dentry, *retval;
355 struct lookup_intent *it = flags ? &nd->intent.open : NULL;
356 struct lustre_id pid;
357 struct it_cb_data icbd;
358 struct ptlrpc_request *req = NULL;
359 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
363 if (dentry->d_name.len > EXT3_NAME_LEN)
364 RETURN(ERR_PTR(-ENAMETOOLONG));
366 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
367 dentry->d_name.len, dentry->d_name.name, parent->i_ino,
368 parent->i_generation, parent, LL_IT2STR(it));
370 if (d_mountpoint(dentry))
371 CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
374 nd->mnt->mnt_last_used = jiffies;
376 gns_it = nd ? nd->intent.open.it_op : IT_OPEN;
377 ll_frob_intent(&it, &lookup_it);
379 icbd.icbd_childp = &dentry;
380 icbd.icbd_parent = parent;
381 ll_inode2id(&pid, parent);
383 rc = md_intent_lock(ll_i2mdexp(parent), &pid,
384 dentry->d_name.name, dentry->d_name.len, NULL, 0,
385 NULL, it, flags, &req, ll_mdc_blocking_ast);
387 GOTO(out, retval = ERR_PTR(rc));
389 rc = lookup_it_finish(req, 1, it, &icbd);
391 ll_intent_release(it);
392 GOTO(out, retval = ERR_PTR(rc));
395 ll_lookup_finish_locks(it, dentry);
397 if (nd && dentry->d_inode != NULL &&
398 dentry->d_inode->i_mode & S_ISUID && S_ISDIR(dentry->d_inode->i_mode) &&
399 ((flags & LOOKUP_CONTINUE) || (gns_it & (IT_CHDIR | IT_OPEN))))
401 rc = ll_gns_mount_object(dentry, nd->mnt);
402 if (rc == -ERESTARTSYS)
403 GOTO(out, retval = ERR_PTR(-ERESTARTSYS));
407 * just reporting about GNS failures, lookup() is
408 * successful, do not stop it.
410 * GNS failure may be that object is found in SUID bit
411 * marked dir but it is not regular file and we should
412 * lookup further until we find correct mount
413 * object. This will allow to perform GNS mount is the
414 * following case for instance:
416 * /mnt/lustre/gns_mount/.mntinfo/.mntinfo/..../.mntinfo
417 * where all ".mntinfo" are dirs and only last one is
420 CDEBUG(D_INODE, "failed to mount %*s, err %d\n",
421 (int)dentry->d_name.len, dentry->d_name.name, rc);
426 GOTO(out, retval = NULL);
428 GOTO(out, retval = dentry);
431 ptlrpc_req_finished(req);
432 if (it == &lookup_it)
433 ll_intent_release(it);
435 CDEBUG(D_INODE, "lookup 0x%p in %lu/%lu: %*s -> %lu/%lu\n",
437 (unsigned long) parent->i_ino,
438 (unsigned long) parent->i_generation,
439 dentry->d_name.len, dentry->d_name.name,
440 (unsigned long) dentry->d_inode->i_ino,
441 (unsigned long) dentry->d_inode->i_generation);
443 CDEBUG(D_INODE, "lookup 0x%p in %lu/%lu: %*s -> ??\n",
445 (unsigned long) parent->i_ino,
446 (unsigned long) parent->i_generation,
447 dentry->d_name.len, dentry->d_name.name);
451 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
452 static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
453 struct nameidata *nd)
458 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
459 de = ll_lookup_it(parent, dentry, nd, nd->flags);
461 de = ll_lookup_it(parent, dentry, nd, 0);
467 /* We depend on "mode" being set with the proper file type/umask by now */
468 static struct inode *ll_create_node(struct inode *dir, const char *name,
469 int namelen, const void *data, int datalen,
470 int mode, __u64 extra,
471 struct lookup_intent *it)
473 struct inode *inode = NULL;
474 struct ptlrpc_request *request = NULL;
475 struct ll_sb_info *sbi = ll_i2sbi(dir);
480 LASSERT(it && LUSTRE_IT(it)->it_disposition);
482 request = LUSTRE_IT(it)->it_data;
483 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
484 &inode, request, 1, dir->i_sb);
486 GOTO(out, inode = ERR_PTR(rc));
488 LASSERT(list_empty(&inode->i_dentry));
490 /* We asked for a lock on the directory, but were granted a
491 * lock on the inode. Since we finally have an inode pointer,
492 * stuff it in the lock. */
493 CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
494 inode, inode->i_ino, inode->i_generation);
495 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
498 ptlrpc_req_finished(request);
503 * By the time this is called, we already have created the directory cache
504 * entry for the new file, but it is so far negative - it has no inode.
506 * We defer creating the OBD object(s) until open, to keep the intent and
507 * non-intent code paths similar, and also because we do not have the MDS
508 * inode number before calling ll_create_node() (which is needed for LOV),
509 * so we would need to do yet another RPC to the MDS to store the LOV EA
510 * data on the MDS. If needed, we would pass the PACKED lmm as data and
511 * lmm_size in datalen (the MDS still has code which will handle that).
513 * If the create succeeds, we fill in the inode information
514 * with d_instantiate().
516 static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
517 struct lookup_intent *it)
520 struct ptlrpc_request *request = LUSTRE_IT(it)->it_data;
521 struct obd_export *md_exp = ll_i2mdexp(dir);
525 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
526 dentry->d_name.len, dentry->d_name.name, dir->i_ino,
527 dir->i_generation, dir, LL_IT2STR(it));
529 rc = it_open_error(DISP_OPEN_CREATE, it);
533 mdc_store_inode_generation(md_exp, request, MDS_REQ_INTENT_REC_OFF, 1);
534 inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
535 NULL, 0, mode, 0, it);
537 RETURN(PTR_ERR(inode));
539 d_instantiate(dentry, inode);
543 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
544 static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
546 return ll_create_it(dir, dentry, mode, &nd->intent.open);
550 static void ll_update_times(struct ptlrpc_request *request, int offset,
553 struct mds_body *body = lustre_msg_buf(request->rq_repmsg, offset,
557 if (body->valid & OBD_MD_FLMTIME &&
558 body->mtime > LTIME_S(inode->i_mtime)) {
559 CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %u\n",
560 inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
561 LTIME_S(inode->i_mtime) = body->mtime;
563 if (body->valid & OBD_MD_FLCTIME &&
564 body->ctime > LTIME_S(inode->i_ctime))
565 LTIME_S(inode->i_ctime) = body->ctime;
568 static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
570 struct ptlrpc_request *request = NULL;
571 struct inode *dir = nd->dentry->d_inode;
572 struct ll_sb_info *sbi = ll_i2sbi(dir);
573 struct mdc_op_data *op_data;
577 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
578 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
580 mode &= ~current->fs->umask;
582 switch (mode & S_IFMT) {
585 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
590 OBD_ALLOC(op_data, sizeof(*op_data));
593 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
595 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
596 current->fsuid, current->fsgid, rdev,
598 OBD_FREE(op_data, sizeof(*op_data));
600 ll_update_times(request, 0, dir);
601 ptlrpc_req_finished(request);
612 static int ll_mknod(struct inode *dir, struct dentry *dchild,
613 int mode, ll_dev_t rdev)
615 struct ptlrpc_request *request = NULL;
616 struct inode *inode = NULL;
617 struct ll_sb_info *sbi = ll_i2sbi(dir);
618 struct mdc_op_data *op_data;
622 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
623 dchild->d_name.len, dchild->d_name.name,
624 dir->i_ino, dir->i_generation, dir);
626 mode &= ~current->fs->umask;
628 switch (mode & S_IFMT) {
631 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
636 OBD_ALLOC(op_data, sizeof(*op_data));
639 ll_prepare_mdc_data(op_data, dir, NULL, dchild->d_name.name,
640 dchild->d_name.len, 0);
641 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
642 current->fsuid, current->fsgid, rdev,
644 OBD_FREE(op_data, sizeof(*op_data));
648 ll_update_times(request, 0, dir);
649 err = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
650 &inode, request, 0, dchild->d_sb);
661 d_instantiate(dchild, inode);
664 ptlrpc_req_finished(request);
668 static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
670 struct inode *dir = nd->dentry->d_inode;
671 struct ptlrpc_request *request = NULL;
672 struct ll_sb_info *sbi = ll_i2sbi(dir);
673 const char *name = nd->last.name;
674 struct mdc_op_data *op_data;
675 int len = nd->last.len;
679 CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),target=%s\n",
680 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation,
683 if (dir->i_nlink >= EXT3_LINK_MAX)
686 OBD_ALLOC(op_data, sizeof(*op_data));
689 ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
691 err = md_create(sbi->ll_md_exp, op_data,
692 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
693 current->fsuid, current->fsgid, 0, &request);
694 OBD_FREE(op_data, sizeof(*op_data));
696 ll_update_times(request, 0, dir);
698 ptlrpc_req_finished(request);
702 static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
704 struct inode *src = srcnd->dentry->d_inode;
705 struct inode *dir = tgtnd->dentry->d_inode;
706 struct ptlrpc_request *request = NULL;
707 struct mdc_op_data *op_data;
709 struct ll_sb_info *sbi = ll_i2sbi(dir);
713 "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
714 src->i_ino, src->i_generation, src, dir->i_ino,
715 dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name);
717 OBD_ALLOC(op_data, sizeof(*op_data));
720 ll_prepare_mdc_data(op_data, src, dir, tgtnd->last.name,
722 err = md_link(sbi->ll_md_exp, op_data, &request);
723 OBD_FREE(op_data, sizeof(*op_data));
725 ll_update_times(request, 0, dir);
726 ptlrpc_req_finished(request);
731 static int ll_mkdir_raw(struct nameidata *nd, int mode)
733 struct inode *dir = nd->dentry->d_inode;
734 struct ptlrpc_request *request = NULL;
735 struct ll_sb_info *sbi = ll_i2sbi(dir);
736 struct mdc_op_data *op_data;
739 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
740 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
742 mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
743 OBD_ALLOC(op_data, sizeof(*op_data));
746 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
748 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
749 current->fsuid, current->fsgid, 0, &request);
750 OBD_FREE(op_data, sizeof(*op_data));
752 ll_update_times(request, 0, dir);
753 ptlrpc_req_finished(request);
757 static int ll_rmdir_raw(struct nameidata *nd)
759 struct inode *dir = nd->dentry->d_inode;
760 struct ptlrpc_request *request = NULL;
761 struct mdc_op_data *op_data;
765 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
766 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
768 OBD_ALLOC(op_data, sizeof(*op_data));
771 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
772 nd->last.len, S_IFDIR);
773 rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
774 OBD_FREE(op_data, sizeof(*op_data));
776 ll_update_times(request, 0, dir);
777 ptlrpc_req_finished(request);
781 int ll_objects_destroy(struct ptlrpc_request *request,
782 struct inode *dir, int offset)
784 struct mds_body *body;
785 struct lov_mds_md *eadata;
786 struct lov_stripe_md *lsm = NULL;
787 struct obd_trans_info oti = { 0 };
792 /* req is swabbed so this is safe */
793 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
795 if (!(body->valid & OBD_MD_FLEASIZE))
798 if (body->eadatasize == 0) {
799 CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
800 GOTO(out, rc = -EPROTO);
804 * the MDS sent back the EA because we unlinked the last reference to
805 * this file. Use this EA to unlink the objects on the OST. It's opaque
806 * so we don't swab here; we leave it to obd_unpackmd() to check it is
807 * complete and sensible.
809 eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
810 LASSERT(eadata != NULL);
811 if (eadata == NULL) {
812 CERROR("Can't unpack MDS EA data\n");
813 GOTO(out, rc = -EPROTO);
816 rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
818 CERROR("obd_unpackmd: %d\n", rc);
821 LASSERT(rc >= sizeof(*lsm));
825 GOTO(out_free_memmd, rc = -ENOMEM);
827 oa->o_id = lsm->lsm_object_id;
828 oa->o_gr = lsm->lsm_object_gr;
829 oa->o_mode = body->mode & S_IFMT;
830 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
832 if (body->valid & OBD_MD_FLCOOKIE) {
833 int length = sizeof(struct llog_cookie) *
834 lsm->lsm_stripe_count;
835 oa->o_valid |= OBD_MD_FLCOOKIE;
837 lustre_msg_buf(request->rq_repmsg, 2, length);
838 if (oti.oti_logcookies == NULL) {
839 oa->o_valid &= ~OBD_MD_FLCOOKIE;
840 body->valid &= ~OBD_MD_FLCOOKIE;
842 /* copy llog cookies to request to replay unlink
843 * so that the same llog file and records as those created
844 * during fail can be re-created while doing replay
847 memcpy(lustre_msg_buf(request->rq_reqmsg, offset, 0),
848 oti.oti_logcookies, length);
852 rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti);
855 CERROR("obd destroy objid "LPX64" error %d\n",
856 lsm->lsm_object_id, rc);
859 obd_free_memmd(ll_i2dtexp(dir), &lsm);
864 static int ll_unlink_raw(struct nameidata *nd)
866 struct inode *dir = nd->dentry->d_inode;
867 struct ptlrpc_request *request = NULL;
868 struct mdc_op_data *op_data;
871 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
872 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
874 OBD_ALLOC(op_data, sizeof(*op_data));
877 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, nd->last.len, 0);
878 rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
879 OBD_FREE(op_data, sizeof(*op_data));
882 ll_update_times(request, 0, dir);
884 rc = ll_objects_destroy(request, dir, 2);
887 ptlrpc_req_finished(request);
891 static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
893 struct inode *src = srcnd->dentry->d_inode;
894 struct inode *tgt = tgtnd->dentry->d_inode;
895 struct ptlrpc_request *request = NULL;
896 struct ll_sb_info *sbi = ll_i2sbi(src);
897 struct mdc_op_data *op_data;
901 if (srcnd->mnt != tgtnd->mnt)
904 CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
905 "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name,
906 src->i_ino, src->i_generation, src, tgtnd->last.len,
907 tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
909 OBD_ALLOC(op_data, sizeof(*op_data));
912 ll_prepare_mdc_data(op_data, src, tgt, NULL, 0, 0);
913 err = md_rename(sbi->ll_md_exp, op_data, srcnd->last.name,
914 srcnd->last.len, tgtnd->last.name, tgtnd->last.len,
916 OBD_FREE(op_data, sizeof(*op_data));
918 ll_update_times(request, 0, src);
919 ll_update_times(request, 0, tgt);
920 err = ll_objects_destroy(request, src, 3);
923 ptlrpc_req_finished(request);
927 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
928 #define LLITE_IT_RAWOPS (IT_MKNOD|IT_MKDIR|IT_SYMLINK|IT_LINK|IT_UNLINK|IT_RMDIR|IT_RENAME)
929 static int ll_rawop_from_intent(struct nameidata *nd)
933 if (!nd || !(nd->intent.open.op & LLITE_IT_RAWOPS))
936 switch (nd->intent.open.op) {
938 error = ll_mknod_raw(nd, nd->intent.open.create_mode,
939 nd->intent.open.create.dev);
942 error = ll_mkdir_raw(nd, nd->intent.open.create_mode);
945 error = ll_rmdir_raw(nd);
948 error = ll_unlink_raw(nd);
951 LASSERT(nd->intent.open.create.link);
952 error = ll_symlink_raw(nd, nd->intent.open.create.link);
955 error = ll_link_raw(nd->intent.open.create.source_nd, nd);
958 LASSERT(nd->intent.open.create.source_nd);
959 error = ll_rename_raw(nd->intent.open.create.source_nd, nd);
964 if (error != -EOPNOTSUPP)
965 nd->intent.open.flags |= IT_STATUS_RAW;
971 struct inode_operations ll_dir_inode_operations = {
973 .setattr = ll_setattr,
974 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
975 .create_it = ll_create_it,
976 .lookup_it = ll_lookup_it,
977 .revalidate_it = ll_inode_revalidate_it,
979 .lookup = ll_lookup_nd,
980 .create = ll_create_nd,
981 .getattr = ll_getattr,
982 .endparentlookup = ll_rawop_from_intent,
984 .setxattr = ll_setxattr,
985 .getxattr = ll_getxattr,
986 .listxattr = ll_listxattr,
987 .removexattr = ll_removexattr,
988 .permission = ll_inode_permission,