1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * derived in small part from linux/fs/ext2/namei.c
23 * Copyright (C) 1991, 1992 Linus Torvalds
25 * Big-endian to little-endian byte-swapping/bitmaps by
26 * David S. Miller (davem@caip.rutgers.edu), 1995
27 * Directory entry file type support and forward compatibility hooks
28 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
32 #include <linux/sched.h>
34 #include <linux/smp_lock.h>
35 #include <linux/quotaops.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
39 #define DEBUG_SUBSYSTEM S_LLITE
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lite.h>
43 #include <linux/lustre_dlm.h>
44 #include <linux/lustre_version.h>
45 #include "llite_internal.h"
49 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
50 static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
52 static int ll_test_inode(struct inode *inode, void *opaque)
55 static int last_ino, last_gen, last_count;
56 struct lustre_md *md = opaque;
58 if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) {
59 CERROR("MDS body missing inum or generation\n");
63 if (last_ino == id_ino(&md->body->id1) &&
64 last_gen == id_gen(&md->body->id1) &&
69 CDEBUG(D_VFSTRACE, "compared %u/%u %u times\n",
70 last_ino, last_gen, last_count);
72 last_ino = id_ino(&md->body->id1);
73 last_gen = id_gen(&md->body->id1);
75 "comparing inode %p ino "DLID4" to body "DLID4"\n",
76 inode, OLID4(&ll_i2info(inode)->lli_id),
77 OLID4(&md->body->id1));
80 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
81 if (inode->i_ino != id_ino(&md->body->id1))
84 if (inode->i_generation != id_gen(&md->body->id1))
87 if (id_group(&ll_i2info(inode)->lli_id) != id_group(&md->body->id1))
90 /* apply the attributes in 'opaque' to this inode. */
91 ll_update_inode(inode, md);
95 extern struct dentry_operations ll_d_ops;
97 int ll_unlock(__u32 mode, struct lustre_handle *lockh)
101 ldlm_lock_decref(lockh, mode);
107 * get an inode by inode number (already instantiated by the intent lookup).
108 * Returns inode or NULL.
110 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
111 int ll_set_inode(struct inode *inode, void *opaque)
113 ll_read_inode2(inode, opaque);
117 struct inode *ll_iget(struct super_block *sb, ino_t hash,
118 struct lustre_md *md)
123 inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
126 if (inode->i_state & I_NEW)
127 unlock_new_inode(inode);
128 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
129 inode->i_generation, inode);
135 struct inode *ll_iget(struct super_block *sb, ino_t hash,
136 struct lustre_md *md)
140 inode = iget4(sb, hash, ll_test_inode, md);
142 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
143 inode->i_generation, inode);
148 int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
149 void *data, int flag)
152 struct lustre_handle lockh;
156 case LDLM_CB_BLOCKING:
157 ldlm_lock2handle(lock, &lockh);
158 rc = ldlm_cli_cancel(&lockh);
160 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
164 case LDLM_CB_CANCELING: {
165 struct inode *inode = ll_inode_from_lock(lock);
166 struct ll_inode_info *li = ll_i2info(inode);
167 __u64 bits = lock->l_policy_data.l_inodebits.bits;
169 /* For lookup locks: Invalidate all dentries associated with
170 this inode, for UPDATE locks - invalidate directory pages */
174 if (lock->l_resource->lr_name.name[0] != id_fid(&li->lli_id) ||
175 lock->l_resource->lr_name.name[1] != id_group(&li->lli_id)) {
176 LDLM_ERROR(lock, "data mismatch with object %lu/%lu",
177 (unsigned long)id_fid(&li->lli_id),
178 (unsigned long)id_group(&li->lli_id));
181 if (bits & MDS_INODELOCK_OPEN) {
183 switch (lock->l_req_mode) {
194 CERROR("Unexpected lock mode for OPEN lock "
195 "%d, inode %ld\n", lock->l_req_mode,
198 ll_md_real_close(ll_i2mdexp(inode), inode, flags);
201 if (bits & MDS_INODELOCK_UPDATE)
202 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
203 &(ll_i2info(inode)->lli_flags));
206 /* If lookup lock is cancelled, we just drop the dentry and
207 this will cause us to reget data from MDS when we'd want to
208 access this dentry/inode again. If this is lock on
209 other parts of inode that is cancelled, we do not need to do
210 much (but need to discard data from readdir, if any), since
211 abscence of lock will cause ll_revalidate_it (called from
212 stat() and similar functions) to renew the data anyway */
213 if (S_ISDIR(inode->i_mode) &&
214 (bits & MDS_INODELOCK_UPDATE)) {
215 CDEBUG(D_INODE, "invalidating inode %lu/%u(%p)\n",
216 inode->i_ino, inode->i_generation, inode);
217 truncate_inode_pages(inode->i_mapping, 0);
220 if (inode->i_sb->s_root &&
221 inode != inode->i_sb->s_root->d_inode &&
222 (bits & MDS_INODELOCK_LOOKUP))
223 ll_unhash_aliases(inode);
234 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
235 int flags, void *opaque)
237 struct ll_inode_info *li = ll_i2info(inode);
238 struct ldlm_res_id res_id =
239 { .name = {id_fid(&li->lli_id), id_group(&li->lli_id)} };
240 struct obd_device *obddev = class_conn2obd(conn);
243 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
247 /* Search "inode"'s alias list for a dentry that has the same name and parent as
248 * de. If found, return it. If not found, return de. */
249 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
251 struct list_head *tmp;
253 spin_lock(&dcache_lock);
254 list_for_each(tmp, &inode->i_dentry) {
255 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
257 /* We are called here with 'de' already on the aliases list. */
263 if (dentry->d_parent != de->d_parent)
266 if (dentry->d_name.len != de->d_name.len)
269 if (memcmp(dentry->d_name.name, de->d_name.name,
270 de->d_name.len) != 0)
273 if (!list_empty(&dentry->d_lru))
274 list_del_init(&dentry->d_lru);
276 hlist_del_init(&dentry->d_hash);
277 __d_rehash(dentry); /* avoid taking dcache_lock inside */
278 spin_unlock(&dcache_lock);
279 atomic_inc(&dentry->d_count);
281 dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
282 CDEBUG(D_DENTRY, "alias dentry %*s (%p) parent %p inode %p "
283 "refc %d\n", de->d_name.len, de->d_name.name, de,
284 de->d_parent, de->d_inode, atomic_read(&de->d_count));
288 spin_unlock(&dcache_lock);
293 static int lookup_it_finish(struct ptlrpc_request *request, int offset,
294 struct lookup_intent *it, void *data)
296 struct it_cb_data *icbd = data;
297 struct dentry **de = icbd->icbd_childp;
298 struct inode *parent = icbd->icbd_parent;
299 struct ll_sb_info *sbi = ll_i2sbi(parent);
300 struct dentry *dentry = *de, *saved = *de;
301 struct inode *inode = NULL;
304 /* NB 1 request reference will be taken away by ll_intent_lock()
306 if (!it_disposition(it, DISP_LOOKUP_NEG)) {
309 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
310 &inode, request, offset, dentry->d_sb);
314 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
315 inode, inode->i_ino, inode->i_generation);
317 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
319 /* If this is a stat, get the authoritative file size */
320 if (it->it_op == IT_GETATTR && S_ISREG(inode->i_mode) &&
321 ll_i2info(inode)->lli_smd != NULL) {
322 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
325 LASSERT(lsm->lsm_object_id != 0);
327 /* bug 2334: drop MDS lock before acquiring OST lock */
328 ll_intent_drop_lock(it);
330 rc = ll_glimpse_size(inode);
337 dentry = *de = ll_find_alias(inode, dentry);
342 dentry->d_op = &ll_d_ops;
346 d_add(dentry, inode);
351 static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
352 struct nameidata *nd, int flags)
354 struct dentry *save = dentry, *retval;
355 struct lookup_intent *it = flags ? &nd->intent.open : NULL;
356 struct lustre_id pid;
357 struct it_cb_data icbd;
358 struct ptlrpc_request *req = NULL;
359 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
363 if (dentry->d_name.len > EXT3_NAME_LEN)
364 RETURN(ERR_PTR(-ENAMETOOLONG));
366 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
367 dentry->d_name.len, dentry->d_name.name, parent->i_ino,
368 parent->i_generation, parent, LL_IT2STR(it));
370 if (d_mountpoint(dentry))
371 CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
374 nd->mnt->mnt_last_used = jiffies;
376 gns_it = nd ? nd->intent.open.it_op : IT_OPEN;
377 ll_frob_intent(&it, &lookup_it);
379 icbd.icbd_childp = &dentry;
380 icbd.icbd_parent = parent;
381 ll_inode2id(&pid, parent);
383 rc = md_intent_lock(ll_i2mdexp(parent), &pid,
384 dentry->d_name.name, dentry->d_name.len, NULL, 0,
385 NULL, it, flags, &req, ll_mdc_blocking_ast);
387 GOTO(out, retval = ERR_PTR(rc));
389 rc = lookup_it_finish(req, 1, it, &icbd);
391 ll_intent_release(it);
392 GOTO(out, retval = ERR_PTR(rc));
395 ll_lookup_finish_locks(it, dentry);
397 if (nd && dentry->d_inode != NULL &&
398 dentry->d_inode->i_mode & S_ISUID && S_ISDIR(dentry->d_inode->i_mode) &&
399 ((flags & LOOKUP_CONTINUE) || (gns_it & (IT_CHDIR | IT_OPEN))))
401 rc = ll_gns_mount_object(dentry, nd->mnt);
402 if (rc == -ERESTARTSYS) {
403 /* causing syscall restart */
404 GOTO(out, retval = ERR_PTR(-ERESTARTSYS));
409 * just reporting about GNS failures, lookup() is
410 * successful, do not stop it.
412 * GNS failure may be that object is found in SUID bit
413 * marked dir but it is not regular file and we should
414 * lookup further until we find correct mount
415 * object. This will allow to perform GNS mount is the
416 * following case for instance:
418 * /mnt/lustre/gns_mount/.mntinfo/.mntinfo/..../.mntinfo
419 * where all ".mntinfo" are dirs and only last one is
422 CDEBUG(D_INODE, "failed to mount %*s, err %d\n",
423 (int)dentry->d_name.len, dentry->d_name.name, rc);
428 GOTO(out, retval = NULL);
430 GOTO(out, retval = dentry);
433 ptlrpc_req_finished(req);
434 if (it == &lookup_it)
435 ll_intent_release(it);
437 CDEBUG(D_INODE, "lookup 0x%p in %lu/%lu: %*s -> %lu/%lu\n",
439 (unsigned long) parent->i_ino,
440 (unsigned long) parent->i_generation,
441 dentry->d_name.len, dentry->d_name.name,
442 (unsigned long) dentry->d_inode->i_ino,
443 (unsigned long) dentry->d_inode->i_generation);
445 CDEBUG(D_INODE, "lookup 0x%p in %lu/%lu: %*s -> ??\n",
447 (unsigned long) parent->i_ino,
448 (unsigned long) parent->i_generation,
449 dentry->d_name.len, dentry->d_name.name);
453 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
454 static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
455 struct nameidata *nd)
460 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
461 de = ll_lookup_it(parent, dentry, nd, nd->flags);
463 de = ll_lookup_it(parent, dentry, nd, 0);
469 /* We depend on "mode" being set with the proper file type/umask by now */
470 static struct inode *ll_create_node(struct inode *dir, const char *name,
471 int namelen, const void *data, int datalen,
472 int mode, __u64 extra,
473 struct lookup_intent *it)
475 struct inode *inode = NULL;
476 struct ptlrpc_request *request = NULL;
477 struct ll_sb_info *sbi = ll_i2sbi(dir);
482 LASSERT(it && LUSTRE_IT(it)->it_disposition);
484 request = LUSTRE_IT(it)->it_data;
485 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
486 &inode, request, 1, dir->i_sb);
488 GOTO(out, inode = ERR_PTR(rc));
490 LASSERT(list_empty(&inode->i_dentry));
492 /* We asked for a lock on the directory, but were granted a
493 * lock on the inode. Since we finally have an inode pointer,
494 * stuff it in the lock. */
495 CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
496 inode, inode->i_ino, inode->i_generation);
497 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
500 ptlrpc_req_finished(request);
505 * By the time this is called, we already have created the directory cache
506 * entry for the new file, but it is so far negative - it has no inode.
508 * We defer creating the OBD object(s) until open, to keep the intent and
509 * non-intent code paths similar, and also because we do not have the MDS
510 * inode number before calling ll_create_node() (which is needed for LOV),
511 * so we would need to do yet another RPC to the MDS to store the LOV EA
512 * data on the MDS. If needed, we would pass the PACKED lmm as data and
513 * lmm_size in datalen (the MDS still has code which will handle that).
515 * If the create succeeds, we fill in the inode information
516 * with d_instantiate().
518 static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
519 struct lookup_intent *it)
522 struct ptlrpc_request *request = LUSTRE_IT(it)->it_data;
523 struct obd_export *md_exp = ll_i2mdexp(dir);
527 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
528 dentry->d_name.len, dentry->d_name.name, dir->i_ino,
529 dir->i_generation, dir, LL_IT2STR(it));
531 rc = it_open_error(DISP_OPEN_CREATE, it);
535 mdc_store_inode_generation(md_exp, request, MDS_REQ_INTENT_REC_OFF, 1);
536 inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
537 NULL, 0, mode, 0, it);
539 RETURN(PTR_ERR(inode));
541 d_instantiate(dentry, inode);
545 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
546 static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
548 return ll_create_it(dir, dentry, mode, &nd->intent.open);
552 static void ll_update_times(struct ptlrpc_request *request, int offset,
555 struct mds_body *body = lustre_msg_buf(request->rq_repmsg, offset,
559 if (body->valid & OBD_MD_FLMTIME &&
560 body->mtime > LTIME_S(inode->i_mtime)) {
561 CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %u\n",
562 inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
563 LTIME_S(inode->i_mtime) = body->mtime;
565 if (body->valid & OBD_MD_FLCTIME &&
566 body->ctime > LTIME_S(inode->i_ctime))
567 LTIME_S(inode->i_ctime) = body->ctime;
570 static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
572 struct ptlrpc_request *request = NULL;
573 struct inode *dir = nd->dentry->d_inode;
574 struct ll_sb_info *sbi = ll_i2sbi(dir);
575 struct mdc_op_data *op_data;
579 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
580 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
582 mode &= ~current->fs->umask;
584 switch (mode & S_IFMT) {
587 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
592 OBD_ALLOC(op_data, sizeof(*op_data));
595 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
597 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
598 current->fsuid, current->fsgid, rdev,
600 OBD_FREE(op_data, sizeof(*op_data));
602 ll_update_times(request, 0, dir);
603 ptlrpc_req_finished(request);
614 static int ll_mknod(struct inode *dir, struct dentry *dchild,
615 int mode, ll_dev_t rdev)
617 struct ptlrpc_request *request = NULL;
618 struct inode *inode = NULL;
619 struct ll_sb_info *sbi = ll_i2sbi(dir);
620 struct mdc_op_data *op_data;
624 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
625 dchild->d_name.len, dchild->d_name.name,
626 dir->i_ino, dir->i_generation, dir);
628 mode &= ~current->fs->umask;
630 switch (mode & S_IFMT) {
633 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
638 OBD_ALLOC(op_data, sizeof(*op_data));
641 ll_prepare_mdc_data(op_data, dir, NULL, dchild->d_name.name,
642 dchild->d_name.len, 0);
643 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
644 current->fsuid, current->fsgid, rdev,
646 OBD_FREE(op_data, sizeof(*op_data));
650 ll_update_times(request, 0, dir);
651 err = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
652 &inode, request, 0, dchild->d_sb);
663 d_instantiate(dchild, inode);
666 ptlrpc_req_finished(request);
670 static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
672 struct inode *dir = nd->dentry->d_inode;
673 struct ptlrpc_request *request = NULL;
674 struct ll_sb_info *sbi = ll_i2sbi(dir);
675 const char *name = nd->last.name;
676 struct mdc_op_data *op_data;
677 int len = nd->last.len;
681 CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),target=%s\n",
682 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation,
685 if (dir->i_nlink >= EXT3_LINK_MAX)
688 OBD_ALLOC(op_data, sizeof(*op_data));
691 ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
693 err = md_create(sbi->ll_md_exp, op_data,
694 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
695 current->fsuid, current->fsgid, 0, &request);
696 OBD_FREE(op_data, sizeof(*op_data));
698 ll_update_times(request, 0, dir);
700 ptlrpc_req_finished(request);
704 static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
706 struct inode *src = srcnd->dentry->d_inode;
707 struct inode *dir = tgtnd->dentry->d_inode;
708 struct ptlrpc_request *request = NULL;
709 struct mdc_op_data *op_data;
711 struct ll_sb_info *sbi = ll_i2sbi(dir);
715 "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
716 src->i_ino, src->i_generation, src, dir->i_ino,
717 dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name);
719 OBD_ALLOC(op_data, sizeof(*op_data));
722 ll_prepare_mdc_data(op_data, src, dir, tgtnd->last.name,
724 err = md_link(sbi->ll_md_exp, op_data, &request);
725 OBD_FREE(op_data, sizeof(*op_data));
727 ll_update_times(request, 0, dir);
728 ptlrpc_req_finished(request);
733 static int ll_mkdir_raw(struct nameidata *nd, int mode)
735 struct inode *dir = nd->dentry->d_inode;
736 struct ptlrpc_request *request = NULL;
737 struct ll_sb_info *sbi = ll_i2sbi(dir);
738 struct mdc_op_data *op_data;
741 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
742 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
744 mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
745 OBD_ALLOC(op_data, sizeof(*op_data));
748 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
750 err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
751 current->fsuid, current->fsgid, 0, &request);
752 OBD_FREE(op_data, sizeof(*op_data));
754 ll_update_times(request, 0, dir);
755 ptlrpc_req_finished(request);
759 static int ll_rmdir_raw(struct nameidata *nd)
761 struct inode *dir = nd->dentry->d_inode;
762 struct ptlrpc_request *request = NULL;
763 struct mdc_op_data *op_data;
767 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
768 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
770 OBD_ALLOC(op_data, sizeof(*op_data));
773 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
774 nd->last.len, S_IFDIR);
775 rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
776 OBD_FREE(op_data, sizeof(*op_data));
778 ll_update_times(request, 0, dir);
779 ptlrpc_req_finished(request);
783 int ll_objects_destroy(struct ptlrpc_request *request,
784 struct inode *dir, int offset)
786 struct mds_body *body;
787 struct lov_mds_md *eadata;
788 struct lov_stripe_md *lsm = NULL;
789 struct obd_trans_info oti = { 0 };
794 /* req is swabbed so this is safe */
795 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
797 if (!(body->valid & OBD_MD_FLEASIZE))
800 if (body->eadatasize == 0) {
801 CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
802 GOTO(out, rc = -EPROTO);
806 * the MDS sent back the EA because we unlinked the last reference to
807 * this file. Use this EA to unlink the objects on the OST. It's opaque
808 * so we don't swab here; we leave it to obd_unpackmd() to check it is
809 * complete and sensible.
811 eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
812 LASSERT(eadata != NULL);
813 if (eadata == NULL) {
814 CERROR("Can't unpack MDS EA data\n");
815 GOTO(out, rc = -EPROTO);
818 rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
820 CERROR("obd_unpackmd: %d\n", rc);
823 LASSERT(rc >= sizeof(*lsm));
827 GOTO(out_free_memmd, rc = -ENOMEM);
829 oa->o_id = lsm->lsm_object_id;
830 oa->o_gr = lsm->lsm_object_gr;
831 oa->o_mode = body->mode & S_IFMT;
832 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
834 if (body->valid & OBD_MD_FLCOOKIE) {
835 int length = sizeof(struct llog_cookie) *
836 lsm->lsm_stripe_count;
837 oa->o_valid |= OBD_MD_FLCOOKIE;
839 lustre_msg_buf(request->rq_repmsg, 2, length);
840 if (oti.oti_logcookies == NULL) {
841 oa->o_valid &= ~OBD_MD_FLCOOKIE;
842 body->valid &= ~OBD_MD_FLCOOKIE;
844 /* copy llog cookies to request to replay unlink
845 * so that the same llog file and records as those created
846 * during fail can be re-created while doing replay
849 memcpy(lustre_msg_buf(request->rq_reqmsg, offset, 0),
850 oti.oti_logcookies, length);
854 rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti);
857 CERROR("obd destroy objid "LPX64" error %d\n",
858 lsm->lsm_object_id, rc);
861 obd_free_memmd(ll_i2dtexp(dir), &lsm);
866 static int ll_unlink_raw(struct nameidata *nd)
868 struct inode *dir = nd->dentry->d_inode;
869 struct ptlrpc_request *request = NULL;
870 struct mdc_op_data *op_data;
873 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
874 nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
876 OBD_ALLOC(op_data, sizeof(*op_data));
879 ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, nd->last.len, 0);
880 rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
881 OBD_FREE(op_data, sizeof(*op_data));
884 ll_update_times(request, 0, dir);
886 rc = ll_objects_destroy(request, dir, 2);
889 ptlrpc_req_finished(request);
893 static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
895 struct inode *src = srcnd->dentry->d_inode;
896 struct inode *tgt = tgtnd->dentry->d_inode;
897 struct ptlrpc_request *request = NULL;
898 struct ll_sb_info *sbi = ll_i2sbi(src);
899 struct mdc_op_data *op_data;
903 if (srcnd->mnt != tgtnd->mnt)
906 CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
907 "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name,
908 src->i_ino, src->i_generation, src, tgtnd->last.len,
909 tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
911 OBD_ALLOC(op_data, sizeof(*op_data));
914 ll_prepare_mdc_data(op_data, src, tgt, NULL, 0, 0);
915 err = md_rename(sbi->ll_md_exp, op_data, srcnd->last.name,
916 srcnd->last.len, tgtnd->last.name, tgtnd->last.len,
918 OBD_FREE(op_data, sizeof(*op_data));
920 ll_update_times(request, 0, src);
921 ll_update_times(request, 0, tgt);
922 err = ll_objects_destroy(request, src, 3);
925 ptlrpc_req_finished(request);
929 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
930 #define LLITE_IT_RAWOPS (IT_MKNOD|IT_MKDIR|IT_SYMLINK|IT_LINK|IT_UNLINK|IT_RMDIR|IT_RENAME)
931 static int ll_rawop_from_intent(struct nameidata *nd)
935 if (!nd || !(nd->intent.open.op & LLITE_IT_RAWOPS))
938 switch (nd->intent.open.op) {
940 error = ll_mknod_raw(nd, nd->intent.open.create_mode,
941 nd->intent.open.create.dev);
944 error = ll_mkdir_raw(nd, nd->intent.open.create_mode);
947 error = ll_rmdir_raw(nd);
950 error = ll_unlink_raw(nd);
953 LASSERT(nd->intent.open.create.link);
954 error = ll_symlink_raw(nd, nd->intent.open.create.link);
957 error = ll_link_raw(nd->intent.open.create.source_nd, nd);
960 LASSERT(nd->intent.open.create.source_nd);
961 error = ll_rename_raw(nd->intent.open.create.source_nd, nd);
966 if (error != -EOPNOTSUPP)
967 nd->intent.open.flags |= IT_STATUS_RAW;
973 struct inode_operations ll_dir_inode_operations = {
975 .setattr = ll_setattr,
976 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
977 .create_it = ll_create_it,
978 .lookup_it = ll_lookup_it,
979 .revalidate_it = ll_inode_revalidate_it,
981 .lookup = ll_lookup_nd,
982 .create = ll_create_nd,
983 .getattr = ll_getattr,
984 .endparentlookup = ll_rawop_from_intent,
986 .setxattr = ll_setxattr,
987 .getxattr = ll_getxattr,
988 .listxattr = ll_listxattr,
989 .removexattr = ll_removexattr,
990 .permission = ll_inode_permission,