1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * derived in small part from linux/fs/ext2/namei.c
23 * Copyright (C) 1991, 1992 Linus Torvalds
25 * Big-endian to little-endian byte-swapping/bitmaps by
26 * David S. Miller (davem@caip.rutgers.edu), 1995
27 * Directory entry file type support and forward compatibility hooks
28 * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
32 #include <linux/sched.h>
34 #include <linux/smp_lock.h>
35 #include <linux/quotaops.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
39 #define DEBUG_SUBSYSTEM S_LLITE
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lite.h>
43 #include <linux/lustre_dlm.h>
46 extern void ll_set_dd(struct dentry *de);
49 extern void ll_change_inode(struct inode *inode);
50 extern int ll_setattr(struct dentry *de, struct iattr *attr);
53 extern int ll_add_link (struct dentry *dentry, struct inode *inode);
54 obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
55 int ext2_make_empty(struct inode *inode, struct inode *parent);
56 struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
57 struct dentry *dentry, struct page ** res_page);
58 int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
59 int ext2_empty_dir (struct inode * inode);
60 struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
61 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
62 struct page *page, struct inode *inode);
65 * Couple of helper functions - make the code slightly cleaner.
67 static inline void ext2_inc_count(struct inode *inode)
72 /* postpone the disk update until the inode really goes away */
73 static inline void ext2_dec_count(struct inode *inode)
77 static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
80 err = ll_add_link(dentry, inode);
82 d_instantiate(dentry, inode);
85 ext2_dec_count(inode);
92 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
93 static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
95 static int ll_test_inode(struct inode *inode, void *opaque)
98 struct ll_read_inode2_cookie *lic = opaque;
99 struct mds_body *body = lic->lic_body;
101 if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
102 CERROR("invalid generation\n");
103 CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
104 inode, inode->i_ino, inode->i_generation, ino,
105 lic->lic_body->generation);
107 if (inode->i_generation != lic->lic_body->generation)
110 /* Apply the attributes in 'opaque' to this inode */
111 ll_update_inode(inode, body, lic->lic_lsm);
115 extern struct dentry_operations ll_d_ops;
117 int ll_unlock(__u32 mode, struct lustre_handle *lockh)
121 ldlm_lock_decref(lockh, mode);
126 /* Get an inode by inode number (already instantiated by the intent lookup).
127 * Returns inode or NULL
129 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
130 extern int ll_read_inode2(struct inode *inode, void *opaque);
131 struct inode *ll_iget(struct super_block *sb, ino_t hash,
132 struct ll_read_inode2_cookie *lic)
137 inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
139 return NULL; /* removed ERR_PTR(-ENOMEM) -eeb */
141 if (inode->i_state & I_NEW)
142 unlock_new_inode(inode);
144 // XXX Coda always fills inodes, should Lustre?
148 struct inode *ll_iget(struct super_block *sb, ino_t hash,
149 struct ll_read_inode2_cookie *lic)
153 inode = iget4(sb, hash, ll_find_inode, lic);
154 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
155 inode->i_generation, inode);
160 static int ll_intent_to_lock_mode(struct lookup_intent *it)
162 /* CREAT needs to be tested before open (both could be set) */
163 if (it->it_op & IT_CREAT)
165 else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
172 int ll_it_open_error(int phase, struct lookup_intent *it)
174 if (it->it_disposition & IT_OPEN_OPEN) {
175 if (phase == IT_OPEN_OPEN)
176 return it->it_status;
181 if (it->it_disposition & IT_OPEN_CREATE) {
182 if (phase == IT_OPEN_CREATE)
183 return it->it_status;
188 if (it->it_disposition & IT_OPEN_LOOKUP) {
189 if (phase == IT_OPEN_LOOKUP)
190 return it->it_status;
198 int ll_mdc_blocking_ast(struct ldlm_lock *lock,
199 struct ldlm_lock_desc *desc,
200 void *data, int flag)
203 struct lustre_handle lockh;
207 case LDLM_CB_BLOCKING:
208 ldlm_lock2handle(lock, &lockh);
209 rc = ldlm_cli_cancel(&lockh);
211 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
215 case LDLM_CB_CANCELING: {
216 /* Invalidate all dentries associated with this inode */
217 struct inode *inode = lock->l_data;
218 LASSERT(inode != NULL);
220 if (S_ISDIR(inode->i_mode)) {
221 CDEBUG(D_INODE, "invalidating inode %lu\n",
224 ll_invalidate_inode_pages(inode);
227 #warning FIXME: we should probably free this inode if there are no aliases
228 if (inode->i_sb->s_root &&
229 inode != inode->i_sb->s_root->d_inode)
230 d_unhash_aliases(inode);
240 void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
242 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
245 LASSERT(lock != NULL);
246 lock->l_data = inode;
251 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
252 int flags, void *opaque)
254 struct ldlm_res_id res_id =
255 { .name = {inode->i_ino, inode->i_generation} };
256 struct obd_device *obddev = class_conn2obd(conn);
258 RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
262 void ll_prepare_mdc_op_data(struct mdc_op_data *data,
271 data->ino1 = i1->i_ino;
272 data->gen1 = i1->i_generation;
273 data->typ1 = i1->i_mode & S_IFMT;
274 data->gid1 = i1->i_gid;
277 data->ino2 = i2->i_ino;
278 data->gen2 = i2->i_generation;
279 data->typ2 = i2->i_mode & S_IFMT;
280 data->gid2 = i2->i_gid;
286 data->namelen = namelen;
290 #define IT_ENQ_COMPLETE (1<<16)
292 int ll_intent_lock(struct inode *parent, struct dentry **de,
293 struct lookup_intent *it, intent_finish_cb intent_finish)
295 struct dentry *dentry = *de;
296 struct inode *inode = dentry->d_inode;
297 struct ll_sb_info *sbi = ll_i2sbi(parent);
298 struct lustre_handle lockh;
299 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
300 struct ptlrpc_request *request = NULL;
301 int rc = 0, offset, flag = 0;
305 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
306 if (it && it->it_op == 0)
312 CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
313 dentry->d_name.name, ldlm_it2str(it->it_op));
315 if (dentry->d_name.len > EXT2_NAME_LEN)
316 RETURN(-ENAMETOOLONG);
318 if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
319 struct mdc_op_data op_data;
321 ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
322 dentry->d_name.name, dentry->d_name.len,
325 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
326 ll_intent_to_lock_mode(it), &op_data,
327 &lockh, NULL, 0, ldlm_completion_ast,
328 ll_mdc_blocking_ast, parent);
331 memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
334 request = (struct ptlrpc_request *)it->it_data;
336 /* non-zero it_disposition indicates that the server performed the
337 * intent on our behalf. */
338 if (it->it_disposition) {
339 struct mds_body *mds_body;
342 /* This long block is all about fixing up the local
343 * state so that it is correct as of the moment
344 * _before_ the operation was applied; that way, the
345 * VFS will think that everything is normal and call
346 * Lustre's regular FS function.
348 * If we're performing a creation, that means that unless the
349 * creation failed with EEXIST, we should fake up a negative
350 * dentry. Likewise for the target of a hard link.
352 * For everything else, we want to lookup to succeed. */
354 /* One additional note: if CREATE/MKDIR/etc succeeded,
355 * we add an extra reference to the request because we
356 * need to keep it around until ll_create gets called.
357 * For anything else which results in
358 * LL_LOOKUP_POSITIVE, we can do the iget()
359 * immediately with the contents of the reply (in the
360 * intent_finish callback). In the create case,
361 * however, we need to wait until ll_create_node to do
362 * the iget() or the VFS will abort with -EEXISTS.
366 mds_body = lustre_msg_buf(request->rq_repmsg, offset,
368 LASSERT (mds_body != NULL); /* mdc_enqueue checked */
369 LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
371 ino = mds_body->fid1.id;
372 mode = mds_body->mode;
374 /*We were called from revalidate2: did we find the same inode?*/
375 if (inode && (ino != inode->i_ino ||
376 mds_body->fid1.generation != inode->i_generation)) {
377 it->it_disposition |= IT_ENQ_COMPLETE;
381 /* If we're doing an IT_OPEN which did not result in an actual
382 * successful open, then we need to remove the bit which saves
383 * this request for unconditional replay. */
384 if (it->it_op & IT_OPEN &&
385 (!(it->it_disposition & IT_OPEN_OPEN) ||
386 it->it_status != 0)) {
389 spin_lock_irqsave (&request->rq_lock, flags);
390 request->rq_replay = 0;
391 spin_unlock_irqrestore (&request->rq_lock, flags);
394 if (it->it_op & IT_CREAT) {
395 mdc_store_inode_generation(request, 2, 1);
396 /* The server will return to us, in it_disposition, an
397 * indication of exactly what it_status refers to.
399 * If IT_OPEN_OPEN is set, then it_status refers to the
400 * open() call, otherwise if IT_OPEN_CREATE is set, then
401 * it status is the creation failure mode. In either
402 * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
403 * indicating whether the child lookup was successful.
405 * Else, if IT_OPEN_LOOKUP then it_status is the rc
406 * of the child lookup.
408 * Finally, if none of the bits are set, then the
409 * failure occurred while looking up the parent. */
410 rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
414 if (it->it_disposition & IT_OPEN_CREATE)
415 ptlrpc_request_addref(request);
416 if (it->it_disposition & IT_OPEN_OPEN)
417 ptlrpc_request_addref(request);
419 if (it->it_disposition & IT_OPEN_NEG)
420 flag = LL_LOOKUP_NEGATIVE;
422 flag = LL_LOOKUP_POSITIVE;
423 } else if (it->it_op == IT_OPEN) {
424 LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
426 rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
430 if (it->it_disposition & IT_OPEN_OPEN)
431 ptlrpc_request_addref(request);
433 if (it->it_disposition & IT_OPEN_NEG)
434 flag = LL_LOOKUP_NEGATIVE;
436 flag = LL_LOOKUP_POSITIVE;
437 } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
438 /* For check ops, we want the lookup to succeed */
441 flag = LL_LOOKUP_NEGATIVE;
443 flag = LL_LOOKUP_POSITIVE;
452 LBUG(); /* For the moment, no non-intent locks */
454 /* it_disposition == 0 indicates that it just did a simple lock
455 * request, for which we are very thankful. move along with
456 * the local lookup then. */
458 //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
461 ino = ll_inode_by_name(parent, dentry, &mode);
463 CERROR("inode %*s not found by name\n",
464 dentry->d_name.len, dentry->d_name.name);
465 GOTO(drop_lock, rc = -ENOENT);
468 valid = OBD_MD_FLNOTOBD;
471 eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
472 valid |= OBD_MD_FLEASIZE;
475 valid |= OBD_MD_FLBLOCKS;
481 rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
482 eadatalen, &request);
484 CERROR("failure %d inode "LPX64"\n", rc, ino);
485 GOTO(drop_lock, rc = -abs(rc));
489 LASSERT (request != NULL);
491 if (intent_finish != NULL) {
492 struct lustre_handle old_lock;
493 struct ldlm_lock *lock;
495 rc = intent_finish(flag, request, parent, de, it, offset, ino);
496 dentry = *de; /* intent_finish may change *de */
497 inode = dentry->d_inode;
501 /* The intent processing may well have given us a lock different
502 * from the one we requested. If we already have a matching
503 * lock, then cancel the new one. (We have to do this here,
504 * instead of in mdc_enqueue, because we need to use the child's
505 * inode as the l_data to match, and that's not available until
506 * intent_finish has performed the iget().) */
507 lock = ldlm_handle2lock(&lockh);
509 LDLM_DEBUG(lock, "matching against this");
511 memcpy(&old_lock, &lockh, sizeof(lockh));
512 if (ldlm_lock_match(NULL,
513 LDLM_FL_BLOCK_GRANTED |
515 NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
517 ldlm_lock_decref_and_cancel(&lockh,
519 memcpy(&lockh, &old_lock, sizeof(old_lock));
520 memcpy(it->it_lock_handle, &lockh,
526 ptlrpc_req_finished(request);
528 /* This places the intent in the dentry so that the vfs_xxx
529 * operation can lay its hands on it; but that is not always
530 * needed... (we need to save it in the GETATTR case for the
531 * benefit of ll_inode_revalidate -phil) */
532 /* Ignore trying to save the intent for "special" inodes as
533 * they have special semantics that can cause deadlocks on
534 * the intent semaphore. -mmex */
535 if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
536 S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
537 LL_SAVE_INTENT(dentry, it);
540 "D_IT dentry %p fsdata %p intent: %s status %d\n",
541 dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
544 if (it->it_op == IT_LOOKUP)
545 ll_intent_release(dentry, it);
550 ll_intent_release(dentry, it);
552 ptlrpc_req_finished(request);
556 /* Search "inode"'s alias list for a dentry that has the same name and parent as
557 * de. If found, return it. If not found, return de. */
558 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
560 struct list_head *tmp;
562 spin_lock(&dcache_lock);
563 list_for_each(tmp, &inode->i_dentry) {
564 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
566 /* We are called here with 'de' already on the aliases list. */
572 if (dentry->d_parent != de->d_parent)
575 if (dentry->d_name.len != de->d_name.len)
578 if (memcmp(dentry->d_name.name, de->d_name.name,
579 de->d_name.len) != 0)
582 if (!list_empty(&dentry->d_lru))
583 list_del_init(&dentry->d_lru);
585 list_del_init(&dentry->d_hash);
586 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
587 spin_unlock(&dcache_lock);
588 atomic_inc(&dentry->d_count);
590 dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
594 spin_unlock(&dcache_lock);
600 lookup2_finish(int flag, struct ptlrpc_request *request,
601 struct inode *parent, struct dentry **de,
602 struct lookup_intent *it, int offset, obd_id ino)
604 struct ll_sb_info *sbi = ll_i2sbi(parent);
605 struct dentry *dentry = *de, *saved = *de;
606 struct inode *inode = NULL;
607 struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
609 /* NB 1 request reference will be taken away by ll_intent_lock()
612 if (!(flag & LL_LOOKUP_NEGATIVE)) {
615 /* We only get called if the mdc_enqueue() called from
616 * ll_intent_lock() was successful. Therefore the mds_body
617 * is present and correct, and the eadata is present if
618 * body->eadatasize != 0 (but still opaque, so only
619 * obd_unpackmd() can check the size) */
620 lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
621 sizeof (*lic.lic_body));
622 LASSERT(lic.lic_body != NULL);
623 LASSERT_REPSWABBED(request, offset);
625 if (S_ISREG(lic.lic_body->mode) &&
626 (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
627 struct lov_mds_md *lmm;
631 lmm_size = lic.lic_body->eadatasize;
633 CERROR("OBD_MD_FLEASIZE set but "
637 lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
639 LASSERT(lmm != NULL);
640 LASSERT_REPSWABBED(request, offset + 1);
642 rc = obd_unpackmd(&sbi->ll_osc_conn,
643 &lic.lic_lsm, lmm, lmm_size);
645 CERROR("Error %d unpacking eadata\n", rc);
648 LASSERT(rc >= sizeof(*lic.lic_lsm));
651 /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
652 * to pick? A more generic EIO? -phik */
653 inode = ll_iget(dentry->d_sb, ino, &lic);
655 /* free the lsm if we allocated one above */
656 if (lic.lic_lsm != NULL)
657 obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
659 } else if (lic.lic_lsm != NULL &&
660 ll_i2info(inode)->lli_smd != lic.lic_lsm) {
661 obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
664 /* If this is a stat, get the authoritative file size */
665 if (it->it_op == IT_GETATTR && S_ISREG(inode->i_mode) &&
666 ll_i2info(inode)->lli_smd != NULL) {
667 struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
668 struct lustre_handle lockh = {0};
669 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
672 LASSERT(lsm->lsm_object_id != 0);
674 rc = ll_extent_lock(NULL, inode, lsm, LCK_PR, &extent,
676 if (rc != ELDLM_OK) {
680 ll_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh);
683 dentry = *de = ll_find_alias(inode, dentry);
685 /* We asked for a lock on the directory, and may have been
686 * granted a lock on the inode. Just in case, fixup the data
688 ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
694 dentry->d_op = &ll_d_ops;
698 d_add(dentry, inode);
703 static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
704 struct lookup_intent *it)
706 struct dentry *save = dentry, *retval;
710 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
711 dentry->d_name.name, parent->i_ino, parent->i_generation,
712 parent, LL_IT2STR(it));
714 rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
716 CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
717 GOTO(out, retval = ERR_PTR(rc));
721 GOTO(out, retval = NULL);
723 GOTO(out, retval = dentry);
728 /* We depend on "mode" being set with the proper file type/umask by now */
729 static struct inode *ll_create_node(struct inode *dir, const char *name,
730 int namelen, const void *data, int datalen,
731 int mode, __u64 extra,
732 struct lookup_intent *it)
735 struct ptlrpc_request *request = NULL;
736 struct mds_body *body;
737 time_t time = LTIME_S(CURRENT_TIME);
738 struct ll_sb_info *sbi = ll_i2sbi(dir);
739 struct ll_read_inode2_cookie lic;
742 if (it && it->it_disposition) {
743 ll_invalidate_inode_pages(dir);
744 request = it->it_data;
745 body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
746 LASSERT (body != NULL); /* checked already */
747 LASSERT_REPSWABBED (request, 1); /* swabbed already */
749 struct mdc_op_data op_data;
750 int gid = current->fsgid;
753 if (dir->i_mode & S_ISGID) {
759 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
760 rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
761 data, datalen, mode, current->fsuid, gid,
762 time, extra, &request);
767 body = lustre_swab_repbuf(request, 0, sizeof (*body),
768 lustre_swab_mds_body);
770 CERROR ("Can't unpack mds_body\n");
771 GOTO (out, inode = ERR_PTR(-EPROTO));
778 inode = ll_iget(dir->i_sb, body->ino, &lic);
779 if (!inode || is_bad_inode(inode)) {
780 /* XXX might need iput() for bad inode */
782 CERROR("new_inode -fatal: rc %d\n", rc);
787 if (!list_empty(&inode->i_dentry)) {
788 CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
789 body->ino, atomic_read(&inode->i_count),
793 inode = ERR_PTR(-EIO);
797 if (it && it->it_disposition) {
798 /* We asked for a lock on the directory, but were
799 * granted a lock on the inode. Since we finally have
800 * an inode pointer, stuff it in the lock. */
801 ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
807 ptlrpc_req_finished(request);
811 static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
812 const char *name, int len)
814 struct ptlrpc_request *request = NULL;
815 struct ll_sb_info *sbi = ll_i2sbi(dir);
816 struct mds_body *body;
817 struct lov_mds_md *eadata;
818 struct lov_stripe_md *lsm = NULL;
819 struct lustre_handle lockh;
820 struct lookup_intent it = { .it_op = IT_UNLINK };
823 struct mdc_op_data op_data;
826 ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
828 err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
829 &op_data, &lockh, NULL, 0,
830 ldlm_completion_ast, ll_mdc_blocking_ast,
832 request = (struct ptlrpc_request *)it.it_data;
836 GOTO(out, err = it.it_status);
839 body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
840 LASSERT (body != NULL); /* checked by mdc_enqueue() */
841 LASSERT_REPSWABBED (request, 1); /* swabbed by mdc_enqueue() */
843 if (!(body->valid & OBD_MD_FLEASIZE))
846 if (body->eadatasize == 0) {
847 CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
848 GOTO (out, err = -EPROTO);
851 /* The MDS sent back the EA because we unlinked the last reference
852 * to this file. Use this EA to unlink the objects on the OST.
853 * Note that mdc_enqueue() has already checked there _is_ some EA
854 * data, but this data is opaque to both mdc_enqueue() and the MDS.
855 * We have to leave it to obd_unpackmd() to check it is complete
857 eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
858 LASSERT (eadata != NULL);
859 LASSERT_REPSWABBED (request, 2);
861 err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
864 CERROR("obd_unpackmd: %d\n", err);
865 GOTO (out_unlock, err);
867 LASSERT (err >= sizeof (*lsm));
871 GOTO(out_free_memmd, err = -ENOMEM);
873 oa->o_id = lsm->lsm_object_id;
874 oa->o_mode = body->mode & S_IFMT;
875 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
877 err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
880 CERROR("obd destroy objid 0x"LPX64" error %d\n",
881 lsm->lsm_object_id, err);
883 obd_free_memmd(ll_i2obdconn(dir), &lsm);
885 ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
887 ptlrpc_req_finished(request);
892 * By the time this is called, we already have created the directory cache
893 * entry for the new file, but it is so far negative - it has no inode.
895 * We defer creating the OBD object(s) until open, to keep the intent and
896 * non-intent code paths similar, and also because we do not have the MDS
897 * inode number before calling ll_create_node() (which is needed for LOV),
898 * so we would need to do yet another RPC to the MDS to store the LOV EA
899 * data on the MDS. If needed, we would pass the PACKED lmm as data and
900 * lmm_size in datalen (the MDS still has code which will handle that).
902 * If the create succeeds, we fill in the inode information
903 * with d_instantiate().
905 static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
907 struct lookup_intent *it;
912 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
913 dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
914 LL_IT2STR(dentry->d_it));
918 rc = ll_it_open_error(IT_OPEN_CREATE, it);
920 LL_GET_INTENT(dentry, it);
921 ptlrpc_req_finished(it->it_data);
925 inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
926 NULL, 0, mode, 0, it);
929 LL_GET_INTENT(dentry, it);
930 RETURN(PTR_ERR(inode));
933 /* no directory data updates when intents rule */
934 if (it && it->it_disposition) {
935 d_instantiate(dentry, inode);
939 rc = ext2_add_nondir(dentry, inode);
943 static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
950 static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
953 struct ptlrpc_request *request = NULL;
954 time_t time = LTIME_S(CURRENT_TIME);
955 struct ll_sb_info *sbi = ll_i2sbi(dir);
956 struct mdc_op_data op_data;
960 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
961 name, dir->i_ino, dir->i_generation, dir);
963 if (dir->i_nlink >= EXT2_LINK_MAX)
966 mode &= ~current->fs->umask;
968 switch (mode & S_IFMT) {
969 case 0: case S_IFREG:
970 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
971 case S_IFCHR: case S_IFBLK:
972 case S_IFIFO: case S_IFSOCK:
973 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
974 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
975 current->fsuid, current->fsgid, time,
977 ptlrpc_req_finished(request);
988 static int ll_symlink(struct inode *dir, struct dentry *dentry,
995 static int ll_symlink2(struct inode *dir, const char *name, int len,
998 struct ptlrpc_request *request = NULL;
999 time_t time = LTIME_S(CURRENT_TIME);
1000 struct ll_sb_info *sbi = ll_i2sbi(dir);
1001 struct mdc_op_data op_data;
1005 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n",
1006 name, dir->i_ino, dir->i_generation, dir, tgt);
1008 if (dir->i_nlink >= EXT2_LINK_MAX)
1011 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1012 err = mdc_create(&sbi->ll_mdc_conn, &op_data,
1013 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
1014 current->fsuid, current->fsgid, time, 0, &request);
1015 ptlrpc_req_finished(request);
1019 static int ll_link(struct dentry *old_dentry, struct inode * dir,
1020 struct dentry *dentry)
1026 static int ll_link2(struct inode *src, struct inode *dir,
1027 const char *name, int len)
1029 struct ptlrpc_request *request = NULL;
1030 struct mdc_op_data op_data;
1032 struct ll_sb_info *sbi = ll_i2sbi(dir);
1035 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n",
1036 src->i_ino, src->i_generation, src,
1037 dir->i_ino, dir->i_generation, dir, name);
1039 ll_prepare_mdc_op_data(&op_data, src, dir, name, len, 0);
1040 err = mdc_link(&sbi->ll_mdc_conn, &op_data, &request);
1041 ptlrpc_req_finished(request);
1046 static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1052 static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
1054 struct ptlrpc_request *request = NULL;
1055 time_t time = LTIME_S(CURRENT_TIME);
1056 struct ll_sb_info *sbi = ll_i2sbi(dir);
1057 struct mdc_op_data op_data;
1060 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1061 name, dir->i_ino, dir->i_generation, dir);
1063 if (dir->i_nlink >= EXT2_LINK_MAX)
1066 mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
1067 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1068 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
1069 current->fsuid, current->fsgid,
1071 ptlrpc_req_finished(request);
1075 static int ll_rmdir2(struct inode *dir, const char *name, int len)
1079 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1080 name, dir->i_ino, dir->i_generation, dir);
1082 rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len);
1086 static int ll_unlink2(struct inode *dir, const char *name, int len)
1090 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1091 name, dir->i_ino, dir->i_generation, dir);
1093 rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len);
1097 static int ll_unlink(struct inode *dir, struct dentry *dentry)
1103 static int ll_rmdir(struct inode *dir, struct dentry *dentry)
1109 static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
1110 struct inode * new_dir, struct dentry * new_dentry)
1116 static int ll_rename2(struct inode *src, struct inode *tgt,
1117 const char *oldname, int oldlen,
1118 const char *newname, int newlen)
1120 struct ptlrpc_request *request = NULL;
1121 struct ll_sb_info *sbi = ll_i2sbi(src);
1122 struct mdc_op_data op_data;
1125 CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s,"
1126 "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation,
1127 src, newname, tgt->i_ino, tgt->i_generation, tgt);
1129 ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1130 err = mdc_rename(&sbi->ll_mdc_conn, &op_data,
1131 oldname, oldlen, newname, newlen, &request);
1132 ptlrpc_req_finished(request);
1137 extern int ll_inode_revalidate(struct dentry *dentry);
1138 struct inode_operations ll_dir_inode_operations = {
1140 lookup2: ll_lookup2,
1141 link: ll_link, /* LBUG() */
1143 unlink: ll_unlink, /* LBUG() */
1144 unlink2: ll_unlink2,
1145 symlink: ll_symlink, /* LBUG() */
1146 symlink2: ll_symlink2,
1147 mkdir: ll_mkdir, /* LBUG() */
1149 rmdir: ll_rmdir, /* LBUG() */
1151 mknod: ll_mknod, /* LBUG() */
1153 rename: ll_rename, /* LBUG() */
1154 rename2: ll_rename2,
1155 setattr: ll_setattr,
1156 setattr_raw: ll_setattr_raw,
1157 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
1158 revalidate: ll_inode_revalidate,