1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Peter Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
7 * Author: Andreas Dilger <adilger@clusterfs.com>
9 * This file is part of Lustre, http://www.lustre.org.
11 * Lustre is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * Lustre is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with Lustre; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #define DEBUG_SUBSYSTEM S_LLITE
27 #include <linux/lustre_dlm.h>
28 #include <linux/lustre_lite.h>
29 #include <linux/obd_lov.h> /* for lov_mds_md_size() in lov_setstripe() */
30 #include <linux/random.h>
32 int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
33 extern int ll_setattr(struct dentry *de, struct iattr *attr);
35 static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
38 struct ll_file_data *fd = file->private_data;
39 struct ptlrpc_request *req = NULL;
41 struct obd_import *imp;
45 /* Complete the open request and remove it from replay list */
46 rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, inode->i_ino,
47 inode->i_mode, &fd->fd_mdshandle, &req);
49 CERROR("inode %lu close failed: rc = %d\n", inode->i_ino, rc);
51 imp = fd->fd_req->rq_import;
53 spin_lock_irqsave(&imp->imp_lock, flags);
55 DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
57 /* We held on to the request for replay until we saw a close for that
58 * file. Now that we've closed it, it gets replayed on the basis of
59 * its transno only. */
60 fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
62 if (fd->fd_req->rq_transno) {
63 /* This open created a file, so it needs replay as a
64 * normal transaction now. Our reference to it now
65 * effectively owned by the imp_replay_list, and it'll
66 * be committed just like other transno-having
67 * requests from here on out. */
69 /* We now retain this close request, so that it is
70 * replayed if the open is replayed. We duplicate the
71 * transno, so that we get freed at the right time,
72 * and rely on the difference in xid to keep
73 * everything ordered correctly.
75 * But! If this close was already given a transno
76 * (because it caused real unlinking of an
77 * open-unlinked file, f.e.), then we'll be ordered on
78 * the basis of that and we don't need to do anything
80 if (!req->rq_transno) {
81 req->rq_transno = fd->fd_req->rq_transno;
82 ptlrpc_retain_replayable_request(req, imp);
84 spin_unlock_irqrestore(&imp->imp_lock, flags);
86 /* Should we free_committed now? we always free before
87 * replay, so it's probably a wash. We could check to
88 * see if the fd_req should already be committed, in
89 * which case we can avoid the whole retain_replayable
92 /* No transno means that we can just drop our ref. */
93 spin_unlock_irqrestore(&imp->imp_lock, flags);
95 ptlrpc_req_finished(fd->fd_req);
97 /* Do this after the fd_req->rq_transno check, because we don't want
98 * to bounce off zero references. */
99 ptlrpc_req_finished(req);
100 fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
101 file->private_data = NULL;
102 kmem_cache_free(ll_file_data_slab, fd);
107 /* While this returns an error code, fput() the caller does not, so we need
108 * to make every effort to clean up all of our state here. Also, applications
109 * rarely check close errors and even if an error is returned they will not
110 * re-try the close call.
112 static int ll_file_release(struct inode *inode, struct file *file)
114 struct ll_file_data *fd;
116 struct ll_sb_info *sbi = ll_i2sbi(inode);
117 struct ll_inode_info *lli = ll_i2info(inode);
118 struct lov_stripe_md *lsm = lli->lli_smd;
123 fd = (struct ll_file_data *)file->private_data;
124 if (!fd) /* no process opened the file after an mcreate */
127 /* we might not be able to get a valid handle on this file
128 * again so we really want to flush our write cache.. */
129 filemap_fdatasync(inode->i_mapping);
130 filemap_fdatawait(inode->i_mapping);
133 memset(&oa, 0, sizeof(oa));
134 oa.o_id = lsm->lsm_object_id;
136 oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
138 memcpy(&oa.o_inline, fd->fd_ostdata, FD_OSTDATA_SIZE);
139 oa.o_valid |= OBD_MD_FLHANDLE;
141 rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
143 CERROR("inode %lu object close failed: rc = %d\n",
147 rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
154 static int ll_local_open(struct file *file, struct lookup_intent *it)
156 struct ptlrpc_request *req = it->it_data;
157 struct ll_file_data *fd;
158 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
161 LASSERT(!file->private_data);
163 fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
164 /* We can't handle this well without reorganizing ll_file_open and
165 * ll_mdc_close, so don't even try right now. */
168 memset(fd, 0, sizeof(*fd));
170 memcpy(&fd->fd_mdshandle, &body->handle, sizeof(body->handle));
171 fd->fd_req = it->it_data;
172 file->private_data = fd;
177 static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
178 struct file *file, struct lov_stripe_md *lsm)
180 struct ll_file_data *fd = file->private_data;
188 oa->o_id = lsm->lsm_object_id;
189 oa->o_mode = S_IFREG;
190 oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
191 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
192 rc = obd_open(conn, oa, lsm, NULL);
196 file->f_flags &= ~O_LOV_DELAY_CREATE;
197 obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME |
200 if (oa->o_valid & OBD_MD_FLHANDLE)
201 memcpy(fd->fd_ostdata, obdo_handle(oa), FD_OSTDATA_SIZE);
209 /* Caller must hold lli_open_sem to protect lli->lli_smd from changing and
210 * duplicate objects from being created. We only install lsm to lli_smd if
211 * the mdc open was successful (hence stored stripe MD on MDS), otherwise
212 * other nodes could try to create different objects for the same file.
214 static int ll_create_obj(struct lustre_handle *conn, struct inode *inode,
215 struct file *file, struct lov_stripe_md *lsm)
217 struct ptlrpc_request *req = NULL;
218 struct ll_inode_info *lli = ll_i2info(inode);
219 struct lov_mds_md *lmm = NULL;
222 int rc, err, lmm_size = 0;;
229 oa->o_mode = S_IFREG | 0600;
230 oa->o_id = inode->i_ino;
231 /* Keep these 0 for now, because chown/chgrp does not change the
232 * ownership on the OST, and we don't want to allow BA OST NFS
233 * users to access these objects by mistake.
237 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
238 OBD_MD_FLUID | OBD_MD_FLGID;
240 rc = obd_create(conn, oa, &lsm, NULL);
242 CERROR("error creating objects for inode %lu: rc = %d\n",
245 CERROR("obd_create returned invalid rc %d\n", rc);
251 LASSERT(lsm && lsm->lsm_object_id);
252 rc = obd_packmd(conn, &lmm, lsm);
254 GOTO(out_destroy, rc);
258 /* Save the stripe MD with this file on the MDS */
259 memset(&iattr, 0, sizeof(iattr));
260 iattr.ia_valid = ATTR_FROM_OPEN;
261 rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, inode, &iattr,
262 lmm, lmm_size, &req);
263 ptlrpc_req_finished(req);
265 obd_free_wiremd(conn, &lmm);
267 /* If we couldn't complete mdc_open() and store the stripe MD on the
268 * MDS, we need to destroy the objects now or they will be leaked.
271 CERROR("error: storing stripe MD for %lu: rc %d\n",
273 GOTO(out_destroy, rc);
283 obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
284 oa->o_id = lsm->lsm_object_id;
285 oa->o_valid |= OBD_MD_FLID;
286 err = obd_destroy(conn, oa, lsm, NULL);
287 obd_free_memmd(conn, &lsm);
289 CERROR("error uncreating inode %lu objects: rc %d\n",
294 /* Open a file, and (for the very first open) create objects on the OSTs at
295 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
296 * creation or open until ll_lov_setstripe() ioctl is called. We grab
297 * lli_open_sem to ensure no other process will create objects, send the
298 * stripe MD to the MDS, or try to destroy the objects if that fails.
300 * If we already have the stripe MD locally then we don't request it in
301 * mdc_open(), by passing a lmm_size = 0.
303 * It is up to the application to ensure no other processes open this file
304 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
305 * used. We might be able to avoid races of that sort by getting lli_open_sem
306 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
307 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
309 extern int ll_it_open_error(int phase, struct lookup_intent *it);
311 static int ll_file_open(struct inode *inode, struct file *file)
313 struct ll_sb_info *sbi = ll_i2sbi(inode);
314 struct ll_inode_info *lli = ll_i2info(inode);
315 struct lustre_handle *conn = ll_i2obdconn(inode);
316 struct lookup_intent *it;
317 struct lov_stripe_md *lsm;
321 CDEBUG(D_VFSTRACE, "VFS Op\n");
322 LL_GET_INTENT(file->f_dentry, it);
323 rc = ll_it_open_error(IT_OPEN_OPEN, it);
327 rc = ll_local_open(file, it);
331 mdc_set_open_replay_data((struct ll_file_data *)file->private_data);
335 if (file->f_flags & O_LOV_DELAY_CREATE) {
336 CDEBUG(D_INODE, "delaying object creation\n");
339 down(&lli->lli_open_sem);
341 rc = ll_create_obj(conn, inode, file, NULL);
342 up(&lli->lli_open_sem);
346 CERROR("warning: stripe already set on ino %lu\n",
348 up(&lli->lli_open_sem);
353 rc = ll_osc_open(conn, inode, file, lsm);
359 ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
364 * really does the getattr on the inode and updates its fields
366 int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
369 struct ll_sb_info *sbi = ll_i2sbi(inode);
377 memset(&oa, 0, sizeof oa);
378 oa.o_id = lsm->lsm_object_id;
380 oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
381 OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
383 if (ostdata != NULL) {
384 memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
385 oa.o_valid |= OBD_MD_FLHANDLE;
388 rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
392 obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
393 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
395 CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lu\n", lsm->lsm_object_id,
396 inode->i_size, inode->i_size);
401 * we've acquired a lock and need to see if we should perform a getattr
402 * to update the file size that may have been updated by others that had
403 * their locks canceled.
405 static int ll_size_validate(struct inode *inode, struct lov_stripe_md *lsm,
406 char *ostdata, struct ldlm_extent *extent)
408 struct ll_inode_info *lli = ll_i2info(inode);
412 if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags))
415 down(&lli->lli_getattr_sem);
417 if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) {
418 rc = ll_inode_getattr(inode, lsm, ostdata);
420 set_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
423 up(&lli->lli_getattr_sem);
428 * some callers, notably truncate, really don't want i_size set based
429 * on the the size returned by the getattr, or lock acquisition in
432 int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
433 struct lov_stripe_md *lsm,
434 int mode, struct ldlm_extent *extent,
435 struct lustre_handle *lockh)
437 struct ll_sb_info *sbi = ll_i2sbi(inode);
441 LASSERT(lockh->addr == 0 && lockh->cookie == 0);
443 /* XXX phil: can we do this? won't it screw the file size up? */
444 if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
445 (sbi->ll_flags & LL_SBI_NOLCK))
448 CDEBUG(D_INFO, "Locking inode %lu, start "LPU64" end "LPU64"\n",
449 inode->i_ino, extent->start, extent->end);
451 rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
452 sizeof(extent), mode, &flags, ll_lock_callback,
453 inode, sizeof(*inode), lockh);
458 * this grabs a lock and manually implements behaviour that makes it look
459 * like the OST is returning the file size with each lock acquisition
461 int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
462 struct lov_stripe_md *lsm,
463 int mode, struct ldlm_extent *extent,
464 struct lustre_handle *lockh)
469 rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
471 if (rc == ELDLM_OK) {
472 rc = ll_size_validate(inode, lsm, fd ? fd->fd_ostdata : NULL,
475 ll_extent_unlock(fd, inode, lsm, mode, lockh);
476 rc = ELDLM_GETATTR_ERROR;
483 int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
484 struct lov_stripe_md *lsm, int mode,
485 struct lustre_handle *lockh)
487 struct ll_sb_info *sbi = ll_i2sbi(inode);
491 /* XXX phil: can we do this? won't it screw the file size up? */
492 if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
493 (sbi->ll_flags & LL_SBI_NOLCK))
496 rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
501 static inline void ll_remove_suid(struct inode *inode)
505 /* set S_IGID if S_IXGRP is set, and always set S_ISUID */
506 mode = (inode->i_mode & S_IXGRP)*(S_ISGID/S_IXGRP) | S_ISUID;
508 /* was any of the uid bits set? */
509 mode &= inode->i_mode;
510 if (mode && !capable(CAP_FSETID)) {
511 inode->i_mode &= ~mode;
512 // XXX careful here - we cannot change the size
516 static void ll_update_atime(struct inode *inode)
521 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
522 attr.ia_atime = CURRENT_TIME;
524 attr.ia_atime = CURRENT_TIME.tv_sec;
526 attr.ia_valid = ATTR_ATIME;
528 if (inode->i_atime == attr.ia_atime) return;
529 if (IS_RDONLY(inode)) return;
530 if (IS_NOATIME(inode)) return;
532 /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
533 ll_inode_setattr(inode, &attr, 0);
535 /* update atime, but don't explicitly write it out just this change */
536 inode->i_atime = CURRENT_TIME;
540 int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
541 void *data, int flag)
543 struct inode *inode = data;
544 struct ll_inode_info *lli = ll_i2info(inode);
545 struct lustre_handle lockh = { 0, 0 };
548 CDEBUG(D_VFSTRACE, "VFS Op\n");
554 case LDLM_CB_BLOCKING:
555 ldlm_lock2handle(lock, &lockh);
556 rc = ldlm_cli_cancel(&lockh);
558 CERROR("ldlm_cli_cancel failed: %d\n", rc);
560 case LDLM_CB_CANCELING:
561 /* FIXME: we could be given 'canceling intents' so that we
562 * could know to write-back or simply throw away the pages
563 * based on if the cancel comes from a desire to, say,
564 * read or truncate.. */
565 CDEBUG(D_INODE, "invalidating obdo/inode %lu\n", inode->i_ino);
566 filemap_fdatasync(inode->i_mapping);
567 filemap_fdatawait(inode->i_mapping);
568 clear_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
569 truncate_inode_pages(inode->i_mapping, 0);
578 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
581 struct ll_file_data *fd = filp->private_data;
582 struct inode *inode = filp->f_dentry->d_inode;
583 struct ll_inode_info *lli = ll_i2info(inode);
584 struct lov_stripe_md *lsm = lli->lli_smd;
585 struct lustre_handle lockh = { 0, 0 };
586 struct ll_read_extent rextent;
590 CDEBUG(D_VFSTRACE, "VFS Op\n");
592 /* "If nbyte is 0, read() will return 0 and have no other results."
593 * -- Single Unix Spec */
597 rextent.re_extent.start = *ppos;
598 rextent.re_extent.end = *ppos + count - 1;
600 err = ll_extent_lock(fd, inode, lsm,
601 LCK_PR, &rextent.re_extent, &lockh);
602 if (err != ELDLM_OK && err != ELDLM_LOCK_MATCHED) {
607 /* XXX tell ll_readpage what pages have a PR lock.. */
608 rextent.re_task = current;
609 spin_lock(&lli->lli_read_extent_lock);
610 list_add(&rextent.re_lli_item, &lli->lli_read_extents);
611 spin_unlock(&lli->lli_read_extent_lock);
613 CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
614 inode->i_ino, count, *ppos);
615 retval = generic_file_read(filp, buf, count, ppos);
617 spin_lock(&lli->lli_read_extent_lock);
618 list_del(&rextent.re_lli_item);
619 spin_unlock(&lli->lli_read_extent_lock);
622 ll_update_atime(inode);
625 ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
630 * Write to a file (through the page cache).
633 ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
635 struct ll_file_data *fd = file->private_data;
636 struct inode *inode = file->f_dentry->d_inode;
637 struct lustre_handle lockh = { 0, 0 };
638 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
639 struct ldlm_extent extent;
644 /* POSIX, but surprised the VFS doesn't check this already */
648 CDEBUG(D_VFSTRACE, "VFS Op\n");
649 if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
651 extent.end = OBD_OBJECT_EOF;
653 extent.start = *ppos;
654 extent.end = *ppos + count - 1;
657 err = ll_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh);
658 if (err != ELDLM_OK && err != ELDLM_LOCK_MATCHED) {
663 if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
664 *ppos = inode->i_size;
666 CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
667 inode->i_ino, count, *ppos);
669 retval = generic_file_write(file, buf, count, ppos);
672 ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
676 static int ll_lov_setstripe(struct inode *inode, struct file *file,
679 struct ll_inode_info *lli = ll_i2info(inode);
680 struct lustre_handle *conn = ll_i2obdconn(inode);
681 struct lov_stripe_md *lsm;
685 down(&lli->lli_open_sem);
688 up(&lli->lli_open_sem);
689 CERROR("stripe already set for ino %lu\n", inode->i_ino);
690 /* If we haven't already done the open, do so now */
691 if (file->f_flags & O_LOV_DELAY_CREATE) {
692 int rc2 = ll_osc_open(conn, inode, file, lsm);
700 rc = obd_iocontrol(LL_IOC_LOV_SETSTRIPE, conn, 0, &lsm, (void *)arg);
702 up(&lli->lli_open_sem);
705 rc = ll_create_obj(conn, inode, file, lsm);
706 up(&lli->lli_open_sem);
709 obd_free_memmd(conn, &lsm);
712 rc = ll_osc_open(conn, inode, file, lli->lli_smd);
716 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
718 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
719 struct lustre_handle *conn = ll_i2obdconn(inode);
724 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, conn, 0, lsm, (void *)arg);
727 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
730 struct ll_file_data *fd = file->private_data;
731 struct lustre_handle *conn;
734 CDEBUG(D_VFSTRACE, "VFS Op\n");
736 if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
740 case LL_IOC_GETFLAGS:
741 /* Get the current value of the file flags */
742 return put_user(fd->fd_flags, (int *)arg);
743 case LL_IOC_SETFLAGS:
744 case LL_IOC_CLRFLAGS:
745 /* Set or clear specific file flags */
746 /* XXX This probably needs checks to ensure the flags are
747 * not abused, and to handle any flag side effects.
749 if (get_user(flags, (int *) arg))
752 if (cmd == LL_IOC_SETFLAGS)
753 fd->fd_flags |= flags;
755 fd->fd_flags &= ~flags;
757 case LL_IOC_LOV_SETSTRIPE:
758 return ll_lov_setstripe(inode, file, arg);
759 case LL_IOC_LOV_GETSTRIPE:
760 return ll_lov_getstripe(inode, arg);
762 /* We need to special case any other ioctls we want to handle,
763 * to send them to the MDS/OST as appropriate and to properly
764 * network encode the arg field.
765 case EXT2_IOC_GETFLAGS:
766 case EXT2_IOC_SETFLAGS:
767 case EXT2_IOC_GETVERSION_OLD:
768 case EXT2_IOC_GETVERSION_NEW:
769 case EXT2_IOC_SETVERSION_OLD:
770 case EXT2_IOC_SETVERSION_NEW:
773 conn = ll_i2obdconn(inode);
774 return obd_iocontrol(cmd, conn, 0, NULL, (void *)arg);
778 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
780 struct inode *inode = file->f_dentry->d_inode;
781 struct ll_file_data *fd = file->private_data;
782 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
783 struct lustre_handle lockh = {0, 0};
787 CDEBUG(D_VFSTRACE, "VFS Op\n");
788 if (origin == 2) { /* SEEK_END */
790 struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
791 err = ll_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh);
792 if (err != ELDLM_OK && err != ELDLM_LOCK_MATCHED) {
797 offset += inode->i_size;
798 } else if (origin == 1) { /* SEEK_CUR */
799 offset += file->f_pos;
803 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
804 if (offset != file->f_pos) {
805 file->f_pos = offset;
806 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
808 file->f_version = ++event;
815 ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
819 int ll_fsync(struct file *file, struct dentry *dentry, int data)
825 * filemap_fdata{sync,wait} are also called at PW lock cancelation so
826 * we know that they can only find data to writeback here if we are
827 * still holding the PW lock that covered the dirty pages. XXX we
828 * should probably get a reference on it, though, just to be clear.
830 ret = filemap_fdatasync(dentry->d_inode->i_mapping);
832 ret = filemap_fdatawait(dentry->d_inode->i_mapping);
837 int ll_inode_revalidate(struct dentry *dentry)
839 struct inode *inode = dentry->d_inode;
840 struct lov_stripe_md *lsm;
843 CDEBUG(D_VFSTRACE, "VFS Op\n");
845 CERROR("REPORT THIS LINE TO PETER\n");
849 /* this is very tricky. it is unsafe to call ll_have_md_lock
850 when we have a referenced lock: because it may cause an RPC
851 below when the lock is marked CB_PENDING. That RPC may not
852 go out because someone else may be in another RPC waiting for
854 if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
855 !ll_have_md_lock(dentry)) {
856 struct ptlrpc_request *req = NULL;
857 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
858 struct mds_body *body;
859 unsigned long valid = 0;
862 /* Why don't we update all valid MDS fields here, if we're
863 * doing an RPC anyways? -phil */
864 if (S_ISREG(inode->i_mode)) {
865 datalen = obd_size_wiremd(&sbi->ll_osc_conn, NULL);
866 valid |= OBD_MD_FLEASIZE;
868 rc = mdc_getattr(&sbi->ll_mdc_conn, inode->i_ino,
869 inode->i_mode, valid, datalen, &req);
871 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
872 ptlrpc_req_finished(req);
876 body = lustre_msg_buf(req->rq_repmsg, 0);
878 if (S_ISREG(inode->i_mode) &&
879 body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) {
880 CERROR("MDS sent back size for regular file\n");
881 body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
884 if (body->valid & OBD_MD_FLEASIZE)
885 ll_update_inode(inode, body,
886 lustre_msg_buf(req->rq_repmsg, 1));
888 ll_update_inode(inode, body, NULL);
889 ptlrpc_req_finished(req);
892 lsm = ll_i2info(inode)->lli_smd;
893 if (!lsm) /* object not yet allocated, don't validate size */
897 * unfortunately stat comes in through revalidate and we don't
898 * differentiate this use from initial instantiation. we're
899 * also being wildly conservative and flushing write caches
900 * so that stat really returns the proper size.
903 struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
904 struct lustre_handle lockh = {0, 0};
907 err = ll_extent_lock(NULL, inode, lsm, LCK_PR, &extent, &lockh);
908 if (err != ELDLM_OK && err != ELDLM_LOCK_MATCHED )
909 RETURN(-abs(err)); /* XXX can't be right */
911 ll_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh);
916 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
917 static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
921 struct inode *inode = de->d_inode;
923 res = ll_inode_revalidate(de);
926 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
927 stat->dev = inode->i_dev;
929 stat->ino = inode->i_ino;
930 stat->mode = inode->i_mode;
931 stat->nlink = inode->i_nlink;
932 stat->uid = inode->i_uid;
933 stat->gid = inode->i_gid;
934 stat->rdev = kdev_t_to_nr(inode->i_rdev);
935 stat->atime = inode->i_atime;
936 stat->mtime = inode->i_mtime;
937 stat->ctime = inode->i_ctime;
938 stat->size = inode->i_size;
943 struct file_operations ll_file_operations = {
945 write: ll_file_write,
946 ioctl: ll_file_ioctl,
948 release: ll_file_release,
949 mmap: generic_file_mmap,
950 llseek: ll_file_seek,
954 struct inode_operations ll_file_inode_operations = {
955 setattr_raw: ll_setattr_raw,
957 truncate: ll_truncate,
958 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
961 revalidate: ll_inode_revalidate,
965 struct inode_operations ll_special_inode_operations = {
966 setattr_raw: ll_setattr_raw,
968 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
971 revalidate: ll_inode_revalidate,