1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
31 #include <sys/types.h>
32 #include <sys/queue.h>
40 #include "llite_lib.h"
42 int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED)
48 * this grabs a lock and manually implements behaviour that makes it look
49 * like the OST is returning the file size with each lock acquisition
51 int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
52 struct lov_stripe_md *lsm,
53 int mode, struct ldlm_extent *extent,
54 struct lustre_handle *lockh)
57 struct ll_inode_info *lli = ll_i2info(inode);
61 rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
65 /* always do a getattr for the first person to pop out of lock
66 * acquisition.. the DID_GETATTR flag and semaphore serialize
67 * this initial race. we used to make a decision based on whether
68 * the lock was matched or acquired, but the matcher could win the
69 * waking race with the first issuer so that was no good..
71 if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags))
74 down(&lli->lli_getattr_sem);
76 if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) {
77 rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
79 set_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
81 /* XXX can this fail? */
82 ll_extent_unlock(fd, inode, lsm, mode, lockh);
86 up(&lli->lli_getattr_sem);
93 int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
94 struct lov_stripe_md *lsm, int mode,
95 struct lustre_handle *lockh)
98 struct ll_sb_info *sbi = ll_i2sbi(inode);
102 /* XXX phil: can we do this? won't it screw the file size up? */
103 if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
104 (sbi->ll_flags & LL_SBI_NOLCK))
107 rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
115 static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags)
117 struct llu_inode_info *lli = llu_i2info(inode);
118 struct lov_stripe_md *lsm = lli->lli_smd;
124 pg.off = ((obd_off)page->index) << PAGE_SHIFT;
126 /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME */
128 if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > lli->lli_st_size))
129 pg.count = lli->lli_st_size % PAGE_SIZE;
132 pg.count = PAGE_SIZE;
134 CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
135 cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, lli->lli_st_ino,
143 rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL);
145 CERROR("error from obd_brw: rc = %d\n", rc);
151 static int llu_prepare_write(struct inode *inode, struct page *page,
152 unsigned from, unsigned to)
154 struct llu_inode_info *lli = llu_i2info(inode);
155 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
160 if (!PageLocked(page))
163 if (PageUptodate(page))
166 //POISON(addr + from, 0xca, to - from);
168 /* We're completely overwriting an existing page, so _don't_ set it up
169 * to date until commit_write */
170 if (from == 0 && to == PAGE_SIZE)
173 /* If are writing to a new page, no need to read old data.
174 * the extent locking and getattr procedures in ll_file_write have
175 * guaranteed that i_size is stable enough for our zeroing needs */
176 if (lli->lli_st_size <= offset) {
177 memset(kmap(page), 0, PAGE_SIZE);
179 GOTO(prepare_done, rc = 0);
182 rc = llu_brw(OBD_BRW_READ, inode, page, 0);
190 static int llu_commit_write(struct inode *inode, struct page *page,
191 unsigned from, unsigned to)
193 struct llu_inode_info *lli = llu_i2info(inode);
198 LASSERT(inode == file->f_dentry->d_inode);
199 LASSERT(PageLocked(page));
201 CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
202 inode, page, from, to, page->index);
203 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,from=%d,to=%d\n",
204 inode->i_ino, from, to);
205 /* to match full page case in prepare_write */
206 SetPageUptodate(page);
207 /* mark the page dirty, put it on mapping->dirty,
208 * mark the inode PAGES_DIRTY, put it on sb->dirty */
209 set_page_dirty(page);
211 rc = llu_brw(OBD_BRW_WRITE, inode, page, 0);
215 /* this is matched by a hack in obdo_to_inode at the moment */
216 size = (((obd_off)page->index) << PAGE_SHIFT) + to;
217 if (size > lli->lli_st_size)
218 lli->lli_st_size = size;
221 } /* ll_commit_write */
224 llu_generic_file_write(struct inode *inode, const char *buf,
225 size_t count, loff_t pos)
233 if ((ssize_t) count < 0)
245 update_inode_times(inode);
248 unsigned long index, offset;
252 * Try to find the page in the cache. If it isn't there,
253 * allocate a free page.
255 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
256 index = pos >> PAGE_CACHE_SHIFT;
257 bytes = PAGE_CACHE_SIZE - offset;
262 status = -ENOMEM; /* we'll assign it later anyway */
263 page = __grab_cache_page(index);
268 status = llu_prepare_write(inode, page, offset, offset+bytes);
272 memcpy(kaddr+offset, buf, bytes);
274 status = llu_commit_write(inode, page, offset, offset+bytes);
286 page_cache_release(page);
292 err = written ? written : status;
304 * If blocksize < pagesize, prepare_write() may have instantiated a
305 * few blocks outside i_size. Trim these off again.
308 page_cache_release(page);
312 ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec,
313 size_t iovlen, loff_t pos)
315 struct llu_inode_info *lli = llu_i2info(inode);
316 struct ll_file_data *fd = lli->lli_file_data; /* XXX not ready don't use it now */
317 struct lustre_handle lockh = { 0 };
318 struct lov_stripe_md *lsm = lli->lli_smd;
319 struct ldlm_extent extent;
324 /* XXX consider other types later */
325 if (!S_ISREG(lli->lli_st_mode))
328 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,size="LPSZ",offset=%Ld\n",
329 inode->i_ino, count, *ppos);
332 * sleep doing some writeback work of this mount's dirty data
333 * if the VM thinks we're low on memory.. other dirtying code
334 * paths should think about doing this, too, but they should be
335 * careful not to hold locked pages while they do so. like
336 * ll_prepare_write. *cough*
338 ll_check_dirty(inode->i_sb);
341 const char *buf = iovec[iovlen].iov_base;
342 size_t count = iovec[iovlen].iov_len;
344 /* POSIX, but surprised the VFS doesn't check this already */
349 if (!S_ISBLK(lli->lli_st_mode) && file->f_flags & O_APPEND) {
351 extent.end = OBD_OBJECT_EOF;
353 extent.start = *ppos;
354 extent.end = *ppos + count - 1;
358 extent.end = pos + count - 1;
361 err = llu_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh);
366 if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
367 *ppos = inode->i_size;
369 CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
370 inode->i_ino, count, *ppos);
372 retval += llu_generic_file_write(inode, buf, count, pos);
376 ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
380 static void llu_update_atime(struct inode *inode)
383 struct llu_inode_info *lli = llu_i2info(inode);
388 attr.ia_atime = LTIME_S(CURRENT_TIME);
389 attr.ia_valid = ATTR_ATIME;
391 if (lli->lli_st_atime == attr.ia_atime) return;
392 if (IS_RDONLY(inode)) return;
393 if (IS_NOATIME(inode)) return;
395 /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
396 llu_inode_setattr(inode, &attr, 0);
398 /* update atime, but don't explicitly write it out just this change */
399 inode->i_atime = CURRENT_TIME;
404 static size_t llu_generic_file_read(struct inode *inode, char *buf,
405 size_t count, loff_t pos)
407 struct llu_inode_info *lli = llu_i2info(inode);
408 unsigned long index, offset;
412 index = pos >> PAGE_CACHE_SHIFT;
413 offset = pos & ~PAGE_CACHE_MASK;
417 unsigned long end_index, nr;
419 end_index = lli->lli_st_size >> PAGE_CACHE_SHIFT;
421 if (index > end_index)
423 nr = PAGE_CACHE_SIZE;
424 if (index == end_index) {
425 nr = lli->lli_st_size & ~PAGE_CACHE_MASK;
434 page = grab_cache_page(index);
440 error = llu_brw(OBD_BRW_READ, inode, page, 0);
442 page_cache_release(page);
446 memcpy(buf, kmap(page)+offset, nr);
448 index += offset >> PAGE_CACHE_SHIFT;
449 offset &= ~PAGE_CACHE_MASK;
453 page_cache_release(page);
461 ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec,
462 size_t iovlen, loff_t pos)
464 struct llu_inode_info *lli = llu_i2info(inode);
465 struct ll_file_data *fd = lli->lli_file_data;
466 struct lov_stripe_md *lsm = lli->lli_smd;
467 struct lustre_handle lockh = { 0 };
469 struct ll_read_extent rextent;
471 struct ldlm_extent extent;
478 char *buf = iovec[iovlen].iov_base;
479 size_t count = iovec[iovlen].iov_len;
481 /* "If nbyte is 0, read() will return 0 and have no other results."
482 * -- Single Unix Spec */
487 rextent.re_extent.start = pos;
488 rextent.re_extent.end = pos + count - 1;
491 extent.end = pos + count - 1;
493 err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh);
497 rextent.re_task = current;
498 spin_lock(&lli->lli_read_extent_lock);
499 list_add(&rextent.re_lli_item, &lli->lli_read_extents);
500 spin_unlock(&lli->lli_read_extent_lock);
502 CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
503 lli->lli_st_ino, count, pos);
504 retval = llu_generic_file_read(inode, buf, count, pos);
506 spin_lock(&lli->lli_read_extent_lock);
507 list_del(&rextent.re_lli_item);
508 spin_unlock(&lli->lli_read_extent_lock);
513 llu_update_atime(inode);
516 ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);