2 * Lustre Light I/O Page Cache
4 * Copyright (C) 2002, Cluster File Systems, Inc.
7 #include <linux/config.h>
8 #include <linux/kernel.h>
10 #include <linux/string.h>
11 #include <linux/stat.h>
12 #include <linux/errno.h>
13 #include <linux/locks.h>
14 #include <linux/unistd.h>
15 #include <linux/version.h>
16 #include <asm/system.h>
17 #include <asm/uaccess.h>
20 #include <linux/stat.h>
21 #include <asm/uaccess.h>
22 #include <linux/vmalloc.h>
23 #include <asm/segment.h>
25 #include <linux/pagemap.h>
26 #include <linux/smp_lock.h>
28 #include <linux/obd_support.h>
29 #include <linux/lustre_lib.h>
30 #include <linux/lustre_idl.h>
31 #include <linux/lustre_mds.h>
32 #include <linux/lustre_light.h>
34 int ll_inode_setattr(struct inode *inode, struct iattr *attr);
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10))
38 * Add a page to the dirty page list.
40 void __set_page_dirty(struct page *page)
42 struct address_space *mapping;
45 pg_lock = PAGECACHE_LOCK(page);
48 mapping = page->mapping;
49 spin_lock(&mapping->page_lock);
51 list_del(&page->list);
52 list_add(&page->list, &mapping->dirty_pages);
54 spin_unlock(&mapping->page_lock);
58 mark_inode_dirty_pages(mapping->host);
62 * Add a page to the dirty page list.
64 void set_page_dirty(struct page *page)
66 if (!test_and_set_bit(PG_dirty, &page->flags)) {
67 struct address_space *mapping = page->mapping;
70 spin_lock(&pagecache_lock);
71 list_del(&page->list);
72 list_add(&page->list, &mapping->dirty_pages);
73 spin_unlock(&pagecache_lock);
76 mark_inode_dirty_pages(mapping->host);
82 static void inline ll_oa_from_inode(struct obdo *oa, struct inode *inode)
84 struct ll_inode_info *oinfo = ll_i2info(inode);
86 if ( oa->o_valid & OBD_MD_FLID )
87 oa->o_id = oinfo->lli_objid;
88 if ( oa->o_valid & OBD_MD_FLATIME )
89 oa->o_atime = inode->i_atime;
90 if ( oa->o_valid & OBD_MD_FLMTIME )
91 oa->o_mtime = inode->i_mtime;
92 if ( oa->o_valid & OBD_MD_FLCTIME )
93 oa->o_ctime = inode->i_ctime;
94 if ( oa->o_valid & OBD_MD_FLSIZE )
95 oa->o_size = inode->i_size;
96 if ( oa->o_valid & OBD_MD_FLBLOCKS ) /* allocation of space */
97 oa->o_blocks = inode->i_blocks;
98 if ( oa->o_valid & OBD_MD_FLBLKSZ )
99 oa->o_blksize = inode->i_blksize;
100 if ( oa->o_valid & OBD_MD_FLMODE )
101 oa->o_mode = inode->i_mode;
102 if ( oa->o_valid & OBD_MD_FLUID )
103 oa->o_uid = inode->i_uid;
104 if ( oa->o_valid & OBD_MD_FLGID )
105 oa->o_gid = inode->i_gid;
106 if ( oa->o_valid & OBD_MD_FLFLAGS )
107 oa->o_flags = inode->i_flags;
108 if ( oa->o_valid & OBD_MD_FLNLINK )
109 oa->o_nlink = inode->i_nlink;
110 if ( oa->o_valid & OBD_MD_FLGENER )
111 oa->o_generation = inode->i_generation;
113 CDEBUG(D_INFO, "src inode %ld, dst obdo %ld valid 0x%08x\n",
114 inode->i_ino, (long)oa->o_id, oa->o_valid);
115 obdo_from_inode(oa, inode);
117 /* this will transfer metadata for the logical object to
118 the oa: that metadata could contain the constituent objects
120 if (ll_has_inline(inode)) {
121 CDEBUG(D_INODE, "copying inline data from inode to obdo\n");
122 memcpy(oa->o_inline, oinfo->lli_inline, OBD_INLINESZ);
123 oa->o_obdflags |= OBD_FL_INLINEDATA;
124 oa->o_valid |= OBD_MD_FLINLINE;
126 } /* ll_oa_from_inode */
131 * Remove page from dirty list
133 void __set_page_clean(struct page *page)
135 struct address_space *mapping = page->mapping;
141 list_del(&page->list);
142 list_add(&page->list, &mapping->clean_pages);
144 inode = mapping->host;
145 if (list_empty(&mapping->dirty_pages)) {
146 CDEBUG(D_INODE, "inode clean\n");
147 inode->i_state &= ~I_DIRTY_PAGES;
152 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
153 static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
155 obd_count num_obdo = 1;
156 obd_count bufs_per_obdo = 1;
158 obd_size count = PAGE_SIZE;
159 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
160 obd_flag flags = create ? OBD_BRW_CREATE : 0;
170 oa->o_valid = OBD_MD_FLNOTOBD;
171 ll_oa_from_inode(oa, inode);
173 err = obd_brw(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
174 &page, &count, &offset, &flags);
176 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
183 extern void set_page_clean(struct page *);
185 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
186 static int ll_commit_page(struct page *page, int create, int from, int to)
188 struct inode *inode = page->mapping->host;
189 obd_count num_obdo = 1;
190 obd_count bufs_per_obdo = 1;
193 obd_off offset = (((obd_off)page->index) << PAGE_SHIFT) + to;
194 obd_flag flags = create ? OBD_BRW_CREATE : 0;
204 oa->o_valid = OBD_MD_FLNOTOBD;
205 ll_oa_from_inode(oa, inode);
207 CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
210 err = obd_brw(OBD_BRW_WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
211 &page, &count, &offset, &flags);
213 SetPageUptodate(page);
214 set_page_clean(page);
217 if (offset > inode->i_size) {
218 iattr.ia_valid = ATTR_SIZE;
219 iattr.ia_size = offset;
220 err = ll_inode_setattr(inode, &iattr);
222 printk("mds_inode_setattr failed; do something dramatic.\n");
230 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
238 /* returns the page unlocked, but with a reference */
239 int ll_readpage(struct file *file, struct page *page)
241 struct inode *inode = page->mapping->host;
246 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
248 memset(kmap(page), 0, PAGE_CACHE_SIZE);
253 if (Page_Uptodate(page)) {
258 rc = ll_brw(READ, inode, page, 0);
263 /* PDEBUG(page, "READ"); */
266 SetPageUptodate(page);
267 obd_unlock_page(page);
273 int ll_dir_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
277 /* returns the page unlocked, but with a reference */
278 int ll_dir_readpage(struct file *file, struct page *page)
280 struct inode *inode = page->mapping->host;
281 struct ll_sb_info *sbi =
282 (struct ll_sb_info *)(&inode->i_sb->u.generic_sbp);
286 struct ptlrep_hdr *hdr;
290 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
292 memset(kmap(page), 0, PAGE_CACHE_SIZE);
297 if (Page_Uptodate(page)) {
302 offset = page->index << PAGE_SHIFT;
304 rc = mdc_readpage(sbi->ll_peer_ptr, inode->i_ino, S_IFDIR, offset, buf,
312 if ((rc = hdr->status)) {
317 /* PDEBUG(page, "READ"); */
319 SetPageUptodate(page);
321 obd_unlock_page(page);
324 } /* ll_dir_readpage */
326 int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
328 struct inode *inode = page->mapping->host;
329 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
334 if (Page_Uptodate(page)) {
339 if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
344 rc = ll_brw(READ, inode, page, 0);
346 SetPageUptodate(page);
350 set_page_dirty(page);
351 //SetPageDirty(page);
357 /* select between SYNC and ASYNC I/O methods */
358 int ll_do_writepage(struct page *page, int sync)
360 struct inode *inode = page->mapping->host;
364 /* PDEBUG(page, "WRITEPAGE"); */
365 /* XXX everything is synchronous now */
366 err = ll_brw(OBD_BRW_WRITE, inode, page, 1);
369 SetPageUptodate(page);
370 set_page_clean(page);
372 /* PDEBUG(page,"WRITEPAGE"); */
375 } /* ll_do_writepage */
379 /* returns the page unlocked, but with a reference */
380 int ll_writepage(struct page *page)
383 struct inode *inode = page->mapping->host;
385 printk("---> writepage called ino %ld!\n", inode->i_ino);
386 rc = ll_do_writepage(page, 1);
388 set_page_clean(page);
390 CDEBUG(D_INODE, "--> GRR %d\n", rc);
396 void write_inode_pages(struct inode *inode)
398 struct list_head *tmp = &inode->i_mapping->dirty_pages;
400 while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
402 page = list_entry(tmp, struct page, list);
408 int ll_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
410 struct inode *inode = page->mapping->host;
412 loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
414 CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
415 inode->i_ino, len, from, to, page->index);
417 rc = ll_commit_page(page, 1, from, to);
419 if (len > inode->i_size) {
420 ll_set_size(inode, len);
430 * This does the "real" work of the write. The generic routine has
431 * allocated the page, locked it, done all the page alignment stuff
432 * calculations etc. Now we should just copy the data from user
433 * space and write it back to the real medium..
435 * If the writer ends up delaying the write, the writer needs to
436 * increment the page use counts until he is done with the page.
438 * Return value is the number of bytes written.
440 int ll_write_one_page(struct file *file, struct page *page,
441 unsigned long offset, unsigned long bytes,
444 struct inode *inode = file->f_dentry->d_inode;
448 /* We check for complete page writes here, as we then don't have to
449 * get the page before writing over everything anyways.
451 if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
452 err = ll_brw(READ, inode, page, 0);
455 SetPageUptodate(page);
458 if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
462 err = ll_writepage(page);
465 return (err < 0 ? err : bytes);
466 } /* ll_write_one_page */
469 * return an up to date page:
470 * - if locked is true then is returned locked
471 * - if create is true the corresponding disk blocks are created
472 * - page is held, i.e. caller must release the page
474 * modeled on NFS code.
476 struct page *ll_getpage(struct inode *inode, unsigned long offset,
477 int create, int locked)
485 offset = offset & PAGE_CACHE_MASK;
486 CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
487 inode->i_ino, offset, create, locked);
488 index = offset >> PAGE_CACHE_SHIFT;
490 page = grab_cache_page(&inode->i_data, index);
494 printk(KERN_WARNING " grab_cache_page says no dice ...\n");
499 /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
500 /* now check if the data in the page is up to date */
501 if ( Page_Uptodate(page)) {
503 if (PageLocked(page))
504 obd_unlock_page(page);
506 printk("file %s, line %d: expecting locked page\n",
513 err = ll_brw(READ, inode, page, create);
517 obd_unlock_page(page);
523 obd_unlock_page(page);
524 SetPageUptodate(page);
525 /* PDEBUG(page,"GETPAGE - after reading"); */
531 void ll_truncate(struct inode *inode)
537 //ll_dequeue_pages(inode);
541 /* XXX This would give an inconsistent FS, so deal with it as
542 * best we can for now - an obdo on the stack is not pretty.
546 printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
548 obdo.o_valid = OBD_MD_FLNOTOBD;
549 ll_oa_from_inode(&obdo, inode);
551 err = obd_punch(IID(inode), &obdo, 0, obdo.o_size);
553 oa->o_valid = OBD_MD_FLNOTOBD;
554 ll_oa_from_inode(oa, inode);
556 CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
557 (long)oa->o_id, oa->o_size);
558 err = obd_punch(IID(inode), oa, oa->o_size, 0);
564 printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
571 struct address_space_operations ll_aops = {
572 readpage: ll_readpage,
573 writepage: ll_writepage,
574 sync_page: block_sync_page,
575 prepare_write: ll_prepare_write,
576 commit_write: ll_commit_write,
581 struct address_space_operations ll_dir_aops = {
582 readpage: ll_dir_readpage,
583 prepare_write: ll_dir_prepare_write