2 * Lustre Light I/O Page Cache
4 * Copyright (C) 2002, Cluster File Systems, Inc.
8 #include <linux/config.h>
9 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/stat.h>
13 #include <linux/errno.h>
14 #include <linux/locks.h>
15 #include <linux/unistd.h>
16 #include <linux/version.h>
17 #include <asm/system.h>
18 #include <asm/uaccess.h>
21 #include <linux/stat.h>
22 #include <asm/uaccess.h>
23 #include <linux/vmalloc.h>
24 #include <asm/segment.h>
26 #include <linux/pagemap.h>
27 #include <linux/smp_lock.h>
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lib.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_mds.h>
33 #include <linux/lustre_light.h>
35 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10))
37 * Add a page to the dirty page list.
39 void __set_page_dirty(struct page *page)
41 struct address_space *mapping;
44 pg_lock = PAGECACHE_LOCK(page);
47 mapping = page->mapping;
48 spin_lock(&mapping->page_lock);
50 list_del(&page->list);
51 list_add(&page->list, &mapping->dirty_pages);
53 spin_unlock(&mapping->page_lock);
57 mark_inode_dirty_pages(mapping->host);
61 * Add a page to the dirty page list.
63 void set_page_dirty(struct page *page)
65 if (!test_and_set_bit(PG_dirty, &page->flags)) {
66 struct address_space *mapping = page->mapping;
69 spin_lock(&pagecache_lock);
70 list_del(&page->list);
71 list_add(&page->list, &mapping->dirty_pages);
72 spin_unlock(&pagecache_lock);
75 mark_inode_dirty_pages(mapping->host);
81 static void inline ll_oa_from_inode(struct obdo *oa, struct inode *inode)
83 struct ll_inode_info *oinfo = ll_i2info(inode);
85 if ( oa->o_valid & OBD_MD_FLID )
86 oa->o_id = oinfo->lli_objid;
87 if ( oa->o_valid & OBD_MD_FLATIME )
88 oa->o_atime = inode->i_atime;
89 if ( oa->o_valid & OBD_MD_FLMTIME )
90 oa->o_mtime = inode->i_mtime;
91 if ( oa->o_valid & OBD_MD_FLCTIME )
92 oa->o_ctime = inode->i_ctime;
93 if ( oa->o_valid & OBD_MD_FLSIZE )
94 oa->o_size = inode->i_size;
95 if ( oa->o_valid & OBD_MD_FLBLOCKS ) /* allocation of space */
96 oa->o_blocks = inode->i_blocks;
97 if ( oa->o_valid & OBD_MD_FLBLKSZ )
98 oa->o_blksize = inode->i_blksize;
99 if ( oa->o_valid & OBD_MD_FLMODE )
100 oa->o_mode = inode->i_mode;
101 if ( oa->o_valid & OBD_MD_FLUID )
102 oa->o_uid = inode->i_uid;
103 if ( oa->o_valid & OBD_MD_FLGID )
104 oa->o_gid = inode->i_gid;
105 if ( oa->o_valid & OBD_MD_FLFLAGS )
106 oa->o_flags = inode->i_flags;
107 if ( oa->o_valid & OBD_MD_FLNLINK )
108 oa->o_nlink = inode->i_nlink;
109 if ( oa->o_valid & OBD_MD_FLGENER )
110 oa->o_generation = inode->i_generation;
112 CDEBUG(D_INFO, "src inode %ld, dst obdo %ld valid 0x%08x\n",
113 inode->i_ino, (long)oa->o_id, oa->o_valid);
114 obdo_from_inode(oa, inode);
116 /* this will transfer metadata for the logical object to
117 the oa: that metadata could contain the constituent objects
119 if (ll_has_inline(inode)) {
120 CDEBUG(D_INODE, "copying inline data from inode to obdo\n");
121 memcpy(oa->o_inline, oinfo->lli_inline, OBD_INLINESZ);
122 oa->o_obdflags |= OBD_FL_INLINEDATA;
123 oa->o_valid |= OBD_MD_FLINLINE;
125 } /* ll_oa_from_inode */
131 * Remove page from dirty list
133 void __set_page_clean(struct page *page)
135 struct address_space *mapping = page->mapping;
141 list_del(&page->list);
142 list_add(&page->list, &mapping->clean_pages);
144 inode = mapping->host;
145 if (list_empty(&mapping->dirty_pages)) {
146 CDEBUG(D_INODE, "inode clean\n");
147 inode->i_state &= ~I_DIRTY_PAGES;
152 inline void set_page_clean(struct page *page)
154 if (PageDirty(page)) {
155 ClearPageDirty(page);
156 __set_page_clean(page);
160 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
161 static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
163 obd_count num_obdo = 1;
164 obd_count bufs_per_obdo = 1;
166 obd_size count = PAGE_SIZE;
167 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
168 obd_flag flags = create ? OBD_BRW_CREATE : 0;
178 oa->o_valid = OBD_MD_FLNOTOBD;
179 ll_oa_from_inode(oa, inode);
181 err = obd_brw(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
182 &page, &count, &offset, &flags);
184 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
191 extern void set_page_clean(struct page *);
193 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
194 static int ll_commit_page(struct page *page, int create, int from, int to)
196 struct inode *inode = page->mapping->host;
197 obd_count num_obdo = 1;
198 obd_count bufs_per_obdo = 1;
201 obd_off offset = (((obd_off)page->index) << PAGE_SHIFT);
202 obd_flag flags = create ? OBD_BRW_CREATE : 0;
211 oa->o_valid = OBD_MD_FLNOTOBD;
212 ll_oa_from_inode(oa, inode);
214 CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
217 err = obd_brw(WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
218 &page, &count, &offset, &flags);
220 SetPageUptodate(page);
221 set_page_clean(page);
225 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
233 /* returns the page unlocked, but with a reference */
234 int ll_readpage(struct file *file, struct page *page)
236 struct inode *inode = page->mapping->host;
241 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
243 memset(kmap(page), 0, PAGE_CACHE_SIZE);
248 if (Page_Uptodate(page)) {
253 rc = ll_brw(READ, inode, page, 0);
258 /* PDEBUG(page, "READ"); */
261 SetPageUptodate(page);
262 obd_unlock_page(page);
269 /* returns the page unlocked, but with a reference */
270 int ll_dir_readpage(struct file *file, struct page *page)
272 struct inode *inode = page->mapping->host;
273 struct ll_sb_info *sbi =
274 (struct ll_sb_info *)(&inode->i_sb->u.generic_sbp);
278 struct ptlrep_hdr *hdr;
282 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
284 memset(kmap(page), 0, PAGE_CACHE_SIZE);
289 if (Page_Uptodate(page)) {
294 offset = page->index << PAGE_SHIFT;
296 rc = mdc_readpage(sbi->ll_peer_ptr, inode->i_ino, S_IFDIR, offset, buf,
304 if ((rc = hdr->status)) {
309 /* PDEBUG(page, "READ"); */
311 SetPageUptodate(page);
313 obd_unlock_page(page);
316 } /* ll_dir_readpage */
318 int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
320 struct inode *inode = page->mapping->host;
321 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
326 if (Page_Uptodate(page)) {
331 if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
336 rc = ll_brw(READ, inode, page, 0);
338 SetPageUptodate(page);
342 set_page_dirty(page);
343 //SetPageDirty(page);
349 /* select between SYNC and ASYNC I/O methods */
350 int ll_do_writepage(struct page *page, int sync)
352 struct inode *inode = page->mapping->host;
356 /* PDEBUG(page, "WRITEPAGE"); */
357 /* XXX everything is synchronous now */
358 err = ll_brw(WRITE, inode, page, 1);
361 SetPageUptodate(page);
362 set_page_clean(page);
364 /* PDEBUG(page,"WRITEPAGE"); */
367 } /* ll_do_writepage */
371 /* returns the page unlocked, but with a reference */
372 int ll_writepage(struct page *page)
375 struct inode *inode = page->mapping->host;
377 printk("---> writepage called ino %ld!\n", inode->i_ino);
379 rc = ll_do_writepage(page, 1);
381 set_page_clean(page);
383 CDEBUG(D_INODE, "--> GRR %d\n", rc);
389 void write_inode_pages(struct inode *inode)
391 struct list_head *tmp = &inode->i_mapping->dirty_pages;
393 while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
395 page = list_entry(tmp, struct page, list);
401 int ll_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
403 struct inode *inode = page->mapping->host;
405 loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
407 CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
408 inode->i_ino, len, from, to, page->index);
410 rc = ll_commit_page(page, 1, from, to);
412 if (len > inode->i_size) {
413 ll_set_size(inode, len);
423 * This does the "real" work of the write. The generic routine has
424 * allocated the page, locked it, done all the page alignment stuff
425 * calculations etc. Now we should just copy the data from user
426 * space and write it back to the real medium..
428 * If the writer ends up delaying the write, the writer needs to
429 * increment the page use counts until he is done with the page.
431 * Return value is the number of bytes written.
433 int ll_write_one_page(struct file *file, struct page *page,
434 unsigned long offset, unsigned long bytes,
437 struct inode *inode = file->f_dentry->d_inode;
441 /* We check for complete page writes here, as we then don't have to
442 * get the page before writing over everything anyways.
444 if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
445 err = ll_brw(READ, inode, page, 0);
448 SetPageUptodate(page);
451 if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
455 err = ll_writepage(page);
458 return (err < 0 ? err : bytes);
459 } /* ll_write_one_page */
462 * return an up to date page:
463 * - if locked is true then is returned locked
464 * - if create is true the corresponding disk blocks are created
465 * - page is held, i.e. caller must release the page
467 * modeled on NFS code.
469 struct page *ll_getpage(struct inode *inode, unsigned long offset,
470 int create, int locked)
478 offset = offset & PAGE_CACHE_MASK;
479 CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
480 inode->i_ino, offset, create, locked);
481 index = offset >> PAGE_CACHE_SHIFT;
483 page = grab_cache_page(&inode->i_data, index);
487 printk(KERN_WARNING " grab_cache_page says no dice ...\n");
492 /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
493 /* now check if the data in the page is up to date */
494 if ( Page_Uptodate(page)) {
496 if (PageLocked(page))
497 obd_unlock_page(page);
499 printk("file %s, line %d: expecting locked page\n",
506 err = ll_brw(READ, inode, page, create);
510 obd_unlock_page(page);
516 obd_unlock_page(page);
517 SetPageUptodate(page);
518 /* PDEBUG(page,"GETPAGE - after reading"); */
524 void ll_truncate(struct inode *inode)
530 //ll_dequeue_pages(inode);
534 /* XXX This would give an inconsistent FS, so deal with it as
535 * best we can for now - an obdo on the stack is not pretty.
539 printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
541 obdo.o_valid = OBD_MD_FLNOTOBD;
542 ll_oa_from_inode(&obdo, inode);
544 err = obd_punch(IID(inode), &obdo, 0, obdo.o_size);
546 oa->o_valid = OBD_MD_FLNOTOBD;
547 ll_oa_from_inode(oa, inode);
549 CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
550 (long)oa->o_id, oa->o_size);
551 err = obd_punch(IID(inode), oa, oa->o_size, 0);
557 printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
564 struct address_space_operations ll_aops = {
565 readpage: ll_readpage,
566 writepage: ll_writepage,
567 sync_page: block_sync_page,
568 prepare_write: ll_prepare_write,
569 commit_write: ll_commit_write,
574 struct address_space_operations ll_dir_aops = {
575 readpage: ll_dir_readpage