2 * Lustre Light I/O Page Cache
4 * Copyright (C) 2002, Cluster File Systems, Inc.
8 #include <linux/config.h>
9 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/stat.h>
13 #include <linux/errno.h>
14 #include <linux/locks.h>
15 #include <linux/unistd.h>
17 #include <asm/system.h>
18 #include <asm/uaccess.h>
21 #include <linux/stat.h>
22 #include <asm/uaccess.h>
23 #include <linux/vmalloc.h>
24 #include <asm/segment.h>
26 #include <linux/pagemap.h>
27 #include <linux/smp_lock.h>
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lib.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_mds.h>
33 #include <linux/lustre_light.h>
36 static void inline ll_oa_from_inode(struct obdo *oa, struct inode *inode)
38 struct ll_inode_info *oinfo = ll_i2info(inode);
40 if ( oa->o_valid & OBD_MD_FLID )
41 oa->o_id = oinfo->lli_objid;
42 if ( oa->o_valid & OBD_MD_FLATIME )
43 oa->o_atime = inode->i_atime;
44 if ( oa->o_valid & OBD_MD_FLMTIME )
45 oa->o_mtime = inode->i_mtime;
46 if ( oa->o_valid & OBD_MD_FLCTIME )
47 oa->o_ctime = inode->i_ctime;
48 if ( oa->o_valid & OBD_MD_FLSIZE )
49 oa->o_size = inode->i_size;
50 if ( oa->o_valid & OBD_MD_FLBLOCKS ) /* allocation of space */
51 oa->o_blocks = inode->i_blocks;
52 if ( oa->o_valid & OBD_MD_FLBLKSZ )
53 oa->o_blksize = inode->i_blksize;
54 if ( oa->o_valid & OBD_MD_FLMODE )
55 oa->o_mode = inode->i_mode;
56 if ( oa->o_valid & OBD_MD_FLUID )
57 oa->o_uid = inode->i_uid;
58 if ( oa->o_valid & OBD_MD_FLGID )
59 oa->o_gid = inode->i_gid;
60 if ( oa->o_valid & OBD_MD_FLFLAGS )
61 oa->o_flags = inode->i_flags;
62 if ( oa->o_valid & OBD_MD_FLNLINK )
63 oa->o_nlink = inode->i_nlink;
64 if ( oa->o_valid & OBD_MD_FLGENER )
65 oa->o_generation = inode->i_generation;
67 CDEBUG(D_INFO, "src inode %ld, dst obdo %ld valid 0x%08x\n",
68 inode->i_ino, (long)oa->o_id, oa->o_valid);
69 obdo_from_inode(oa, inode);
71 /* this will transfer metadata for the logical object to
72 the oa: that metadata could contain the constituent objects
74 if (ll_has_inline(inode)) {
75 CDEBUG(D_INODE, "copying inline data from inode to obdo\n");
76 memcpy(oa->o_inline, oinfo->lli_inline, OBD_INLINESZ);
77 oa->o_obdflags |= OBD_FL_INLINEDATA;
78 oa->o_valid |= OBD_MD_FLINLINE;
80 } /* ll_oa_from_inode */
83 * Add a page to the dirty page list.
85 void set_page_dirty(struct page *page)
87 if (!test_and_set_bit(PG_dirty, &page->flags)) {
88 struct address_space *mapping = page->mapping;
91 spin_lock(&pagecache_lock);
92 list_del(&page->list);
93 list_add(&page->list, &mapping->dirty_pages);
94 spin_unlock(&pagecache_lock);
97 mark_inode_dirty_pages(mapping->host);
103 * Remove page from dirty list
105 void __set_page_clean(struct page *page)
107 struct address_space *mapping = page->mapping;
113 spin_lock(&pagecache_lock);
114 list_del(&page->list);
115 list_add(&page->list, &mapping->clean_pages);
117 inode = mapping->host;
118 if (list_empty(&mapping->dirty_pages)) {
119 CDEBUG(D_INODE, "inode clean\n");
120 inode->i_state &= ~I_DIRTY_PAGES;
122 spin_unlock(&pagecache_lock);
126 inline void set_page_clean(struct page *page)
128 if (PageDirty(page)) {
129 ClearPageDirty(page);
130 __set_page_clean(page);
134 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
135 static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
137 obd_count num_obdo = 1;
138 obd_count bufs_per_obdo = 1;
140 obd_size count = PAGE_SIZE;
141 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
142 obd_flag flags = create ? OBD_BRW_CREATE : 0;
152 oa->o_valid = OBD_MD_FLNOTOBD;
153 ll_oa_from_inode(oa, inode);
155 err = obd_brw(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
156 &page, &count, &offset, &flags);
158 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
165 extern void set_page_clean(struct page *);
167 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
168 static int ll_commit_page(struct page *page, int create, int from, int to)
170 struct inode *inode = page->mapping->host;
171 obd_count num_obdo = 1;
172 obd_count bufs_per_obdo = 1;
175 obd_off offset = (((obd_off)page->index) << PAGE_SHIFT);
176 obd_flag flags = create ? OBD_BRW_CREATE : 0;
185 oa->o_valid = OBD_MD_FLNOTOBD;
186 ll_oa_from_inode(oa, inode);
188 CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
191 err = obd_brw(WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
192 &page, &count, &offset, &flags);
194 SetPageUptodate(page);
195 set_page_clean(page);
199 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
207 /* returns the page unlocked, but with a reference */
208 int ll_readpage(struct file *file, struct page *page)
210 struct inode *inode = page->mapping->host;
215 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
217 memset(kmap(page), 0, PAGE_CACHE_SIZE);
222 if (Page_Uptodate(page)) {
227 rc = ll_brw(READ, inode, page, 0);
232 /* PDEBUG(page, "READ"); */
235 SetPageUptodate(page);
236 obd_unlock_page(page);
243 /* returns the page unlocked, but with a reference */
244 int ll_dir_readpage(struct file *file, struct page *page)
246 struct inode *inode = page->mapping->host;
247 struct ll_sb_info *sbi =
248 (struct ll_sb_info *)(&inode->i_sb->u.generic_sbp);
252 struct ptlrep_hdr *hdr;
256 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
258 memset(kmap(page), 0, PAGE_CACHE_SIZE);
263 if (Page_Uptodate(page)) {
268 offset = page->index << PAGE_SHIFT;
270 rc = mdc_readpage(sbi->ll_peer_ptr, inode->i_ino, S_IFDIR, offset, buf,
278 if ((rc = hdr->status)) {
283 /* PDEBUG(page, "READ"); */
285 SetPageUptodate(page);
290 } /* ll_dir_readpage */
292 int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
294 struct inode *inode = page->mapping->host;
295 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
300 if (Page_Uptodate(page)) {
305 if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
310 rc = ll_brw(READ, inode, page, 0);
312 SetPageUptodate(page);
316 set_page_dirty(page);
317 //SetPageDirty(page);
323 /* select between SYNC and ASYNC I/O methods */
324 int ll_do_writepage(struct page *page, int sync)
326 struct inode *inode = page->mapping->host;
330 /* PDEBUG(page, "WRITEPAGE"); */
331 /* XXX everything is synchronous now */
332 err = ll_brw(WRITE, inode, page, 1);
335 SetPageUptodate(page);
336 set_page_clean(page);
338 /* PDEBUG(page,"WRITEPAGE"); */
341 } /* ll_do_writepage */
345 /* returns the page unlocked, but with a reference */
346 int ll_writepage(struct page *page)
349 struct inode *inode = page->mapping->host;
351 printk("---> writepage called ino %ld!\n", inode->i_ino);
353 rc = ll_do_writepage(page, 1);
355 set_page_clean(page);
357 CDEBUG(D_INODE, "--> GRR %d\n", rc);
363 void write_inode_pages(struct inode *inode)
365 struct list_head *tmp = &inode->i_mapping->dirty_pages;
367 while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
369 page = list_entry(tmp, struct page, list);
375 int ll_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
377 struct inode *inode = page->mapping->host;
379 loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
381 CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
382 inode->i_ino, len, from, to, page->index);
384 rc = ll_commit_page(page, 1, from, to);
386 if (len > inode->i_size) {
387 ll_set_size(inode, len);
397 * This does the "real" work of the write. The generic routine has
398 * allocated the page, locked it, done all the page alignment stuff
399 * calculations etc. Now we should just copy the data from user
400 * space and write it back to the real medium..
402 * If the writer ends up delaying the write, the writer needs to
403 * increment the page use counts until he is done with the page.
405 * Return value is the number of bytes written.
407 int ll_write_one_page(struct file *file, struct page *page,
408 unsigned long offset, unsigned long bytes,
411 struct inode *inode = file->f_dentry->d_inode;
415 /* We check for complete page writes here, as we then don't have to
416 * get the page before writing over everything anyways.
418 if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
419 err = ll_brw(READ, inode, page, 0);
422 SetPageUptodate(page);
425 if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
429 err = ll_writepage(page);
432 return (err < 0 ? err : bytes);
433 } /* ll_write_one_page */
436 * return an up to date page:
437 * - if locked is true then is returned locked
438 * - if create is true the corresponding disk blocks are created
439 * - page is held, i.e. caller must release the page
441 * modeled on NFS code.
443 struct page *ll_getpage(struct inode *inode, unsigned long offset,
444 int create, int locked)
452 offset = offset & PAGE_CACHE_MASK;
453 CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
454 inode->i_ino, offset, create, locked);
455 index = offset >> PAGE_CACHE_SHIFT;
457 page = grab_cache_page(&inode->i_data, index);
461 printk(KERN_WARNING " grab_cache_page says no dice ...\n");
466 /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
467 /* now check if the data in the page is up to date */
468 if ( Page_Uptodate(page)) {
470 if (PageLocked(page))
471 obd_unlock_page(page);
473 printk("file %s, line %d: expecting locked page\n",
480 err = ll_brw(READ, inode, page, create);
484 obd_unlock_page(page);
490 obd_unlock_page(page);
491 SetPageUptodate(page);
492 /* PDEBUG(page,"GETPAGE - after reading"); */
498 void ll_truncate(struct inode *inode)
504 //ll_dequeue_pages(inode);
508 /* XXX This would give an inconsistent FS, so deal with it as
509 * best we can for now - an obdo on the stack is not pretty.
513 printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
515 obdo.o_valid = OBD_MD_FLNOTOBD;
516 ll_oa_from_inode(&obdo, inode);
518 err = obd_punch(IID(inode), &obdo, 0, obdo.o_size);
520 oa->o_valid = OBD_MD_FLNOTOBD;
521 ll_oa_from_inode(oa, inode);
523 CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
524 (long)oa->o_id, oa->o_size);
525 err = obd_punch(IID(inode), oa, oa->o_size, 0);
531 printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
538 struct address_space_operations ll_aops = {
539 readpage: ll_readpage,
540 writepage: ll_writepage,
541 sync_page: block_sync_page,
542 prepare_write: ll_prepare_write,
543 commit_write: ll_commit_write,
548 struct address_space_operations ll_dir_aops = {
549 readpage: ll_dir_readpage