2 * Lustre Light I/O Page Cache
4 * Copyright (C) 2002, Cluster File Systems, Inc.
7 #include <linux/config.h>
8 #include <linux/kernel.h>
10 #include <linux/string.h>
11 #include <linux/stat.h>
12 #include <linux/errno.h>
13 #include <linux/locks.h>
14 #include <linux/unistd.h>
15 #include <linux/version.h>
16 #include <asm/system.h>
17 #include <asm/uaccess.h>
20 #include <linux/stat.h>
21 #include <asm/uaccess.h>
22 #include <linux/vmalloc.h>
23 #include <asm/segment.h>
25 #include <linux/pagemap.h>
26 #include <linux/smp_lock.h>
28 #include <linux/obd_support.h>
29 #include <linux/lustre_lib.h>
30 #include <linux/lustre_idl.h>
31 #include <linux/lustre_mds.h>
32 #include <linux/lustre_light.h>
34 int ll_inode_setattr(struct inode *inode, struct iattr *attr);
36 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10))
38 * Add a page to the dirty page list.
40 void __set_page_dirty(struct page *page)
42 struct address_space *mapping;
45 pg_lock = PAGECACHE_LOCK(page);
48 mapping = page->mapping;
49 spin_lock(&mapping->page_lock);
51 list_del(&page->list);
52 list_add(&page->list, &mapping->dirty_pages);
54 spin_unlock(&mapping->page_lock);
58 mark_inode_dirty_pages(mapping->host);
62 * Add a page to the dirty page list.
64 void set_page_dirty(struct page *page)
66 if (!test_and_set_bit(PG_dirty, &page->flags)) {
67 struct address_space *mapping = page->mapping;
70 spin_lock(&pagecache_lock);
71 list_del(&page->list);
72 list_add(&page->list, &mapping->dirty_pages);
73 spin_unlock(&pagecache_lock);
76 mark_inode_dirty_pages(mapping->host);
82 static void inline ll_oa_from_inode(struct obdo *oa, struct inode *inode)
84 struct ll_inode_info *oinfo = ll_i2info(inode);
86 if ( oa->o_valid & OBD_MD_FLID )
87 oa->o_id = oinfo->lli_objid;
88 if ( oa->o_valid & OBD_MD_FLATIME )
89 oa->o_atime = inode->i_atime;
90 if ( oa->o_valid & OBD_MD_FLMTIME )
91 oa->o_mtime = inode->i_mtime;
92 if ( oa->o_valid & OBD_MD_FLCTIME )
93 oa->o_ctime = inode->i_ctime;
94 if ( oa->o_valid & OBD_MD_FLSIZE )
95 oa->o_size = inode->i_size;
96 if ( oa->o_valid & OBD_MD_FLBLOCKS ) /* allocation of space */
97 oa->o_blocks = inode->i_blocks;
98 if ( oa->o_valid & OBD_MD_FLBLKSZ )
99 oa->o_blksize = inode->i_blksize;
100 if ( oa->o_valid & OBD_MD_FLMODE )
101 oa->o_mode = inode->i_mode;
102 if ( oa->o_valid & OBD_MD_FLUID )
103 oa->o_uid = inode->i_uid;
104 if ( oa->o_valid & OBD_MD_FLGID )
105 oa->o_gid = inode->i_gid;
106 if ( oa->o_valid & OBD_MD_FLFLAGS )
107 oa->o_flags = inode->i_flags;
108 if ( oa->o_valid & OBD_MD_FLNLINK )
109 oa->o_nlink = inode->i_nlink;
110 if ( oa->o_valid & OBD_MD_FLGENER )
111 oa->o_generation = inode->i_generation;
113 CDEBUG(D_INFO, "src inode %ld, dst obdo %ld valid 0x%08x\n",
114 inode->i_ino, (long)oa->o_id, oa->o_valid);
115 obdo_from_inode(oa, inode);
117 /* this will transfer metadata for the logical object to
118 the oa: that metadata could contain the constituent objects
120 if (ll_has_inline(inode)) {
121 CDEBUG(D_INODE, "copying inline data from inode to obdo\n");
122 memcpy(oa->o_inline, oinfo->lli_inline, OBD_INLINESZ);
123 oa->o_obdflags |= OBD_FL_INLINEDATA;
124 oa->o_valid |= OBD_MD_FLINLINE;
126 } /* ll_oa_from_inode */
131 * Remove page from dirty list
133 void __set_page_clean(struct page *page)
135 struct address_space *mapping = page->mapping;
141 list_del(&page->list);
142 list_add(&page->list, &mapping->clean_pages);
144 inode = mapping->host;
145 if (list_empty(&mapping->dirty_pages)) {
146 CDEBUG(D_INODE, "inode clean\n");
147 inode->i_state &= ~I_DIRTY_PAGES;
152 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
153 static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
155 obd_count num_obdo = 1;
156 obd_count bufs_per_obdo = 1;
158 obd_size count = PAGE_SIZE;
159 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
160 obd_flag flags = create ? OBD_BRW_CREATE : 0;
170 oa->o_valid = OBD_MD_FLNOTOBD;
171 ll_oa_from_inode(oa, inode);
173 err = obd_brw(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
174 &page, &count, &offset, &flags);
176 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
183 extern void set_page_clean(struct page *);
185 /* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
186 static int ll_commit_page(struct page *page, int create, int from, int to)
188 struct inode *inode = page->mapping->host;
189 obd_count num_obdo = 1;
190 obd_count bufs_per_obdo = 1;
193 obd_off offset = (((obd_off)page->index) << PAGE_SHIFT);
194 obd_flag flags = create ? OBD_BRW_CREATE : 0;
197 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
205 oa->o_valid = OBD_MD_FLNOTOBD;
206 ll_oa_from_inode(oa, inode);
208 CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
211 err = obd_brw(OBD_BRW_WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
212 &page, &count, &offset, &flags);
214 SetPageUptodate(page);
215 set_page_clean(page);
218 if (pos > inode->i_size) {
219 iattr.ia_valid = ATTR_SIZE;
220 iattr.ia_size = inode->i_size;
221 err = ll_inode_setattr(inode, &iattr);
223 printk("mds_inode_setattr failed; do something dramatic.\n");
231 // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
239 /* returns the page unlocked, but with a reference */
240 int ll_readpage(struct file *file, struct page *page)
242 struct inode *inode = page->mapping->host;
247 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
249 memset(kmap(page), 0, PAGE_CACHE_SIZE);
254 if (Page_Uptodate(page)) {
259 rc = ll_brw(READ, inode, page, 0);
264 /* PDEBUG(page, "READ"); */
267 SetPageUptodate(page);
268 obd_unlock_page(page);
275 /* returns the page unlocked, but with a reference */
276 int ll_dir_readpage(struct file *file, struct page *page)
278 struct inode *inode = page->mapping->host;
279 struct ll_sb_info *sbi =
280 (struct ll_sb_info *)(&inode->i_sb->u.generic_sbp);
284 struct ptlrep_hdr *hdr;
288 if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
290 memset(kmap(page), 0, PAGE_CACHE_SIZE);
295 if (Page_Uptodate(page)) {
300 offset = page->index << PAGE_SHIFT;
302 rc = mdc_readpage(sbi->ll_peer_ptr, inode->i_ino, S_IFDIR, offset, buf,
310 if ((rc = hdr->status)) {
315 /* PDEBUG(page, "READ"); */
317 SetPageUptodate(page);
319 obd_unlock_page(page);
322 } /* ll_dir_readpage */
324 int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
326 struct inode *inode = page->mapping->host;
327 obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
332 if (Page_Uptodate(page)) {
337 if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
342 rc = ll_brw(READ, inode, page, 0);
344 SetPageUptodate(page);
348 set_page_dirty(page);
349 //SetPageDirty(page);
355 /* select between SYNC and ASYNC I/O methods */
356 int ll_do_writepage(struct page *page, int sync)
358 struct inode *inode = page->mapping->host;
362 /* PDEBUG(page, "WRITEPAGE"); */
363 /* XXX everything is synchronous now */
364 err = ll_brw(OBD_BRW_WRITE, inode, page, 1);
367 SetPageUptodate(page);
368 set_page_clean(page);
370 /* PDEBUG(page,"WRITEPAGE"); */
373 } /* ll_do_writepage */
377 /* returns the page unlocked, but with a reference */
378 int ll_writepage(struct page *page)
381 struct inode *inode = page->mapping->host;
383 printk("---> writepage called ino %ld!\n", inode->i_ino);
384 rc = ll_do_writepage(page, 1);
386 set_page_clean(page);
388 CDEBUG(D_INODE, "--> GRR %d\n", rc);
394 void write_inode_pages(struct inode *inode)
396 struct list_head *tmp = &inode->i_mapping->dirty_pages;
398 while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
400 page = list_entry(tmp, struct page, list);
406 int ll_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
408 struct inode *inode = page->mapping->host;
410 loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
412 CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
413 inode->i_ino, len, from, to, page->index);
415 rc = ll_commit_page(page, 1, from, to);
417 if (len > inode->i_size) {
418 ll_set_size(inode, len);
428 * This does the "real" work of the write. The generic routine has
429 * allocated the page, locked it, done all the page alignment stuff
430 * calculations etc. Now we should just copy the data from user
431 * space and write it back to the real medium..
433 * If the writer ends up delaying the write, the writer needs to
434 * increment the page use counts until he is done with the page.
436 * Return value is the number of bytes written.
438 int ll_write_one_page(struct file *file, struct page *page,
439 unsigned long offset, unsigned long bytes,
442 struct inode *inode = file->f_dentry->d_inode;
446 /* We check for complete page writes here, as we then don't have to
447 * get the page before writing over everything anyways.
449 if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
450 err = ll_brw(READ, inode, page, 0);
453 SetPageUptodate(page);
456 if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
460 err = ll_writepage(page);
463 return (err < 0 ? err : bytes);
464 } /* ll_write_one_page */
467 * return an up to date page:
468 * - if locked is true then is returned locked
469 * - if create is true the corresponding disk blocks are created
470 * - page is held, i.e. caller must release the page
472 * modeled on NFS code.
474 struct page *ll_getpage(struct inode *inode, unsigned long offset,
475 int create, int locked)
483 offset = offset & PAGE_CACHE_MASK;
484 CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
485 inode->i_ino, offset, create, locked);
486 index = offset >> PAGE_CACHE_SHIFT;
488 page = grab_cache_page(&inode->i_data, index);
492 printk(KERN_WARNING " grab_cache_page says no dice ...\n");
497 /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
498 /* now check if the data in the page is up to date */
499 if ( Page_Uptodate(page)) {
501 if (PageLocked(page))
502 obd_unlock_page(page);
504 printk("file %s, line %d: expecting locked page\n",
511 err = ll_brw(READ, inode, page, create);
515 obd_unlock_page(page);
521 obd_unlock_page(page);
522 SetPageUptodate(page);
523 /* PDEBUG(page,"GETPAGE - after reading"); */
529 void ll_truncate(struct inode *inode)
535 //ll_dequeue_pages(inode);
539 /* XXX This would give an inconsistent FS, so deal with it as
540 * best we can for now - an obdo on the stack is not pretty.
544 printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
546 obdo.o_valid = OBD_MD_FLNOTOBD;
547 ll_oa_from_inode(&obdo, inode);
549 err = obd_punch(IID(inode), &obdo, 0, obdo.o_size);
551 oa->o_valid = OBD_MD_FLNOTOBD;
552 ll_oa_from_inode(oa, inode);
554 CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
555 (long)oa->o_id, oa->o_size);
556 err = obd_punch(IID(inode), oa, oa->o_size, 0);
562 printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
569 struct address_space_operations ll_aops = {
570 readpage: ll_readpage,
571 writepage: ll_writepage,
572 sync_page: block_sync_page,
573 prepare_write: ll_prepare_write,
574 commit_write: ll_commit_write,
579 struct address_space_operations ll_dir_aops = {
580 readpage: ll_dir_readpage