From 4b0ad457546112e14468e774a8a27a2fb39d2c04 Mon Sep 17 00:00:00 2001 From: adilger Date: Mon, 6 Mar 2000 04:17:22 +0000 Subject: [PATCH] obdfs/flushd.c: pupdated stopped on module unload. check_time now indicates minimum age of buffer to flush, so we can call obdfs_enqueue_pages() multiple times with decreasing ages in tight memory situations to flush some pages, but not all obdfs/super.c: fix small memory leaks obdfs/*.c: partial update to 2.3.42 code (not including 32-bit UID/GID) --- lustre/obdfs/dir.c | 2 +- lustre/obdfs/file.c | 13 ++-- lustre/obdfs/flushd.c | 147 +++++++++++++++++++++++-------------- lustre/obdfs/namei.c | 195 +++++++++++++++++++++++++++---------------------- lustre/obdfs/rw.c | 94 ++++++++++++------------ lustre/obdfs/super.c | 39 ++++++---- lustre/obdfs/symlink.c | 125 ++++++++++++------------------- 7 files changed, 327 insertions(+), 288 deletions(-) diff --git a/lustre/obdfs/dir.c b/lustre/obdfs/dir.c index 1aa3d66..698b68c 100644 --- a/lustre/obdfs/dir.c +++ b/lustre/obdfs/dir.c @@ -144,7 +144,7 @@ static int obdfs_readdir(struct file * filp, void * dirent, filldir_t filldir) OIDEBUG(inode); while (!error && !stored && filp->f_pos < inode->i_size) { page = obdfs_getpage(inode, filp->f_pos, 0, LOCKED); - PDEBUG(page, "readdir"); + /* PDEBUG(page, "readdir"); */ if (!page) { ext2_error (sb, "ext2_readdir", "directory #%lu contains a hole at offset %lu", diff --git a/lustre/obdfs/file.c b/lustre/obdfs/file.c index 6492951..87d6b0e 100644 --- a/lustre/obdfs/file.c +++ b/lustre/obdfs/file.c @@ -57,7 +57,8 @@ static ssize_t obdfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { ssize_t retval; - CDEBUG(D_INFO, "Writing inode %ld, %d bytes, offset %ld\n", file->f_dentry->d_inode->i_ino, count, (long)*ppos); + CDEBUG(D_INFO, "Writing inode %ld, %d bytes, offset %ld\n", + file->f_dentry->d_inode->i_ino, count, (long)*ppos); retval = generic_file_write(file, buf, count, ppos, obdfs_write_one_page); @@ -77,13 +78,13 @@ struct file_operations obdfs_file_operations = { generic_file_read, /* read */ obdfs_file_write, /* write */ NULL, /* readdir - bad */ - NULL, /* poll - default */ + NULL, /* poll */ NULL, /* ioctl */ generic_file_mmap, - NULL, /* no special open code */ + NULL, /* open */ NULL, /* flush */ - NULL, /* no special release code */ - NULL, /* fsync */ + NULL, /* release */ + NULL /* XXX add XXX */, /* fsync */ NULL, /* fasync */ NULL /* lock */ }; @@ -104,7 +105,7 @@ struct inode_operations obdfs_file_inode_operations = { NULL, /* get_block */ obdfs_readpage, /* readpage */ obdfs_writepage, /* writepage */ - NULL, /* truncate */ + NULL /* XXX add XXX */, /* truncate */ NULL, /* permission */ NULL /* revalidate */ }; diff --git a/lustre/obdfs/flushd.c b/lustre/obdfs/flushd.c index 0bcf10c..5d8367e 100644 --- a/lustre/obdfs/flushd.c +++ b/lustre/obdfs/flushd.c @@ -47,13 +47,13 @@ struct { int interval; /* jiffies delay between pupdate flushes */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ -} pupd_prm = {40, 500, 64, 256, 3*HZ, 30*HZ, 5*HZ }; +} pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; /* Called with the superblock list lock */ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, int nr_slots, struct page **pages, char **bufs, obd_size *counts, obd_off *offsets, - obd_flag *flag, int check_time) + obd_flag *flag, unsigned long check_time) { struct list_head *page_list = obdfs_iplist(inode); struct list_head *tmp; @@ -71,8 +71,7 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, page = req->rq_page; - if (check_time && - (jiffies - req->rq_jiffies) < pupd_prm.age_buffer) + if (req->rq_jiffies > check_time) break; /* pages are in chronological order */ /* Only allocate the obdo if we will actually do I/O here */ @@ -112,7 +111,7 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, } if (!list_empty(page_list)) - CDEBUG(D_CACHE, "inode %ld list not empty\n", inode->i_ino); + CDEBUG(D_INFO, "inode %ld list not empty\n", inode->i_ino); CDEBUG(D_INFO, "added %d page(s) to vector\n", num); EXIT; @@ -120,7 +119,7 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, } /* obdfs_enqueue_pages */ /* Remove writeback requests for the superblock */ -int obdfs_flush_reqs(struct list_head *inode_list, int check_time) +int obdfs_flush_reqs(struct list_head *inode_list, unsigned long check_time) { struct list_head *tmp; int total_io = 0; @@ -148,7 +147,8 @@ int obdfs_flush_reqs(struct list_head *inode_list, int check_time) obd_down(&sbi->osi_list_mutex); if ( list_empty(inode_list) ) { - CDEBUG(D_CACHE, "list empty\n"); + CDEBUG(D_CACHE, "list empty: memory %ld, inodes %d, pages %d\n", + obd_memory, obd_inodes, obd_pages); obd_up(&sbi->osi_list_mutex); EXIT; return 0; @@ -183,7 +183,7 @@ int obdfs_flush_reqs(struct list_head *inode_list, int check_time) &offsets[num_io], &flags[num_obdos], check_time); - CDEBUG(D_CACHE, "FLUSH inode %ld, pages flushed: %d\n", + CDEBUG(D_INFO, "FLUSH inode %ld, pages flushed: %d\n", inode->i_ino, res); if ( res < 0 ) { CDEBUG(D_INODE, @@ -251,7 +251,7 @@ BREAK: inode = list_entry(ii, struct inode, u); CDEBUG(D_INFO, "checking inode %ld empty\n", inode->i_ino); if (list_empty(obdfs_iplist(inode))) { - CDEBUG(D_CACHE, "remove inode %ld from dirty list\n", + CDEBUG(D_INFO, "remove inode %ld from dirty list\n", inode->i_ino); tmp = tmp->next; list_del(obdfs_islist(inode)); @@ -265,24 +265,33 @@ BREAK: CDEBUG(D_INFO, "flushed %d pages in total\n", total_io); EXIT; ERR: - return err; + return err ? err : total_io; } /* obdfs_flush_reqs */ -void obdfs_flush_dirty_pages(int check_time) +/* Walk all of the superblocks and write out blocks which are too old. + * Return the maximum number of blocks written for a single filesystem. + */ +int obdfs_flush_dirty_pages(unsigned long check_time) { struct list_head *sl; + int max = 0; ENTRY; sl = &obdfs_super_list; while ( (sl = sl->prev) != &obdfs_super_list ) { struct obdfs_sb_info *sbi = list_entry(sl, struct obdfs_sb_info, osi_list); + int ret; /* walk write requests here, use the sb, check the time */ - obdfs_flush_reqs(&sbi->osi_inodes, check_time); + ret = obdfs_flush_reqs(&sbi->osi_inodes, check_time); + /* XXX handle error? What to do with it? */ + + max = ret > max ? ret : max; } EXIT; + return max; } /* obdfs_flush_dirty_pages */ @@ -290,65 +299,83 @@ static struct task_struct *pupdated; static int pupdate(void *unused) { - struct task_struct * tsk = current; - int interval; + int interval = pupd_prm.interval; + long age = pupd_prm.age_buffer; + int wrote = 0; - pupdated = current; - exit_files(current); exit_mm(current); - tsk->session = 1; - tsk->pgrp = 1; - sprintf(tsk->comm, "pupdated"); pupdated = current; + pupdated->session = 1; + pupdated->pgrp = 1; + strcpy(pupdated->comm, "pupdated"); - MOD_INC_USE_COUNT; /* XXX until send_sig works */ printk("pupdated activated...\n"); - /* sigstop and sigcont will stop and wakeup pupdate */ - spin_lock_irq(&tsk->sigmask_lock); - sigfillset(&tsk->blocked); - siginitsetinv(&tsk->blocked, sigmask(SIGTERM)); - recalc_sigpending(tsk); - spin_unlock_irq(&tsk->sigmask_lock); + spin_lock_irq(&pupdated->sigmask_lock); + sigfillset(&pupdated->blocked); + siginitsetinv(&pupdated->blocked, sigmask(SIGTERM)); + recalc_sigpending(pupdated); + spin_unlock_irq(&pupdated->sigmask_lock); for (;;) { + long dirty_limit; + /* update interval */ - interval = pupd_prm.interval; - if (interval) - { - tsk->state = TASK_INTERRUPTIBLE; + if (interval) { + set_task_state(pupdated, TASK_INTERRUPTIBLE); schedule_timeout(interval); } - else - { - stop_pupdate: - tsk->state = TASK_STOPPED; - MOD_DEC_USE_COUNT; /* XXX until send_sig works */ - printk("pupdated stopped...\n"); - return 0; - } - /* check for sigstop */ - if (signal_pending(tsk)) + if (signal_pending(pupdated)) { int stopped = 0; - spin_lock_irq(&tsk->sigmask_lock); - if (sigismember(&tsk->signal, SIGTERM)) + spin_lock_irq(&pupdated->sigmask_lock); + if (sigismember(&pupdated->signal, SIGTERM)) { - sigdelset(&tsk->signal, SIGTERM); + sigdelset(&pupdated->signal, SIGTERM); stopped = 1; } - recalc_sigpending(tsk); - spin_unlock_irq(&tsk->sigmask_lock); - if (stopped) - goto stop_pupdate; + recalc_sigpending(pupdated); + spin_unlock_irq(&pupdated->sigmask_lock); + if (stopped) { + printk("pupdated stopped...\n"); + set_task_state(pupdated, TASK_STOPPED); + pupdated = NULL; + return 0; + } } /* asynchronous setattr etc for the future ... - flush_inodes(); + obdfs_flush_dirty_inodes(jiffies - pupd_prm.age_super); */ - obdfs_flush_dirty_pages(1); + dirty_limit = nr_free_buffer_pages() * pupd_prm.nfract / 100; + CDEBUG(D_CACHE, "dirty_limit %ld, cache_count %ld\n", + dirty_limit, obdfs_cache_count); + + if (obdfs_cache_count > dirty_limit) { + interval = 0; + if ( wrote < pupd_prm.ndirty ) + age >>= 1; + } else { + int isave = interval; + int asave = age; + + if ( wrote < pupd_prm.ndirty >> 1 ) + interval = pupd_prm.interval; + else + interval = isave >> 1; + + if (obdfs_cache_count > dirty_limit / 3) { + age = asave >> 1; + interval = isave >> 1; + } else + age = pupd_prm.age_buffer; + } + + CDEBUG(D_CACHE, "age %ld, interval %d\n", age, interval); + wrote = obdfs_flush_dirty_pages(jiffies - age); } + } @@ -365,12 +392,24 @@ int obdfs_flushd_init(void) int obdfs_flushd_cleanup(void) { ENTRY; - /* deliver a signal to pupdated to shut it down - XXX need to kill it from user space for now XXX - if (pupdated) { - send_sig_info(SIGTERM, 1, pupdated); + + if (pupdated) /* for debugging purposes only */ + CDEBUG(D_CACHE, "pupdated->state = %lx\n", pupdated->state); + + /* deliver a signal to pupdated to shut it down */ + if (pupdated && (pupdated->state == TASK_RUNNING || + pupdated->state == TASK_INTERRUPTIBLE )) { + unsigned long timeout = HZ/20; + unsigned long count = 0; + send_sig_info(SIGTERM, (struct siginfo *)1, pupdated); + while (pupdated) { + if ((count % 2*HZ) == timeout) + printk(KERN_INFO "wait for pupdated to stop\n"); + count += timeout; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(timeout); + } } - */ EXIT; /* not reached */ diff --git a/lustre/obdfs/namei.c b/lustre/obdfs/namei.c index 6169217..fcf50cb 100644 --- a/lustre/obdfs/namei.c +++ b/lustre/obdfs/namei.c @@ -22,22 +22,13 @@ * */ -#include - -#include #include -#include -#include -#include -#include -#include #include #include -#include - #include #include + /* * define how far ahead to read directories while searching them. */ @@ -67,7 +58,7 @@ static inline int ext2_match (int len, const char * const name, * * finds an entry in the specified directory with the wanted name. It * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_dir). It does NOT read the inode of the + * itself (as a parameter - res_dir). It does NOT read the inode of the * entry - you'll have to do that yourself if you want to. */ static struct page * obdfs_find_entry (struct inode * dir, @@ -78,6 +69,7 @@ static struct page * obdfs_find_entry (struct inode * dir, struct super_block * sb; unsigned long offset; struct page * page; + ENTRY; CDEBUG(D_INFO, "find entry for %*s\n", namelen, name); @@ -154,8 +146,8 @@ failure: struct dentry *obdfs_lookup(struct inode *dir, struct dentry *dentry) { - struct inode *inode; - struct ext2_dir_entry_2 *de; + struct inode * inode; + struct ext2_dir_entry_2 * de; struct page *page; ENTRY; @@ -188,7 +180,6 @@ struct dentry *obdfs_lookup(struct inode *dir, struct dentry *dentry) return NULL; } /* obdfs_lookup */ - /* * obdfs_add_entry() * @@ -249,7 +240,7 @@ static struct page *obdfs_add_entry (struct inode * dir, } rec_len = EXT2_DIR_REC_LEN(namelen); /* CDEBUG(D_INFO, "reclen: %d\n", rec_len); */ - PDEBUG(page, "starting search"); + /* PDEBUG(page, "starting search"); */ offset = 0; de = (struct ext2_dir_entry_2 *) page_address(page); *err = -ENOSPC; @@ -278,7 +269,7 @@ static struct page *obdfs_add_entry (struct inode * dir, de = (struct ext2_dir_entry_2 *) page_address(page); de->inode = 0; - de->rec_len = le16_to_cpu(PAGE_SIZE); + de->rec_len = cpu_to_le16(PAGE_SIZE); dir->i_size = offset + PAGE_SIZE; dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); @@ -306,8 +297,8 @@ static struct page *obdfs_add_entry (struct inode * dir, return NULL; } /* CDEBUG(D_INFO, "Testing for enough space at de %p\n", de);*/ - if ( (le32_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) || - (le16_to_cpu(de->rec_len) >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) { + if ((le32_to_cpu(de->inode) == 0 && le16_to_cpu(de->rec_len) >= rec_len) || + (le16_to_cpu(de->rec_len) >= EXT2_DIR_REC_LEN(de->name_len) + rec_len)) { offset += le16_to_cpu(de->rec_len); /* CDEBUG(D_INFO, "Found enough space de %p, offset %#lx\n", @@ -348,7 +339,7 @@ static struct page *obdfs_add_entry (struct inode * dir, dir->i_version = ++event; *res_dir = de; *err = 0; - PDEBUG(page, "add_entry"); + /* PDEBUG(page, "add_entry"); */ /* XXX unlock page here */ EXIT; return page; @@ -360,7 +351,7 @@ static struct page *obdfs_add_entry (struct inode * dir, UnlockPage(page); page_cache_release(page); - PDEBUG(page, "add_entry"); + /* PDEBUG(page, "add_entry"); */ EXIT; return NULL; } /* obdfs_add_entry */ @@ -402,20 +393,23 @@ static int obdfs_delete_entry (struct ext2_dir_entry_2 * dir, static inline void ext2_set_de_type(struct super_block *sb, struct ext2_dir_entry_2 *de, umode_t mode) { + /* XXX fix this to check for obdfs feature, not ext2 feature */ if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) return; - if (S_ISCHR(mode)) - de->file_type = EXT2_FT_CHRDEV; - else if (S_ISBLK(mode)) - de->file_type = EXT2_FT_BLKDEV; - else if (S_ISFIFO(mode)) - de->file_type = EXT2_FT_FIFO; - else if (S_ISLNK(mode)) - de->file_type = EXT2_FT_SYMLINK; - else if (S_ISREG(mode)) + if (S_ISREG(mode)) de->file_type = EXT2_FT_REG_FILE; else if (S_ISDIR(mode)) de->file_type = EXT2_FT_DIR; + else if (S_ISLNK(mode)) + de->file_type = EXT2_FT_SYMLINK; + else if (S_ISSOCK(mode)) + de->file_type = EXT2_FT_SOCK; + else if (S_ISFIFO(mode)) + de->file_type = EXT2_FT_FIFO; + else if (S_ISCHR(mode)) + de->file_type = EXT2_FT_CHRDEV; + else if (S_ISBLK(mode)) + de->file_type = EXT2_FT_BLKDEV; } @@ -476,6 +470,7 @@ static struct inode *obdfs_new_inode(struct inode *dir, int mode) err = IOPS(dir, create)(IID(dir), oa); if ( err ) { + CDEBUG(D_INODE, "fatal: creating new inode (err %d)\n", err); obdo_free(oa); EXIT; return ERR_PTR(err); @@ -484,6 +479,7 @@ static struct inode *obdfs_new_inode(struct inode *dir, int mode) inode = iget(dir->i_sb, (ino_t)oa->o_id); if (!inode) { + CDEBUG(D_INODE, "fatal: get new inode %ld\n", (long)oa->o_id); IOPS(dir, destroy)(IID(dir), oa); obdo_free(oa); EXIT; @@ -620,37 +616,39 @@ int obdfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_nlink--; /* is this nlink == 0? */ mark_inode_dirty(inode); iput (inode); - return err; + return -EIO; } + de = (struct ext2_dir_entry_2 *) page_address(inode_page); /* create . and .. */ - de = (struct ext2_dir_entry_2 *) page_address(inode_page); de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(de->name_len)); strcpy (de->name, "."); ext2_set_de_type(dir->i_sb, de, S_IFDIR); - de = (struct ext2_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); de->inode = cpu_to_le32(dir->i_ino); de->rec_len = cpu_to_le16(PAGE_SIZE - EXT2_DIR_REC_LEN(1)); de->name_len = 2; strcpy (de->name, ".."); ext2_set_de_type(dir->i_sb, de, S_IFDIR); + inode->i_nlink = 2; - /* XXX handle err */ err = obdfs_do_writepage(inode, inode_page, IS_SYNC(inode)); inode->i_blocks = PAGE_SIZE/inode->i_sb->s_blocksize; inode->i_size = PAGE_SIZE; UnlockPage(inode_page); page_cache_release(inode_page); - - inode->i_nlink = 2; mark_inode_dirty(inode); + if (err) { + EXIT; + goto out_no_entry; + } /* now deal with the parent */ page = obdfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, &de, &err); if (!page) { + EXIT; goto out_no_entry; } @@ -667,15 +665,14 @@ int obdfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) page_cache_release(page); d_instantiate(dentry, inode); -out: EXIT; +out: return err; out_no_entry: inode->i_nlink = 0; mark_inode_dirty(inode); iput (inode); - EXIT; goto out; } /* obdfs_mkdir */ @@ -753,7 +750,6 @@ int obdfs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode; struct page *page; struct ext2_dir_entry_2 * de; - int err; ENTRY; @@ -777,7 +773,7 @@ int obdfs_rmdir (struct inode * dir, struct dentry *dentry) dir->i_version = ++event; if (retval) goto end_rmdir; - err = obdfs_do_writepage(dir, page, IS_SYNC(dir)); + retval = obdfs_do_writepage(dir, page, IS_SYNC(dir)); /* XXX handle err? */ UnlockPage(page); @@ -808,7 +804,6 @@ int obdfs_unlink(struct inode * dir, struct dentry *dentry) struct inode * inode; struct page *page; struct ext2_dir_entry_2 * de; - int err; ENTRY; @@ -834,7 +829,7 @@ int obdfs_unlink(struct inode * dir, struct dentry *dentry) if (retval) goto end_unlink; dir->i_version = ++event; - err = obdfs_do_writepage(dir, page, IS_SYNC(dir)); + retval = obdfs_do_writepage(dir, page, IS_SYNC(dir)); /* XXX handle err? */ UnlockPage(page); @@ -844,7 +839,6 @@ int obdfs_unlink(struct inode * dir, struct dentry *dentry) inode->i_nlink--; mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime; - retval = 0; d_delete(dentry); /* This also frees the inode */ end_unlink: @@ -857,57 +851,59 @@ end_unlink: int obdfs_symlink (struct inode * dir, struct dentry *dentry, const char * symname) { - struct ext2_dir_entry_2 * de; struct inode * inode; + struct ext2_dir_entry_2 * de; struct obdfs_inode_info *oinfo; struct page* page = NULL, * name_page = NULL; char * link; - int i, l, err = -EIO; - char c; + int l, err; ENTRY; - inode = obdfs_new_inode(dir, S_IFLNK | S_IRWXUGO); - if ( IS_ERR(inode) ) { + err = -ENAMETOOLONG; + l = strlen(symname)+1; + if (l > PAGE_SIZE) { EXIT; - return PTR_ERR(inode); + goto out; } - inode->i_op = &obdfs_symlink_inode_operations; - for (l = 0; l < inode->i_sb->s_blocksize - 1 && symname [l]; l++) - ; + inode = obdfs_new_inode(dir, S_IFLNK); + if ( IS_ERR(inode) ) { + EXIT; + goto out; + } + inode->i_mode = S_IFLNK | S_IRWXUGO; oinfo = obdfs_i2info(inode); if (l >= sizeof(oinfo->oi_inline)) { CDEBUG(D_INFO, "l=%d, normal symlink\n", l); + inode->i_op = &obdfs_symlink_inode_operations; name_page = obdfs_getpage(inode, 0, 1, LOCKED); if (!name_page) { - inode->i_nlink--; - mark_inode_dirty(inode); - iput (inode); EXIT; - return err; + err = -ENOMEM; + goto out_no_entry; } link = (char *)page_address(name_page); } else { + CDEBUG(D_INFO, "l=%d, fast symlink\n", l); + inode->i_op = &obdfs_fast_symlink_inode_operations; link = oinfo->oi_inline; oinfo->oi_flags |= OBD_FL_INLINEDATA; - - CDEBUG(D_INFO, "l=%d, fast symlink\n", l); } - i = 0; - while (i < inode->i_sb->s_blocksize - 1 && (c = *(symname++))) - link[i++] = c; - link[i] = 0; + memcpy(link, symname, l); if (name_page) { err = obdfs_do_writepage(inode, name_page, IS_SYNC(inode)); - /* XXX handle err */ - PDEBUG(name_page, "symlink"); + /* PDEBUG(name_page, "symlink"); */ UnlockPage(name_page); page_cache_release(name_page); + if (err) { + EXIT; + goto out_no_entry; + } } - inode->i_size = i; + inode->i_size = l-1; mark_inode_dirty(inode); page = obdfs_add_entry (dir, dentry->d_name.name, dentry->d_name.len, @@ -982,7 +978,7 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * old_inode, * new_inode; struct page * old_page, * new_page, * dir_page; struct ext2_dir_entry_2 * old_de, * new_de; - int err; + int retval; ENTRY; @@ -990,7 +986,7 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, /* does the old entry exist? - if not get out */ old_page = obdfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_de, NOLOCK); - PDEBUG(old_page, "rename - old page"); + /* PDEBUG(old_page, "rename - old page"); */ /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -998,15 +994,17 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, * same name. Goodbye sticky bit ;-< */ old_inode = old_dentry->d_inode; - err = -ENOENT; - if (!old_page || le32_to_cpu(old_de->inode) != old_inode->i_ino) + retval = -ENOENT; + if (!old_page || le32_to_cpu(old_de->inode) != old_inode->i_ino) { + EXIT; goto end_rename; + } /* find new inode */ new_inode = new_dentry->d_inode; new_page = obdfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, &new_de, NOLOCK); - PDEBUG(new_page, "rename - new page "); + /* PDEBUG(new_page, "rename - new page "); */ if (new_page) { if (!new_inode) { page_cache_release(new_page); @@ -1019,31 +1017,42 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, if (S_ISDIR(old_inode->i_mode)) { /* can only rename into empty new directory */ if (new_inode) { - err = -ENOTEMPTY; - if (!empty_dir (new_inode)) + retval = -ENOTEMPTY; + if (!empty_dir (new_inode)) { + EXIT; goto end_rename; + } } - err = -EIO; - dir_page= obdfs_getpage (old_inode, 0, 0, LOCKED); - PDEBUG(dir_page, "rename dir page"); + retval = -EIO; + dir_page = obdfs_getpage (old_inode, 0, 0, LOCKED); + /* PDEBUG(dir_page, "rename dir page"); */ - if (!dir_page) + if (!dir_page) { + EXIT; goto end_rename; - if (le32_to_cpu(PARENT_INO(page_address(dir_page))) != old_dir->i_ino) + } + if (le32_to_cpu(PARENT_INO(page_address(dir_page))) != + old_dir->i_ino) { + EXIT; goto end_rename; - err = -EMLINK; + } + retval = -EMLINK; if (!new_inode && new_dir!=old_dir && - new_dir->i_nlink >= EXT2_LINK_MAX) + new_dir->i_nlink >= EXT2_LINK_MAX) { + EXIT; goto end_rename; + } } /* create the target dir entry */ if (!new_page) { new_page = obdfs_add_entry (new_dir, new_dentry->d_name.name, - new_dentry->d_name.len, &new_de, - &err); - PDEBUG(new_page, "rename new page"); - if (!new_page) + new_dentry->d_name.len, &new_de, + &retval); + /* PDEBUG(new_page, "rename new page"); */ + if (!new_page) { + EXIT; goto end_rename; + } } new_dir->i_version = ++event; @@ -1068,9 +1077,13 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, mark_inode_dirty(old_dir); if (dir_page) { PARENT_INO(page_address(dir_page)) =le32_to_cpu(new_dir->i_ino); - err = obdfs_do_writepage(old_inode, dir_page, - IS_SYNC(old_inode)); - /* XXX handle err */ + retval = obdfs_do_writepage(old_inode, dir_page, + IS_SYNC(old_inode)); + /* XXX handle err - not sure if this is correct */ + if (retval) { + EXIT; + goto end_rename; + } old_dir->i_nlink--; mark_inode_dirty(old_dir); if (new_inode) { @@ -1090,11 +1103,15 @@ int obdfs_rename (struct inode * old_dir, struct dentry *old_dentry, old_page = obdfs_getpage(old_dir, index << PAGE_SHIFT, 0, LOCKED); CDEBUG(D_INFO, "old_page at %p\n", old_page); - err = obdfs_do_writepage(old_dir, old_page, IS_SYNC(old_dir)); - /* XXX handle err */ + retval = obdfs_do_writepage(old_dir, old_page,IS_SYNC(old_dir)); + /* XXX handle err - not sure if this is correct */ + if (retval) { + EXIT; + goto end_rename; + } } - err = obdfs_do_writepage(new_dir, new_page, IS_SYNC(new_dir)); + retval = obdfs_do_writepage(new_dir, new_page, IS_SYNC(new_dir)); end_rename: if (old_page && PageLocked(old_page) ) @@ -1110,5 +1127,5 @@ end_rename: if (dir_page) page_cache_release(dir_page); - return err; + return retval; } /* obdfs_rename */ diff --git a/lustre/obdfs/rw.c b/lustre/obdfs/rw.c index ba8c2f1..56adb90 100644 --- a/lustre/obdfs/rw.c +++ b/lustre/obdfs/rw.c @@ -73,22 +73,19 @@ int obdfs_readpage(struct dentry *dentry, struct page *page) int rc; ENTRY; - PDEBUG(page, "READ"); + /* PDEBUG(page, "READ"); */ rc = obdfs_brw(READ, inode, page, 0); if ( !rc ) { SetPageUptodate(page); UnlockPage(page); } - PDEBUG(page, "READ"); + /* PDEBUG(page, "READ"); */ EXIT; return rc; } /* obdfs_readpage */ static kmem_cache_t *obdfs_pgrq_cachep = NULL; -/* XXX should probably have one of these per superblock */ -static int obdfs_cache_count = 0; - int obdfs_init_pgrqcache(void) { ENTRY; @@ -115,8 +112,8 @@ int obdfs_init_pgrqcache(void) inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq) { - obdfs_cache_count--; - CDEBUG(D_INFO, "deleting page %p from list [count %d]\n", + --obdfs_cache_count; + CDEBUG(D_INFO, "deleting page %p from list [count %ld]\n", pgrq->rq_page, obdfs_cache_count); list_del(&pgrq->rq_plist); kmem_cache_free(obdfs_pgrq_cachep, pgrq); @@ -126,7 +123,7 @@ void obdfs_cleanup_pgrqcache(void) { ENTRY; if (obdfs_pgrq_cachep != NULL) { - CDEBUG(D_CACHE, "destroying obdfs_pgrqcache at %p, count %d\n", + CDEBUG(D_CACHE, "destroying obdfs_pgrqcache at %p, count %ld\n", obdfs_pgrq_cachep, obdfs_cache_count); if (kmem_cache_destroy(obdfs_pgrq_cachep)) printk(KERN_INFO __FUNCTION__ @@ -178,7 +175,7 @@ obdfs_find_in_page_list(struct inode *inode, struct page *page) /* called with the list lock held */ -static struct page* obdfs_find_page_index(struct inode *inode, +static struct page *obdfs_find_page_index(struct inode *inode, unsigned long index) { struct list_head *page_list = obdfs_iplist(inode); @@ -228,24 +225,40 @@ int obdfs_do_vec_wr(struct inode **inodes, obd_count num_io, ENTRY; CDEBUG(D_INFO, "writing %d page(s), %d obdo(s) in vector\n", num_io, num_obdos); + { /* DEBUGGING */ + int i; + printk("OBDOS: "); + for (i = 0; i < num_obdos; i++) + printk("%ld:0x%p ", (long)obdos[i]->o_id, obdos[i]); + + printk("\nPAGES: "); + for (i = 0; i < num_io; i++) + printk("0x%p ", pages[i]); + printk("\n"); + } + err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_obdos, obdos, oa_bufs, bufs, counts, offsets, flags); + CDEBUG(D_CACHE, "BRW done\n"); /* release the pages from the page cache */ while ( num_io > 0 ) { - num_io--; + --num_io; CDEBUG(D_INFO, "calling put_page for %p, index %ld\n", pages[num_io], pages[num_io]->index); + /* PDEBUG(pages[num_io], "do_vec_wr"); */ put_page(pages[num_io]); + /* PDEBUG(pages[num_io], "do_vec_wr"); */ } + CDEBUG(D_CACHE, "put_page done\n"); while ( num_obdos > 0) { - num_obdos--; - CDEBUG(D_INFO, "copy/free obdo %ld\n", - (long)obdos[num_obdos]->o_id); + --num_obdos; + CDEBUG(D_INFO, "free obdo %ld\n",(long)obdos[num_obdos]->o_id); obdfs_to_inode(inodes[num_obdos], obdos[num_obdos]); obdo_free(obdos[num_obdos]); } + CDEBUG(D_CACHE, "obdo_free done\n"); EXIT; return err; } @@ -266,9 +279,6 @@ static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) if ( !obdfs_find_in_page_list(inode, page) ) { struct obdfs_pgrq *pgrq; pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL); - CDEBUG(D_INFO, - "adding inode %ld page %p, pgrq: %p, cache count [%d]\n", - inode->i_ino, page, pgrq, obdfs_cache_count + 1); if (!pgrq) { EXIT; obd_up(&obdfs_i2sbi(inode)->osi_list_mutex); @@ -281,6 +291,9 @@ static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) get_page(pgrq->rq_page); list_add(&pgrq->rq_plist, obdfs_iplist(inode)); obdfs_cache_count++; + CDEBUG(D_INFO, + "added inode %ld page %p, pgrq: %p, cache count [%ld]\n", + inode->i_ino, page, pgrq, obdfs_cache_count); } /* If inode isn't already on the superblock inodes list, add it, @@ -288,7 +301,7 @@ static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) * * We increment the reference count on the inode to keep it from * being freed from memory. This _should_ be an iget() with an - * iput() in both flush_reqs() and put_inode(), but since ut_inode() + * iput() in both flush_reqs() and put_inode(), but since put_inode() * is called from iput() we can't call iput() again there. Instead * we just increment/decrement i_count, which is essentially what * iget/iput do for an inode already in memory. @@ -299,13 +312,12 @@ static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) inode->i_ino, obdfs_slist(inode)); list_add(obdfs_islist(inode), obdfs_slist(inode)); } + obd_up(&obdfs_i2sbi(inode)->osi_list_mutex); /* XXX For testing purposes, we write out the page here. * In the future, a flush daemon will write out the page. - res = obdfs_flush_reqs(obdfs_slist(inode), 0); - obdfs_flush_dirty_pages(1); + res = obdfs_flush_reqs(obdfs_slist(inode), ~0UL); */ - obd_up(&obdfs_i2sbi(inode)->osi_list_mutex); EXIT; return res; @@ -352,25 +364,27 @@ int obdfs_writepage(struct dentry *dentry, struct page *page) * Return value is the number of bytes written. */ int obdfs_write_one_page(struct file *file, struct page *page, - unsigned long offset, unsigned long bytes, - const char * buf) + unsigned long offset, unsigned long bytes, + const char * buf) { struct inode *inode = file->f_dentry->d_inode; int err; ENTRY; - if ( !Page_Uptodate(page) ) { - err = obdfs_brw(READ, inode, page, 1); - if ( !err ) - SetPageUptodate(page); - else + /* We check for complete page writes here, as we then don't have to + * get the page before writing over everything anyways. + */ + if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) { + err = obdfs_brw(READ, inode, page, 0); + if ( err ) return err; + SetPageUptodate(page); } if (copy_from_user((u8*)page_address(page) + offset, buf, bytes)) return -EFAULT; - lock_kernel(); + lock_kernel(); /* XXX do we really need to lock the kernel to write? */ err = obdfs_writepage(file->f_dentry, page); unlock_kernel(); @@ -388,10 +402,8 @@ int obdfs_write_one_page(struct file *file, struct page *page, struct page *obdfs_getpage(struct inode *inode, unsigned long offset, int create, int locked) { - struct page *page_cache; - int index; - struct page ** hash; struct page * page; + int index; int err; ENTRY; @@ -401,26 +413,16 @@ struct page *obdfs_getpage(struct inode *inode, unsigned long offset, inode->i_ino, offset, create, locked); index = offset >> PAGE_CACHE_SHIFT; - - page = NULL; - page_cache = page_cache_alloc(); - if ( ! page_cache ) { - EXIT; - return NULL; - } - CDEBUG(D_INFO, "page_cache %p\n", page_cache); - - hash = page_hash(&inode->i_data, index); page = grab_cache_page(&inode->i_data, index); /* Yuck, no page */ if (! page) { printk(KERN_WARNING " grab_cache_page says no dice ...\n"); EXIT; - return 0; + return NULL; } - PDEBUG(page, "GETPAGE: got page - before reading\n"); + /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */ /* now check if the data in the page is up to date */ if ( Page_Uptodate(page)) { if (!locked) @@ -430,10 +432,12 @@ struct page *obdfs_getpage(struct inode *inode, unsigned long offset, } - if ( obdfs_find_page_index(inode, index) ) { +#ifdef EXT2_OBD_DEBUG + if ((obd_debug_level & D_INFO) && obdfs_find_page_index(inode, index)) { CDEBUG(D_INFO, "OVERWRITE: found dirty page %p, index %ld\n", page, page->index); } +#endif err = obdfs_brw(READ, inode, page, create); @@ -447,7 +451,7 @@ struct page *obdfs_getpage(struct inode *inode, unsigned long offset, if ( !locked ) UnlockPage(page); SetPageUptodate(page); - PDEBUG(page,"GETPAGE - after reading"); + /* PDEBUG(page,"GETPAGE - after reading"); */ EXIT; return page; } /* obdfs_getpage */ diff --git a/lustre/obdfs/super.c b/lustre/obdfs/super.c index 18623de..972b27b 100644 --- a/lustre/obdfs/super.c +++ b/lustre/obdfs/super.c @@ -34,6 +34,8 @@ struct list_head obdfs_super_list; struct super_operations obdfs_super_operations; +long obdfs_cache_count = 0; +long obd_memory = 0; static char *obdfs_read_opt(const char *opt, char *data) { @@ -55,7 +57,7 @@ static char *obdfs_read_opt(const char *opt, char *data) } memcpy(retval, value, strlen(value)+1); - CDEBUG(D_INFO, "Assigned option: %s, value %s\n", opt, retval); + CDEBUG(D_PSDEV, "Assigned option: %s, value %s\n", opt, retval); return retval; } @@ -125,31 +127,27 @@ static struct super_block * obdfs_read_super(struct super_block *sb, obdfs_options(data, &device, &version); if ( !device ) { printk(__FUNCTION__ ": no device\n"); - MOD_DEC_USE_COUNT; EXIT; - return NULL; + goto ERR; } if ( (err = obdfs_getdev(device, &devno)) ) { printk("Cannot get devno of %s, error %d\n", device, err); - MOD_DEC_USE_COUNT; EXIT; - return NULL; + goto ERR;; } if ( MAJOR(devno) != OBD_PSDEV_MAJOR ) { printk(__FUNCTION__ ": wrong major number %d!\n", MAJOR(devno)); - MOD_DEC_USE_COUNT; EXIT; - return NULL; + goto ERR; } if ( MINOR(devno) >= MAX_OBD_DEVICES ) { printk(__FUNCTION__ ": minor of %s too high (%d)\n", device, MINOR(devno)); - MOD_DEC_USE_COUNT; EXIT; - return NULL; + goto ERR; } obddev = &obd_dev[MINOR(devno)]; @@ -158,9 +156,8 @@ static struct super_block * obdfs_read_super(struct super_block *sb, ! (obddev->obd_flags & OBD_SET_UP) ){ printk("device %s not attached or not set up (%d)\n", device, MINOR(devno)); - MOD_DEC_USE_COUNT; EXIT; - return NULL; + goto ERR;; } sbi->osi_obd = obddev; @@ -170,6 +167,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, err = sbi->osi_ops->o_connect(&sbi->osi_conn); if ( err ) { printk("OBDFS: cannot connect to %s\n", device); + EXIT; goto ERR; } @@ -184,6 +182,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, (void *)&blocksize); if ( err ) { printk("getinfo call to drive failed (blocksize)\n"); + EXIT; goto ERR; } @@ -192,6 +191,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, (void *)&blocksize_bits); if ( err ) { printk("getinfo call to drive failed (blocksize_bits)\n"); + EXIT; goto ERR; } @@ -199,6 +199,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, "root_ino", &scratch, (void *)&root_ino); if ( err ) { printk("getinfo call to drive failed (root_ino)\n"); + EXIT; goto ERR; } @@ -218,6 +219,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, sb->s_dev = 0; err = -ENOENT; unlock_super(sb); + EXIT; goto ERR; } @@ -228,12 +230,18 @@ static struct super_block * obdfs_read_super(struct super_block *sb, sb->s_root = d_alloc_root(root); list_add(&sbi->osi_list, &obdfs_super_list); unlock_super(sb); + OBD_FREE(device, strlen(device) + 1); + if (version) + OBD_FREE(version, strlen(version) + 1); EXIT; return sb; ERR: - EXIT; MOD_DEC_USE_COUNT; + if (device) + OBD_FREE(device, strlen(device) + 1); + if (version) + OBD_FREE(version, strlen(version) + 1); if (sbi) { sbi->osi_super = NULL; } @@ -253,7 +261,7 @@ static void obdfs_put_super(struct super_block *sb) sb->s_dev = 0; sbi = (struct obdfs_sb_info *) &sb->u.generic_sbp; - obdfs_flush_reqs(&sbi->osi_inodes, 0); + obdfs_flush_reqs(&sbi->osi_inodes, ~0UL); OPS(sb,disconnect)(ID(sb)); list_del(&sbi->osi_list); @@ -369,7 +377,7 @@ static void obdfs_put_inode(struct inode *inode) INIT_LIST_HEAD(obdfs_islist(inode)); tmp = obdfs_iplist(inode); - while ( (tmp = tmp->next) != obdfs_iplist(inode) ) { + while ( (tmp = tmp->prev) != obdfs_iplist(inode) ) { struct obdfs_pgrq *req; struct page *page; @@ -520,7 +528,8 @@ void cleanup_module(void) obdfs_sysctl_clean(); obdfs_cleanup_pgrqcache(); unregister_filesystem(&obdfs_fs_type); - + CDEBUG(D_MALLOC, "OBDFS mem used %ld, inodes %d, pages %d\n", + obd_memory, obd_inodes, obd_pages); EXIT; } diff --git a/lustre/obdfs/symlink.c b/lustre/obdfs/symlink.c index f1e0a5d..046d408 100644 --- a/lustre/obdfs/symlink.c +++ b/lustre/obdfs/symlink.c @@ -18,104 +18,73 @@ * Copyright (C) 1999 Seagate Technology Inc. (author: braam@stelias.com) */ -#include - -#include #include -#include +#include #include #include #include - #include /* for ENTRY and EXIT only */ #include -static struct dentry * obdfs_follow_link(struct dentry * dentry, - struct dentry *base, - unsigned int follow) +static int obdfs_fast_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + char *s = obdfs_i2info(dentry->d_inode)->oi_inline; + return vfs_readlink(dentry, buffer, buflen, s); +} + +static struct dentry *obdfs_fast_follow_link(struct dentry *dentry, struct dentry *base, unsigned flags) +{ + char *s = obdfs_i2info(dentry->d_inode)->oi_inline; + return vfs_follow_link(dentry, base, flags, s); +} + +struct inode_operations obdfs_fast_symlink_inode_operations = { + readlink: obdfs_fast_readlink, + follow_link: obdfs_fast_follow_link, +}; + +static int obdfs_readlink(struct dentry *dentry, char *buffer, int buflen) { - struct inode *inode = dentry->d_inode; struct page *page = NULL; - char * link; + int res; ENTRY; - link = obdfs_i2info(inode)->oi_inline; - if (!obdfs_has_inline(inode)) { - OIDEBUG(inode); - page = obdfs_getpage(inode, 0, 0, 0); - PDEBUG(page, "follow_link"); - if (!page) { - dput(base); - EXIT; - return ERR_PTR(-EIO); - } - link = (char *)page_address(page); - } - UPDATE_ATIME(inode); - base = lookup_dentry(link, base, follow); - if (page) { - page_cache_release(page); + OIDEBUG(dentry->d_inode); + page = obdfs_getpage(dentry->d_inode, 0, 0, 0); + /* PDEBUG(page, "readlink"); */ + if (!page) { + EXIT; + return 0; } + res = vfs_readlink(dentry, buffer, buflen, (char *)page_address(page)); + page_cache_release(page); EXIT; - return base; -} + return res; +} /* obdfs_readlink */ -static int obdfs_readlink (struct dentry * dentry, char * buffer, int buflen) +static struct dentry * obdfs_follow_link(struct dentry * dentry, + struct dentry *base, + unsigned int follow) { - struct inode *inode = dentry->d_inode; struct page *page = NULL; - char * link; - int i; + struct dentry *res; ENTRY; - if (buflen > inode->i_sb->s_blocksize - 1) - buflen = inode->i_sb->s_blocksize - 1; - - link = obdfs_i2info(inode)->oi_inline; - if (!obdfs_has_inline(inode)) { - OIDEBUG(inode); - page = obdfs_getpage(inode, 0, 0, 0); - PDEBUG(page, "readlink"); - if (!page) { - EXIT; - return 0; - } - link = (char *)page_address(page); - } - - i = 0; - while (i < buflen && link[i]) - i++; - if (copy_to_user(buffer, link, i)) - i = -EFAULT; - if (page) { - page_cache_release(page); + OIDEBUG(dentry->d_inode); + page = obdfs_getpage(dentry->d_inode, 0, 0, 0); + /* PDEBUG(page, "follow_link"); */ + if (!page) { + dput(base); + EXIT; + return ERR_PTR(-EIO); } + res = vfs_follow_link(dentry, base, follow, (char *)page_address(page)); + page_cache_release(page); EXIT; - return i; -} /* obdfs_readlink */ + return res; +} -/* - * symlinks can't do much... - */ struct inode_operations obdfs_symlink_inode_operations = { - NULL, /* no file-operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - obdfs_readlink, /* readlink */ - obdfs_follow_link, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL /* revalidate */ + readlink: obdfs_readlink, + follow_link: obdfs_follow_link, }; - -- 1.8.3.1