From eab0d0db6c22f83dc9488d97cea499218d01700c Mon Sep 17 00:00:00 2001 From: shadow Date: Mon, 26 Mar 2007 13:33:43 +0000 Subject: [PATCH] Add support for usage PG_writeback bit in lustre. b=11710 r=alex,green. --- lustre/ChangeLog | 10 ++- lustre/include/linux/lustre_compat25.h | 3 + lustre/llite/file.c | 8 -- lustre/llite/rw.c | 29 ++++++- lustre/llite/rw24.c | 6 -- lustre/llite/rw26.c | 55 ------------- lustre/tests/multiop.c | 141 +++++++++++++++++++++++++++------ lustre/tests/openfile.c | 3 +- lustre/tests/sanity.sh | 10 +++ 9 files changed, 165 insertions(+), 100 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index fd6b5f1..db75d0b 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -322,7 +322,7 @@ Description: Crash on NFS re-export node Details : under very unusual load conditions an assertion is hit in ll_intent_file_open() -Severity : major +Severity : major Frequency : only if OST filesystem is corrupted Bugzilla : 9829 Description: client incorrectly hits assertion in ptlrpc_replay_req() @@ -341,6 +341,14 @@ Details : Large single O_DIRECT read and write calls can fail to allocate allocation failure the allocation is retried with a smaller buffer and broken into smaller requests. +Severity : major +Bugzilla : 11710 +Frequency : always +Description: add support PG_writeback bit +Details : add support for PG_writeback bit for Lustre, for more carefull + work with page cache in 2.6 kernel. This also fix some deadlocks + and remove hack for work O_SYNC with 2.6 kernel. + ------------------------------------------------------------------------------ 2006-02-09 Cluster File Systems, Inc. diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index b0f2cb94..7e69bce 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -213,6 +213,8 @@ static inline int cleanup_group_info(void) flock_lock_file_wait(file, lock) #endif +#define CheckWriteback(page, cmd) \ + (!(!PageWriteback(page) && cmd == OBD_BRW_WRITE)) #else /* 2.4.. */ @@ -354,6 +356,7 @@ static inline void cond_resched(void) #define __set_page_ll_data(page, llap) set_page_private(page, (unsigned long)llap) #define __clear_page_ll_data(page) set_page_private(page, 0) #define PageWriteback(page) 0 +#define CheckWriteback(page, cmd) 1 #define set_page_writeback(page) do {} while (0) #define end_page_writeback(page) do {} while (0) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 40c8749..6d4ba77 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1410,14 +1410,6 @@ out: retval = (sum > 0) ? sum : retval; lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, retval > 0 ? retval : 0); - - if (retval > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - rc = ll_sync_page_range(inode, inode->i_mapping, *ppos - retval, - count); - if (rc < 0) - retval = rc; - } - RETURN(retval); } diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 72f4a86..588bfeb 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -291,8 +291,7 @@ static int ll_ap_make_ready(void *data, int cmd) if (TryLockPage(page)) RETURN(-EAGAIN); - LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n"); - page_cache_get(page); + LASSERT(!PageWriteback(page)); /* if we left PageDirty we might get another writepage call * in the future. list walkers are bright enough @@ -301,7 +300,20 @@ static int ll_ap_make_ready(void *data, int cmd) * we got the page cache list we'd create a lock inversion * with the removepage path which gets the page lock then the * cli lock */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) clear_page_dirty(page); +#else + LASSERTF(!PageWriteback(page),"cmd %x page %p ino %lu index %lu\n", cmd, page, + page->mapping->host->i_ino, page->index); + clear_page_dirty_for_io(page); + + /* This actually clears the dirty bit in the radix tree.*/ + set_page_writeback(page); +#endif + + LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n"); + page_cache_get(page); + RETURN(0); } @@ -707,8 +719,12 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode, rc = oig_wait(oig); - if (!rc && async_flags & ASYNC_READY) + if (!rc && async_flags & ASYNC_READY) { unlock_page(llap->llap_page); + if (PageWriteback(llap->llap_page)) { + end_page_writeback(llap->llap_page); + } + } LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc); @@ -827,6 +843,7 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; LASSERT(PageLocked(page)); + LASSERT(CheckWriteback(page,cmd)); LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc); @@ -1350,6 +1367,9 @@ int ll_writepage(struct page *page) if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); + LASSERT(!PageWriteback(page)); + set_page_writeback(page); + page_cache_get(page); if (llap->llap_write_queued) { LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); @@ -1367,6 +1387,9 @@ out: if (!lli->lli_async_rc) lli->lli_async_rc = rc; /* re-dirty page on error so it retries write */ + if (PageWriteback(page)) { + end_page_writeback(page); + } ll_redirty_page(page); unlock_page(page); } diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index 402a555..508636b 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -147,9 +147,3 @@ struct address_space_operations ll_aops = { #endif }; -int ll_sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t start, size_t count) -{ - return 0; -} - diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index b7e4c5f..4852d43 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -292,58 +292,3 @@ struct address_space_operations ll_aops = { .releasepage = ll_releasepage, .bmap = NULL }; - -static int wait_on_page_locked_range(struct address_space *mapping, - pgoff_t start, pgoff_t end) -{ - pgoff_t index; - struct page *page; - int ret = 0; - - if (end < start) - return 0; - - for(index = start; index < end; index++) { - page = find_get_page(mapping, index); - if (page == NULL) - continue; - - wait_on_page_locked(page); - if (PageError(page)) - ret = -EIO; - cond_resched(); - } - - /* Check for outstanding write errors */ - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) - ret = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) - ret = -EIO; - - return ret; -} - -int ll_sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, size_t count) -{ - pgoff_t start = pos >> CFS_PAGE_SHIFT; - pgoff_t end = (pos + count - 1) >> CFS_PAGE_SHIFT; - struct writeback_control wbc; - int ret; - - wbc.sync_mode = WB_SYNC_ALL; - wbc.nr_to_write = mapping->nrpages * 2; -#ifdef HAVE_WB_RANGE_START - wbc.range_start = start; - wbc.range_end = end; -#else - wbc.start = start; - wbc.end = end; -#endif - ret = generic_writepages(mapping, &wbc); - - if (ret == 0) - ret = wait_on_page_locked_range(mapping, start, end); - return ret; -} - diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index cdc5013..63a6f08 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -61,6 +61,70 @@ pop_arg(int argc, char *argv[]) return argv[cur_arg++]; } + +struct flag_mapping { + const char *string; + const int flag; +} flag_table[] = { + {"O_RDONLY", O_RDONLY}, + {"O_WRONLY", O_WRONLY}, + {"O_RDWR", O_RDWR}, + {"O_CREAT", O_CREAT}, + {"O_EXCL", O_EXCL}, + {"O_NOCTTY", O_NOCTTY}, + {"O_TRUNC", O_TRUNC}, + {"O_APPEND", O_APPEND}, + {"O_NONBLOCK", O_NONBLOCK}, + {"O_NDELAY", O_NDELAY}, + {"O_SYNC", O_SYNC}, +#ifdef O_DIRECT + {"O_DIRECT", O_DIRECT}, +#endif + {"O_LARGEFILE", O_LARGEFILE}, + {"O_DIRECTORY", O_DIRECTORY}, + {"O_NOFOLLOW", O_NOFOLLOW}, + {"", -1} +}; + +int get_flags(char *data, int *rflags) +{ + char *cloned_flags; + char *tmp; + int flag_set = 0; + int flags = 0; + int size = 0; + + cloned_flags = strdup(data); + if (cloned_flags == NULL) { + fprintf(stderr, "Insufficient memory.\n"); + exit(-1); + } + + for (tmp = strtok(cloned_flags, ":"); tmp; + tmp = strtok(NULL, ":")) { + int i; + + size = tmp - cloned_flags; + for (i = 0; flag_table[i].flag != -1; i++) { + if (!strcmp(tmp, flag_table[i].string)){ + flags |= flag_table[i].flag; + size += strlen(flag_table[i].string); + flag_set = 1; + break; + } + } + } + free(cloned_flags); + + if (!flag_set) { + *rflags = O_RDONLY; + return 0; + } + + *rflags = flags; + return size; +} + #define POP_ARG() (pop_arg(argc, argv)) #define min(a,b) ((a)>(b)?(b):(a)) @@ -72,6 +136,8 @@ int main(int argc, char **argv) size_t mmap_len = 0, i; unsigned char *mmap_ptr = NULL, junk = 0; int rc, len, fd = -1; + int flags; + int save_errno; if (argc < 3) { fprintf(stderr, usage, argv[0]); @@ -92,22 +158,25 @@ int main(int argc, char **argv) break; case 'c': if (close(fd) == -1) { + save_errno = errno; perror("close"); - exit(1); + exit(save_errno); } fd = -1; break; case 'd': if (mkdir(fname, 0755) == -1) { + save_errno = errno; perror("mkdir(0755)"); - exit(1); + exit(save_errno); } break; case 'D': fd = open(fname, O_DIRECTORY); if (fd == -1) { + save_errno = errno; perror("open(O_DIRECTORY)"); - exit(1); + exit(save_errno); } break; case 'l': @@ -115,8 +184,9 @@ int main(int argc, char **argv) if (!newfile) newfile = fname; if (symlink(fname, newfile)) { + save_errno = errno; perror("symlink()"); - exit(1); + exit(save_errno); } break; case 'L': @@ -124,14 +194,16 @@ int main(int argc, char **argv) if (!newfile) newfile = fname; if (link(fname, newfile)) { + save_errno = errno; perror("symlink()"); - exit(1); + exit(save_errno); } break; case 'm': if (mknod(fname, S_IFREG | 0644, 0) == -1) { + save_errno = errno; perror("mknod(S_IFREG|0644, 0)"); - exit(1); + exit(save_errno); } break; case 'M': @@ -139,8 +211,9 @@ int main(int argc, char **argv) mmap_ptr = mmap(NULL, mmap_len, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); if (mmap_ptr == MAP_FAILED) { + save_errno = errno; perror("mmap"); - exit(1); + exit(save_errno); } break; case 'N': @@ -148,41 +221,48 @@ int main(int argc, char **argv) if (!newfile) newfile = fname; if (rename (fname, newfile)) { + save_errno = errno; perror("rename()"); - exit(1); + exit(save_errno); } break; case 'O': fd = open(fname, O_CREAT|O_RDWR, 0644); if (fd == -1) { + save_errno = errno; perror("open(O_RDWR|O_CREAT)"); - exit(1); + exit(save_errno); } break; case 'o': - fd = open(fname, O_RDONLY); + len = get_flags(commands+1, &flags); + commands += len; + fd = open(fname, flags); if (fd == -1) { - perror("open(O_RDONLY)"); - exit(1); + save_errno = errno; + perror("open"); + exit(save_errno); } break; - case 'r': + case 'r': len = atoi(commands+1); if (len <= 0) len = 1; while(len > 0) { if (read(fd, &buf, min(len,sizeof(buf))) == -1) { + save_errno = errno; perror("read"); - exit(1); + exit(save_errno); } len -= sizeof(buf); } break; case 'S': if (fstat(fd, &st) == -1) { + save_errno = errno; perror("fstat"); - exit(1); + exit(save_errno); } break; case 'R': @@ -191,46 +271,52 @@ int main(int argc, char **argv) break; case 's': if (stat(fname, &st) == -1) { + save_errno = errno; perror("stat"); - exit(1); + exit(save_errno); } break; case 't': if (fchmod(fd, 0) == -1) { + save_errno = errno; perror("fchmod"); - exit(1); + exit(save_errno); } break; case 'T': len = atoi(commands+1); if (ftruncate(fd, len) == -1) { + save_errno = errno; printf("ftruncate (%d,%d)\n", fd, len); perror("ftruncate"); - exit(1); + exit(save_errno); } break; case 'u': if (unlink(fname) == -1) { + save_errno = errno; perror("unlink"); - exit(1); + exit(save_errno); } break; case 'U': if (munmap(mmap_ptr, mmap_len)) { + save_errno = errno; perror("munmap"); - exit(1); + exit(save_errno); } break; - case 'w': + case 'w': len = atoi(commands+1); if (len <= 0) len = 1; while(len > 0) { - if ((rc = write(fd, buf, + if ((rc = write(fd, buf, min(len, sizeof(buf)))) == -1) { + save_errno = errno; perror("write"); - exit(1); + exit(save_errno); } len -= sizeof(buf); } @@ -241,20 +327,23 @@ int main(int argc, char **argv) break; case 'y': if (fsync(fd) == -1) { + save_errno = errno; perror("fsync"); - exit(1); + exit(save_errno); } break; case 'Y': if (fdatasync(fd) == -1) { + save_errno = errno; perror("fdatasync"); - exit(1); + exit(save_errno); } case 'z': len = atoi(commands+1); if (lseek(fd, len, SEEK_SET) == -1) { + save_errno = errno; perror("lseek"); - exit(1); + exit(save_errno); } break; case '0': diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c index 1510c8f..40d197f 100644 --- a/lustre/tests/openfile.c +++ b/lustre/tests/openfile.c @@ -81,7 +81,7 @@ int main(int argc, char** argv) } strncpy(cloned_flags, optarg, strlen(optarg)+1); - for (tmp = strtok(optarg, ":|"); tmp; + for (tmp = strtok(cloned_flags, ":|"); tmp; tmp = strtok(NULL, ":|")) { int i = 0; #ifdef DEBUG @@ -101,6 +101,7 @@ int main(int argc, char** argv) exit(-1); } } + free(cloned_flags); #ifdef DEBUG printf("flags = %x\n", flags); #endif diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d8631a3..40e50ff 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3779,6 +3779,16 @@ test_117() # bug 10891 } run_test 117 "verify fsfilt_extend ==========" +test_118() #bug 11710 +{ + + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c; + dirty=$(grep -c dirty /proc/fs/lustre/llite/lustre-*/dump_page_cache) + + return $dirty +} +run_test 118 "verify O_SYNC work" + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME -- 1.8.3.1