Details : under very unusual load conditions an assertion is hit in
ll_intent_file_open()
-Severity : major
+Severity : major
Frequency : only if OST filesystem is corrupted
Bugzilla : 9829
Description: client incorrectly hits assertion in ptlrpc_replay_req()
allocation failure the allocation is retried with a smaller
buffer and broken into smaller requests.
+Severity : major
+Bugzilla : 11710
+Frequency : always
+Description: add support PG_writeback bit
+Details : add support for PG_writeback bit for Lustre, for more carefull
+ work with page cache in 2.6 kernel. This also fix some deadlocks
+ and remove hack for work O_SYNC with 2.6 kernel.
+
------------------------------------------------------------------------------
2006-02-09 Cluster File Systems, Inc. <info@clusterfs.com>
flock_lock_file_wait(file, lock)
#endif
+#define CheckWriteback(page, cmd) \
+ (!(!PageWriteback(page) && cmd == OBD_BRW_WRITE))
#else /* 2.4.. */
#define __set_page_ll_data(page, llap) set_page_private(page, (unsigned long)llap)
#define __clear_page_ll_data(page) set_page_private(page, 0)
#define PageWriteback(page) 0
+#define CheckWriteback(page, cmd) 1
#define set_page_writeback(page) do {} while (0)
#define end_page_writeback(page) do {} while (0)
retval = (sum > 0) ? sum : retval;
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES,
retval > 0 ? retval : 0);
-
- if (retval > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- rc = ll_sync_page_range(inode, inode->i_mapping, *ppos - retval,
- count);
- if (rc < 0)
- retval = rc;
- }
-
RETURN(retval);
}
if (TryLockPage(page))
RETURN(-EAGAIN);
- LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
- page_cache_get(page);
+ LASSERT(!PageWriteback(page));
/* if we left PageDirty we might get another writepage call
* in the future. list walkers are bright enough
* we got the page cache list we'd create a lock inversion
* with the removepage path which gets the page lock then the
* cli lock */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
clear_page_dirty(page);
+#else
+ LASSERTF(!PageWriteback(page),"cmd %x page %p ino %lu index %lu\n", cmd, page,
+ page->mapping->host->i_ino, page->index);
+ clear_page_dirty_for_io(page);
+
+ /* This actually clears the dirty bit in the radix tree.*/
+ set_page_writeback(page);
+#endif
+
+ LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
+ page_cache_get(page);
+
RETURN(0);
}
rc = oig_wait(oig);
- if (!rc && async_flags & ASYNC_READY)
+ if (!rc && async_flags & ASYNC_READY) {
unlock_page(llap->llap_page);
+ if (PageWriteback(llap->llap_page)) {
+ end_page_writeback(llap->llap_page);
+ }
+ }
LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
llap = LLAP_FROM_COOKIE(data);
page = llap->llap_page;
LASSERT(PageLocked(page));
+ LASSERT(CheckWriteback(page,cmd));
LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
if (IS_ERR(llap))
GOTO(out, rc = PTR_ERR(llap));
+ LASSERT(!PageWriteback(page));
+ set_page_writeback(page);
+
page_cache_get(page);
if (llap->llap_write_queued) {
LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
if (!lli->lli_async_rc)
lli->lli_async_rc = rc;
/* re-dirty page on error so it retries write */
+ if (PageWriteback(page)) {
+ end_page_writeback(page);
+ }
ll_redirty_page(page);
unlock_page(page);
}
#endif
};
-int ll_sync_page_range(struct inode *inode, struct address_space *mapping,
- loff_t start, size_t count)
-{
- return 0;
-}
-
.releasepage = ll_releasepage,
.bmap = NULL
};
-
-static int wait_on_page_locked_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end)
-{
- pgoff_t index;
- struct page *page;
- int ret = 0;
-
- if (end < start)
- return 0;
-
- for(index = start; index < end; index++) {
- page = find_get_page(mapping, index);
- if (page == NULL)
- continue;
-
- wait_on_page_locked(page);
- if (PageError(page))
- ret = -EIO;
- cond_resched();
- }
-
- /* Check for outstanding write errors */
- if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
- ret = -ENOSPC;
- if (test_and_clear_bit(AS_EIO, &mapping->flags))
- ret = -EIO;
-
- return ret;
-}
-
-int ll_sync_page_range(struct inode *inode, struct address_space *mapping,
- loff_t pos, size_t count)
-{
- pgoff_t start = pos >> CFS_PAGE_SHIFT;
- pgoff_t end = (pos + count - 1) >> CFS_PAGE_SHIFT;
- struct writeback_control wbc;
- int ret;
-
- wbc.sync_mode = WB_SYNC_ALL;
- wbc.nr_to_write = mapping->nrpages * 2;
-#ifdef HAVE_WB_RANGE_START
- wbc.range_start = start;
- wbc.range_end = end;
-#else
- wbc.start = start;
- wbc.end = end;
-#endif
- ret = generic_writepages(mapping, &wbc);
-
- if (ret == 0)
- ret = wait_on_page_locked_range(mapping, start, end);
- return ret;
-}
-
return argv[cur_arg++];
}
+
+struct flag_mapping {
+ const char *string;
+ const int flag;
+} flag_table[] = {
+ {"O_RDONLY", O_RDONLY},
+ {"O_WRONLY", O_WRONLY},
+ {"O_RDWR", O_RDWR},
+ {"O_CREAT", O_CREAT},
+ {"O_EXCL", O_EXCL},
+ {"O_NOCTTY", O_NOCTTY},
+ {"O_TRUNC", O_TRUNC},
+ {"O_APPEND", O_APPEND},
+ {"O_NONBLOCK", O_NONBLOCK},
+ {"O_NDELAY", O_NDELAY},
+ {"O_SYNC", O_SYNC},
+#ifdef O_DIRECT
+ {"O_DIRECT", O_DIRECT},
+#endif
+ {"O_LARGEFILE", O_LARGEFILE},
+ {"O_DIRECTORY", O_DIRECTORY},
+ {"O_NOFOLLOW", O_NOFOLLOW},
+ {"", -1}
+};
+
+int get_flags(char *data, int *rflags)
+{
+ char *cloned_flags;
+ char *tmp;
+ int flag_set = 0;
+ int flags = 0;
+ int size = 0;
+
+ cloned_flags = strdup(data);
+ if (cloned_flags == NULL) {
+ fprintf(stderr, "Insufficient memory.\n");
+ exit(-1);
+ }
+
+ for (tmp = strtok(cloned_flags, ":"); tmp;
+ tmp = strtok(NULL, ":")) {
+ int i;
+
+ size = tmp - cloned_flags;
+ for (i = 0; flag_table[i].flag != -1; i++) {
+ if (!strcmp(tmp, flag_table[i].string)){
+ flags |= flag_table[i].flag;
+ size += strlen(flag_table[i].string);
+ flag_set = 1;
+ break;
+ }
+ }
+ }
+ free(cloned_flags);
+
+ if (!flag_set) {
+ *rflags = O_RDONLY;
+ return 0;
+ }
+
+ *rflags = flags;
+ return size;
+}
+
#define POP_ARG() (pop_arg(argc, argv))
#define min(a,b) ((a)>(b)?(b):(a))
size_t mmap_len = 0, i;
unsigned char *mmap_ptr = NULL, junk = 0;
int rc, len, fd = -1;
+ int flags;
+ int save_errno;
if (argc < 3) {
fprintf(stderr, usage, argv[0]);
break;
case 'c':
if (close(fd) == -1) {
+ save_errno = errno;
perror("close");
- exit(1);
+ exit(save_errno);
}
fd = -1;
break;
case 'd':
if (mkdir(fname, 0755) == -1) {
+ save_errno = errno;
perror("mkdir(0755)");
- exit(1);
+ exit(save_errno);
}
break;
case 'D':
fd = open(fname, O_DIRECTORY);
if (fd == -1) {
+ save_errno = errno;
perror("open(O_DIRECTORY)");
- exit(1);
+ exit(save_errno);
}
break;
case 'l':
if (!newfile)
newfile = fname;
if (symlink(fname, newfile)) {
+ save_errno = errno;
perror("symlink()");
- exit(1);
+ exit(save_errno);
}
break;
case 'L':
if (!newfile)
newfile = fname;
if (link(fname, newfile)) {
+ save_errno = errno;
perror("symlink()");
- exit(1);
+ exit(save_errno);
}
break;
case 'm':
if (mknod(fname, S_IFREG | 0644, 0) == -1) {
+ save_errno = errno;
perror("mknod(S_IFREG|0644, 0)");
- exit(1);
+ exit(save_errno);
}
break;
case 'M':
mmap_ptr = mmap(NULL, mmap_len, PROT_WRITE | PROT_READ,
MAP_SHARED, fd, 0);
if (mmap_ptr == MAP_FAILED) {
+ save_errno = errno;
perror("mmap");
- exit(1);
+ exit(save_errno);
}
break;
case 'N':
if (!newfile)
newfile = fname;
if (rename (fname, newfile)) {
+ save_errno = errno;
perror("rename()");
- exit(1);
+ exit(save_errno);
}
break;
case 'O':
fd = open(fname, O_CREAT|O_RDWR, 0644);
if (fd == -1) {
+ save_errno = errno;
perror("open(O_RDWR|O_CREAT)");
- exit(1);
+ exit(save_errno);
}
break;
case 'o':
- fd = open(fname, O_RDONLY);
+ len = get_flags(commands+1, &flags);
+ commands += len;
+ fd = open(fname, flags);
if (fd == -1) {
- perror("open(O_RDONLY)");
- exit(1);
+ save_errno = errno;
+ perror("open");
+ exit(save_errno);
}
break;
- case 'r':
+ case 'r':
len = atoi(commands+1);
if (len <= 0)
len = 1;
while(len > 0) {
if (read(fd, &buf,
min(len,sizeof(buf))) == -1) {
+ save_errno = errno;
perror("read");
- exit(1);
+ exit(save_errno);
}
len -= sizeof(buf);
}
break;
case 'S':
if (fstat(fd, &st) == -1) {
+ save_errno = errno;
perror("fstat");
- exit(1);
+ exit(save_errno);
}
break;
case 'R':
break;
case 's':
if (stat(fname, &st) == -1) {
+ save_errno = errno;
perror("stat");
- exit(1);
+ exit(save_errno);
}
break;
case 't':
if (fchmod(fd, 0) == -1) {
+ save_errno = errno;
perror("fchmod");
- exit(1);
+ exit(save_errno);
}
break;
case 'T':
len = atoi(commands+1);
if (ftruncate(fd, len) == -1) {
+ save_errno = errno;
printf("ftruncate (%d,%d)\n", fd, len);
perror("ftruncate");
- exit(1);
+ exit(save_errno);
}
break;
case 'u':
if (unlink(fname) == -1) {
+ save_errno = errno;
perror("unlink");
- exit(1);
+ exit(save_errno);
}
break;
case 'U':
if (munmap(mmap_ptr, mmap_len)) {
+ save_errno = errno;
perror("munmap");
- exit(1);
+ exit(save_errno);
}
break;
- case 'w':
+ case 'w':
len = atoi(commands+1);
if (len <= 0)
len = 1;
while(len > 0) {
- if ((rc = write(fd, buf,
+ if ((rc = write(fd, buf,
min(len, sizeof(buf))))
== -1) {
+ save_errno = errno;
perror("write");
- exit(1);
+ exit(save_errno);
}
len -= sizeof(buf);
}
break;
case 'y':
if (fsync(fd) == -1) {
+ save_errno = errno;
perror("fsync");
- exit(1);
+ exit(save_errno);
}
break;
case 'Y':
if (fdatasync(fd) == -1) {
+ save_errno = errno;
perror("fdatasync");
- exit(1);
+ exit(save_errno);
}
case 'z':
len = atoi(commands+1);
if (lseek(fd, len, SEEK_SET) == -1) {
+ save_errno = errno;
perror("lseek");
- exit(1);
+ exit(save_errno);
}
break;
case '0':
}
strncpy(cloned_flags, optarg, strlen(optarg)+1);
- for (tmp = strtok(optarg, ":|"); tmp;
+ for (tmp = strtok(cloned_flags, ":|"); tmp;
tmp = strtok(NULL, ":|")) {
int i = 0;
#ifdef DEBUG
exit(-1);
}
}
+ free(cloned_flags);
#ifdef DEBUG
printf("flags = %x\n", flags);
#endif
}
run_test 117 "verify fsfilt_extend =========="
+test_118() #bug 11710
+{
+
+ multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c;
+ dirty=$(grep -c dirty /proc/fs/lustre/llite/lustre-*/dump_page_cache)
+
+ return $dirty
+}
+run_test 118 "verify O_SYNC work"
+
TMPDIR=$OLDTMPDIR
TMP=$OLDTMP
HOME=$OLDHOME