Whamcloud - gitweb
Add support for usage PG_writeback bit in lustre.
authorshadow <shadow>
Mon, 26 Mar 2007 13:33:43 +0000 (13:33 +0000)
committershadow <shadow>
Mon, 26 Mar 2007 13:33:43 +0000 (13:33 +0000)
b=11710
r=alex,green.

lustre/ChangeLog
lustre/include/linux/lustre_compat25.h
lustre/llite/file.c
lustre/llite/rw.c
lustre/llite/rw24.c
lustre/llite/rw26.c
lustre/tests/multiop.c
lustre/tests/openfile.c
lustre/tests/sanity.sh

index fd6b5f1..db75d0b 100644 (file)
@@ -322,7 +322,7 @@ Description: Crash on NFS re-export node
 Details    : under very unusual load conditions an assertion is hit in
             ll_intent_file_open()
 
-Severity   : major          
+Severity   : major
 Frequency  : only if OST filesystem is corrupted
 Bugzilla   : 9829
 Description: client incorrectly hits assertion in ptlrpc_replay_req()
@@ -341,6 +341,14 @@ Details    : Large single O_DIRECT read and write calls can fail to allocate
             allocation failure the allocation is retried with a smaller
             buffer and broken into smaller requests.
 
+Severity   : major
+Bugzilla   : 11710
+Frequency  : always
+Description: add support PG_writeback bit
+Details    : add support for PG_writeback bit for Lustre, for more carefull
+             work with page cache in 2.6 kernel. This also fix some deadlocks 
+            and remove hack for work O_SYNC with 2.6 kernel.
+
 ------------------------------------------------------------------------------
 
 2006-02-09  Cluster File Systems, Inc. <info@clusterfs.com>
index b0f2cb9..7e69bce 100644 (file)
@@ -213,6 +213,8 @@ static inline int cleanup_group_info(void)
         flock_lock_file_wait(file, lock)
 #endif
 
+#define CheckWriteback(page, cmd) \
+        (!(!PageWriteback(page) && cmd == OBD_BRW_WRITE))
 
 #else /* 2.4.. */
 
@@ -354,6 +356,7 @@ static inline void cond_resched(void)
 #define __set_page_ll_data(page, llap) set_page_private(page, (unsigned long)llap)
 #define __clear_page_ll_data(page) set_page_private(page, 0)
 #define PageWriteback(page) 0
+#define CheckWriteback(page, cmd) 1
 #define set_page_writeback(page) do {} while (0)
 #define end_page_writeback(page) do {} while (0)
 
index 40c8749..6d4ba77 100644 (file)
@@ -1410,14 +1410,6 @@ out:
         retval = (sum > 0) ? sum : retval;
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES,
                             retval > 0 ? retval : 0);
-
-        if (retval > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-                rc = ll_sync_page_range(inode, inode->i_mapping, *ppos - retval,
-                                        count);
-                if (rc < 0)
-                        retval = rc;
-        }
-
         RETURN(retval);
 }
 
index 72f4a86..588bfeb 100644 (file)
@@ -291,8 +291,7 @@ static int ll_ap_make_ready(void *data, int cmd)
         if (TryLockPage(page))
                 RETURN(-EAGAIN);
 
-        LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
-        page_cache_get(page);
+        LASSERT(!PageWriteback(page));
 
         /* if we left PageDirty we might get another writepage call
          * in the future.  list walkers are bright enough
@@ -301,7 +300,20 @@ static int ll_ap_make_ready(void *data, int cmd)
          * we got the page cache list we'd create a lock inversion
          * with the removepage path which gets the page lock then the
          * cli lock */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
         clear_page_dirty(page);
+#else
+        LASSERTF(!PageWriteback(page),"cmd %x page %p ino %lu index %lu\n", cmd, page,
+                 page->mapping->host->i_ino, page->index);
+        clear_page_dirty_for_io(page);
+
+        /* This actually clears the dirty bit in the radix tree.*/
+        set_page_writeback(page);
+#endif
+
+        LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");
+        page_cache_get(page);
+
         RETURN(0);
 }
 
@@ -707,8 +719,12 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
 
         rc = oig_wait(oig);
 
-        if (!rc && async_flags & ASYNC_READY)
+        if (!rc && async_flags & ASYNC_READY) {
                 unlock_page(llap->llap_page);
+                if (PageWriteback(llap->llap_page)) {
+                        end_page_writeback(llap->llap_page);
+                }
+        }
 
         LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", rc);
 
@@ -827,6 +843,7 @@ int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
         llap = LLAP_FROM_COOKIE(data);
         page = llap->llap_page;
         LASSERT(PageLocked(page));
+        LASSERT(CheckWriteback(page,cmd));
 
         LL_CDEBUG_PAGE(D_PAGE, page, "completing cmd %d with %d\n", cmd, rc);
 
@@ -1350,6 +1367,9 @@ int ll_writepage(struct page *page)
         if (IS_ERR(llap))
                 GOTO(out, rc = PTR_ERR(llap));
 
+        LASSERT(!PageWriteback(page));
+        set_page_writeback(page);
+
         page_cache_get(page);
         if (llap->llap_write_queued) {
                 LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
@@ -1367,6 +1387,9 @@ out:
                 if (!lli->lli_async_rc)
                         lli->lli_async_rc = rc;
                 /* re-dirty page on error so it retries write */
+                if (PageWriteback(page)) {
+                        end_page_writeback(page);
+                }
                 ll_redirty_page(page);
                 unlock_page(page);
         }
index 402a555..508636b 100644 (file)
@@ -147,9 +147,3 @@ struct address_space_operations ll_aops = {
 #endif
 };
 
-int ll_sync_page_range(struct inode *inode, struct address_space *mapping,
-                       loff_t start, size_t count)
-{
-        return 0;
-}
-
index b7e4c5f..4852d43 100644 (file)
@@ -292,58 +292,3 @@ struct address_space_operations ll_aops = {
         .releasepage    = ll_releasepage,
         .bmap           = NULL
 };
-
-static int wait_on_page_locked_range(struct address_space *mapping,
-                                     pgoff_t start, pgoff_t end)
-{
-        pgoff_t index;
-        struct page *page;
-        int ret = 0;
-
-        if (end < start)
-                return 0;
-
-        for(index = start; index < end; index++) {
-                page = find_get_page(mapping, index);
-                if (page == NULL)
-                        continue;
-
-                wait_on_page_locked(page);
-                if (PageError(page))
-                        ret = -EIO;
-                cond_resched();
-        }
-
-        /* Check for outstanding write errors */
-        if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
-                ret = -ENOSPC;
-        if (test_and_clear_bit(AS_EIO, &mapping->flags))
-                ret = -EIO;
-
-        return ret;
-}
-
-int ll_sync_page_range(struct inode *inode, struct address_space *mapping,
-                       loff_t pos, size_t count)
-{
-        pgoff_t start = pos >> CFS_PAGE_SHIFT;
-        pgoff_t end = (pos + count - 1) >> CFS_PAGE_SHIFT;
-        struct writeback_control wbc;
-        int ret;
-
-        wbc.sync_mode = WB_SYNC_ALL;
-        wbc.nr_to_write = mapping->nrpages * 2;
-#ifdef HAVE_WB_RANGE_START
-        wbc.range_start = start;
-        wbc.range_end = end;
-#else
-        wbc.start = start;
-        wbc.end = end;
-#endif
-        ret = generic_writepages(mapping, &wbc);
-
-        if (ret == 0)
-                ret = wait_on_page_locked_range(mapping, start, end);
-        return ret;
-}
-
index cdc5013..63a6f08 100755 (executable)
@@ -61,6 +61,70 @@ pop_arg(int argc, char *argv[])
 
         return argv[cur_arg++];
 }
+
+struct flag_mapping {
+       const char *string;
+       const int  flag;
+} flag_table[] = {
+       {"O_RDONLY", O_RDONLY},
+       {"O_WRONLY", O_WRONLY},
+       {"O_RDWR", O_RDWR},
+       {"O_CREAT", O_CREAT},
+       {"O_EXCL", O_EXCL},
+       {"O_NOCTTY", O_NOCTTY},
+       {"O_TRUNC", O_TRUNC},
+       {"O_APPEND", O_APPEND},
+       {"O_NONBLOCK", O_NONBLOCK},
+       {"O_NDELAY", O_NDELAY},
+       {"O_SYNC", O_SYNC},
+#ifdef O_DIRECT
+       {"O_DIRECT", O_DIRECT},
+#endif
+       {"O_LARGEFILE", O_LARGEFILE},
+       {"O_DIRECTORY", O_DIRECTORY},
+       {"O_NOFOLLOW", O_NOFOLLOW},
+       {"", -1}
+};
+
+int get_flags(char *data, int *rflags)
+{
+        char *cloned_flags;
+        char *tmp;
+        int flag_set = 0;
+        int flags = 0;
+        int size = 0;
+
+        cloned_flags = strdup(data);
+        if (cloned_flags == NULL) {
+                fprintf(stderr, "Insufficient memory.\n");
+                exit(-1);
+        }
+
+        for (tmp = strtok(cloned_flags, ":"); tmp;
+             tmp = strtok(NULL, ":")) {
+                int i;
+
+                size = tmp - cloned_flags;
+                for (i = 0; flag_table[i].flag != -1; i++) {
+                        if (!strcmp(tmp, flag_table[i].string)){
+                                flags |= flag_table[i].flag;
+                                size += strlen(flag_table[i].string);
+                                flag_set = 1;
+                                break;
+                        }
+                }
+        }
+        free(cloned_flags);
+
+        if (!flag_set) {
+                *rflags = O_RDONLY;
+                return 0;
+        }
+
+        *rflags = flags;
+        return size;
+}
+
 #define POP_ARG() (pop_arg(argc, argv))
 #define min(a,b) ((a)>(b)?(b):(a))
 
@@ -72,6 +136,8 @@ int main(int argc, char **argv)
         size_t mmap_len = 0, i;
         unsigned char *mmap_ptr = NULL, junk = 0;
         int rc, len, fd = -1;
+        int flags;
+        int save_errno;
 
         if (argc < 3) {
                 fprintf(stderr, usage, argv[0]);
@@ -92,22 +158,25 @@ int main(int argc, char **argv)
                         break;
                 case 'c':
                         if (close(fd) == -1) {
+                                save_errno = errno;
                                 perror("close");
-                                exit(1);
+                                exit(save_errno);
                         }
                         fd = -1;
                         break;
                 case 'd':
                         if (mkdir(fname, 0755) == -1) {
+                                save_errno = errno;
                                 perror("mkdir(0755)");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'D':
                         fd = open(fname, O_DIRECTORY);
                         if (fd == -1) {
+                                save_errno = errno;
                                 perror("open(O_DIRECTORY)");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'l':
@@ -115,8 +184,9 @@ int main(int argc, char **argv)
                         if (!newfile)
                                 newfile = fname;
                         if (symlink(fname, newfile)) {
+                                save_errno = errno;
                                 perror("symlink()");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'L':
@@ -124,14 +194,16 @@ int main(int argc, char **argv)
                         if (!newfile)
                                 newfile = fname;
                         if (link(fname, newfile)) {
+                                save_errno = errno;
                                 perror("symlink()");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'm':
                         if (mknod(fname, S_IFREG | 0644, 0) == -1) {
+                                save_errno = errno;
                                 perror("mknod(S_IFREG|0644, 0)");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'M':
@@ -139,8 +211,9 @@ int main(int argc, char **argv)
                         mmap_ptr = mmap(NULL, mmap_len, PROT_WRITE | PROT_READ,
                                         MAP_SHARED, fd, 0);
                         if (mmap_ptr == MAP_FAILED) {
+                                save_errno = errno;
                                 perror("mmap");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'N':
@@ -148,41 +221,48 @@ int main(int argc, char **argv)
                         if (!newfile)
                                 newfile = fname;
                         if (rename (fname, newfile)) {
+                                save_errno = errno;
                                 perror("rename()");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'O':
                         fd = open(fname, O_CREAT|O_RDWR, 0644);
                         if (fd == -1) {
+                                save_errno = errno;
                                 perror("open(O_RDWR|O_CREAT)");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'o':
-                        fd = open(fname, O_RDONLY);
+                        len = get_flags(commands+1, &flags);
+                        commands += len;
+                        fd = open(fname, flags);
                         if (fd == -1) {
-                                perror("open(O_RDONLY)");
-                                exit(1);
+                                save_errno = errno;
+                                perror("open");
+                                exit(save_errno);
                         }
                         break;
-                case 'r': 
+                case 'r':
                         len = atoi(commands+1);
                         if (len <= 0)
                                 len = 1;
                         while(len > 0) {
                                 if (read(fd, &buf,
                                          min(len,sizeof(buf))) == -1) {
+                                        save_errno = errno;
                                         perror("read");
-                                        exit(1);
+                                        exit(save_errno);
                                 }
                                 len -= sizeof(buf);
                         }
                         break;
                 case 'S':
                         if (fstat(fd, &st) == -1) {
+                                save_errno = errno;
                                 perror("fstat");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'R':
@@ -191,46 +271,52 @@ int main(int argc, char **argv)
                         break;
                 case 's':
                         if (stat(fname, &st) == -1) {
+                                save_errno = errno;
                                 perror("stat");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 't':
                         if (fchmod(fd, 0) == -1) {
+                                save_errno = errno;
                                 perror("fchmod");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'T':
                         len = atoi(commands+1);
                         if (ftruncate(fd, len) == -1) {
+                                save_errno = errno;
                                 printf("ftruncate (%d,%d)\n", fd, len);
                                 perror("ftruncate");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'u':
                         if (unlink(fname) == -1) {
+                                save_errno = errno;
                                 perror("unlink");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'U':
                         if (munmap(mmap_ptr, mmap_len)) {
+                                save_errno = errno;
                                 perror("munmap");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
-                case 'w': 
+                case 'w':
                         len = atoi(commands+1);
                         if (len <= 0)
                                 len = 1;
                         while(len > 0) {
-                                if ((rc = write(fd, buf, 
+                                if ((rc = write(fd, buf,
                                                 min(len, sizeof(buf))))
                                     == -1) {
+                                        save_errno = errno;
                                         perror("write");
-                                        exit(1);
+                                        exit(save_errno);
                                 }
                                 len -= sizeof(buf);
                         }
@@ -241,20 +327,23 @@ int main(int argc, char **argv)
                         break;
                 case 'y':
                         if (fsync(fd) == -1) {
+                                save_errno = errno;
                                 perror("fsync");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case 'Y':
                         if (fdatasync(fd) == -1) {
+                                save_errno = errno;
                                 perror("fdatasync");
-                                exit(1);
+                                exit(save_errno);
                         }
                 case 'z':
                         len = atoi(commands+1);
                         if (lseek(fd, len, SEEK_SET) == -1) {
+                                save_errno = errno;
                                 perror("lseek");
-                                exit(1);
+                                exit(save_errno);
                         }
                         break;
                 case '0':
index 1510c8f..40d197f 100644 (file)
@@ -81,7 +81,7 @@ int main(int argc, char** argv)
                         }
 
                         strncpy(cloned_flags, optarg, strlen(optarg)+1);
-                        for (tmp = strtok(optarg, ":|"); tmp;
+                        for (tmp = strtok(cloned_flags, ":|"); tmp;
                              tmp = strtok(NULL, ":|")) {
                                 int i = 0;
 #ifdef DEBUG
@@ -101,6 +101,7 @@ int main(int argc, char** argv)
                                         exit(-1);
                                 }
                         }
+                        free(cloned_flags);
 #ifdef DEBUG
                         printf("flags = %x\n", flags);
 #endif
index d8631a3..40e50ff 100644 (file)
@@ -3779,6 +3779,16 @@ test_117() # bug 10891
 }
 run_test 117 "verify fsfilt_extend =========="
 
+test_118() #bug 11710
+{
+       
+       multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c;
+       dirty=$(grep -c dirty /proc/fs/lustre/llite/lustre-*/dump_page_cache)
+       
+       return $dirty
+}
+run_test 118 "verify O_SYNC work"
+
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
 HOME=$OLDHOME