Whamcloud - gitweb
b=11742 FSX checksum false positves due to mmap IO
authorDmitry Zogin <dmitry.zogin@sun.com>
Mon, 17 May 2010 20:35:07 +0000 (16:35 -0400)
committerJohann Lombardi <johann@sun.com>
Tue, 18 May 2010 12:41:42 +0000 (14:41 +0200)
 Use OBD_FL_MMAP flag for IOs on a memory mapped file. Do not print
 checksum errors, if the flag is set on a request.

 i=adilger
 i=alexey.lyashkov
 i=johann

lustre/include/lustre/lustre_idl.h
lustre/llite/rw.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c

index 24ac61a..4f61d31 100644 (file)
@@ -497,6 +497,7 @@ enum obdo_flags {
         OBD_FL_CKSUM_RSVD2  = 0x00008000, /* for future cksum types */
         OBD_FL_CKSUM_RSVD3  = 0x00010000, /* for future cksum types */
         OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
+        OBD_FL_MMAP         = 0x00040000, /* object is mmapped on the client */
 
         OBD_FL_CKSUM_ALL    = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER,
 
index f42419e..9eb1f45 100644 (file)
@@ -492,6 +492,9 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
         }
 
         obdo_from_inode(oa, inode, valid_flags);
+        /* Bug11742 - set the OBD_FL_MMAP flag for memory mapped files */
+        if (atomic_read(&(ll_i2info(inode)->lli_mmap_cnt)) != 0) 
+                oa->o_flags |= OBD_FL_MMAP;
 }
 
 static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
@@ -913,8 +916,17 @@ static struct ll_async_page *llap_from_page_with_lockh(struct page *page,
                                page, csum);
                 } else {
                         /* origin == LLAP_ORIGIN_WRITEPAGE */
-                        LL_CDEBUG_PAGE(D_ERROR, page, "old cksum %x != new "
-                                       "%x!\n", llap->llap_checksum, csum);
+                        if (!atomic_read(&(ll_i2info(inode)->lli_mmap_cnt))) {
+                               LL_CDEBUG_PAGE(D_ERROR, page,
+                                               "old cksum %x != new %x!\n",
+                                               llap->llap_checksum, csum);
+                        } else {
+                                /* mmapped page was modified */
+                                CDEBUG(D_PAGE,
+                                       "page %p old cksum %x != new %x\n",
+                                       page, llap->llap_checksum, csum);
+                        }
+                        llap->llap_checksum = csum;
                 }
         }
 
index 9a4409c..2f5bf18 100644 (file)
@@ -1347,6 +1347,10 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
                 return 0;
         }
 
+        /* If this is mmaped file - it can be changed at any time */
+        if (oa->o_flags & OBD_FL_MMAP)
+                return 1;
+
         if (oa->o_valid & OBD_MD_FLFLAGS)
                 cksum_type = cksum_type_unpack(oa->o_flags);
         else
@@ -2223,9 +2227,19 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc)
         CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc);
 
         if (osc_recoverable_error(rc)) {
-                rc = osc_brw_redo_request(request, aa);
-                if (rc == 0)
-                        RETURN(0);
+                /* Only retry once for mmaped files since the mmaped page
+                 * might be modified at anytime. We have to retry at least
+                 * once in case there WAS really a corruption of the page
+                 * on the network, that was not caused by mmap() modifying
+                 * the page. bug 11742 */
+                if ((rc == -EAGAIN) && (aa->aa_resends > 0) &&
+                    (aa->aa_oa->o_flags & OBD_FL_MMAP)) {
+                        rc = 0;
+                } else {
+                       rc = osc_brw_redo_request(request, aa);
+                       if (rc == 0)
+                               RETURN(0);
+               }
         }
 
         cli = aa->aa_cli;
index cc6d3fd..e264761 100644 (file)
@@ -1090,7 +1090,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 repbody->oa.o_cksum = server_cksum;
                 cksum_counter++;
                 if (unlikely(client_cksum != server_cksum)) {
-                        CERROR("client csum %x, server csum %x\n",
+                        CDEBUG_LIMIT((body->oa.o_flags&OBD_FL_MMAP) ? D_INFO
+                                                                    : D_ERROR,
+                               "client csum %x, server csum %x\n",
                                client_cksum, server_cksum);
                         cksum_counter = 0;
                 } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){
@@ -1128,7 +1130,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
          */
         repbody->oa.o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLATIME);
 
-        if (unlikely(client_cksum != server_cksum && rc == 0)) {
+        if (unlikely(client_cksum != server_cksum && rc == 0 &&
+                     !(body->oa.o_flags & OBD_FL_MMAP))) {
                 int  new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type);
                 char *msg;
                 char *via;