Whamcloud - gitweb
LU-8376 ost: enhance end to end bulk cksum error report 60/23960/20
authorBruno Faccini <bruno.faccini@intel.com>
Fri, 25 Nov 2016 14:57:20 +0000 (15:57 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 9 May 2017 03:44:21 +0000 (03:44 +0000)
Some sites have experienced spurious checksum errors upon bulk
xfers where it is very difficult to determine the source of the
corruption.
With this patch, upon cksum error, full dump of all pages in a
bulk xfer is now possible (enabled via a /proc tunable) on both
Client and OSS sides, to allow easier root cause identification.

sanity.sh/test_77[b,d,f,g]() existing sub-tests results can already
be used to show the effects of this patch, by injecting bulk cksum
error/corruption using OBD_FAIL_[OSC,OST]_CHECKSUM_[SEND,RECEIVE]
fail codes.

sanity.sh/test_77c has been created to specificaly test new dump
on cksum error functionality.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I0d200bb6d5c41c55a66ac012fd9cbd8d702d2f3a
Reviewed-on: https://review.whamcloud.com/23960
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
libcfs/libcfs/debug.c
lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/obdclass/lprocfs_status_server.c
lustre/ofd/lproc_ofd.c
lustre/osc/lproc_osc.c
lustre/osc/osc_request.c
lustre/target/tgt_handler.c
lustre/tests/sanity.sh

index c3c3d15..1d7360a 100644 (file)
@@ -98,6 +98,7 @@ EXPORT_SYMBOL(libcfs_kmemory);
 static wait_queue_head_t debug_ctlwq;
 
 char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
+EXPORT_SYMBOL(libcfs_debug_file_path_arr);
 
 /* We need to pass a pointer here, but elsewhere this must be a const */
 static char *libcfs_debug_file_path;
index b68a3f5..212c32c 100644 (file)
@@ -637,6 +637,13 @@ ssize_t
 lprocfs_ir_factor_seq_write(struct file *file, const char __user *buffer,
                                size_t count, loff_t *off);
 #endif
+
+/* lprocfs_status.c: dump pages on cksum error */
+int lprocfs_checksum_dump_seq_show(struct seq_file *m, void *data);
+ssize_t
+lprocfs_checksum_dump_seq_write(struct file *file, const char __user *buffer,
+                               size_t count, loff_t *off);
+
 extern int lprocfs_single_release(struct inode *, struct file *);
 extern int lprocfs_seq_release(struct inode *, struct file *);
 
index 596ce8d..a3a0e22 100644 (file)
@@ -308,7 +308,8 @@ struct client_obd {
        struct obd_export        *cl_mgc_mgsexp;
 
         /* checksumming for data sent over the network */
-        unsigned int             cl_checksum:1; /* 0 = disabled, 1 = enabled */
+       unsigned int             cl_checksum:1, /* 0 = disabled, 1 = enabled */
+                                cl_checksum_dump:1; /* same */
         /* supported checksum types that are worked out at connect time */
         __u32                    cl_supp_cksum_types;
         /* checksum algorithm to be used */
@@ -595,7 +596,8 @@ struct obd_device {
                                         * (for /proc/status only!!) */
                obd_no_ir:1,            /* no imperative recovery. */
                obd_process_conf:1,     /* device is processing mgs config */
-               obd_uses_nid_stats:1;   /* maintain per-client OBD stats */
+               obd_uses_nid_stats:1,   /* maintain per-client OBD stats */
+               obd_checksum_dump:1;    /* dump pages upon cksum error */
 
         /* use separate field as it is set in interrupt to don't mess with
          * protection of other bits using _bh lock */
index fc74d16..b73fdde 100644 (file)
@@ -689,6 +689,35 @@ lprocfs_ir_factor_seq_write(struct file *file, const char __user *buffer,
 }
 EXPORT_SYMBOL(lprocfs_ir_factor_seq_write);
 
+int lprocfs_checksum_dump_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+
+       LASSERT(obd != NULL);
+       seq_printf(m, "%d\n", obd->obd_checksum_dump);
+       return 0;
+}
+EXPORT_SYMBOL(lprocfs_checksum_dump_seq_show);
+
+ssize_t
+lprocfs_checksum_dump_seq_write(struct file *file, const char __user *buffer,
+                           size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct obd_device *obd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(obd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       obd->obd_checksum_dump = !!val;
+       return count;
+}
+EXPORT_SYMBOL(lprocfs_checksum_dump_seq_write);
+
 int lprocfs_recovery_time_soft_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
index 7609167..b39135f 100644 (file)
@@ -903,6 +903,7 @@ LPROC_SEQ_FOPS_WO_TYPE(ofd, evict_client);
 LPROC_SEQ_FOPS_RO_TYPE(ofd, num_exports);
 LPROC_SEQ_FOPS_RO_TYPE(ofd, target_instance);
 LPROC_SEQ_FOPS_RW_TYPE(ofd, ir_factor);
+LPROC_SEQ_FOPS_RW_TYPE(ofd, checksum_dump);
 LPROC_SEQ_FOPS_RW_TYPE(ofd, job_interval);
 
 struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
@@ -958,6 +959,8 @@ struct lprocfs_vars lprocfs_ofd_obd_vars[] = {
          .fops =       &ofd_target_instance_fops       },
        { .name =       "ir_factor",
          .fops =       &ofd_ir_factor_fops             },
+       { .name =       "checksum_dump",
+         .fops =       &ofd_checksum_dump_fops         },
        { .name =       "grant_compat_disable",
          .fops =       &ofd_grant_compat_disable_fops  },
        { .name =       "client_cache_count",
index deb1760..c7c3f0c 100644 (file)
@@ -468,6 +468,39 @@ static ssize_t osc_resend_count_seq_write(struct file *file,
 }
 LPROC_SEQ_FOPS(osc_resend_count);
 
+static int osc_checksum_dump_seq_show(struct seq_file *m, void *v)
+{
+       struct obd_device *obd = m->private;
+
+       if (obd == NULL)
+               return 0;
+
+       seq_printf(m, "%d\n", obd->u.cli.cl_checksum_dump ? 1 : 0);
+       return 0;
+}
+
+static ssize_t osc_checksum_dump_seq_write(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *off)
+{
+       struct obd_device *obd;
+       int rc;
+       __s64 val;
+
+       obd = ((struct seq_file *)file->private_data)->private;
+       if (obd == NULL)
+               return 0;
+
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       obd->u.cli.cl_checksum_dump = (val ? 1 : 0);
+
+       return count;
+}
+LPROC_SEQ_FOPS(osc_checksum_dump);
+
 static int osc_contention_seconds_seq_show(struct seq_file *m, void *v)
 {
        struct obd_device *obd = m->private;
@@ -624,6 +657,8 @@ struct lprocfs_vars lprocfs_osc_obd_vars[] = {
          .fops =       &osc_checksum_fops              },
        { .name =       "checksum_type",
          .fops =       &osc_checksum_type_fops         },
+       { .name =       "checksum_dump",
+         .fops =       &osc_checksum_dump_fops         },
        { .name =       "resend_count",
          .fops =       &osc_resend_count_fops          },
        { .name =       "timeouts",
index f68f878..99baeb8 100644 (file)
@@ -1288,7 +1288,12 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
                         body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
                         body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
                 }
-        }
+
+               /* Client cksum has been already copied to wire obdo in previous
+                * lustre_set_wire_obdo(), and in the case a bulk-read is being
+                * resent due to cksum error, this will allow Server to
+                * check+dump pages on its side */
+       }
         ptlrpc_request_set_replen(req);
 
         CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -1314,11 +1319,78 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
         RETURN(rc);
 }
 
+char dbgcksum_file_name[PATH_MAX];
+
+static void dump_all_bulk_pages(struct obdo *oa, __u32 page_count,
+                               struct brw_page **pga, __u32 server_cksum,
+                               __u32 client_cksum)
+{
+       struct file *filp;
+       int rc, i;
+       unsigned int len;
+       char *buf;
+       mm_segment_t oldfs;
+
+       /* will only keep dump of pages on first error for the same range in
+        * file/fid, not during the resends/retries. */
+       snprintf(dbgcksum_file_name, sizeof(dbgcksum_file_name),
+                "%s-checksum_dump-osc-"DFID":[%llu-%llu]-%x-%x",
+                (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 ?
+                 libcfs_debug_file_path_arr :
+                 LIBCFS_DEBUG_FILE_PATH_DEFAULT),
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : 0ULL,
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
+                pga[0]->off,
+                pga[page_count-1]->off + pga[page_count-1]->count - 1,
+                client_cksum, server_cksum);
+       filp = filp_open(dbgcksum_file_name,
+                        O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, 0600);
+       if (IS_ERR(filp)) {
+               rc = PTR_ERR(filp);
+               if (rc == -EEXIST)
+                       CDEBUG(D_INFO, "%s: can't open to dump pages with "
+                              "checksum error: rc = %d\n", dbgcksum_file_name,
+                              rc);
+               else
+                       CERROR("%s: can't open to dump pages with checksum "
+                              "error: rc = %d\n", dbgcksum_file_name, rc);
+               return;
+       }
+
+       oldfs = get_fs();
+       set_fs(KERNEL_DS);
+       for (i = 0; i < page_count; i++) {
+               len = pga[i]->count;
+               buf = kmap(pga[i]->pg);
+               while (len != 0) {
+                       rc = vfs_write(filp, (__force const char __user *)buf,
+                                      len, &filp->f_pos);
+                       if (rc < 0) {
+                               CERROR("%s: wanted to write %u but got %d "
+                                      "error\n", dbgcksum_file_name, len, rc);
+                               break;
+                       }
+                       len -= rc;
+                       buf += rc;
+                       CDEBUG(D_INFO, "%s: wrote %d bytes\n",
+                              dbgcksum_file_name, rc);
+               }
+               kunmap(pga[i]->pg);
+       }
+       set_fs(oldfs);
+
+       rc = ll_vfs_fsync_range(filp, 0, LLONG_MAX, 1);
+       if (rc)
+               CERROR("%s: sync returns %d\n", dbgcksum_file_name, rc);
+       filp_close(filp, NULL);
+       return;
+}
+
 static int
-check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
-                    __u32 client_cksum, __u32 server_cksum, int nob,
-                    size_t page_count, struct brw_page **pga,
-                    cksum_type_t client_cksum_type)
+check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
+                               __u32 client_cksum, __u32 server_cksum,
+                               struct osc_brw_async_args *aa)
 {
         __u32 new_cksum;
         char *msg;
@@ -1329,12 +1401,16 @@ check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
                 return 0;
         }
 
-        cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
-                                       oa->o_flags : 0);
-        new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
-                                      cksum_type);
+       if (aa->aa_cli->cl_checksum_dump)
+               dump_all_bulk_pages(oa, aa->aa_page_count, aa->aa_ppga,
+                                   server_cksum, client_cksum);
+
+       cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+                                      oa->o_flags : 0);
+       new_cksum = osc_checksum_bulk(aa->aa_requested_nob, aa->aa_page_count,
+                                     aa->aa_ppga, OST_WRITE, cksum_type);
 
-        if (cksum_type != client_cksum_type)
+       if (cksum_type != cksum_type_unpack(aa->aa_oa->o_flags))
                 msg = "the server did not use the checksum type specified in "
                       "the original request - likely a protocol problem";
         else if (new_cksum == server_cksum)
@@ -1346,17 +1422,20 @@ check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
                 msg = "changed in transit AND doesn't match the original - "
                       "likely false positive due to mmap IO (bug 11742)";
 
-       LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode "DFID
-                          " object "DOSTID" extent [%llu-%llu]\n",
+       LCONSOLE_ERROR_MSG(0x132, "%s: BAD WRITE CHECKSUM: %s: from %s inode "
+                          DFID " object "DOSTID" extent [%llu-%llu], original "
+                          "client csum %x (type %x), server csum %x (type %x),"
+                          " client csum now %x\n",
+                          aa->aa_cli->cl_import->imp_obd->obd_name,
                           msg, libcfs_nid2str(peer->nid),
                           oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
                           oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
                           oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
-                          POSTID(&oa->o_oi), pga[0]->off,
-                          pga[page_count-1]->off + pga[page_count-1]->count - 1);
-       CERROR("original client csum %x (type %x), server csum %x (type %x), "
-              "client csum now %x\n", client_cksum, client_cksum_type,
-              server_cksum, cksum_type, new_cksum);
+                          POSTID(&oa->o_oi), aa->aa_ppga[0]->off,
+                          aa->aa_ppga[aa->aa_page_count - 1]->off +
+                               aa->aa_ppga[aa->aa_page_count-1]->count - 1,
+                          client_cksum, cksum_type_unpack(aa->aa_oa->o_flags),
+                          server_cksum, cksum_type, new_cksum);
        return 1;
 }
 
@@ -1416,9 +1495,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
 
                 if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
                     check_write_checksum(&body->oa, peer, client_cksum,
-                                         body->oa.o_cksum, aa->aa_requested_nob,
-                                         aa->aa_page_count, aa->aa_ppga,
-                                         cksum_type_unpack(aa->aa_oa->o_flags)))
+                                        body->oa.o_cksum, aa))
                         RETURN(-EAGAIN);
 
                 rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count,
@@ -1467,25 +1544,35 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                }
 
                if (server_cksum != client_cksum) {
+                       struct ost_body *clbody;
+                       u32 page_count = aa->aa_page_count;
+
+                       clbody = req_capsule_client_get(&req->rq_pill,
+                                                       &RMF_OST_BODY);
+                       if (cli->cl_checksum_dump)
+                               dump_all_bulk_pages(&clbody->oa, page_count,
+                                                   aa->aa_ppga, server_cksum,
+                                                   client_cksum);
+
                        LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
                                           "%s%s%s inode "DFID" object "DOSTID
-                                          " extent [%llu-%llu]\n",
+                                          " extent [%llu-%llu], client %x, "
+                                          "server %x, cksum_type %x\n",
                                           req->rq_import->imp_obd->obd_name,
                                           libcfs_nid2str(peer->nid),
                                           via, router,
-                                          body->oa.o_valid & OBD_MD_FLFID ?
-                                               body->oa.o_parent_seq : (__u64)0,
-                                          body->oa.o_valid & OBD_MD_FLFID ?
-                                               body->oa.o_parent_oid : 0,
-                                          body->oa.o_valid & OBD_MD_FLFID ?
-                                               body->oa.o_parent_ver : 0,
+                                          clbody->oa.o_valid & OBD_MD_FLFID ?
+                                               clbody->oa.o_parent_seq : 0ULL,
+                                          clbody->oa.o_valid & OBD_MD_FLFID ?
+                                               clbody->oa.o_parent_oid : 0,
+                                          clbody->oa.o_valid & OBD_MD_FLFID ?
+                                               clbody->oa.o_parent_ver : 0,
                                           POSTID(&body->oa.o_oi),
                                           aa->aa_ppga[0]->off,
-                                          aa->aa_ppga[aa->aa_page_count-1]->off +
-                                          aa->aa_ppga[aa->aa_page_count-1]->count -
-                                                                       1);
-                       CERROR("client %x, server %x, cksum_type %x\n",
-                              client_cksum, server_cksum, cksum_type);
+                                          aa->aa_ppga[page_count-1]->off +
+                                          aa->aa_ppga[page_count-1]->count - 1,
+                                          client_cksum, server_cksum,
+                                          cksum_type);
                        cksum_counter = 0;
                        aa->aa_oa->o_cksum = client_cksum;
                        rc = -EAGAIN;
index 2fcb794..02f2902 100644 (file)
@@ -1687,6 +1687,12 @@ static __u32 tgt_checksum_bulk(struct lu_target *tgt,
                                memcpy(ptr2, ptr, len);
                                memcpy(ptr2, "bad3", min(4, len));
                                kunmap(np);
+
+                               /* LU-8376 to preserve original index for
+                                * display in dump_all_bulk_pages() */
+                               np->index = BD_GET_KIOV(desc,
+                                                       i).kiov_page->index;
+
                                BD_GET_KIOV(desc, i).kiov_page = np;
                        } else {
                                CERROR("%s: can't alloc page for corruption\n",
@@ -1716,6 +1722,12 @@ static __u32 tgt_checksum_bulk(struct lu_target *tgt,
                                memcpy(ptr2, ptr, len);
                                memcpy(ptr2, "bad4", min(4, len));
                                kunmap(np);
+
+                               /* LU-8376 to preserve original index for
+                                * display in dump_all_bulk_pages() */
+                               np->index = BD_GET_KIOV(desc,
+                                                       i).kiov_page->index;
+
                                BD_GET_KIOV(desc, i).kiov_page = np;
                        } else {
                                CERROR("%s: can't alloc page for corruption\n",
@@ -1730,6 +1742,122 @@ static __u32 tgt_checksum_bulk(struct lu_target *tgt,
        return cksum;
 }
 
+char dbgcksum_file_name[PATH_MAX];
+
+static void dump_all_bulk_pages(struct obdo *oa, int count,
+                                   lnet_kiov_t *iov, __u32 server_cksum,
+                                   __u32 client_cksum)
+{
+       struct file *filp;
+       int rc, i;
+       unsigned int len;
+       char *buf;
+       mm_segment_t oldfs;
+
+       /* will only keep dump of pages on first error for the same range in
+        * file/fid, not during the resends/retries. */
+       snprintf(dbgcksum_file_name, sizeof(dbgcksum_file_name),
+                "%s-checksum_dump-ost-"DFID":[%llu-%llu]-%x-%x",
+                (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 ?
+                 libcfs_debug_file_path_arr :
+                 LIBCFS_DEBUG_FILE_PATH_DEFAULT),
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0,
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
+                oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
+                (__u64)iov[0].kiov_page->index << PAGE_SHIFT,
+                ((__u64)iov[count - 1].kiov_page->index << PAGE_SHIFT) +
+                iov[count - 1].kiov_len - 1, client_cksum, server_cksum);
+       filp = filp_open(dbgcksum_file_name,
+                        O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, 0600);
+       if (IS_ERR(filp)) {
+               rc = PTR_ERR(filp);
+               if (rc == -EEXIST)
+                       CDEBUG(D_INFO, "%s: can't open to dump pages with "
+                              "checksum error: rc = %d\n", dbgcksum_file_name,
+                              rc);
+               else
+                       CERROR("%s: can't open to dump pages with checksum "
+                              "error: rc = %d\n", dbgcksum_file_name, rc);
+               return;
+       }
+
+       oldfs = get_fs();
+       set_fs(KERNEL_DS);
+       for (i = 0; i < count; i++) {
+               len = iov[i].kiov_len;
+               buf = kmap(iov[i].kiov_page);
+               while (len != 0) {
+                       rc = vfs_write(filp, (__force const char __user *)buf,
+                                      len, &filp->f_pos);
+                       if (rc < 0) {
+                               CERROR("%s: wanted to write %u but got %d "
+                                      "error\n", dbgcksum_file_name, len, rc);
+                               break;
+                       }
+                       len -= rc;
+                       buf += rc;
+                       CDEBUG(D_INFO, "%s: wrote %d bytes\n",
+                              dbgcksum_file_name, rc);
+               }
+               kunmap(iov[i].kiov_page);
+       }
+       set_fs(oldfs);
+
+       rc = ll_vfs_fsync_range(filp, 0, LLONG_MAX, 1);
+       if (rc)
+               CERROR("%s: sync returns %d\n", dbgcksum_file_name, rc);
+       filp_close(filp, NULL);
+       return;
+}
+
+static int check_read_checksum(struct ptlrpc_bulk_desc *desc, struct obdo *oa,
+                              const lnet_process_id_t *peer,
+                              __u32 client_cksum, __u32 server_cksum,
+                              cksum_type_t server_cksum_type)
+{
+       char *msg;
+       cksum_type_t cksum_type;
+
+       /* unlikely to happen and only if resend does not occur due to cksum
+        * control failure on Client */
+       if (unlikely(server_cksum == client_cksum)) {
+               CDEBUG(D_PAGE, "checksum %x confirmed upon retry\n",
+                      client_cksum);
+               return 0;
+       }
+
+       if (desc->bd_export->exp_obd->obd_checksum_dump)
+               dump_all_bulk_pages(oa, desc->bd_iov_count,
+                                   &BD_GET_KIOV(desc, 0), server_cksum,
+                                   client_cksum);
+
+       cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ?
+                                      oa->o_flags : 0);
+
+       if (cksum_type != server_cksum_type)
+               msg = "the server may have not used the checksum type specified"
+                     " in the original request - likely a protocol problem";
+       else
+               msg = "should have changed on the client or in transit";
+
+       LCONSOLE_ERROR_MSG(0x132, "%s: BAD READ CHECKSUM: %s: from %s inode "
+               DFID " object "DOSTID" extent [%llu-%llu], client returned csum"
+               " %x (type %x), server csum %x (type %x)\n",
+               desc->bd_export->exp_obd->obd_name,
+               msg, libcfs_nid2str(peer->nid),
+               oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : 0ULL,
+               oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
+               oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
+               POSTID(&oa->o_oi),
+               (__u64)BD_GET_KIOV(desc, 0).kiov_page->index << PAGE_SHIFT,
+               ((__u64)BD_GET_KIOV(desc,
+                                   desc->bd_iov_count - 1).kiov_page->index
+                       << PAGE_SHIFT) +
+                       BD_GET_KIOV(desc, desc->bd_iov_count - 1).kiov_len - 1,
+               client_cksum, cksum_type, server_cksum, server_cksum_type);
+       return 1;
+}
+
 int tgt_brw_read(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
@@ -1859,12 +1987,22 @@ int tgt_brw_read(struct tgt_session_info *tsi)
                cksum_type_t cksum_type =
                        cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ?
                                          body->oa.o_flags : 0);
+
                repbody->oa.o_flags = cksum_type_pack(cksum_type);
                repbody->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
                repbody->oa.o_cksum = tgt_checksum_bulk(tsi->tsi_tgt, desc,
                                                        OST_READ, cksum_type);
                CDEBUG(D_PAGE, "checksum at read origin: %x\n",
                       repbody->oa.o_cksum);
+
+               /* if a resend it could be for a cksum error, so check Server
+                * cksum with returned Client cksum (this should even cover
+                * zero-cksum case) */
+               if ((body->oa.o_valid & OBD_MD_FLFLAGS) &&
+                   (body->oa.o_flags & OBD_FL_RECOV_RESEND))
+                       check_read_checksum(desc, &body->oa, &req->rq_peer,
+                                           body->oa.o_cksum,
+                                           repbody->oa.o_cksum, cksum_type);
        } else {
                repbody->oa.o_valid = 0;
        }
@@ -1941,13 +2079,18 @@ static void tgt_warn_on_cksum(struct ptlrpc_request *req,
                router = libcfs_nid2str(desc->bd_sender);
        }
 
+       if (exp->exp_obd->obd_checksum_dump)
+               dump_all_bulk_pages(&body->oa, desc->bd_iov_count,
+                                   &BD_GET_KIOV(desc, 0), server_cksum,
+                                   client_cksum);
+
        if (mmap) {
                CDEBUG_LIMIT(D_INFO, "client csum %x, server csum %x\n",
                             client_cksum, server_cksum);
                return;
        }
 
-       LCONSOLE_ERROR_MSG(0x168, "BAD WRITE CHECKSUM: %s from %s%s%s inode "
+       LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: from %s%s%s inode "
                           DFID" object "DOSTID" extent [%llu-%llu"
                           "]: client csum %x, server csum %x\n",
                           exp->exp_obd->obd_name, libcfs_id2str(req->rq_peer),
index 67fd9c1..4450172 100755 (executable)
@@ -6107,6 +6107,96 @@ test_77b() { # bug 10889
 }
 run_test 77b "checksum error on client write, read"
 
+cleanup_77c() {
+       trap 0
+       set_checksums 0
+       $LCTL set_param osc.*osc-[^mM]*.checksum_dump=0
+       $check_ost &&
+               do_facet ost1 $LCTL set_param obdfilter.*-OST*.checksum_dump=0
+       [ -n $osc_file_prefix ] && rm -f ${osc_file_prefix}*
+       $check_ost && [ -n $ost_file_prefix ] &&
+               do_facet ost1 rm -f ${ost_file_prefix}\*
+}
+
+test_77c() {
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+       $GSS && skip "could not run with gss" && return
+
+       local bad1
+       local osc_file_prefix
+       local osc_file
+       local check_ost=false
+       local ost_file_prefix
+       local ost_file
+       local orig_cksum
+       local dump_cksum
+       local fid
+
+       # ensure corruption will occur on first OSS/OST
+       $LFS setstripe -i 0 $DIR/$tfile
+
+       [ ! -f $F77_TMP ] && setup_f77
+       dd if=$F77_TMP of=$DIR/$tfile bs=1M count=$F77SZ conv=sync ||
+               error "dd write error: $?"
+       fid=$($LFS path2fid $DIR/$tfile)
+
+       if [ $(lustre_version_code ost1) -ge $(version_code 2.5.42.6) ]
+       then
+               check_ost=true
+               ost_file_prefix=$(do_facet ost1 $LCTL get_param -n debug_path)
+               ost_file_prefix=${ost_file_prefix}-checksum_dump-ost-\\${fid}
+       else
+               echo "OSS do not support bulk pages dump upon error"
+       fi
+
+       osc_file_prefix=$($LCTL get_param -n debug_path)
+       osc_file_prefix=${osc_file_prefix}-checksum_dump-osc-\\${fid}
+
+       trap cleanup_77c EXIT
+
+       set_checksums 1
+       # enable bulk pages dump upon error on Client
+       $LCTL set_param osc.*osc-[^mM]*.checksum_dump=1
+       # enable bulk pages dump upon error on OSS
+       $check_ost &&
+               do_facet ost1 $LCTL set_param obdfilter.*-OST*.checksum_dump=1
+
+       # flush Client cache to allow next read to reach OSS
+       cancel_lru_locks osc
+
+       #define OBD_FAIL_OSC_CHECKSUM_RECEIVE       0x408
+       $LCTL set_param fail_loc=0x80000408
+       dd if=$DIR/$tfile of=/dev/null bs=1M || error "dd read error: $?"
+       $LCTL set_param fail_loc=0
+
+       rm -f $DIR/$tfile
+
+       # check cksum dump on Client
+       osc_file=$(ls ${osc_file_prefix}*)
+       [ -n "$osc_file" ] || error "no checksum dump file on Client"
+       # OBD_FAIL_OSC_CHECKSUM_RECEIVE corrupts with "bad1" at start of file
+       bad1=$(dd if=$osc_file bs=1 count=4 2>/dev/null) || error "dd error: $?"
+       [ $bad1 == "bad1" ] || error "unexpected corrupt pattern"
+       orig_cksum=$(dd if=$F77_TMP bs=1 skip=4 count=1048572 2>/dev/null |
+                    cksum)
+       dump_cksum=$(dd if=$osc_file bs=1 skip=4 2>/dev/null | cksum)
+       [[ "$orig_cksum" == "$dump_cksum" ]] ||
+               error "dump content does not match on Client"
+
+       $check_ost || skip "No need to check cksum dump on OSS"
+
+       # check cksum dump on OSS
+       ost_file=$(do_facet ost1 ls ${ost_file_prefix}\*)
+       [ -n "$ost_file" ] || error "no checksum dump file on OSS"
+       orig_cksum=$(dd if=$F77_TMP bs=1048576 count=1 2>/dev/null | cksum)
+       dump_cksum=$(do_facet ost1 dd if=$ost_file 2>/dev/null \| cksum)
+       [[ "$orig_cksum" == "$dump_cksum" ]] ||
+               error "dump content does not match on OSS"
+
+       cleanup_77c
+}
+run_test 77c "checksum error on client read with debug"
+
 test_77d() { # bug 10889
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        $GSS && skip "could not run with gss" && return