Whamcloud - gitweb
LU-14895 brw: log T10 GRD tags during checksum calcs
[fs/lustre-release.git] / lustre / osc / osc_request.c
index b9c2e05..b80f8f4 100644 (file)
@@ -1178,7 +1178,7 @@ static int osc_checksum_bulk_t10pi(const char *obd_name, int nob,
                                   size_t pg_count, struct brw_page **pga,
                                   int opc, obd_dif_csum_fn *fn,
                                   int sector_size,
-                                  u32 *check_sum)
+                                  u32 *check_sum, bool resend)
 {
        struct ahash_request *req;
        /* Used Adler as the default checksum type on top of DIF tags */
@@ -1211,6 +1211,10 @@ static int osc_checksum_bulk_t10pi(const char *obd_name, int nob,
        buffer = kmap(__page);
        guard_start = (__u16 *)buffer;
        guard_number = PAGE_SIZE / sizeof(*guard_start);
+       CDEBUG(D_PAGE | (resend ? D_HA : 0),
+              "GRD tags per page=%u, resend=%u, bytes=%u, pages=%zu\n",
+              guard_number, resend, nob, pg_count);
+
        while (nob > 0 && pg_count > 0) {
                unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
 
@@ -1236,6 +1240,12 @@ static int osc_checksum_bulk_t10pi(const char *obd_name, int nob,
                                                  guard_number - used_number,
                                                  &used, sector_size,
                                                  fn);
+               if (unlikely(resend))
+                       CDEBUG(D_PAGE | D_HA,
+                              "pga[%u]: used %u off %llu+%u gen checksum: %*phN\n",
+                              i, used, pga[i]->off & ~PAGE_MASK, count,
+                              (int)(used * sizeof(*guard_start)),
+                              guard_start + used_number);
                if (rc)
                        break;
 
@@ -1274,7 +1284,7 @@ out:
 #else /* !CONFIG_CRC_T10DIF */
 #define obd_dif_ip_fn NULL
 #define obd_dif_crc_fn NULL
-#define osc_checksum_bulk_t10pi(name, nob, pgc, pga, opc, fn, ssize, csum \
+#define osc_checksum_bulk_t10pi(name, nob, pgc, pga, opc, fn, ssize, csum, re) \
        -EOPNOTSUPP
 #endif /* CONFIG_CRC_T10DIF */
 
@@ -1336,7 +1346,7 @@ static int osc_checksum_bulk_rw(const char *obd_name,
                                enum cksum_types cksum_type,
                                int nob, size_t pg_count,
                                struct brw_page **pga, int opc,
-                               u32 *check_sum)
+                               u32 *check_sum, bool resend)
 {
        obd_dif_csum_fn *fn = NULL;
        int sector_size = 0;
@@ -1347,7 +1357,8 @@ static int osc_checksum_bulk_rw(const char *obd_name,
 
        if (fn)
                rc = osc_checksum_bulk_t10pi(obd_name, nob, pg_count, pga,
-                                            opc, fn, sector_size, check_sum);
+                                            opc, fn, sector_size, check_sum,
+                                            resend);
        else
                rc = osc_checksum_bulk(nob, pg_count, pga, opc, cksum_type,
                                       check_sum);
@@ -1710,17 +1721,18 @@ no_bulk:
                        rc = osc_checksum_bulk_rw(obd_name, cksum_type,
                                                  requested_nob, page_count,
                                                  pga, OST_WRITE,
-                                                 &body->oa.o_cksum);
+                                                 &body->oa.o_cksum, resend);
                        if (rc < 0) {
-                               CDEBUG(D_PAGE, "failed to checksum, rc = %d\n",
+                               CDEBUG(D_PAGE, "failed to checksum: rc = %d\n",
                                       rc);
                                GOTO(out, rc);
                        }
-                        CDEBUG(D_PAGE, "checksum at write origin: %x\n",
-                               body->oa.o_cksum);
+                       CDEBUG(D_PAGE | (resend ? D_HA : 0),
+                              "checksum at write origin: %x (%x)\n",
+                              body->oa.o_cksum, cksum_type);
 
-                        /* save this in 'oa', too, for later checking */
-                        oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+                       /* save this in 'oa', too, for later checking */
+                       oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
                        oa->o_flags |= obd_cksum_type_pack(obd_name,
                                                           cksum_type);
                 } else {
@@ -1793,6 +1805,7 @@ static void dump_all_bulk_pages(struct obdo *oa, __u32 page_count,
                 pga[0]->off,
                 pga[page_count-1]->off + pga[page_count-1]->count - 1,
                 client_cksum, server_cksum);
+       CWARN("dumping checksum data to %s\n", dbgcksum_file_name);
        filp = filp_open(dbgcksum_file_name,
                         O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, 0600);
        if (IS_ERR(filp)) {
@@ -1819,8 +1832,6 @@ static void dump_all_bulk_pages(struct obdo *oa, __u32 page_count,
                        }
                        len -= rc;
                        buf += rc;
-                       CDEBUG(D_INFO, "%s: wrote %d bytes\n",
-                              dbgcksum_file_name, rc);
                }
                kunmap(pga[i]->pg);
        }
@@ -1829,6 +1840,8 @@ static void dump_all_bulk_pages(struct obdo *oa, __u32 page_count,
        if (rc)
                CERROR("%s: sync returns %d\n", dbgcksum_file_name, rc);
        filp_close(filp, NULL);
+
+       libcfs_debug_dumplog();
 }
 
 static int
@@ -1881,7 +1894,7 @@ check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
                rc = osc_checksum_bulk_t10pi(obd_name, aa->aa_requested_nob,
                                             aa->aa_page_count, aa->aa_ppga,
                                             OST_WRITE, fn, sector_size,
-                                            &new_cksum);
+                                            &new_cksum, true);
        else
                rc = osc_checksum_bulk(aa->aa_requested_nob, aa->aa_page_count,
                                       aa->aa_ppga, OST_WRITE, cksum_type,
@@ -2043,22 +2056,23 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                }
        }
 
-        if (rc < aa->aa_requested_nob)
-                handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
+       if (rc < aa->aa_requested_nob)
+               handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
 
-        if (body->oa.o_valid & OBD_MD_FLCKSUM) {
-                static int cksum_counter;
-               u32        server_cksum = body->oa.o_cksum;
-               char      *via = "";
-               char      *router = "";
+       if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+               static int cksum_counter;
+               u32 server_cksum = body->oa.o_cksum;
+               int nob = rc;
+               char *via = "";
+               char *router = "";
                enum cksum_types cksum_type;
                u32 o_flags = body->oa.o_valid & OBD_MD_FLFLAGS ?
                        body->oa.o_flags : 0;
 
                cksum_type = obd_cksum_type_unpack(o_flags);
-               rc = osc_checksum_bulk_rw(obd_name, cksum_type, rc,
+               rc = osc_checksum_bulk_rw(obd_name, cksum_type, nob,
                                          aa->aa_page_count, aa->aa_ppga,
-                                         OST_READ, &client_cksum);
+                                         OST_READ, &client_cksum, false);
                if (rc < 0)
                        GOTO(out, rc);
 
@@ -2070,8 +2084,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
 
                if (server_cksum != client_cksum) {
                        struct ost_body *clbody;
+                       __u32 client_cksum2;
                        u32 page_count = aa->aa_page_count;
 
+                       osc_checksum_bulk_rw(obd_name, cksum_type, nob,
+                                            page_count, aa->aa_ppga,
+                                            OST_READ, &client_cksum2, true);
                        clbody = req_capsule_client_get(&req->rq_pill,
                                                        &RMF_OST_BODY);
                        if (cli->cl_checksum_dump)
@@ -2081,7 +2099,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
 
                        LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
                                           "%s%s%s inode "DFID" object "DOSTID
-                                          " extent [%llu-%llu], client %x, "
+                                          " extent [%llu-%llu], client %x/%x, "
                                           "server %x, cksum_type %x\n",
                                           obd_name,
                                           libcfs_nid2str(peer->nid),
@@ -2096,8 +2114,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                                           aa->aa_ppga[0]->off,
                                           aa->aa_ppga[page_count-1]->off +
                                           aa->aa_ppga[page_count-1]->count - 1,
-                                          client_cksum, server_cksum,
-                                          cksum_type);
+                                          client_cksum, client_cksum2,
+                                          server_cksum, cksum_type);
                        cksum_counter = 0;
                        aa->aa_oa->o_cksum = client_cksum;
                        rc = -EAGAIN;
@@ -2336,7 +2354,7 @@ static int brw_interpret(const struct lu_env *env,
                               req->rq_import->imp_obd->obd_name,
                               POSTID(&aa->aa_oa->o_oi), rc);
                } else if (rc == -EINPROGRESS ||
-                   client_should_resend(aa->aa_resends, aa->aa_cli)) {
+                          client_should_resend(aa->aa_resends, aa->aa_cli)) {
                        rc = osc_brw_redo_request(req, aa, rc);
                } else {
                        CERROR("%s: too many resent retries for object: "