Details : fix duplicated definitions between asm-ppc64/types.h and
lustre_types.h on PPC.
+Severity : normal
+Bugzilla : 13805
+Description: data checksumming impacts single node performance
+Details : add support for several checksum algorithms. Currently, CRC32 and
+ Adler-32 are supported. The checksum type can be changed on the fly
+ through /proc/fs/lustre/osc/*/checksum_type.
+
--------------------------------------------------------------------------------
2007-12-07 Cluster File Systems, Inc. <info@clusterfs.com>
AC_DEFUN([LC_CONFIG_SRCDIR],
[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
])
-
+
#
# LC_PATH_DEFAULTS
#
# utils/llverfs.c
AC_CHECK_HEADERS([ext2fs/ext2fs.h])
+# include/linux/obd_support.h
+AC_CHECK_HEADERS([zlib.h])
+
+# check for -lz support
+AC_CHECK_LIB(z, [adler32],
+ [
+ ZLIB="-lz"
+ AC_DEFINE([HAVE_ADLER], 1, [support alder32 checksum type])
+ ],
+ [
+ ZLIB=""
+ AC_MSG_WARN([No zlib-devel package found, unable to use adler32 checksum])
+ ])
+AC_SUBST(ZLIB)
+
# Super safe df
AC_ARG_ENABLE([mindf],
AC_HELP_STRING([--enable-mindf],
spin_unlock(lock);
}
+#if defined(__KERNEL__) && !defined(HAVE_ADLER)
+/* zlib_adler() is an inline function defined in zutil.h */
+#define HAVE_ADLER
+#endif
#endif /* __LINUX_OBD_H */
#endif
#include <libcfs/kp30.h>
#include <linux/lustre_compat25.h>
+#include <lustre/lustre_idl.h>
/* Prefer the kernel's version, if it exports it, because it might be
* optimized for this CPU. */
#endif
#ifdef __KERNEL__
+# include <linux/zutil.h>
+# ifndef HAVE_ADLER
+# define HAVE_ADLER
+# endif
+#else /* ! __KERNEL__ */
+# ifdef HAVE_ADLER
+# include <zlib.h>
+
+static inline __u32 zlib_adler32(__u32 adler, unsigned char const *p,
+ size_t len)
+{
+ return adler32(adler, p, len);
+}
+# endif
+#endif /* __KERNEL__ */
+
+static inline __u32 init_checksum(cksum_type_t cksum_type)
+{
+ switch(cksum_type) {
+ case OBD_CKSUM_CRC32:
+ return ~0U;
+#ifdef HAVE_ADLER
+ case OBD_CKSUM_ADLER:
+ return 1U;
+#endif
+ default:
+ CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
+ LBUG();
+ }
+ return 0;
+}
+
+static inline __u32 compute_checksum(__u32 cksum, unsigned char const *p,
+ size_t len, cksum_type_t cksum_type)
+{
+ switch(cksum_type) {
+ case OBD_CKSUM_CRC32:
+ return crc32_le(cksum, p, len);
+#ifdef HAVE_ADLER
+ case OBD_CKSUM_ADLER:
+ return zlib_adler32(cksum, p, len);
+#endif
+ default:
+ CERROR("Unknown checksum type (%x)!!!\n", cksum_type);
+ LBUG();
+ }
+ return 0;
+}
+
+static inline obd_flag cksum_type_pack(cksum_type_t cksum_type)
+{
+ switch(cksum_type) {
+ case OBD_CKSUM_CRC32:
+ return OBD_FL_CKSUM_CRC32;
+#ifdef HAVE_ADLER
+ case OBD_CKSUM_ADLER:
+ return OBD_FL_CKSUM_ADLER;
+#endif
+ default:
+ CWARN("unknown cksum type %x\n", cksum_type);
+ }
+ return OBD_FL_CKSUM_CRC32;
+}
+
+static inline cksum_type_t cksum_type_unpack(obd_flag o_flags)
+{
+ o_flags &= OBD_FL_CKSUM_ALL;
+ if ((o_flags - 1) & o_flags)
+ CWARN("several checksum types are set: %x\n", o_flags);
+ if (o_flags & OBD_FL_CKSUM_ADLER)
+#ifdef HAVE_ADLER
+ return OBD_CKSUM_ADLER;
+#else
+ CWARN("checksum type is set to adler32, but adler32 is not "
+ "supported (%x)\n", o_flags);
+#endif
+ return OBD_CKSUM_CRC32;
+}
+
+#ifdef __KERNEL__
# include <linux/types.h>
# include <linux/blkdev.h>
# include <lvfs.h>
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \
OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
- LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CHANGE_QS)
+ LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
+ OBD_CONNECT_CHANGE_QS)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT)
__u64 ocd_ibits_known; /* inode bits this client understands */
__u32 ocd_nllu; /* non-local-lustre-user */
__u32 ocd_nllg; /* non-local-lustre-group */
+ __u64 ocd_transno; /* Used in lustre 1.8 */
+ __u32 ocd_group; /* Used in lustre 1.8 */
+ __u32 ocd_cksum_types; /* supported checksum algorithms */
__u64 padding1; /* also fix lustre_swab_connect */
__u64 padding2; /* also fix lustre_swab_connect */
- __u64 padding3; /* also fix lustre_swab_connect */
- __u64 padding4; /* also fix lustre_swab_connect */
};
extern void lustre_swab_connect(struct obd_connect_data *ocd);
/*
+ * Supported checksum algorithms. Up to 32 checksum types are supported.
+ * (32-bit mask stored in obd_connect_data::ocd_cksum_types)
+ * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
+ * algorithm and also the OBD_FL_CKSUM* flags.
+ */
+typedef enum {
+ OBD_CKSUM_CRC32 = 0x00000001,
+ OBD_CKSUM_ADLER = 0x00000002,
+} cksum_type_t;
+
+/*
* OST requests: OBDO & OBD request records
*/
*/
#define OBD_FL_TRUNCLOCK (0x00000800)
+/*
+ * Checksum types
+ */
+#define OBD_FL_CKSUM_CRC32 (0x00001000)
+#define OBD_FL_CKSUM_ADLER (0x00002000)
+#define OBD_FL_CKSUM_ALL (OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER)
+
/* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct
* llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer
* stored in o_inline, we keep this just for case. */
atomic_t cl_mgc_refcount;
struct obd_export *cl_mgc_mgsexp;
- /* Flags section */
- unsigned long cl_checksum:1; /* debug checksums */
-
+ /* checksumming for data sent over the network */
+ unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */
+ /* supported checksum types that are worked out at connect time */
+ __u32 cl_supp_cksum_types;
+ /* checksum algorithm to be used */
+ cksum_type_t cl_cksum_type;
+
/* also protected by the poorly named _loi_list_lock lock above */
struct osc_async_rc cl_ar;
obd_ops->o_quota_adjust_qunit = QUOTA_OP(interface, adjust_qunit);
}
+/*
+ * Checksums
+ */
+
+#ifdef HAVE_ADLER
+/* Default preferred checksum algorithm to use (if supported by the server) */
+#define OSC_DEFAULT_CKSUM OBD_CKSUM_ADLER
+/* Adler-32 is supported */
+#define CHECKSUM_ADLER OBD_CKSUM_ADLER
+#else
+#define OSC_DEFAULT_CKSUM OBD_CKSUM_CRC32
+#define CHECKSUM_ADLER 0
+#endif
+
+#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | CHECKSUM_ADLER)
+
+/* Checksum algorithm names. Must be defined in the same order as the
+ * OBD_CKSUM_* flags. */
+#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler"}
+
#endif /* __OBD_H */
#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a
+#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
+#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
cfs_waitq_init(&cli->cl_destroy_waitq);
atomic_set(&cli->cl_destroy_in_flight, 0);
#ifdef ENABLE_CHECKSUM
+ /* Turn on checksumming by default. */
cli->cl_checksum = 1;
+ /*
+ * The supported checksum types will be worked out at connect time
+ * Set cl_chksum* to CRC32 for now to avoid returning screwed info
+ * through procfs.
+ */
+ cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
#endif
atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
CDEBUG(D_HA, "marking %s %s->%s as inactive\n",
name, obddev->obd_name,
cli->cl_target_uuid.uuid);
-
spin_lock(&imp->imp_lock);
imp->imp_invalid = 1;
spin_unlock(&imp->imp_lock);
llite_lib.h
liblustre.a : $(LUSTRE_LIBS) $(LND_LIBS) $(LNET_LIBS) $(SYSIO_LIBS) $(QUOTA_LIBS)
- sh $(srcdir)/genlib.sh "$(SYSIO)" "$(LIBS)" "$(LND_LIBS)" "$(PTHREAD_LIBS)" "$(QUOTA_LIBS)" "$(CAP_LIBS)"
+ sh $(srcdir)/genlib.sh "$(SYSIO)" "$(LIBS)" "$(LND_LIBS)" "$(PTHREAD_LIBS)" "$(QUOTA_LIBS)" "$(CAP_LIBS)" "$(ZLIB)"
EXTRA_DIST = genlib.sh
PTHREAD_LIBS=$4
QUOTA_LIBS=$5
CAP_LIBS=$6
+ZLIB=$7
if [ ! -f $SYSIO/lib/libsysio.a ]; then
echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist"
if test x$OS = xAIX; then
$LD -shared -o $CWD/liblustre.so $ALL_OBJS -lpthread -Xlinker -bnoipath ../../libsyscall.so
else
-$LD -shared -nostdlib -o $CWD/liblustre.so $ALL_OBJS $CAP_LIBS $PTHREAD_LIBS
+$LD -shared -nostdlib -o $CWD/liblustre.so $ALL_OBJS $CAP_LIBS $PTHREAD_LIBS $ZLIB
fi
rm -rf $sysio_tmp
AM_CFLAGS = $(LLCFLAGS)
AM_LIBS = $(LIBEFENCE) $(LIBREADLINE)
-LLIB_EXEC = $(top_builddir)/lustre/utils/liblustreapi.a $(top_builddir)/lustre/liblustre/liblustre.a $(CAP_LIBS) $(PTHREAD_LIBS)
+LLIB_EXEC = $(top_builddir)/lustre/utils/liblustreapi.a $(top_builddir)/lustre/liblustre/liblustre.a $(CAP_LIBS) $(PTHREAD_LIBS) $(ZLIB)
if LIBLUSTRE
noinst_LIBRARIES = libtestcommon.a
echo_test_SOURCES = echo_test.c $(top_srcdir)/lustre/utils/parser.c $(top_srcdir)/lustre/utils/obd.c $(top_srcdir)/lustre/utils/lustre_cfg.c
echo_test_CFLAGS = $(LL_CFLAGS)
-echo_test_LDADD = $(top_builddir)/lustre/liblustre/liblsupport.a $(LIBREADLINE) $(CAP_LIBS) $(PTHREAD_LIBS)
+echo_test_LDADD = $(top_builddir)/lustre/liblustre/liblsupport.a $(LIBREADLINE) $(CAP_LIBS) $(PTHREAD_LIBS) $(ZLIB)
echo_test_DEPENDENCIES=$(top_builddir)/lustre/liblustre/liblsupport.a
sanity_SOURCES = sanity.c
OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
OBD_CONNECT_SRVLOCK | OBD_CONNECT_CANCELSET | OBD_CONNECT_AT;
+ if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
+ /* OBD_CONNECT_CKSUM should always be set, even if checksums are
+ * disabled by default, because it can still be enabled on the
+ * fly via /proc. As a consequence, we still need to come to an
+ * agreement on the supported algorithms at connect time */
+ data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
+ data->ocd_cksum_types = OBD_CKSUM_ADLER;
+ else
+ /* send the list of supported checksum types */
+ data->ocd_cksum_types = OBD_CKSUM_ALL;
+ }
+
#ifdef HAVE_LRU_RESIZE_SUPPORT
if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
LASSERT(data->ocd_brw_size);
}
+ if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+ __u32 cksum_types = data->ocd_cksum_types;
+
+ /* The client set in ocd_cksum_types the checksum types it
+ * supports. We have to mask off the algorithms that we don't
+ * support */
+ if (cksum_types & OBD_CKSUM_ALL)
+ data->ocd_cksum_types &= OBD_CKSUM_ALL;
+ else
+ data->ocd_cksum_types = OBD_CKSUM_CRC32;
+
+ CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
+ "%x\n", exp->exp_obd->obd_name,
+ obd_export_nid2str(exp), cksum_types,
+ data->ocd_cksum_types);
+ } else {
+ /* This client does not support OBD_CONNECT_CKSUM
+ * fall back to CRC32 */
+ CDEBUG(D_RPCTRACE, "%s: cli %s does not support "
+ "OBD_CONNECT_CKSUM, CRC32 will be used\n",
+ exp->exp_obd->obd_name,
+ obd_export_nid2str(exp));
+ }
+
/* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
* FIXME: We don't strictly need the COMPAT flag for that,
* FIXME: as fsd_peeruuid[0] will tell us if that is set.
return count;
}
+static int osc_rd_checksum_type(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device *obd = data;
+ int i, len =0;
+ DECLARE_CKSUM_NAME;
+
+ if (obd == NULL)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(cksum_name) && len < count; i++) {
+ if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
+ continue;
+ if (obd->u.cli.cl_cksum_type == (1 << i))
+ len += snprintf(page + len, count - len, "[%s] ",
+ cksum_name[i]);
+ else
+ len += snprintf(page + len, count - len, "%s ",
+ cksum_name[i]);
+ }
+ if (len < count)
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static int osc_wd_checksum_type(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = data;
+ int i;
+ DECLARE_CKSUM_NAME;
+ char kernbuf[10];
+
+ if (obd == NULL)
+ return 0;
+
+ if (count > sizeof(kernbuf) - 1)
+ return -EINVAL;
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+ if (count > 0 && kernbuf[count - 1] == '\n')
+ kernbuf[count - 1] = '\0';
+ else
+ kernbuf[count] = '\0';
+
+ for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
+ if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0)
+ continue;
+ if (!strcmp(kernbuf, cksum_name[i])) {
+ obd->u.cli.cl_cksum_type = 1 << i;
+ return count;
+ }
+ }
+ return -EINVAL;
+}
+
static int osc_rd_resend_count(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
{ "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
{ "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
{ "checksums", osc_rd_checksum, osc_wr_checksum, 0 },
+ { "checksum_type", osc_rd_checksum_type, osc_wd_checksum_type, 0 },
{ "resend_count", osc_rd_resend_count, osc_wr_resend_count, 0},
{ "timeouts", lprocfs_rd_timeouts, 0, 0 },
{ 0 }
}
static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
- struct brw_page **pga, int opc)
+ struct brw_page **pga, int opc,
+ cksum_type_t cksum_type)
{
- __u32 cksum = ~0;
+ __u32 cksum;
int i = 0;
LASSERT (pg_count > 0);
+ cksum = init_checksum(cksum_type);
while (nob > 0 && pg_count > 0) {
unsigned char *ptr = cfs_kmap(pga[i]->pg);
int off = pga[i]->off & ~CFS_PAGE_MASK;
if (i == 0 && opc == OST_READ &&
OBD_FAIL_CHECK_ONCE(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
memcpy(ptr + off, "bad1", min(4, nob));
- cksum = crc32_le(cksum, ptr + off, count);
+ cksum = compute_checksum(cksum, ptr + off, count, cksum_type);
cfs_kunmap(pga[i]->pg);
LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d checksum %x\n",
off, cksum);
/* size[REQ_REC_OFF] still sizeof (*body) */
if (opc == OST_WRITE) {
- if (unlikely(cli->cl_checksum)) {
- body->oa.o_valid |= OBD_MD_FLCKSUM;
+ if (cli->cl_checksum) {
+ /* store cl_cksum_type in a local variable since
+ * it can be changed via lprocfs */
+ cksum_type_t cksum_type = cli->cl_cksum_type;
+
+ if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
+ oa->o_flags = body->oa.o_flags = 0;
+ body->oa.o_flags |= cksum_type_pack(cksum_type);
+ body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
body->oa.o_cksum = osc_checksum_bulk(requested_nob,
page_count, pga,
- OST_WRITE);
+ OST_WRITE,
+ cksum_type);
CDEBUG(D_PAGE, "checksum at write origin: %x\n",
body->oa.o_cksum);
/* save this in 'oa', too, for later checking */
- oa->o_valid |= OBD_MD_FLCKSUM;
+ oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ oa->o_flags |= cksum_type_pack(cksum_type);
} else {
/* clear out the checksum flag, in case this is a
* resend but cl_checksum is no longer set. b=11238 */
size[REPLY_REC_OFF + 1] = sizeof(__u32) * niocount;
ptlrpc_req_set_repsize(req, 3, size);
} else {
- if (unlikely(cli->cl_checksum))
- body->oa.o_valid |= OBD_MD_FLCKSUM;
+ if (cli->cl_checksum) {
+ if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
+ body->oa.o_flags = 0;
+ body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
+ body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ }
/* 1 RC for the whole I/O */
ptlrpc_req_set_repsize(req, 2, size);
}
}
static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
- __u32 client_cksum, __u32 server_cksum, int nob,
- obd_count page_count, struct brw_page **pga)
+ __u32 client_cksum, __u32 server_cksum, int nob,
+ obd_count page_count, struct brw_page **pga,
+ cksum_type_t client_cksum_type)
{
__u32 new_cksum;
char *msg;
+ cksum_type_t cksum_type;
if (server_cksum == client_cksum) {
CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum);
return 0;
}
- new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE);
+ if (oa->o_valid & OBD_MD_FLFLAGS)
+ cksum_type = cksum_type_unpack(oa->o_flags);
+ else
+ cksum_type = OBD_CKSUM_CRC32;
+
+ new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
+ cksum_type);
- if (new_cksum == server_cksum)
+ if (cksum_type != client_cksum_type)
+ msg = "the server did not use the checksum type specified in "
+ "the original request - likely a protocol problem";
+ else if (new_cksum == server_cksum)
msg = "changed on the client after we checksummed it - "
"likely false positive due to mmap IO (bug 11742)";
else if (new_cksum == client_cksum)
oa->o_valid & OBD_MD_FLGROUP ? oa->o_gr : (__u64)0,
pga[0]->off,
pga[page_count-1]->off + pga[page_count-1]->count - 1);
- CERROR("original client csum %x, server csum %x, client csum now %x\n",
- client_cksum, server_cksum, new_cksum);
+ CERROR("original client csum %x (type %x), server csum %x (type %x), "
+ "client csum now %x\n", client_cksum, client_cksum_type,
+ server_cksum, cksum_type, new_cksum);
return 1;
}
if (rc < 0)
RETURN(rc);
- if (unlikely(aa->aa_oa->o_valid & OBD_MD_FLCKSUM))
+ if (aa->aa_oa->o_valid & OBD_MD_FLCKSUM)
client_cksum = aa->aa_oa->o_cksum; /* save for later */
osc_update_grant(cli, body);
}
LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob);
- if (unlikely((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) &&
- client_cksum &&
- check_write_checksum(&body->oa, peer, client_cksum,
- body->oa.o_cksum,
- aa->aa_requested_nob,
- aa->aa_page_count,
- aa->aa_ppga)))
+ if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
+ check_write_checksum(&body->oa, peer, client_cksum,
+ body->oa.o_cksum, aa->aa_requested_nob,
+ aa->aa_page_count, aa->aa_ppga,
+ cksum_type_unpack(aa->aa_oa->o_flags)))
RETURN(-EAGAIN);
rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count,
if (rc < aa->aa_requested_nob)
handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
- if (unlikely(body->oa.o_valid & OBD_MD_FLCKSUM)) {
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
__u32 server_cksum = body->oa.o_cksum;
char *via;
char *router;
+ cksum_type_t cksum_type;
+ if (body->oa.o_valid & OBD_MD_FLFLAGS)
+ cksum_type = cksum_type_unpack(body->oa.o_flags);
+ else
+ cksum_type = OBD_CKSUM_CRC32;
client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
- aa->aa_ppga, OST_READ);
+ aa->aa_ppga, OST_READ,
+ cksum_type);
if (peer->nid == req->rq_bulk->bd_sender) {
via = router = "";
via = " via ";
router = libcfs_nid2str(req->rq_bulk->bd_sender);
}
-
+
if (server_cksum == ~0 && rc > 0) {
CERROR("Protocol error: server %s set the 'checksum' "
"bit, but didn't send a checksum. Not fatal, "
aa->aa_ppga[aa->aa_page_count-1]->off +
aa->aa_ppga[aa->aa_page_count-1]->count -
1);
- CERROR("client %x, server %x\n",
- client_cksum, server_cksum);
+ CERROR("client %x, server %x, cksum_type %x\n",
+ client_cksum, server_cksum, cksum_type);
cksum_counter = 0;
aa->aa_oa->o_cksum = client_cksum;
rc = -EAGAIN;
CERROR("too many resend retries, returning error\n");
RETURN(-EIO);
}
-
+
lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
l_wait_event(waitq, 0, &lwi);
CERROR("too many resend retries, returning error\n");
RETURN(-EIO);
}
-
+
DEBUG_REQ(D_ERROR, request, "redo for recoverable error");
rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
RETURN(rc);
client_obd_list_lock(&aa->aa_cli->cl_loi_list_lock);
-
+
list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
if (oap->oap_request != NULL) {
LASSERTF(request == oap->oap_request,
request, oap->oap_request);
if (oap->oap_interrupted) {
client_obd_list_unlock(&aa->aa_cli->cl_loi_list_lock);
- ptlrpc_req_finished(new_req);
+ ptlrpc_req_finished(new_req);
RETURN(-EINTR);
}
}
return npages;
}
-static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc)
+static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc,
+ cksum_type_t cksum_type)
{
- __u32 cksum = ~0;
+ __u32 cksum;
int i;
+ cksum = init_checksum(cksum_type);
for (i = 0; i < desc->bd_iov_count; i++) {
struct page *page = desc->bd_iov[i].kiov_page;
int off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
if (i == 0 && opc == OST_WRITE &&
OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_CHECKSUM_RECEIVE))
memcpy(ptr, "bad3", min(4, len));
- cksum = crc32_le(cksum, ptr, len);
+ cksum = compute_checksum(cksum, ptr, len, cksum_type);
/* corrupt the data after we compute the checksum, to
* simulate an OST->client data error */
if (i == 0 && opc == OST_READ &&
}
}
- if (unlikely(body->oa.o_valid & OBD_MD_FLCKSUM)) {
- body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ);
- body->oa.o_valid = OBD_MD_FLCKSUM;
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ cksum_type_t cksum_type = OBD_CKSUM_CRC32;
+
+ if (body->oa.o_valid & OBD_MD_FLFLAGS)
+ cksum_type = cksum_type_unpack(body->oa.o_flags);
+ body->oa.o_flags = cksum_type_pack(cksum_type);
+ body->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ, cksum_type);
CDEBUG(D_PAGE,"checksum at read origin: %x\n",body->oa.o_cksum);
} else {
body->oa.o_valid = 0;
int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
int objcount, niocount, npages;
int rc, swab, i, j;
- obd_count client_cksum, server_cksum = 0;
+ obd_count client_cksum = 0, server_cksum = 0;
+ cksum_type_t cksum_type = OBD_CKSUM_CRC32;
int no_reply = 0;
ENTRY;
ost_prolong_locks(exp, ioo, pp_rnb, LCK_PW);
/* obd_preprw clobbers oa->valid, so save what we need */
- client_cksum = body->oa.o_valid & OBD_MD_FLCKSUM ? body->oa.o_cksum : 0;
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ client_cksum = body->oa.o_cksum;
+ if (body->oa.o_valid & OBD_MD_FLFLAGS)
+ cksum_type = cksum_type_unpack(body->oa.o_flags);
+ }
/* Because we already sync grant info with client when reconnect,
* grant info will be cleared for resent req, then fed_grant and
sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
- if (unlikely(client_cksum != 0 && rc == 0)) {
+ if (client_cksum != 0 && rc == 0) {
static int cksum_counter;
- server_cksum = ost_checksum_bulk(desc, OST_WRITE);
- repbody->oa.o_valid |= OBD_MD_FLCKSUM;
+ repbody->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ repbody->oa.o_flags &= ~OBD_FL_CKSUM_ALL;
+ repbody->oa.o_flags |= cksum_type_pack(cksum_type);
+ server_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type);
repbody->oa.o_cksum = server_cksum;
cksum_counter++;
if (unlikely(client_cksum != server_cksum)) {
objcount, ioo, npages, local_nb, oti, rc);
if (unlikely(client_cksum != server_cksum && rc == 0)) {
- int new_cksum = ost_checksum_bulk(desc, OST_WRITE);
+ int new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type);
char *msg;
char *via;
char *router;
via = " via ";
router = libcfs_nid2str(desc->bd_sender);
}
-
+
LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: %s from %s"
"%s%s inum "LPU64"/"LPU64" object "LPU64"/"
LPU64" extent ["LPU64"-"LPU64"]\n",
newer : older, LUSTRE_VERSION_STRING);
}
+ if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+ /* We sent to the server ocd_cksum_types with bits set
+ * for algorithms we understand. The server masked off
+ * the checksum types it doesn't support */
+ if ((ocd->ocd_cksum_types & OBD_CKSUM_ALL) == 0) {
+ LCONSOLE_WARN("The negotiation of the checksum "
+ "alogrithm to use with server %s "
+ "failed (%x/%x), disabling "
+ "checksums\n",
+ obd2cli_tgt(imp->imp_obd),
+ ocd->ocd_cksum_types,
+ OBD_CKSUM_ALL);
+ cli->cl_checksum = 0;
+ cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+ cli->cl_cksum_type = OBD_CKSUM_CRC32;
+ } else {
+ cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+
+ if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM)
+ cli->cl_cksum_type = OSC_DEFAULT_CKSUM;
+ else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER)
+ cli->cl_cksum_type = OBD_CKSUM_ADLER;
+ else
+ cli->cl_cksum_type = OBD_CKSUM_CRC32;
+ }
+ } else {
+ /* The server does not support OBD_CONNECT_CKSUM.
+ * Enforce CRC32 for backward compatibility*/
+ cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+ cli->cl_cksum_type = OBD_CKSUM_CRC32;
+ }
+
if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
cli->cl_max_pages_per_rpc =
ocd->ocd_brw_size >> CFS_PAGE_SHIFT;
__swab64s(&ocd->ocd_ibits_known);
__swab32s(&ocd->ocd_nllu);
__swab32s(&ocd->ocd_nllg);
+ __swab64s(&ocd->ocd_transno);
+ __swab32s(&ocd->ocd_group);
+ __swab32s(&ocd->ocd_cksum_types);
CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
- CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
- CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
}
void lustre_swab_obdo (struct obdo *o)
(long long)(int)offsetof(struct obd_connect_data, ocd_nllg));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg));
- LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 40, " found %lld\n",
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_group));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 56, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding1));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
- LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 48, " found %lld\n",
+ LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 64, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 56, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding3));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
- LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 64, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding4));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100));
CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200));
CLASSERT(OBD_FL_CREATE_CROW == (0x00000400));
+ CLASSERT(OBD_FL_TRUNCLOCK == (0x00000800));
+ CLASSERT(OBD_FL_CKSUM_CRC32 == (0x00001000));
+ CLASSERT(OBD_FL_CKSUM_ADLER == (0x00002000));
+ CLASSERT(OBD_CKSUM_CRC32 == (0x00000001));
+ CLASSERT(OBD_CKSUM_ADLER == (0x00000002));
/* Checks for struct lov_mds_md_v1 */
LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",
for f in $LPROC/osc/*/checksums; do
echo $1 >> $f
done
-
return 0
}
+export ORIG_CSUM_TYPE=""
+CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"}
+set_checksum_type()
+{
+ [ "$ORIG_CSUM_TYPE" ] || \
+ ORIG_CSUM_TYPE=`sed 's/.*\[\(.*\)\].*/\1/g' \
+ $LPROC/osc/*osc-[^mM]*/checksum_type | head -n1`
+ for f in $LPROC/osc/*osc-*/checksum_type; do
+ echo $1 > $f
+ done
+ log "set checksum type to $1"
+ return 0
+}
F77_TMP=$TMP/f77-temp
F77SZ=8
setup_f77() {
test_77c() { # bug 10889
[ ! -f $DIR/f77b ] && skip "requires 77b - skipping" && return
- cancel_lru_locks osc
- #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
- sysctl -w lustre.fail_loc=0x80000408
set_checksums 1
- cmp $F77_TMP $DIR/f77b || error "file compare failed"
- sysctl -w lustre.fail_loc=0
+ for algo in $CKSUM_TYPES; do
+ cancel_lru_locks osc
+ set_checksum_type $algo
+ #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
+ sysctl -w lustre.fail_loc=0x80000408
+ cmp $F77_TMP $DIR/f77b || error "file compare failed"
+ sysctl -w lustre.fail_loc=0
+ done
set_checksums 0
+ set_checksum_type $ORIG_CSUM_TYPE
}
run_test 77c "checksum error on client read ==================="
run_test 77e "checksum error on OST direct read ================"
test_77f() { # bug 10889
- #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
- sysctl -w lustre.fail_loc=0x409
set_checksums 1
- directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \
- error "direct write succeeded"
- sysctl -w lustre.fail_loc=0
+ for algo in $CKSUM_TYPES; do
+ cancel_lru_locks osc
+ set_checksum_type $algo
+ #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
+ sysctl -w lustre.fail_loc=0x409
+ directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \
+ error "direct write succeeded"
+ sysctl -w lustre.fail_loc=0
+ done
+ set_checksum_type $ORIG_CSUM_TYPE
set_checksums 0
}
run_test 77f "repeat checksum error on write (expect error) ===="
}
run_test 77h "checksum error on OST read ======================="
+test_77i() { # bug 13805
+ #define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
+ sysctl -w lustre.fail_loc=0x40b
+ remount_client $MOUNT
+ sysctl -w lustre.fail_loc=0
+ for f in $LPROC/osc/*osc-[^mM]*/checksum_type; do
+ algo=`sed 's/.*\[\(.*\)\].*/\1/g' $f`
+ [ "$algo" = "crc32" ] || error "algo set to $algo instead of crc32"
+ done
+ remount_client $MOUNT
+}
+run_test 77i "client not supporting OSD_CONNECT_CKSUM =========="
+
+test_77j() { # bug 13805
+ #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c
+ sysctl -w lustre.fail_loc=0x40c
+ remount_client $MOUNT
+ sysctl -w lustre.fail_loc=0
+ for f in $LPROC/osc/*osc-[^mM]*/checksum_type; do
+ algo=`sed 's/.*\[\(.*\)\].*/\1/g' $f`
+ [ "$algo" = "adler" ] || error "algo set to $algo instead of adler"
+ done
+ remount_client $MOUNT
+}
+run_test 77j "client only supporting ADLER32 ===================="
+
[ "$ORIG_CSUM" ] && set_checksums $ORIG_CSUM || true
rm -f $F77_TMP
unset F77_TMP
grep " $1 " /proc/mounts || zconf_mount $HOSTNAME $*
}
+remount_client()
+{
+ zconf_umount `hostname` $1 || error "umount failed"
+ zconf_mount `hostname` $1 || error "mount failed"
+}
+
setupall() {
load_modules
if [ -z "$CLIENTONLY" ]; then
CHECK_MEMBER(obd_connect_data, ocd_ibits_known);
CHECK_MEMBER(obd_connect_data, ocd_nllu);
CHECK_MEMBER(obd_connect_data, ocd_nllg);
+ CHECK_MEMBER(obd_connect_data, ocd_transno);
+ CHECK_MEMBER(obd_connect_data, ocd_group);
+ CHECK_MEMBER(obd_connect_data, ocd_cksum_types);
CHECK_MEMBER(obd_connect_data, padding1);
CHECK_MEMBER(obd_connect_data, padding2);
- CHECK_MEMBER(obd_connect_data, padding3);
- CHECK_MEMBER(obd_connect_data, padding4);
CHECK_CDEFINE(OBD_CONNECT_RDONLY);
CHECK_CDEFINE(OBD_CONNECT_INDEX);
CHECK_CDEFINE(OBD_FL_NO_USRQUOTA);
CHECK_CDEFINE(OBD_FL_NO_GRPQUOTA);
CHECK_CDEFINE(OBD_FL_CREATE_CROW);
+ CHECK_CDEFINE(OBD_FL_TRUNCLOCK);
+ CHECK_CDEFINE(OBD_FL_CKSUM_CRC32);
+ CHECK_CDEFINE(OBD_FL_CKSUM_ADLER);
+ CHECK_CDEFINE(OBD_CKSUM_CRC32);
+ CHECK_CDEFINE(OBD_CKSUM_ADLER);
}
static void
(long long)(int)offsetof(struct obd_connect_data, ocd_nllg));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg));
- LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 40, " found %lld\n",
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_transno));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_group));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group));
+ LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, " found %lld\n",
+ (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types));
+ LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, " found %lld\n",
+ (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types));
+ LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 56, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding1));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding1));
- LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 48, " found %lld\n",
+ LASSERTF((int)offsetof(struct obd_connect_data, padding2) == 64, " found %lld\n",
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 56, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding3));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3));
- LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 64, " found %lld\n",
- (long long)(int)offsetof(struct obd_connect_data, padding4));
- LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, " found %lld\n",
- (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4));
CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100));
CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200));
CLASSERT(OBD_FL_CREATE_CROW == (0x00000400));
+ CLASSERT(OBD_FL_TRUNCLOCK == (0x00000800));
+ CLASSERT(OBD_FL_CKSUM_CRC32 == (0x00001000));
+ CLASSERT(OBD_FL_CKSUM_ADLER == (0x00002000));
+ CLASSERT(OBD_CKSUM_CRC32 == (0x00000001));
+ CLASSERT(OBD_CKSUM_ADLER == (0x00000002));
/* Checks for struct lov_mds_md_v1 */
LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n",