return vmalloc_to_page(addr);
}
-int is_chunk_start(struct page *page, struct ll_compr_hdr **ret_header);
+int is_chunk_start(struct page *page, struct ll_compr_hdr **ret_header, int *rc);
int decompress_chunk(const char *obd_name, struct crypto_comp *cc,
unsigned char *in, unsigned int in_len,
#define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417
#define OBD_FAIL_OSC_WRONG_COMP_ALG 0x418
#define OBD_FAIL_OSC_MARK_COMPRESSED 0x419
+#define OBD_FAIL_OSC_FORCE_DECOMPR 0x420
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
#include <lustre_crypto.h>
#include <lustre_compr.h>
#include <lustre_sec.h>
+#include <linux/crc32.h>
/* "one-shot" try to load our own compression modules
* the first time that a compressed file is accessed
* The minimum delta between compressed and plain data to
* use the compressed one.
*/
+
+static __u32 compr_header_csum(struct ll_compr_hdr *header)
+{
+ int offset = offsetof(struct ll_compr_hdr, llch_hdr_csum);
+ __u32 csum32;
+ /* let's skip llch_hdr_csum value and crc 0 instead */
+ __u32 dummy_csum = 0;
+
+ csum32 = crc32(~0, (unsigned char *)header,
+ offset);
+ csum32 = crc32(csum32, (unsigned char *)&dummy_csum,
+ sizeof(dummy_csum));
+ offset += sizeof(dummy_csum);
+
+ if (offset < header->llch_header_size) {
+ csum32 = crc32(csum32, (unsigned char *)header + offset,
+ header->llch_header_size - offset);
+ }
+
+ return csum32;
+}
+
int compress_chunk(const char *obd_name, struct crypto_comp *cc,
const unsigned char *in, unsigned int in_len,
unsigned char *out, unsigned int *out_len,
llch->llch_uncompr_size = in_len;
llch->llch_reserved = 0;
llch->llch_compr_csum = 0;
- llch->llch_hdr_csum = 0;
+ llch->llch_hdr_csum = compr_header_csum(llch);
*out_len = len + sizeof(*llch);
}
EXPORT_SYMBOL(compress_chunk);
-int is_chunk_start(struct page *page, struct ll_compr_hdr **ret_header)
+static int chunk_header_csum_valid(struct ll_compr_hdr *header)
+{
+ __u32 csum32;
+
+ csum32 = compr_header_csum(header);
+ if (csum32 != header->llch_hdr_csum) {
+ CDEBUG(D_SEC, "Header csum mismatch %x != %x\n",
+ cpu_to_le32(csum32), header->llch_hdr_csum);
+ return 0;
+ }
+
+ return 1;
+}
+
+int is_chunk_start(struct page *page, struct ll_compr_hdr **ret_header, int *rc)
{
struct ll_compr_hdr *header;
- int rc = 1;
+ int retval = 1;
ENTRY;
+ *rc = 0;
if (page == NULL)
RETURN(0);
-
header = (struct ll_compr_hdr *)kmap_atomic(page);
- if (header->llch_magic != LLCH_MAGIC)
- rc = 0;
+
+ if (header->llch_magic != LLCH_MAGIC) {
+ retval = 0;
+ /* If the magic is found but most fields are invalid, it
+ * implies that the data is likely uncompressed and should
+ * be passed through unmodified.
+ */
+ } else if (header->llch_hdr_csum != 0 &&
+ !(chunk_header_csum_valid(header))) {
+ CDEBUG(D_SEC, "Magic found but header csum invalid\n");
+ retval = 0;
+ /* If the magic is valid but a few fields are invalid, it
+ * suggests that the valid compressed chunk is corrupted and
+ * -EUCLEAN should be returned when reading it.
+ */
+ } else if (header->llch_compr_size >=
+ COMPR_GET_CHUNK_SIZE(header->llch_chunk_log_bits) ||
+ /* Asume here that ll_compr_hdr will grow less then
+ * 4 times from original size
+ */
+ header->llch_header_size > sizeof(header) * 4 ||
+ header->llch_compr_type > LL_COMPR_TYPE_MAX) {
+
+ if (header->llch_hdr_csum == 0) {
+ CDEBUG(D_SEC, "No csum, sanity failed, skipping\n");
+ retval = 0;
+ } else {
+ retval = 0;
+ *rc = -EUCLEAN;
+ }
+ }
+
*ret_header = header;
kunmap_atomic(header);
- RETURN(rc);
+ RETURN(retval);
}
EXPORT_SYMBOL(is_chunk_start);
#include <lustre_compr.h>
#include <lustre_sec.h>
-static int decompress_chunk_in_lnb(const char *obd_name,
+static int decompress_chunk_in_lnb(const char *obd_name, struct lu_fid *fid,
struct niobuf_local *lnbs, int lnb_start,
void **bounce_src, void **bounce_dst,
enum ll_compr_type type, int lvl,
lnbs[lnb_start].lnb_page_offset, lnbs[lnb_start].lnb_len);
/* if this chunk isn't compressed, don't uncompress it */
- if (!is_chunk_start(lnbs[lnb_start].lnb_page, &llch))
- GOTO(out, rc = 0);
+ if (!is_chunk_start(lnbs[lnb_start].lnb_page, &llch, &rc)) {
+ CERROR("%s: Header sanity failed: "DFID" at %llu: rc = %d\n",
+ obd_name, PFID(fid), lnbs[lnb_start].lnb_file_offset,
+ rc);
+ GOTO(out, rc);
+ }
/* compression type and level in the compressed data can
* be different from those set in the layout, because the client
llch->llch_compr_level);
if (chunk_size !=
COMPR_GET_CHUNK_SIZE(llch->llch_chunk_log_bits)) {
- CERROR("%s: chunk size disagreement, layout %d, from disk %d\n",
- obd_name, chunk_size,
+ CERROR("%s: chunk size disagreement: "DFID" at %llu: layout %d, from disk %d\n",
+ obd_name, PFID(fid), lnbs[lnb_start].lnb_file_offset,
+ chunk_size,
COMPR_GET_CHUNK_SIZE(llch->llch_chunk_log_bits));
/* compression type and level can disagree with layout, we just
* dump them for debugging
hdr_size = llch->llch_header_size;
rc = alloc_decompr(obd_name, &type, &lvl, &cc);
if (rc) {
- CERROR("%s: Setup for decompression failed, type %i, lvl %d, rc = %d\n",
- obd_name, type, lvl, rc);
+ CERROR("%s: Setup for decompression failed: "DFID" at %llu: type %i, lvl %d: rc = %d\n",
+ obd_name, PFID(fid), lnbs[lnb_start].lnb_file_offset,
+ type, lvl, rc);
GOTO(out, rc);
}
rc = merge_chunk(NULL, lnbs, lnb_start, pages_in_chunk, (char *) bounce_src,
&src_size);
if (rc < 0) {
- CERROR("%s: Data error in lnbs at %llu, rc: %d\n",
- obd_name, lnbs[lnb_start].lnb_file_offset, rc);
+ CERROR("%s: Data error in lnbs: "DFID" at %llu: rc = %d\n",
+ obd_name, PFID(fid), lnbs[lnb_start].lnb_file_offset, rc);
GOTO(out, rc);
}
LASSERT(src_size <= chunk_size);
(char *) bounce_dst, &dst_size, type,
lvl);
if (rc != 0) {
- CERROR("%s: Failed to decompress %d byte chunk at %llu, rc: %d\n",
- obd_name, llch->llch_compr_size,
+ CERROR("%s: Failed to decompress: "DFID", %d byte chunk at %llu: rc = %d\n",
+ obd_name, PFID(fid), llch->llch_compr_size,
lnbs[lnb_start].lnb_file_offset, rc);
GOTO(out, rc);
}
* reliably zero unused parts of the header, so we skip if it's zero
*/
if (llch->llch_uncompr_size != 0 && dst_size != llch->llch_uncompr_size) {
- CERROR("%s: Compressed chunk at %llu invalid, uncompressed size from disk %d disagrees with result of decompression %d, rc: %d\n",
- obd_name, lnbs[lnb_start].lnb_file_offset,
- llch->llch_uncompr_size, dst_size, rc);
+ CERROR("%s: object invalid, size %d != %d: "DFID" at %llu: rc = %d\n",
+ obd_name, llch->llch_uncompr_size, dst_size, PFID(fid),
+ lnbs[lnb_start].lnb_file_offset, rc);
GOTO(out, rc = -EUCLEAN);
}
LASSERT(dst_size <= chunk_size);
for(i = lnb_start; i < lnb_start + pages_in_chunk; i++) {
/* if there's no data in this page, we must clear it */
if (lnbs[i].lnb_rc == 0) {
- CDEBUG(D_SEC, "no data in page %d at %llu, clearing\n",
- i, lnbs[i].lnb_file_offset);
+ CDEBUG(D_SEC, "%s: no data, clearing: page %d, "DFID" at %llu\n",
+ obd_name, i, PFID(fid), lnbs[i].lnb_file_offset);
memset(kmap(lnbs[i].lnb_page), 0, PAGE_SIZE);
kunmap(lnbs[i].lnb_page);
}
* the beginning and end of the read may be unaligned, so we check and if
* necessary decompress (in place) the data in those locations
*/
-int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs,
- int lnb_npages, __u64 rnb_start, __u64 rnb_end,
- int *lnb_start, void **bounce_src, void **bounce_dst,
+int decompress_rnb(const char *obd_name, struct lu_fid *fid,
+ struct niobuf_local *lnbs, int lnb_npages,
+ __u64 rnb_start, __u64 rnb_end, int *lnb_start,
+ void **bounce_src, void **bounce_dst,
enum ll_compr_type type, int lvl, int chunk_size,
bool write)
{
if (!chunk_found)
GOTO(out, rc = -EINVAL);
- rc = decompress_chunk_in_lnb(obd_name, lnbs, i, bounce_src,
+ rc = decompress_chunk_in_lnb(obd_name, fid, lnbs, i, bounce_src,
bounce_dst, type, lvl, chunk_size);
if (rc)
GOTO(out, rc);
if (!chunk_found)
GOTO(out, rc = -EINVAL);
- rc = decompress_chunk_in_lnb(obd_name, lnbs, i, bounce_src,
+ rc = decompress_chunk_in_lnb(obd_name, fid, lnbs, i, bounce_src,
bounce_dst, type, lvl, chunk_size);
if (rc)
GOTO(out, rc);
#include <linux/falloc.h>
#include "ofd_internal.h"
-int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs,
- int lnb_npages, __u64 rnb_start, __u64 rnb_end,
- int *lnb_offset, void **bounce_src, void **bounce_dst,
+int decompress_rnb(const char *obd_name, struct lu_fid *fid,
+ struct niobuf_local *lnbs, int lnb_npages,
+ __u64 rnb_start, __u64 rnb_end, int *lnb_offset,
+ void **bounce_src, void **bounce_dst,
enum ll_compr_type type, int lvl, int chunk_size,
bool write);
CDEBUG(D_SEC,
"decompressing: rnb %d rnb_start %llu, rnb_end %llu\n",
i, rnb_start, rnb_end);
- rc = decompress_rnb(exp->exp_obd->obd_name, lnb, npages,
+ rc = decompress_rnb(exp->exp_obd->obd_name, fid, lnb, npages,
rnb_start, rnb_end, &lnb_start,
bounce_src, bounce_dst, type, lvl,
chunk_size, write);
- if (rc)
+
+ if (rc) {
GOTO(out, rc);
+ }
}
prev_rnb_end = rnb_end;
}
clpage = oap2cl_page(brw_page2oap(pga[0]));
type = clpage->cp_compr_type;
+ chunk_bits = cl_page_compr_bits(clpage);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_FORCE_DECOMPR)) {
+ /* decompress a sample from the lustre/tests/compressed.bin */
+ lvl = 9;
+ type = LL_COMPR_TYPE_LZ4HC;
+ chunk_bits = COMPR_CHUNK_MIN_BITS;
+ }
+
/* no compression */
if (type == LL_COMPR_TYPE_NONE)
RETURN(0);
- chunk_bits = cl_page_compr_bits(clpage);
chunk_size = 1 << chunk_bits;
buf_bits = chunk_bits + 1;
pages_per_chunk = chunk_size / PAGE_SIZE;
LASSERT(ergo(next_chunk_min, i >= next_chunk_min));
- if (!is_chunk_start(pga[i]->pg, &llch))
+ if (!is_chunk_start(pga[i]->pg, &llch, &rc)) {
+ if (rc) {
+ CERROR("%s: Magic and csum OK, but sanity failed: "DFID": rc = %d\n",
+ obd_name, PFID(&aa->aa_oa->o_oi.oi_fid),
+ rc);
+ GOTO(out, rc);
+ }
continue;
+ }
+
/* all chunks should be compressed with the same algorithm, but
* we need to get the type and level from the data provided.
* In the future it may be that the type is also changing in
* from storage and could potentially be corrupted)
*/
if (rpc_chunk_bits != chunk_bits) {
+ rc = -EUCLEAN;
CERROR(
- "chunk bits from storage (%d) and layout (%d) disagree\n",
- rpc_chunk_bits, chunk_bits);
- GOTO(out, rc = -EUCLEAN);
+ "%s: Chunk bits disagree %d != %d disagree: "DFID": rc = %d\n",
+ obd_name,
+ rpc_chunk_bits, chunk_bits,
+ PFID(&aa->aa_oa->o_oi.oi_fid), rc);
+ GOTO(out, rc);
}
CDEBUG(D_SEC, "chunk_size: %i, pages_per_chunk: %i\n",
* data, if not, the data from disk is probably corrupt
*/
if (compressed_pages > page_count - i) {
- CERROR("compressed pages from disk %d don't match pages in rpc %d (at %d of %d pages)\n",
- compressed_pages, page_count - i, i, page_count);
- GOTO(out, rc = -EUCLEAN);
+ rc = -EUCLEAN;
+ CERROR("%s: compressed pages mismatch %d != %d (at %d of %d pages): object "DFID": rc = %d\n",
+ obd_name, compressed_pages, page_count - i, i,
+ page_count, PFID(&aa->aa_oa->o_oi.oi_fid), rc);
+ GOTO(out, rc);
}
CDEBUG(D_SEC, "Merge chunk [%i, %i], src: %px\n", i,
* CSDC currently (but could happen if there's bad data)
*/
if (src_size != compressed_pages * PAGE_SIZE) {
- CERROR("buffer size from compressed pages (%u bytes) doesn't match number of compressed pages %d\n",
- src_size, compressed_pages);
- GOTO(out, rc = -EUCLEAN);
+ rc = -EUCLEAN;
+ CERROR("%s: buffer size from compressed pages (%u bytes) doesn't match number of compressed pages %d: object "DFID": rc = %d\n",
+ obd_name, src_size, compressed_pages,
+ PFID(&aa->aa_oa->o_oi.oi_fid), rc);
+ GOTO(out, rc);
}
dst_size = 2 * chunk_size;
CDEBUG(D_SEC, "Compressed size %lu, type %i\n",
* data, if not, the data from disk is probably corrupt
*/
if (decompressed_pages > page_count - i) {
- CERROR("decompressed pages from disk %d don't match pages in rpc %d (at %d of %d pages)\n",
- decompressed_pages, page_count - i, i, page_count);
- GOTO(out, rc = -EUCLEAN);
+ rc = -EUCLEAN;
+ CERROR("%s: decompressed pages from disk %d don't match pages in rpc %d (at %d of %d pages): object "DFID": rc = %d\n",
+ obd_name, decompressed_pages, page_count - i, i,
+ page_count, PFID(&aa->aa_oa->o_oi.oi_fid), rc);
+ GOTO(out, rc);
}
unmerge_chunk(pga, NULL, i, decompressed_pages, dst, dst_size,
0);
noinst_DATA += zfs_mdt1_2_11.tar.bz2 zfs_mdt2_2_11.tar.bz2
noinst_DATA += zfs_ost1_2_11.tar.bz2 zfs_ost2_2_11.tar.bz2
noinst_DATA += AMSR_E_L3_DailyOcean_V05_20111003.hdf.bz2
+noinst_DATA += compressed.bin
noinst_SCRIPTS = leak_finder.pl llmount.sh llmountcleanup.sh functions.sh
noinst_SCRIPTS += test-framework.sh runvmstat runiozone runtests sanity.sh
noinst_SCRIPTS += rundbench acceptance-small.sh compile.sh conf-sanity.sh
for bs in 3 4 7 32 97 128 130 192; do
dd if=$tf bs=${bs}K of=$tf.3 ||
error "(28) dd with block size ${bs}K failed"
-
cmp -bl $tf.3 $tf.2 || error "(29) cmp failed"
flush_and_compare $tf.3 $tf.2 "(30)"
rm -f $tf.3
}
run_test 1008 "validate directory space usage reduction with compression"
+test_1008() {
+ (( MDS1_VERSION >= $(version_code 2.14.0-ddn121) )) ||
+ skip "Need MDS version at least 2.14.0-ddn121"
+
+ test_mkdir -p $DIR/$tdir
+ local tf=$DIR/$tdir/$tfile
+ local tf2=$TMP/$tfile.2
+ local compressed=$LUSTRE/tests/compressed.bin
+ local broken_header=$TMP/$tfile.4
+
+ enable_compression
+ # Use pre-created compressed file
+ hexdump -C $compressed | head
+ cat $compressed > $tf
+ # The next data is needed for hole after first compressed block
+ dd if=/dev/urandom of=$tf bs=64k seek=1 count=1
+ sync
+ cancel_lru_locks osc
+ echo 3 > /proc/sys/vm/drop_caches
+
+ # In case the file has no compressed component, it can't be set
+ # after the data was written. Without such a component file is not
+ # decompressed at reading because there is
+ # clpage->cp_comp_type == LL_COMPR_TYPE_NONE check
+ # As a workaround, this check is skipped during the testing,
+ # if OBD_FAIL_OSC_FORCE_DECOMPR is set
+ # define OBD_FAIL_OSC_FORCE_DECOMPR 0x420
+ $LCTL set_param fail_loc=0x420
+ # Written data already has compression header
+ dd if=$tf of=$tf2 bs=64k count=1 || error "reading data"
+ hexdump -C $tf2 | head
+
+ # Here is the compression header format to explain offsets
+ # struct ll_compr_hdr {
+ # __u64 llch_magic:48; /* LLCH_MAGIC */
+ # __u8 llch_header_size; /* for future extensions */
+ # __u8 llch_extra_flags;
+ # __u16 llch_flags;
+ # __u8 llch_compr_type; /* LLCH_COMP_GZIP, LLCH_COMP_LZ4, */
+ # __u8 llch_compr_level:4, /* per-algorithm mapped level */
+ # llch_chunk_log_bits:4;
+ # __u32 llch_compr_size; /* bytes of compressed data */
+ # __u32 llch_reserved; /* unused, initialize to 0 */
+ # __u32 llch_uncompr_csum; /* crc32 of raw data, or 0 */
+ # __u32 llch_compr_csum; /* crc32 of compressed data, or 0 */
+ # __u32 llch_hdr_csum; /* crc32 of magic..compr_csum, or 0 */
+ #};
+
+ # set wrong llch_compr_size
+ # checksum should fail and chunk copied as is
+ cp $compressed $broken_header
+ printf '\xff\xff\xff\xff' | dd of=$broken_header bs=1 seek=12 conv=notrunc
+ cat $broken_header > $tf
+ # The next data is needed for hole after first compressed block
+ dd if=/dev/urandom of=$tf bs=64k seek=1 count=1
+ sync
+ cancel_lru_locks osc
+ echo 3 > /proc/sys/vm/drop_caches
+
+ # If the magic is found but most fields are invalid, it implies that the data
+ # is likely uncompressed and should be passed through unmodified.
+ dd if=$tf of=$tf2 bs=64k count=1 || error "reading data failed(0)"
+ hexdump -C $tf2 | head
+
+ # fix checksum, so sanity check fail at wrong header size
+ printf '\x88\xb3\xf6\x81' | dd of=$broken_header bs=1 seek=28 conv=notrunc
+ cat $broken_header > $tf
+ # The next data is needed for hole after first compressed block
+ dd if=/dev/urandom of=$tf bs=64k seek=1 count=1
+ sync
+ cancel_lru_locks osc
+ echo 3 > /proc/sys/vm/drop_caches
+
+ # If the magic is valid but some fields are invalid, the reading
+ # must fail, because data is corrupted.
+ dd if=$tf of=$tf2 bs=64k count=1 && error "reading should fail"
+ hexdump -C $tf2 | head
+
+ # Set checksum to 0, so sanity check fails at wrong header size,
+ # but chunk is returned as is without error
+ printf '\x0\x0\x0\x0' | dd of=$broken_header bs=1 seek=28 conv=notrunc
+ cat $broken_header > $tf
+ # The next data is needed for hole after first compressed block
+ dd if=/dev/urandom of=$tf bs=64k seek=1 count=1
+ sync
+ cancel_lru_locks osc
+ echo 3 > /proc/sys/vm/drop_caches
+
+ # If the magic is 0 and some fields are invalid, it still can be
+ # not corrupted chunk but coincidence.
+ dd if=$tf of=$tf2 bs=64k count=1 || error "reading data failed(0)"
+ hexdump -C $tf2 | head
+ $LCTL set_param fail_loc=0x0
+}
+run_test 1008 "Compression header error tolerance"
+
complete_test $SECONDS
check_and_cleanup_lustre
declare -a logs=($ONLY)