--- /dev/null
+This patch is no longer needed for Lustre. It is only included
+for testing and ease of using the same kernel with older Lustre
+versions. This testing functionality was replaced in Linux 3.0
+by the dm-flakey driver.
+
+This functionality is mainly used during testing, in order to
+simulate a server crash for ldiskfs by discarding all of the
+writes to the filesystem. For recovery testing we could simulate
+this by using a special loopback or DM device that also discards
+writes to the device.
+
+This functionality is also used by target "failback" in order
+to speed up service shutdown and takeover by the other node
+during controlled operation. However, it would also be possible
+to do this by simply allowing all of the in-flight requests to
+complete and then waiting for the service to stop. This will
+also be needed by the DMU-OSD, because discarding of writes on
+a DMU-based target is not safe as it could trigger a storage
+failure if the data is ever read from disk again and the
+checksum does not match that expected by the block pointer.
+
+Index: linux-3.10.0-123.8.1.el7.x86_64/block/blk-core.c
+===================================================================
+--- linux-3.10.0-123.8.1.el7.x86_64.orig/block/blk-core.c
++++ linux-3.10.0-123.8.1.el7.x86_64/block/blk-core.c
+@@ -1667,6 +1667,8 @@ static inline bool should_fail_request(s
+
+ #endif /* CONFIG_FAIL_MAKE_REQUEST */
+
++int dev_check_rdonly(struct block_device *bdev);
++
+ /*
+ * Check whether this bio extends beyond the end of the device.
+ */
+@@ -1729,6 +1731,12 @@ generic_make_request_checks(struct bio *
+ goto end_io;
+ }
+
++ /* this is cfs's dev_rdonly check */
++ if (bio_rw(bio) == WRITE && dev_check_rdonly(bio->bi_bdev)) {
++ err = 0;
++ goto end_io;
++ }
++
+ part = bio->bi_bdev->bd_part;
+ if (should_fail_request(part, bio->bi_size) ||
+ should_fail_request(&part_to_disk(part)->part0,
+@@ -3240,6 +3248,99 @@ void blk_post_runtime_resume(struct requ
+ EXPORT_SYMBOL(blk_post_runtime_resume);
+ #endif
+
++/*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently). This is for filesystem crash/recovery testing.
++ */
++struct deventry {
++ dev_t dev;
++ struct deventry *next;
++};
++
++static struct deventry *devlist = NULL;
++static spinlock_t devlock = __SPIN_LOCK_UNLOCKED(devlock);
++
++int dev_check_rdonly(struct block_device *bdev)
++{
++ struct deventry *cur;
++
++ if (!bdev)
++ return 0;
++
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ spin_unlock(&devlock);
++ return 1;
++ }
++ cur = cur->next;
++ }
++ spin_unlock(&devlock);
++ return 0;
++}
++
++void dev_set_rdonly(struct block_device *bdev)
++{
++ struct deventry *newdev, *cur;
++
++ if (!bdev)
++ return;
++
++ newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL);
++ if (!newdev)
++ return;
++
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ spin_unlock(&devlock);
++ kfree(newdev);
++ return;
++ }
++ cur = cur->next;
++ }
++ newdev->dev = bdev->bd_dev;
++ newdev->next = devlist;
++ devlist = newdev;
++ spin_unlock(&devlock);
++ printk(KERN_WARNING "Turning device %s (%#x) read-only\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev);
++}
++
++void dev_clear_rdonly(struct block_device *bdev)
++{
++ struct deventry *cur, *last = NULL;
++
++ if (!bdev)
++ return;
++
++ spin_lock(&devlock);
++ cur = devlist;
++ while(cur) {
++ if (bdev->bd_dev == cur->dev) {
++ if (last)
++ last->next = cur->next;
++ else
++ devlist = cur->next;
++ spin_unlock(&devlock);
++ kfree(cur);
++ printk(KERN_WARNING "Removing read-only on %s (%#x)\n",
++ bdev->bd_disk ? bdev->bd_disk->disk_name :
++ "unknown block", bdev->bd_dev);
++ return;
++ }
++ last = cur;
++ cur = cur->next;
++ }
++ spin_unlock(&devlock);
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
++
+ int __init blk_dev_init(void)
+ {
+ BUILD_BUG_ON(__REQ_NR_BITS > 8 *
+Index: linux-3.10.0-123.8.1.el7.x86_64/fs/block_dev.c
+===================================================================
+--- linux-3.10.0-123.8.1.el7.x86_64.orig/fs/block_dev.c
++++ linux-3.10.0-123.8.1.el7.x86_64/fs/block_dev.c
+@@ -1441,6 +1441,7 @@ static void __blkdev_put(struct block_de
+ if (bdev != bdev->bd_contains)
+ victim = bdev->bd_contains;
+ bdev->bd_contains = NULL;
++ dev_clear_rdonly(bdev);
+
+ put_disk(disk);
+ module_put(owner);
+Index: linux-3.10.0-123.8.1.el7.x86_64/include/linux/fs.h
+===================================================================
+--- linux-3.10.0-123.8.1.el7.x86_64.orig/include/linux/fs.h
++++ linux-3.10.0-123.8.1.el7.x86_64/include/linux/fs.h
+@@ -2440,6 +2440,10 @@ extern void inode_sb_list_add(struct ino
+ extern void submit_bio(int, struct bio *);
+ extern int bdev_read_only(struct block_device *);
+ #endif
++#define HAVE_CLEAR_RDONLY_ON_PUT
++extern void dev_set_rdonly(struct block_device *bdev);
++extern int dev_check_rdonly(struct block_device *bdev);
++extern void dev_clear_rdonly(struct block_device *bdev);
+ extern int set_blocksize(struct block_device *, int);
+ extern int sb_set_blocksize(struct super_block *, int);
+ extern int sb_min_blocksize(struct super_block *, int);
--- /dev/null
+From f9b256237b2682ef81847165a9cdf8465e5ebb16 Mon Sep 17 00:00:00 2001
+From: Greg Edwards <gedwards@ddn.com>
+Date: Thu, 29 Oct 2020 15:10:58 -0600
+Subject: [PATCH 4/4] virtio_ring: add a vring_desc reserve mempool
+
+When submitting large IOs under heavy memory fragmentation, the
+allocation of the indirect vring_desc descriptor array may fail
+for higher order allocations.
+
+Create a small reserve mempool of max-sized vring_desc descriptor
+arrays per-virtqueue. If we fail to allocate a descriptor array
+via kmalloc(), fall back to grabbing one from the preallocated
+reserve pool.
+
+Signed-off-by: Greg Edwards <gedwards@ddn.com>
+---
+ drivers/virtio/virtio_ring.c | 90 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 81 insertions(+), 9 deletions(-)
+
+Index: linux-3.10.0-1160.11.1.el7/drivers/virtio/virtio_ring.c
+===================================================================
+--- linux-3.10.0-1160.11.1.el7.orig/drivers/virtio/virtio_ring.c
++++ linux-3.10.0-1160.11.1.el7/drivers/virtio/virtio_ring.c
+@@ -16,6 +16,11 @@
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/mempool.h>
++#include <linux/scatterlist.h>
+ #include <linux/virtio.h>
+ #include <linux/virtio_ring.h>
+ #include <linux/virtio_config.h>
+@@ -26,6 +31,24 @@
+ #include <linux/kmemleak.h>
+ #include <linux/dma-mapping.h>
+
++/*
++ * vring_desc reserve mempool
++ *
++ * If higher-order allocations fail in alloc_indirect(), try to grab a
++ * preallocated, max-sized descriptor array from the per-virtqueue mempool.
++ * Each pool element is sized at (req + rsp + max data + max integrity).
++ */
++#define VRING_DESC_POOL_DEFAULT 16
++#define VRING_DESC_POOL_NR_DESC (1 + 1 + SG_MAX_SEGMENTS + SG_MAX_SEGMENTS)
++#define VRING_DESC_POOL_ELEM_SZ (VRING_DESC_POOL_NR_DESC * \
++ sizeof(struct vring_desc))
++
++static unsigned short vring_desc_pool_sz = VRING_DESC_POOL_DEFAULT;
++module_param_named(vring_desc_pool_sz, vring_desc_pool_sz, ushort, 0444);
++MODULE_PARM_DESC(vring_desc_pool_sz,
++ "Number of elements in indirect descriptor mempool (default: "
++ __stringify(VRING_DESC_POOL_DEFAULT) ")");
++
+ #ifdef DEBUG
+ /* For development, we want to crash whenever the ring is screwed. */
+ #define BAD_RING(_vq, fmt, args...) \
+@@ -58,6 +81,7 @@
+ struct vring_desc_state {
+ void *data; /* Data for callback. */
+ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
++ bool indir_desc_mempool; /* Allocated from reserve mempool */
+ };
+
+ struct vring_virtqueue {
+@@ -103,6 +127,9 @@ struct vring_virtqueue {
+ ktime_t last_add_time;
+ #endif
+
++ /* Descriptor reserve mempool */
++ mempool_t *vring_desc_pool;
++
+ /* Per-descriptor state. */
+ struct vring_desc_state desc_state[];
+ };
+@@ -228,10 +255,13 @@ static int vring_mapping_error(const str
+ }
+
+ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
+- unsigned int total_sg, gfp_t gfp)
++ unsigned int total_sg, gfp_t gfp,
++ int head)
+ {
++ struct vring_virtqueue *vq = to_vvq(_vq);
+ struct vring_desc *desc;
+ unsigned int i;
++ size_t size = total_sg * sizeof(struct vring_desc);
+
+ /*
+ * We require lowmem mappings for the descriptors because
+@@ -239,16 +269,43 @@ static struct vring_desc *alloc_indirect
+ * virtqueue.
+ */
+ gfp &= ~__GFP_HIGHMEM;
++ gfp |= __GFP_NOWARN;
+
+- desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
+- if (!desc)
+- return NULL;
++ desc = kmalloc(size, gfp);
++ if (!desc) {
++ if (vq->vring_desc_pool) {
++ /* try to get a buffer from the reserve pool */
++ if (WARN_ON_ONCE(size > VRING_DESC_POOL_ELEM_SZ))
++ return NULL;
++ desc = mempool_alloc(vq->vring_desc_pool, gfp);
++ if (!desc) {
++ pr_warn_ratelimited(
++ "reserve indirect desc alloc failed\n");
++ return NULL;
++ }
++ vq->desc_state[head].indir_desc_mempool = true;
++ } else {
++ pr_warn_ratelimited("indirect desc alloc failed\n");
++ return NULL;
++ }
++ }
+
+ for (i = 0; i < total_sg; i++)
+ desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
+ return desc;
+ }
+
++void free_indirect(struct vring_virtqueue *vq, struct vring_desc *desc,
++ int head)
++{
++ if (!vq->desc_state[head].indir_desc_mempool) {
++ kfree(desc);
++ } else {
++ mempool_free(desc, vq->vring_desc_pool);
++ vq->desc_state[head].indir_desc_mempool = 0;
++ }
++}
++
+ static inline int virtqueue_add(struct virtqueue *_vq,
+ struct scatterlist *sgs[],
+ unsigned int total_sg,
+@@ -293,7 +350,7 @@ static inline int virtqueue_add(struct v
+ /* If the host supports indirect descriptor tables, and we have multiple
+ * buffers, then go indirect. FIXME: tune this threshold */
+ if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+- desc = alloc_indirect(_vq, total_sg, gfp);
++ desc = alloc_indirect(_vq, total_sg, gfp, head);
+ else {
+ desc = NULL;
+ WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect);
+@@ -321,7 +378,7 @@ static inline int virtqueue_add(struct v
+ if (out_sgs)
+ vq->notify(&vq->vq);
+ if (indirect)
+- kfree(desc);
++ free_indirect(vq, desc, head);
+ END_USE(vq);
+ return -ENOSPC;
+ }
+@@ -420,7 +477,7 @@ unmap_release:
+ }
+
+ if (indirect)
+- kfree(desc);
++ free_indirect(vq, desc, head);
+
+ return -ENOMEM;
+ }
+@@ -627,7 +684,7 @@ static void detach_buf(struct vring_virt
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one(vq, &indir_desc[j]);
+
+- kfree(vq->desc_state[head].indir_desc);
++ free_indirect(vq, vq->desc_state[head].indir_desc, head);
+ vq->desc_state[head].indir_desc = NULL;
+ }
+ }
+@@ -904,6 +961,15 @@ struct virtqueue *__vring_new_virtqueue(
+ if (!vq)
+ return NULL;
+
++ if (vring_desc_pool_sz) {
++ vq->vring_desc_pool = mempool_create_node(vring_desc_pool_sz,
++ mempool_kmalloc, mempool_kfree,
++ (void *)VRING_DESC_POOL_ELEM_SZ,
++ GFP_KERNEL, numa_node_id());
++ if (!vq->vring_desc_pool)
++ goto err;
++ }
++
+ vq->vring = vring;
+ vq->vq.callback = callback;
+ vq->vq.vdev = vdev;
+@@ -938,6 +1004,10 @@ struct virtqueue *__vring_new_virtqueue(
+ memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state));
+
+ return &vq->vq;
++
++err:
++ kfree(vq);
++ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
+
+@@ -1073,6 +1143,8 @@ void vring_del_virtqueue(struct virtqueu
+ vq->vring.desc, vq->queue_dma_addr);
+ }
+ list_del(&_vq->list);
++ if (vq->vring_desc_pool)
++ mempool_destroy(vq->vring_desc_pool);
+ kfree(vq);
+ }
+ EXPORT_SYMBOL_GPL(vring_del_virtqueue);