From 8582fedd72010169b43a9158cbdfba0012f7ac45 Mon Sep 17 00:00:00 2001 From: jxiong Date: Thu, 13 Dec 2007 14:07:37 +0000 Subject: [PATCH] Fixed indention of raid6 patches, and an issue about overlapping --- lustre/kernel_patches/patches/raid6-large-io.patch | 8 +- .../kernel_patches/patches/raid6-merge-ios.patch | 111 +++++++++---------- .../patches/raid6-serialize-ovelapping-reqs.patch | 123 ++++++++++++--------- lustre/kernel_patches/patches/raid6-stats.patch | 43 ++++--- .../patches/raid6-stripe-by-stripe-handling.patch | 96 ++++++++-------- lustre/kernel_patches/patches/raid6-zerocopy.patch | 43 +++---- 6 files changed, 220 insertions(+), 204 deletions(-) diff --git a/lustre/kernel_patches/patches/raid6-large-io.patch b/lustre/kernel_patches/patches/raid6-large-io.patch index 13fff9c..85a7f43 100644 --- a/lustre/kernel_patches/patches/raid6-large-io.patch +++ b/lustre/kernel_patches/patches/raid6-large-io.patch @@ -4,10 +4,10 @@ mddev->queue->unplug_fn = raid6_unplug_device; mddev->queue->issue_flush_fn = raid6_issue_flush; -+ /* in order to support large I/Os */ -+ blk_queue_max_sectors(mddev->queue, mddev->chunk_size * mddev->raid_disks >> 9); -+ mddev->queue->max_phys_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT; -+ mddev->queue->max_hw_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT; ++ /* in order to support large I/Os */ ++ blk_queue_max_sectors(mddev->queue, mddev->chunk_size * mddev->raid_disks >> 9); ++ mddev->queue->max_phys_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT; ++ mddev->queue->max_hw_segments = mddev->chunk_size * mddev->raid_disks >> PAGE_SHIFT; + PRINTK("raid6: run(%s) called.\n", mdname(mddev)); diff --git a/lustre/kernel_patches/patches/raid6-merge-ios.patch b/lustre/kernel_patches/patches/raid6-merge-ios.patch index a205741..c403af1 100644 --- a/lustre/kernel_patches/patches/raid6-merge-ios.patch +++ b/lustre/kernel_patches/patches/raid6-merge-ios.patch @@ -1,6 +1,7 @@ ---- linux-2.6.9.orig/drivers/md/raid6main.c 2006-09-08 00:25:38.000000000 +0800 -+++ linux-2.6.9/drivers/md/raid6main.c 2006-09-08 00:53:36.000000000 +0800 -@@ -956,6 +956,28 @@ static void add_stripe_bio (struct strip +diff -pur linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c linux-2.6.9-55.0.9/drivers/md/raid6main.c +--- linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c 2007-12-13 20:31:53.000000000 +0800 ++++ linux-2.6.9-55.0.9/drivers/md/raid6main.c 2007-12-13 20:50:04.000000000 +0800 +@@ -956,6 +956,26 @@ static void add_stripe_bio (struct strip } } @@ -10,26 +11,24 @@ + */ +static void raid6_flush_bios(raid6_conf_t *conf, struct bio *bios[], int raid_disks) +{ -+ struct bio *bio, *nbio; -+ int i; ++ struct bio *bio, *nbio; ++ int i; + -+ for (i = 0; i < raid_disks; i++) { -+ bio = bios[i]; -+ while (bio) { -+ nbio = bio->bi_next; -+ bio->bi_next = NULL; -+ generic_make_request(bio); -+ bio = nbio; -+ } -+ bios[i] = NULL; -+ } ++ for (i = 0; i < raid_disks; i++) { ++ bio = bios[i]; ++ while (bio) { ++ nbio = bio->bi_next; ++ bio->bi_next = NULL; ++ generic_make_request(bio); ++ bio = nbio; ++ } ++ bios[i] = NULL; ++ } +} -+ -+ /* * handle_stripe - do things to a stripe. -@@ -975,7 +997,7 @@ static void add_stripe_bio (struct strip +@@ -975,7 +995,7 @@ static void add_stripe_bio (struct strip * */ @@ -38,7 +37,7 @@ { raid6_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks; -@@ -1452,7 +1474,11 @@ static void handle_stripe(struct stripe_ +@@ -1452,7 +1472,11 @@ static void handle_stripe(struct stripe_ bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; atomic_inc(&conf->out_reqs_in_queue); @@ -51,44 +50,44 @@ } else { PRINTK("skip op %ld on disc %d for sector %llu\n", bi->bi_rw, i, (unsigned long long)sh->sector); -@@ -1575,6 +1601,7 @@ static int make_request (request_queue_t - int sectors_per_chunk = conf->chunk_size >> 9; - int stripes_per_chunk, sectors_per_block; - int sectors_per_stripe; +@@ -1575,6 +1599,7 @@ static int make_request (request_queue_t + int sectors_per_chunk = conf->chunk_size >> 9; + int stripes_per_chunk, sectors_per_block; + int sectors_per_stripe; + struct bio *bios[MD_SB_DISKS]; - int i, j; + int i, j; atomic_inc(&conf->in_reqs_in_queue); -@@ -1610,6 +1637,7 @@ static int make_request (request_queue_t - sector_div(block, sectors_per_block); - sectors = bi->bi_size >> 9; +@@ -1610,6 +1635,7 @@ static int make_request (request_queue_t + sector_div(block, sectors_per_block); + sectors = bi->bi_size >> 9; + memset(&bios, 0, sizeof(bios)); repeat: - stripe = block * (sectors_per_block / data_disks); - b_sector = stripe * data_disks; -@@ -1629,9 +1657,17 @@ static int make_request (request_queue_t - new_sector = raid6_compute_sector(r_sector, raid_disks, - data_disks, &dd_idx, - &pd_idx, conf); -- if (sh == NULL) -- sh = get_active_stripe(conf, new_sector, pd_idx, -- (bi->bi_rw&RWA_MASK)); -+ if (sh == NULL) { -+ /* first, try to get stripe w/o blocking -+ * if we can't, then it's time to submit -+ * all collected bio's in order to free -+ * some space in the cache -bzzz */ -+ sh = get_active_stripe(conf, new_sector, pd_idx, 1); -+ if (!sh && !(bi->bi_rw&RWA_MASK)) { -+ raid6_flush_bios(conf, bios, raid_disks); -+ sh = get_active_stripe(conf, new_sector, pd_idx, 0); -+ } -+ } - if (sh) { - add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); - } else { -@@ -1652,7 +1688,7 @@ static int make_request (request_queue_t + stripe = block * (sectors_per_block / data_disks); + b_sector = stripe * data_disks; +@@ -1629,9 +1655,17 @@ static int make_request (request_queue_t + new_sector = raid6_compute_sector(r_sector, raid_disks, + data_disks, &dd_idx, + &pd_idx, conf); +- if (sh == NULL) +- sh = get_active_stripe(conf, new_sector, pd_idx, +- (bi->bi_rw&RWA_MASK)); ++ if (sh == NULL) { ++ /* first, try to get stripe w/o blocking ++ * if we can't, then it's time to submit ++ * all collected bio's in order to free ++ * some space in the cache -bzzz */ ++ sh = get_active_stripe(conf, new_sector, pd_idx, 1); ++ if (!sh && !(bi->bi_rw&RWA_MASK)) { ++ raid6_flush_bios(conf, bios, raid_disks); ++ sh = get_active_stripe(conf, new_sector, pd_idx, 0); ++ } ++ } + if (sh) { + add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); + } else { +@@ -1652,7 +1686,7 @@ static int make_request (request_queue_t if (sh) { raid6_plug_device(conf); @@ -97,17 +96,17 @@ release_stripe(sh); } else { sh = NULL; -@@ -1664,6 +1700,9 @@ static int make_request (request_queue_t +@@ -1664,6 +1698,9 @@ static int make_request (request_queue_t if(sectors > 0) goto repeat; -+ /* now flush all bio's */ -+ raid6_flush_bios(conf, bios, raid_disks); ++ /* now flush all bio's */ ++ raid6_flush_bios(conf, bios, raid_disks); + spin_lock_irq(&conf->device_lock); if (--bi->bi_phys_segments == 0) { int bytes = bi->bi_size; -@@ -1719,7 +1758,7 @@ static int sync_request (mddev_t *mddev, +@@ -1719,7 +1756,7 @@ static int sync_request (mddev_t *mddev, clear_bit(STRIPE_INSYNC, &sh->state); spin_unlock(&sh->lock); @@ -116,7 +115,7 @@ release_stripe(sh); return STRIPE_SECTORS; -@@ -1769,7 +1808,7 @@ static void raid6d (mddev_t *mddev) +@@ -1769,7 +1806,7 @@ static void raid6d (mddev_t *mddev) handled++; atomic_inc(&conf->handled_in_raid5d); diff --git a/lustre/kernel_patches/patches/raid6-serialize-ovelapping-reqs.patch b/lustre/kernel_patches/patches/raid6-serialize-ovelapping-reqs.patch index 51fcab6..fc690b2 100644 --- a/lustre/kernel_patches/patches/raid6-serialize-ovelapping-reqs.patch +++ b/lustre/kernel_patches/patches/raid6-serialize-ovelapping-reqs.patch @@ -1,25 +1,18 @@ ---- linux-2.6.9.orig/include/linux/raid/raid5.h 2006-09-08 02:51:59.000000000 +0800 -+++ linux-2.6.9/include/linux/raid/raid5.h 2006-09-08 02:49:24.000000000 +0800 -@@ -154,6 +154,8 @@ struct stripe_head { - #define R5_Wantwrite 5 - #define R5_Syncio 6 /* this io need to be accounted as resync io */ - #define R5_Direct 7 /* use page from passed bio to avoid memcpy */ -+#define R5_Overlap 8 /* There is a pending overlapping request -+ * on this block */ - - /* - * Write method -@@ -220,6 +221,7 @@ struct raid5_private_data { - atomic_t active_stripes; - struct list_head inactive_list; - wait_queue_head_t wait_for_stripe; -+ wait_queue_head_t wait_for_overlap; - int inactive_blocked; /* release of inactive stripes blocked, - * waiting for 25% to be free - */ ---- linux-2.6.9.orig/drivers/md/raid6main.c 2006-09-08 02:51:59.000000000 +0800 -+++ linux-2.6.9/drivers/md/raid6main.c 2006-09-08 02:51:28.000000000 +0800 -@@ -907,7 +907,7 @@ static void compute_block_2(struct strip +diff -pur linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c linux-2.6.9-55.0.9/drivers/md/raid6main.c +--- linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c 2007-12-13 20:52:42.000000000 +0800 ++++ linux-2.6.9-55.0.9/drivers/md/raid6main.c 2007-12-13 21:05:04.000000000 +0800 +@@ -749,6 +749,10 @@ static void compute_parity(struct stripe + if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { + chosen = sh->dev[i].towrite; + sh->dev[i].towrite = NULL; ++ ++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) ++ wake_up(&conf->wait_for_overlap); ++ + if (sh->dev[i].written) BUG(); + sh->dev[i].written = chosen; + } +@@ -907,7 +911,7 @@ static void compute_block_2(struct strip * toread/towrite point to the first in a chain. * The bi_next chain must be in order. */ @@ -28,7 +21,7 @@ { struct bio **bip; raid6_conf_t *conf = sh->raid_conf; -@@ -924,10 +924,13 @@ static void add_stripe_bio (struct strip +@@ -924,10 +928,13 @@ static void add_stripe_bio (struct strip else bip = &sh->dev[dd_idx].toread; while (*bip && (*bip)->bi_sector < bi->bi_sector) { @@ -44,7 +37,7 @@ if (*bip && bi->bi_next && (*bip) != bi->bi_next) BUG(); if (*bip) -@@ -954,6 +957,14 @@ static void add_stripe_bio (struct strip +@@ -954,6 +961,14 @@ static void add_stripe_bio (struct strip if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } @@ -59,7 +52,7 @@ } /* -@@ -1040,6 +1051,9 @@ static void handle_stripe(struct stripe_ +@@ -1038,6 +1053,9 @@ static void handle_stripe(struct stripe_ spin_lock_irq(&conf->device_lock); rbi = dev->toread; dev->toread = NULL; @@ -69,7 +62,7 @@ spin_unlock_irq(&conf->device_lock); while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { copy_data(0, rbi, dev->page, dev->sector); -@@ -1089,6 +1103,9 @@ static void handle_stripe(struct stripe_ +@@ -1087,6 +1105,9 @@ static void handle_stripe(struct stripe_ sh->dev[i].towrite = NULL; if (bi) to_write--; @@ -79,7 +72,7 @@ while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); -@@ -1117,6 +1134,8 @@ static void handle_stripe(struct stripe_ +@@ -1115,6 +1136,8 @@ static void handle_stripe(struct stripe_ if (!test_bit(R5_Insync, &sh->dev[i].flags)) { bi = sh->dev[i].toread; sh->dev[i].toread = NULL; @@ -88,31 +81,31 @@ if (bi) to_read--; while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); -@@ -1649,6 +1668,8 @@ static int make_request (request_queue_t - sh = NULL; - /* iterrate through all pages in the stripe */ - for (j = 0; j < data_disks && sectors > 0; j++) { +@@ -1647,6 +1670,8 @@ static int make_request (request_queue_t + sh = NULL; + /* iterrate through all pages in the stripe */ + for (j = 0; j < data_disks && sectors > 0; j++) { + DEFINE_WAIT(w); + - if (r_sector + STRIPE_SECTORS <= bi->bi_sector || - r_sector >= last_sector) { - r_sector += sectors_per_chunk; -@@ -1657,6 +1678,9 @@ static int make_request (request_queue_t - new_sector = raid6_compute_sector(r_sector, raid_disks, - data_disks, &dd_idx, - &pd_idx, conf); + if (r_sector + STRIPE_SECTORS <= bi->bi_sector || + r_sector >= last_sector) { + r_sector += sectors_per_chunk; +@@ -1655,6 +1680,9 @@ static int make_request (request_queue_t + new_sector = raid6_compute_sector(r_sector, raid_disks, + data_disks, &dd_idx, + &pd_idx, conf); + +retry: + prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); - if (sh == NULL) { - /* first, try to get stripe w/o blocking - * if we can't, then it's time to submit -@@ -1669,10 +1693,18 @@ static int make_request (request_queue_t - } - } - if (sh) { -- add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); -+ if(!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { + if (sh == NULL) { + /* first, try to get stripe w/o blocking + * if we can't, then it's time to submit +@@ -1667,10 +1695,18 @@ static int make_request (request_queue_t + } + } + if (sh) { +- add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); ++ if(!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { + /* Failed to be added due to overlapped. */ + raid6_unplug_device(mddev->queue); + release_stripe(sh); @@ -120,14 +113,14 @@ + goto retry; + } + finish_wait(&conf->wait_for_overlap, &w); - } else { - /* cannot get stripe for read-ahead, just give-up */ - clear_bit(BIO_UPTODATE, &bi->bi_flags); + } else { + /* cannot get stripe for read-ahead, just give-up */ + clear_bit(BIO_UPTODATE, &bi->bi_flags); + finish_wait(&conf->wait_for_overlap, &w); - sectors = 0; - break; - } -@@ -1690,7 +1722,6 @@ static int make_request (request_queue_t + sectors = 0; + break; + } +@@ -1688,7 +1724,6 @@ static int make_request (request_queue_t raid6_plug_device(conf); handle_stripe(sh, bios); release_stripe(sh); @@ -135,7 +128,7 @@ sh = NULL; } -@@ -1849,6 +1880,7 @@ static int run (mddev_t *mddev) +@@ -1847,6 +1882,7 @@ static int run (mddev_t *mddev) conf->device_lock = SPIN_LOCK_UNLOCKED; init_waitqueue_head(&conf->wait_for_stripe); @@ -143,3 +136,23 @@ INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->delayed_list); INIT_LIST_HEAD(&conf->inactive_list); +diff -pur linux-2.6.9-55.0.9.orig/include/linux/raid/raid5.h linux-2.6.9-55.0.9/include/linux/raid/raid5.h +--- linux-2.6.9-55.0.9.orig/include/linux/raid/raid5.h 2007-12-13 21:09:13.000000000 +0800 ++++ linux-2.6.9-55.0.9/include/linux/raid/raid5.h 2007-12-13 21:01:33.000000000 +0800 +@@ -154,6 +154,8 @@ struct stripe_head { + #define R5_Wantwrite 5 + #define R5_Syncio 6 /* this io need to be accounted as resync io */ + #define R5_Direct 7 /* use page from passed bio to avoid memcpy */ ++#define R5_Overlap 8 /* There is a pending overlapping request ++ * on this block */ + + /* + * Write method +@@ -221,6 +223,7 @@ struct raid5_private_data { + atomic_t active_stripes; + struct list_head inactive_list; + wait_queue_head_t wait_for_stripe; ++ wait_queue_head_t wait_for_overlap; + int inactive_blocked; /* release of inactive stripes blocked, + * waiting for 25% to be free + */ diff --git a/lustre/kernel_patches/patches/raid6-stats.patch b/lustre/kernel_patches/patches/raid6-stats.patch index 942b3fe..c173a08 100644 --- a/lustre/kernel_patches/patches/raid6-stats.patch +++ b/lustre/kernel_patches/patches/raid6-stats.patch @@ -1,5 +1,6 @@ ---- linux-2.6.9.orig/drivers/md/raid6main.c 2006-09-07 23:09:20.000000000 +0800 -+++ linux-2.6.9/drivers/md/raid6main.c 2006-09-07 23:09:11.000000000 +0800 +diff -pur linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c linux-2.6.9-55.0.9/drivers/md/raid6main.c +--- linux-2.6.9-55.0.9.orig/drivers/md/raid6main.c 2007-09-28 17:53:55.000000000 +0800 ++++ linux-2.6.9-55.0.9/drivers/md/raid6main.c 2007-12-13 20:19:11.000000000 +0800 @@ -96,9 +96,10 @@ static inline void __release_stripe(raid if (atomic_read(&conf->active_stripes)==0) BUG(); @@ -26,7 +27,7 @@ BUG(); list_del_init(&sh->lru); + if (test_bit(STRIPE_DELAYED, &sh->state)) -+ atomic_dec(&conf->delayed); ++ atomic_dec(&conf->delayed); + } } @@ -142,28 +143,26 @@ handle_stripe(sh); release_stripe(sh); -@@ -1939,7 +1963,24 @@ static void status (struct seq_file *seq - seq_printf (seq, "%s", +@@ -1940,6 +1964,23 @@ static void status (struct seq_file *seq conf->disks[i].rdev && conf->disks[i].rdev->in_sync ? "U" : "_"); -- seq_printf (seq, "]"); -+ seq_printf (seq, "]"); + seq_printf (seq, "]"); + -+ seq_printf (seq, "\n\t\tin: %u reads, %u writes; out: %u reads, %u writes", -+ atomic_read(&conf->reads_in), atomic_read(&conf->writes_in), -+ atomic_read(&conf->reads_out), atomic_read(&conf->writes_out)); -+ seq_printf (seq, "\n\t\t%u in raid5d, %u out of stripes, %u handle called", -+ atomic_read(&conf->handled_in_raid5d), -+ atomic_read(&conf->out_of_stripes), -+ atomic_read(&conf->handle_called)); -+ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw", -+ atomic_read(&conf->reads_for_rmw), -+ atomic_read(&conf->reads_for_rcw)); -+ seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", -+ atomic_read(&conf->delayed), -+ atomic_read(&conf->active_stripes), -+ atomic_read(&conf->in_reqs_in_queue), -+ atomic_read(&conf->out_reqs_in_queue)); ++ seq_printf (seq, "\n\t\tin: %u reads, %u writes; out: %u reads, %u writes", ++ atomic_read(&conf->reads_in), atomic_read(&conf->writes_in), ++ atomic_read(&conf->reads_out), atomic_read(&conf->writes_out)); ++ seq_printf (seq, "\n\t\t%u in raid5d, %u out of stripes, %u handle called", ++ atomic_read(&conf->handled_in_raid5d), ++ atomic_read(&conf->out_of_stripes), ++ atomic_read(&conf->handle_called)); ++ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw", ++ atomic_read(&conf->reads_for_rmw), ++ atomic_read(&conf->reads_for_rcw)); ++ seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", ++ atomic_read(&conf->delayed), ++ atomic_read(&conf->active_stripes), ++ atomic_read(&conf->in_reqs_in_queue), ++ atomic_read(&conf->out_reqs_in_queue)); + #if RAID6_DUMPSTATE seq_printf (seq, "\n"); diff --git a/lustre/kernel_patches/patches/raid6-stripe-by-stripe-handling.patch b/lustre/kernel_patches/patches/raid6-stripe-by-stripe-handling.patch index 4a3543a..b4155df 100644 --- a/lustre/kernel_patches/patches/raid6-stripe-by-stripe-handling.patch +++ b/lustre/kernel_patches/patches/raid6-stripe-by-stripe-handling.patch @@ -4,11 +4,11 @@ sector_t new_sector; sector_t logical_sector, last_sector; struct stripe_head *sh; -+ sector_t stripe, sectors, block, r_sector, b_sector; -+ int sectors_per_chunk = conf->chunk_size >> 9; -+ int stripes_per_chunk, sectors_per_block; -+ int sectors_per_stripe; -+ int i, j; ++ sector_t stripe, sectors, block, r_sector, b_sector; ++ int sectors_per_chunk = conf->chunk_size >> 9; ++ int stripes_per_chunk, sectors_per_block; ++ int sectors_per_stripe; ++ int i, j; atomic_inc(&conf->in_reqs_in_queue); @@ -24,53 +24,53 @@ - PRINTK("raid6: make_request, sector %Lu logical %Lu\n", - (unsigned long long)new_sector, - (unsigned long long)logical_sector); -+ stripes_per_chunk = conf->chunk_size / STRIPE_SIZE; -+ sectors_per_stripe = STRIPE_SECTORS * data_disks; -+ sectors_per_block = stripes_per_chunk * sectors_per_stripe; ++ stripes_per_chunk = conf->chunk_size / STRIPE_SIZE; ++ sectors_per_stripe = STRIPE_SECTORS * data_disks; ++ sectors_per_block = stripes_per_chunk * sectors_per_stripe; + -+ block = logical_sector & ~((sector_t)sectors_per_block - 1); -+ sector_div(block, sectors_per_block); -+ sectors = bi->bi_size >> 9; ++ block = logical_sector & ~((sector_t)sectors_per_block - 1); ++ sector_div(block, sectors_per_block); ++ sectors = bi->bi_size >> 9; + + repeat: -+ stripe = block * (sectors_per_block / data_disks); -+ b_sector = stripe * data_disks; -+ /* iterate through all stripes in this block, -+ * where block is a set of internal stripes -+ * which covers chunk */ -+ for (i = 0; i < stripes_per_chunk && sectors > 0; i++) { -+ r_sector = b_sector + (i * STRIPE_SECTORS); -+ sh = NULL; -+ /* iterrate through all pages in the stripe */ -+ for (j = 0; j < data_disks && sectors > 0; j++) { -+ if (r_sector + STRIPE_SECTORS <= bi->bi_sector || -+ r_sector >= last_sector) { -+ r_sector += sectors_per_chunk; -+ continue; -+ } -+ new_sector = raid6_compute_sector(r_sector, raid_disks, -+ data_disks, &dd_idx, -+ &pd_idx, conf); -+ if (sh == NULL) -+ sh = get_active_stripe(conf, new_sector, pd_idx, -+ (bi->bi_rw&RWA_MASK)); -+ if (sh) { -+ add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); -+ } else { -+ /* cannot get stripe for read-ahead, just give-up */ -+ clear_bit(BIO_UPTODATE, &bi->bi_flags); -+ sectors = 0; -+ break; -+ } ++ stripe = block * (sectors_per_block / data_disks); ++ b_sector = stripe * data_disks; ++ /* iterate through all stripes in this block, ++ * where block is a set of internal stripes ++ * which covers chunk */ ++ for (i = 0; i < stripes_per_chunk && sectors > 0; i++) { ++ r_sector = b_sector + (i * STRIPE_SECTORS); ++ sh = NULL; ++ /* iterrate through all pages in the stripe */ ++ for (j = 0; j < data_disks && sectors > 0; j++) { ++ if (r_sector + STRIPE_SECTORS <= bi->bi_sector || ++ r_sector >= last_sector) { ++ r_sector += sectors_per_chunk; ++ continue; ++ } ++ new_sector = raid6_compute_sector(r_sector, raid_disks, ++ data_disks, &dd_idx, ++ &pd_idx, conf); ++ if (sh == NULL) ++ sh = get_active_stripe(conf, new_sector, pd_idx, ++ (bi->bi_rw&RWA_MASK)); ++ if (sh) { ++ add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); ++ } else { ++ /* cannot get stripe for read-ahead, just give-up */ ++ clear_bit(BIO_UPTODATE, &bi->bi_flags); ++ sectors = 0; ++ break; ++ } + -+ BUG_ON (new_sector != stripe); -+ sectors -= STRIPE_SECTORS; -+ if (bi->bi_sector > r_sector) -+ sectors += bi->bi_sector - r_sector; -+ if (r_sector + STRIPE_SECTORS > last_sector) -+ sectors += r_sector + STRIPE_SECTORS - last_sector; -+ r_sector += sectors_per_chunk; -+ } ++ BUG_ON (new_sector != stripe); ++ sectors -= STRIPE_SECTORS; ++ if (bi->bi_sector > r_sector) ++ sectors += bi->bi_sector - r_sector; ++ if (r_sector + STRIPE_SECTORS > last_sector) ++ sectors += r_sector + STRIPE_SECTORS - last_sector; ++ r_sector += sectors_per_chunk; ++ } - sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK)); if (sh) { diff --git a/lustre/kernel_patches/patches/raid6-zerocopy.patch b/lustre/kernel_patches/patches/raid6-zerocopy.patch index d03d346..baa1e5f 100644 --- a/lustre/kernel_patches/patches/raid6-zerocopy.patch +++ b/lustre/kernel_patches/patches/raid6-zerocopy.patch @@ -55,7 +55,7 @@ /**** FIX THIS: This could be very bad if disks is close to 256 ****/ void *ptrs[disks]; -@@ -757,18 +783,41 @@ static void compute_parity(struct stripe +@@ -761,18 +787,46 @@ static void compute_parity(struct stripe BUG(); /* Not implemented yet */ } @@ -83,6 +83,11 @@ + if (!wbi->bi_next && test_bit(R5_Insync, &sh->dev[i].flags) && + test_bit(R5_OVERWRITE, &sh->dev[i].flags)) { + page = zero_copy_data(wbi, sector); ++ /* we don't do zerocopy on a HighMem page. Raid6 tend ++ * to prepare all of the pages' content to be accessed ++ * before computing PQ parity. If we need to support HighMem ++ * page also, we have to modify the gen_syndrome() ++ * algorithm. -jay */ + if (page && !PageHighMem(page)) { + atomic_inc(&conf->writes_zcopy); + sh->dev[i].req.bi_io_vec[0].bv_page = page; @@ -106,7 +111,7 @@ // switch(method) { // case RECONSTRUCT_WRITE: -@@ -779,7 +828,10 @@ static void compute_parity(struct stripe +@@ -783,7 +837,10 @@ static void compute_parity(struct stripe count = 0; i = d0_idx; do { @@ -118,7 +123,7 @@ i = raid6_next_disk(i, disks); } while ( i != d0_idx ); -@@ -1183,7 +1235,8 @@ static void handle_stripe(struct stripe_ +@@ -1185,7 +1242,8 @@ static void handle_stripe(struct stripe_ if (sh->dev[i].written) { dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) && @@ -128,7 +133,7 @@ /* We can return any write requests */ struct bio *wbi, *wbi2; PRINTK("Return write for stripe %llu disc %d\n", -@@ -1191,6 +1244,7 @@ static void handle_stripe(struct stripe_ +@@ -1193,6 +1251,7 @@ static void handle_stripe(struct stripe_ spin_lock_irq(&conf->device_lock); wbi = dev->written; dev->written = NULL; @@ -136,7 +141,7 @@ while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); if (--wbi->bi_phys_segments == 0) { -@@ -2005,6 +2059,7 @@ static int run (mddev_t *mddev) +@@ -2007,6 +2066,7 @@ static int run (mddev_t *mddev) if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) mddev->queue->backing_dev_info.ra_pages = 2 * stripe; } @@ -144,17 +149,17 @@ /* Ok, everything is just fine now */ mddev->array_size = mddev->size * (mddev->raid_disks - 2); -@@ -2092,9 +2147,11 @@ static void status (struct seq_file *seq - atomic_read(&conf->handled_in_raid5d), - atomic_read(&conf->out_of_stripes), - atomic_read(&conf->handle_called)); -- seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw", -+ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u", - atomic_read(&conf->reads_for_rmw), -- atomic_read(&conf->reads_for_rcw)); -+ atomic_read(&conf->reads_for_rcw), -+ atomic_read(&conf->writes_zcopy), -+ atomic_read(&conf->writes_copied)); - seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", - atomic_read(&conf->delayed), - atomic_read(&conf->active_stripes), +@@ -2094,9 +2154,11 @@ static void status (struct seq_file *seq + atomic_read(&conf->handled_in_raid5d), + atomic_read(&conf->out_of_stripes), + atomic_read(&conf->handle_called)); +- seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw", ++ seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u", + atomic_read(&conf->reads_for_rmw), +- atomic_read(&conf->reads_for_rcw)); ++ atomic_read(&conf->reads_for_rcw), ++ atomic_read(&conf->writes_zcopy), ++ atomic_read(&conf->writes_copied)); + seq_printf (seq, "\n\t\t%u delayed, %u active, queues: %u in, %u out\n", + atomic_read(&conf->delayed), + atomic_read(&conf->active_stripes), -- 1.8.3.1