From 176ea3a4599ede8b1a0c91506dcd34bc162f2959 Mon Sep 17 00:00:00 2001
From: Xinliang Liu <xinliang.liu@linaro.org>
Date: Wed, 6 Apr 2022 08:06:33 +0000
Subject: [PATCH] LU-15722 osd-ldiskfs: fix IO write gets stuck for 64K
 PAGE_SIZE

This fixes below IO write stuck issue:
-----
[606895.151765] LustreError:
334886:0:(ofd_io.c:1389:ofd_commitrw_write()) lustre-OST0000: restart IO
write too many times: 10000
[606895.207345] LustreError:
334886:0:(ofd_io.c:1389:ofd_commitrw_write()) Skipped 8 previous similar
messages
-------

Which goes into an infinite loop:
ofd_commitrw_write()->osd_write_commit()->osd_ldiskfs_map_inode_pages()
    ->ldiskfs_map_blocks()->ofd_commitrw_write()

The cause is that:
For 64K PAGE_SIZE blocks allocation/mapping. m_lblk should be the
first un-allocated block if m_lblk points at an already allocated
block when create = 1, ldiskfs_map_blocks() will just return with
already allocated blocks and without allocating any new requested
blocks for the extent.

This stuck issue won't happen on 4K PAGE_SIZE. Because for
PAGE_SIZE = blocksize case, if m_lblk points at an already
allocated block it will point at an un-allocated block in next
restart transaction, because the already mapped block/page will
be filtered out in next restart transaction via flag
OBD_BRW_DONE in osd_declare_write_commit().

Change-Id: Iadba0be8875a15a2e2f158ec9571f5ece5637ae0
Signed-off-by: Xinliang Liu <xinliang.liu@linaro.org>
Reviewed-on: https://review.whamcloud.com/47004
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/osd-ldiskfs/osd_io.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c
index 74b1d3f..9cff3c8 100644
--- a/lustre/osd-ldiskfs/osd_io.c
+++ b/lustre/osd-ldiskfs/osd_io.c
@@ -1084,6 +1084,7 @@ static int osd_ldiskfs_map_inode_pages(struct inode *inode,
 				       struct thandle *thandle)
 {
 	int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+	int blocksize = 1 << inode->i_blkbits;
 	int rc = 0, i = 0, mapped_index = 0;
 	struct page *fp = NULL;
 	int clen = 0;
@@ -1140,6 +1141,48 @@ static int osd_ldiskfs_map_inode_pages(struct inode *inode,
 		/* process found extent */
 		map.m_lblk = fp->index * blocks_per_page;
 		map.m_len = blen = clen * blocks_per_page;
+
+		/*
+		 * For PAGE_SIZE > blocksize block allocation mapping, the
+		 * ldiskfs_map_blocks() aims at looking up already mapped
+		 * blocks, recording them to iobuf->dr_blocks and fixing up
+		 * m_lblk, m_len for un-allocated blocks to be created/mapped
+		 * in the second ldiskfs_map_blocks().
+		 *
+		 * M_lblk should be the first un-allocated block if m_lblk
+		 * points at an already allocated block when create = 1,
+		 * ldiskfs_map_blocks() will just return with already
+		 * allocated blocks and without allocating any requested
+		 * new blocks for the extent. For PAGE_SIZE = blocksize
+		 * case, if m_lblk points at an already allocated block it
+		 * will point at an un-allocated block in next restart
+		 * transaction, because the already mapped block/page will
+		 * be filtered out in next restart transaction via flag
+		 * OBD_BRW_DONE in osd_declare_write_commit().
+		 */
+		if (create && PAGE_SIZE > blocksize) {
+			/* With flags=0 just for already mapped blocks lookup */
+			rc = ldiskfs_map_blocks(handle, inode, &map, 0);
+			if (rc > 0 && map.m_flags & LDISKFS_MAP_MAPPED) {
+				for (; total < blen && total < map.m_len;
+						total++)
+					*(blocks + total) = map.m_pblk + total;
+
+				/* The extent is already full mapped */
+				if (total == blen) {
+					rc = 0;
+					goto ext_already_mapped;
+				}
+			}
+			/*
+			 * Fixup or reset m_lblk and m_len for un-mapped blocks.
+			 * The second ldiskfs_map_blocks() will create and map
+			 * them.
+			 */
+			map.m_lblk = fp->index * blocks_per_page + total;
+			map.m_len = blen - total;
+		}
+
 cont_map:
 		/**
 		 * We might restart transaction for block allocations,
@@ -1204,6 +1247,7 @@ cont_map:
 			rc = 0;
 		}
 
+ext_already_mapped:
 		if (rc == 0 && create) {
 			count += (total - previous_total);
 			mapped_index = (count + blocks_per_page -
-- 
1.8.3.1