Whamcloud - gitweb
LU-16966 osd: take trunc_lock for fallocate 10/52710/2
authorAlex Zhuravlev <bzzz@whamcloud.com>
Mon, 16 Oct 2023 13:59:08 +0000 (16:59 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 13 Nov 2023 02:07:41 +0000 (02:07 +0000)
as fallocate may need few transactions (or transaction restarted)
we have to avoid any concurrent writes/truncates on this object
until fallocate supports 'restart-from-beginning' - first stop the
transaction, then release the lock, then repeat again (like
the write path does).

Lustre-change: https://review.whamcloud.com/52264
Lustre-commit: 51529fb57f85210e292a15c882cf25a4689ea77d

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I0bf38b1886fbf24656b45fe0f87fcbad2227672a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52710
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/osd-ldiskfs/osd_io.c

index 5a21f0f..f74862c 100644 (file)
@@ -2262,33 +2262,39 @@ static int osd_declare_fallocate(const struct lu_env *env,
        LASSERT(th);
        LASSERT(inode);
 
-       if (mode & FALLOC_FL_PUNCH_HOLE) {
-               rc = osd_declare_inode_qid(env, i_uid_read(inode),
-                                          i_gid_read(inode),
-                                          i_projid_read(inode), 0, oh,
-                                          osd_dt_obj(dt), NULL, OSD_QID_BLK);
-               if (rc == 0)
-                       rc = osd_trunc_lock(osd_dt_obj(dt), oh, false);
-               RETURN(rc);
-       }
-
-       /* quota space for metadata blocks
-        * approximate metadata estimate should be good enough.
-        */
-       quota_space += PAGE_SIZE;
-       quota_space += depth * LDISKFS_BLOCK_SIZE(osd_sb(osd));
+       if ((mode & FALLOC_FL_PUNCH_HOLE) == 0) {
+               /* quota space for metadata blocks
+                * approximate metadata estimate should be good enough.
+                */
+               quota_space += PAGE_SIZE;
+               quota_space += depth * LDISKFS_BLOCK_SIZE(osd_sb(osd));
 
-       /* quota space should be reported in 1K blocks */
-       quota_space = toqb(quota_space) + toqb(end - start) +
-                     LDISKFS_META_TRANS_BLOCKS(inode->i_sb);
+               /* quota space should be reported in 1K blocks */
+               quota_space = toqb(quota_space) + toqb(end - start) +
+                       LDISKFS_META_TRANS_BLOCKS(inode->i_sb);
 
-       /* We don't need to reserve credits for whole fallocate here.
-        * We reserve space only for metadata. Fallocate credits are
-        * extended as required
-        */
+               /*
+                * We don't need to reserve credits for whole fallocate here.
+                * We reserve space only for metadata. Fallocate credits are
+                * extended as required
+                */
+       }
        rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
                                   i_projid_read(inode), quota_space, oh,
                                   osd_dt_obj(dt), NULL, OSD_QID_BLK);
+       if (rc)
+               RETURN(rc);
+
+       /*
+        * The both hole punch and allocation may need few transactions
+        * to complete, so we have to avoid concurrent writes/truncates
+        * as we can't release object lock from within ldiskfs.
+        * Notice locking order: transaction start, then lock object
+        * (don't confuse object lock dt_{read|write}_lock() with the
+        * trunc lock.
+        */
+       rc = osd_trunc_lock(osd_dt_obj(dt), oh, false);
+
        RETURN(rc);
 }