Whamcloud - gitweb
Create a per-fs lock for obdfilter block allocation. This is held only
authoradilger <adilger>
Thu, 20 Nov 2003 19:05:02 +0000 (19:05 +0000)
committeradilger <adilger>
Thu, 20 Nov 2003 19:05:02 +0000 (19:05 +0000)
during actual block allocation and not during RPCs or writes.  This allows
us to allocate contiguous chunks of disk (if available) up to the size of
each RPC, instead of interleaving block allocations.

It slows down writes in the contention case, because we might be holding
the lock while waiting for a bitmap or something to be loaded from disk,
and in the current 2.4 IO code reads-behind-lots-of-writes can be punishing.
We might benefit here and elsewhere from AKPM's read priority patch.

The big benefit is that at read time, or after some amount of create-delete
we don't have a maximally fragmented block allocation to deal with, which
causes pathological seeking on the disks.

b=2260
r=peter,phil

lustre/obdfilter/filter_io_24.c

index 90f6135..72b7ba6 100644 (file)
@@ -83,6 +83,7 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf,
                 GOTO(cleanup, rc);
         cleanup_phase = 2;
 
+        down(&exp->exp_obd->u.filter.fo_alloc_lock);
         for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){
                 page = iobuf->maplist[i];
 
@@ -90,12 +91,14 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf,
                 if (rc) {
                         CERROR("ino %lu, blk %lu cr %u create %d: rc %d\n",
                                inode->i_ino, *b, *cr, create, rc);
+                        up(&exp->exp_obd->u.filter.fo_alloc_lock);
                         GOTO(cleanup, rc);
                 }
 
                 b += blocks_per_page;
                 cr += blocks_per_page;
         }
+        up(&exp->exp_obd->u.filter.fo_alloc_lock);
 
         if (attr->ia_size > inode->i_size)
                 attr->ia_valid |= ATTR_SIZE;
@@ -146,7 +149,7 @@ cleanup:
         case 2:
                 unlock_kiovec(1, &iobuf);
         case 1:
-                OBD_FREE(created, sizeof(*created) * 
+                OBD_FREE(created, sizeof(*created) *
                          iobuf->nr_pages*blocks_per_page);
         case 0:
                 if (cleanup_phase == 3)
@@ -213,7 +216,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
         }
 
         push_ctxt(&saved, &obd->obd_ctxt, NULL);
-        cleanup_phase = 2; 
+        cleanup_phase = 2;
 
         down(&inode->i_sem);
         oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti);