Whamcloud - gitweb
LU-12593 osd: zeroing a freshly allocated block buffer
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_io.c
index f140e2e..3503e5a 100644 (file)
@@ -164,7 +164,6 @@ static void dio_complete_routine(struct bio *bio, int error)
 {
 #endif
        struct osd_iobuf *iobuf = bio->bi_private;
-       int iter;
        struct bio_vec *bvl;
 
         /* CAVEAT EMPTOR: possibly in IRQ context
@@ -191,7 +190,9 @@ static void dio_complete_routine(struct bio *bio, int error)
 
        /* the check is outside of the cycle for performance reason -bzzz */
        if (!bio_data_dir(bio)) {
-               bio_for_each_segment_all(bvl, bio, iter) {
+               DECLARE_BVEC_ITER_ALL(iter_all);
+
+               bio_for_each_segment_all(bvl, bio, iter_all) {
                        if (likely(error == 0))
                                SetPageUptodate(bvl_to_page(bvl));
                        LASSERT(PageLocked(bvl_to_page(bvl)));
@@ -279,11 +280,11 @@ static int can_be_merged(struct bio *bio, sector_t sector)
 static void bio_integrity_fault_inject(struct bio *bio)
 {
        struct bio_vec *bvec;
-       int i;
+       DECLARE_BVEC_ITER_ALL(iter_all);
        void *kaddr;
        char *addr;
 
-       bio_for_each_segment_all(bvec, bio, i) {
+       bio_for_each_segment_all(bvec, bio, iter_all) {
                struct page *page = bvec->bv_page;
 
                kaddr = kmap(page);
@@ -329,12 +330,13 @@ static int osd_bio_integrity_compare(struct bio *bio, struct block_device *bdev,
                bip->bip_vec->bv_offset;
        struct bio_vec *bv;
        sector_t sector = bio_start_sector(bio);
-       unsigned int i, sectors, total;
+       unsigned int sectors, total;
+       DECLARE_BVEC_ITER_ALL(iter_all);
        __u16 *expected_guard;
        int rc;
 
        total = 0;
-       bio_for_each_segment_all(bv, bio, i) {
+       bio_for_each_segment_all(bv, bio, iter_all) {
                lnb = iobuf->dr_lnbs[index];
                expected_guard = lnb->lnb_guards;
                sectors = bv->bv_len / sector_size;
@@ -527,7 +529,7 @@ static int osd_do_bio(struct osd_device *osd, struct inode *inode,
                                       bi_size, bio->bi_vcnt, bio->bi_max_vecs,
                                       bio_sectors(bio),
                                       queue_max_sectors(q),
-                                      bio->bi_phys_segments,
+                                      osd_bio_nr_segs(bio),
                                       queue_max_segments(q));
                                rc = osd_bio_integrity_handle(osd, bio,
                                        iobuf, bio_start_page_idx,
@@ -609,8 +611,9 @@ out:
 }
 
 static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages,
-                                   struct niobuf_local *lnb)
+                                  struct niobuf_local *lnb, int maxlnb)
 {
+       int rc = 0;
         ENTRY;
 
         *nrpages = 0;
@@ -619,6 +622,11 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages,
                int poff = offset & (PAGE_SIZE - 1);
                int plen = PAGE_SIZE - poff;
 
+               if (*nrpages >= maxlnb) {
+                       rc = -EOVERFLOW;
+                       break;
+               }
+
                 if (plen > len)
                         plen = len;
                lnb->lnb_file_offset = offset;
@@ -640,7 +648,7 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages,
                 (*nrpages)++;
         }
 
-        RETURN(0);
+       RETURN(rc);
 }
 
 static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt,
@@ -792,7 +800,7 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt,
  */
 static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
                        loff_t pos, ssize_t len, struct niobuf_local *lnb,
-                       enum dt_bufs_type rw)
+                       int maxlnb, enum dt_bufs_type rw)
 {
        struct osd_thread_info *oti = osd_oti_get(env);
        struct osd_object *obj = osd_dt_obj(dt);
@@ -810,7 +818,9 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt,
                }
        }
 
-       osd_map_remote_to_local(pos, len, &npages, lnb);
+       rc = osd_map_remote_to_local(pos, len, &npages, lnb, maxlnb);
+       if (rc)
+               RETURN(rc);
 
        /* this could also try less hard for DT_BUFS_TYPE_READAHEAD pages */
        gfp_mask = rw & DT_BUFS_TYPE_LOCAL ? (GFP_NOFS | __GFP_HIGHMEM) :
@@ -1644,6 +1654,8 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
         int                 size;
         int                 boffs;
         int                 dirty_inode = 0;
+       struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
+       bool create, sparse;
 
        if (write_NUL) {
                /*
@@ -1655,8 +1667,15 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                ++bufsize;
        }
 
+       /* sparse checking is racy, but sparse is very rare case, leave as is */
+       sparse = (new_size > 0 && (inode->i_blocks >> (inode->i_blkbits - 9)) <
+                 ((new_size - 1) >> inode->i_blkbits) + 1);
+
        while (bufsize > 0) {
                int credits = handle->h_buffer_credits;
+               bool sync;
+               unsigned long last_block = (new_size == 0) ? 0 :
+                                          (new_size - 1) >> inode->i_blkbits;
 
                if (bh)
                        brelse(bh);
@@ -1664,7 +1683,26 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                block = offset >> inode->i_blkbits;
                boffs = offset & (blocksize - 1);
                size = min(blocksize - boffs, bufsize);
-               bh = __ldiskfs_bread(handle, inode, block, 1);
+               sync = (block > last_block || new_size == 0 || sparse);
+
+               if (sync)
+                       down(&ei->i_append_sem);
+
+               bh = __ldiskfs_bread(handle, inode, block, 0);
+
+               if (unlikely(IS_ERR_OR_NULL(bh) && !sync))
+                       CWARN("%s: adding bh without locking off %llu (block %lu, "
+                             "size %d, offs %llu)\n", inode->i_sb->s_id,
+                             offset, block, bufsize, *offs);
+
+               if (IS_ERR_OR_NULL(bh)) {
+                       bh = __ldiskfs_bread(handle, inode, block, 1);
+                       create = true;
+               } else {
+                       if (sync)
+                               up(&ei->i_append_sem);
+                       create = false;
+               }
                if (IS_ERR_OR_NULL(bh)) {
                        if (bh == NULL) {
                                err = -EIO;
@@ -1689,7 +1727,12 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                LASSERTF(boffs + size <= bh->b_size,
                         "boffs %d size %d bh->b_size %lu\n",
                         boffs, size, (unsigned long)bh->b_size);
-                memcpy(bh->b_data + boffs, buf, size);
+               if (create) {
+                       memset(bh->b_data, 0, bh->b_size);
+                       if (sync)
+                               up(&ei->i_append_sem);
+               }
+               memcpy(bh->b_data + boffs, buf, size);
                err = ldiskfs_handle_dirty_metadata(handle, NULL, bh);
                 if (err)
                         break;