LU-13783 osd-ldiskfs: use alloc_file_pseudo to create fake files

[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_io.c
diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c

index 2f3c0ff..1db5db3 100644 (file)
--- a/lustre/osd-ldiskfs/osd_io.c
+++ b/lustre/osd-ldiskfs/osd_io.c
@@ -150,9 +150,9 @@ void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf)
                 iobuf->dr_elapsed_valid = 0;
                 LASSERT(iobuf->dr_dev == d);
                 LASSERT(iobuf->dr_frags > 0);
-               lprocfs_oh_tally(&d->od_brw_stats.hist[BRW_R_DIO_FRAGS+rw],
+               lprocfs_oh_tally(&d->od_brw_stats.bs_hist[BRW_R_DIO_FRAGS + rw],
                                  iobuf->dr_frags);
-               lprocfs_oh_tally_log2(&d->od_brw_stats.hist[BRW_R_IO_TIME+rw],
+               lprocfs_oh_tally_log2(&d->od_brw_stats.bs_hist[BRW_R_IO_TIME+rw],
                                       ktime_to_ms(iobuf->dr_elapsed));
         }
  }
@@ -173,7 +173,7 @@ static void dio_complete_routine(struct bio *bio, int error)
          */
  
         if (unlikely(iobuf == NULL)) {
-               CERROR("***** bio->bi_private is NULL!  This should never happen.  Normally, I would crash here, but instead I will dump the bio contents to the console.  Please report this to <https://jira.whamcloud.com/> , along with any interesting messages leading up to this point (like SCSI errors, perhaps).  Because bi_private is NULL, I can't wake up the thread that initiated this IO - you will probably have to reboot this node.\n");
+               CERROR("***** bio->bi_private is NULL! Dump the bio contents to the console. Please report this to <https://jira.whamcloud.com/>, and probably have to reboot this node.\n");
                 CERROR("bi_next: %p, bi_flags: %lx, " __stringify(bi_opf)
                        ": %x, bi_vcnt: %d, bi_idx: %d, bi->size: %d, bi_end_io: %p, bi_cnt: %d, bi_private: %p\n",
                        bio->bi_next, (unsigned long)bio->bi_flags,
@@ -229,8 +229,8 @@ static void dio_complete_routine(struct bio *bio, int error)
  
  static void record_start_io(struct osd_iobuf *iobuf, int size)
  {
-       struct osd_device    *osd = iobuf->dr_dev;
-       struct obd_histogram *h = osd->od_brw_stats.hist;
+       struct osd_device *osd = iobuf->dr_dev;
+       struct obd_histogram *h = osd->od_brw_stats.bs_hist;
  
         iobuf->dr_frags++;
         atomic_inc(&iobuf->dr_numreqs);
@@ -520,12 +520,24 @@ static int osd_do_bio(struct osd_device *osd, struct inode *inode,
         for (page_idx = page_idx_start, block_idx = start_blocks;
              block_idx < block_idx_end; page_idx++,
              block_idx += blocks_left_page) {
+               /* For cases where the filesystems blocksize is not the
+                * same as PAGE_SIZE (e.g. ARM with PAGE_SIZE=64KB and
+                * blocksize=4KB), there will be multiple blocks to
+                * read/write per page. Also, the start and end block may
+                * not be aligned to the start and end of the page, so the
+                * first page may skip some blocks at the start ("i != 0",
+                * "blocks_left_page" is reduced), and the last page may
+                * skip some blocks at the end (limited by "count").
+                */
                 page = pages[page_idx];
                 LASSERT(page_idx < iobuf->dr_npages);
  
                 i = block_idx % blocks_per_page;
                 blocks_left_page = blocks_per_page - i;
-               for (page_offset = i * blocksize; i < blocks_left_page;
+               if (block_idx + blocks_left_page > block_idx_end)
+                       blocks_left_page = block_idx_end - block_idx;
+               page_offset = i * blocksize;
+               for (i = 0; i < blocks_left_page;
                      i += nblocks, page_offset += blocksize * nblocks) {
                         nblocks = 1;
  
@@ -1401,23 +1413,19 @@ static int osd_declare_write_commit(const struct lu_env *env,
          * heavily-fragmented, it will be reduced to 4K at the worst.
          */
         extent_bytes = osd_extent_bytes(osd);
-       LASSERT(extent_bytes >= (1 << osd_sb(osd)->s_blocksize));
+       LASSERT(extent_bytes >= osd_sb(osd)->s_blocksize);
  
         /* calculate number of extents (probably better to pass nb) */
         for (i = 0; i < npages; i++) {
                 /* ignore quota for the whole request if any page is from
                  * client cache or written by root.
                  *
-                * XXX once we drop the 1.8 client support, the checking
-                * for whether page is from cache can be simplified as:
-                * !(lnb[i].flags & OBD_BRW_SYNC)
-                *
                  * XXX we could handle this on per-lnb basis as done by
                  * grant.
                  */
                 if ((lnb[i].lnb_flags & OBD_BRW_NOQUOTA) ||
-                   (lnb[i].lnb_flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
-                   OBD_BRW_FROM_GRANT)
+                   (lnb[i].lnb_flags & OBD_BRW_SYS_RESOURCE) ||
+                   !(lnb[i].lnb_flags & OBD_BRW_SYNC))
                         declare_flags |= OSD_QID_FORCE;
  
                 /*
@@ -1960,7 +1968,7 @@ static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt,
                  * level.
                  */
                 depth = inode != NULL ? ext_depth(inode) : 0;
-               depth = min(max(depth, 1) + 1, LDISKFS_MAX_EXTENT_DEPTH);
+               depth = min(max(depth, 1) + 3, LDISKFS_MAX_EXTENT_DEPTH);
                 credits = depth;
                 /* if not append, then split may need to modify
                  * existing blocks moving entries into the new ones
@@ -2646,20 +2654,26 @@ static loff_t osd_lseek(const struct lu_env *env, struct dt_object *dt,
                         loff_t offset, int whence)
  {
         struct osd_object *obj = osd_dt_obj(dt);
+       struct osd_device *dev = osd_obj2dev(obj);
         struct inode *inode = obj->oo_inode;
         struct file *file;
         loff_t result;
  
         ENTRY;
-
         LASSERT(dt_object_exists(dt));
         LASSERT(osd_invariant(obj));
         LASSERT(inode);
         LASSERT(offset >= 0);
  
-       file = osd_quasi_file(env, inode);
-       result = file->f_op->llseek(file, offset, whence);
+       file = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+                                inode->i_fop);
+       if (IS_ERR(file))
+               RETURN(PTR_ERR(file));
  
+       file->f_mode |= FMODE_64BITHASH;
+       result = file->f_op->llseek(file, offset, whence);
+       ihold(inode);
+       fput(file);
         /*
          * If 'offset' is beyond end of object file then treat it as not error
          * but valid case for SEEK_HOLE and return 'offset' as result.
@@ -2839,22 +2853,34 @@ void osd_execute_truncate(struct osd_object *obj)
         osd_partial_page_flush(d, inode, size);
  }
  
-void osd_execute_punch(const struct lu_env *env, struct osd_object *obj,
-                      loff_t start, loff_t end, int mode)
+static int osd_execute_punch(const struct lu_env *env, struct osd_object *obj,
+                            loff_t start, loff_t end, int mode)
  {
         struct osd_device *d = osd_obj2dev(obj);
         struct inode *inode = obj->oo_inode;
-       struct file *file = osd_quasi_file(env, inode);
+       struct file *file;
+       int rc;
+
+       file = alloc_file_pseudo(inode, d->od_mnt, "/", O_NOATIME,
+                                inode->i_fop);
+       if (IS_ERR(file))
+               RETURN(PTR_ERR(file));
  
-       file->f_op->fallocate(file, mode, start, end - start);
-       osd_partial_page_flush_punch(d, inode, start, end - 1);
+       file->f_mode |= FMODE_64BITHASH;
+       rc = file->f_op->fallocate(file, mode, start, end - start);
+       ihold(inode);
+       fput(file);
+       if (rc == 0)
+               osd_partial_page_flush_punch(d, inode, start, end - 1);
+       return rc;
  }
  
-void osd_process_truncates(const struct lu_env *env, struct list_head *list)
+int osd_process_truncates(const struct lu_env *env, struct list_head *list)
  {
         struct osd_access_lock *al;
+       int rc = 0;
  
-       LASSERT(journal_current_handle() == NULL);
+       LASSERT(!journal_current_handle());
  
         list_for_each_entry(al, list, tl_list) {
                 if (al->tl_shared)
@@ -2862,7 +2888,9 @@ void osd_process_truncates(const struct lu_env *env, struct list_head *list)
                 if (al->tl_truncate)
                         osd_execute_truncate(al->tl_obj);
                 else if (al->tl_punch)
-                       osd_execute_punch(env, al->tl_obj, al->tl_start,
-                                         al->tl_end, al->tl_mode);
+                       rc = osd_execute_punch(env, al->tl_obj, al->tl_start,
+                                              al->tl_end, al->tl_mode);
         }
+
+       return rc;
  }