Whamcloud - gitweb
LU-12593 osd: up i_append_sem during errors
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_io.c
index b7a55ce..64b995d 100644 (file)
@@ -1126,10 +1126,10 @@ static int osd_declare_write_commit(const struct lu_env *env,
        int                     i;
        int                     newblocks;
        int                     rc = 0;
-       int                     flags = 0;
        int                     credits = 0;
        long long               quota_space = 0;
        struct osd_fextent      extent = { 0 };
+       enum osd_quota_local_flags local_flags = 0;
        enum osd_qid_declare_flags declare_flags = OSD_QID_BLK;
        ENTRY;
 
@@ -1214,16 +1214,16 @@ static int osd_declare_write_commit(const struct lu_env *env,
 
        rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
                                   i_projid_read(inode), quota_space, oh,
-                                  osd_dt_obj(dt), &flags, declare_flags);
+                                  osd_dt_obj(dt), &local_flags, declare_flags);
 
        /* we need only to store the overquota flags in the first lnb for
         * now, once we support multiple objects BRW, this code needs be
         * revised. */
-       if (flags & QUOTA_FL_OVER_USRQUOTA)
+       if (local_flags & QUOTA_FL_OVER_USRQUOTA)
                lnb[0].lnb_flags |= OBD_BRW_OVER_USRQUOTA;
-       if (flags & QUOTA_FL_OVER_GRPQUOTA)
+       if (local_flags & QUOTA_FL_OVER_GRPQUOTA)
                lnb[0].lnb_flags |= OBD_BRW_OVER_GRPQUOTA;
-       if (flags & QUOTA_FL_OVER_PRJQUOTA)
+       if (local_flags & QUOTA_FL_OVER_PRJQUOTA)
                lnb[0].lnb_flags |= OBD_BRW_OVER_PRJQUOTA;
 
        if (rc == 0)
@@ -1363,10 +1363,10 @@ static int osd_read_prep(const struct lu_env *env, struct dt_object *dt,
                         * lnb->lnb_rc == 0, so it's easy to detect later. */
                        break;
 
-               if (isize < lnb[i].lnb_file_offset + lnb[i].lnb_len)
-                       lnb[i].lnb_rc = isize - lnb[i].lnb_file_offset;
-               else
-                       lnb[i].lnb_rc = lnb[i].lnb_len;
+               /* instead of looking if we go beyong isize, send complete
+                * pages all the time
+                */
+               lnb[i].lnb_rc = lnb[i].lnb_len;
 
                /* Bypass disk read if fail_loc is set properly */
                if (OBD_FAIL_CHECK(OBD_FAIL_OST_FAKE_RW))
@@ -1681,9 +1681,11 @@ static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen)
        return 0;
 }
 
-int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
-                            int write_NUL, loff_t *offs, handle_t *handle)
+static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
+                                   int bufsize, int write_NUL, loff_t *offs,
+                                   handle_t *handle)
 {
+       struct inode *inode = osd_dt_obj(dt)->oo_inode;
         struct buffer_head *bh        = NULL;
         loff_t              offset    = *offs;
         loff_t              new_size  = i_size_read(inode);
@@ -1694,7 +1696,7 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
         int                 boffs;
         int                 dirty_inode = 0;
        struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
-       bool create, sparse;
+       bool create, sparse, sync = false;
 
        if (write_NUL) {
                /*
@@ -1712,7 +1714,6 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
 
        while (bufsize > 0) {
                int credits = handle->h_buffer_credits;
-               bool sync;
                unsigned long last_block = (new_size == 0) ? 0 :
                                           (new_size - 1) >> inode->i_blkbits;
 
@@ -1735,11 +1736,24 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                              offset, block, bufsize, *offs);
 
                if (IS_ERR_OR_NULL(bh)) {
-                       bh = __ldiskfs_bread(handle, inode, block, 1);
+                       struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+                       int flags = LDISKFS_GET_BLOCKS_CREATE;
+
+                       /* while the file system is being mounted, avoid
+                        * preallocation otherwise mount can take a long
+                        * time as mballoc cache is cold.
+                        * XXX: this is a workaround until we have a proper
+                        *      fix in mballoc
+                        * XXX: works with extent-based files only */
+                       if (!osd->od_cl_seq)
+                               flags |= LDISKFS_GET_BLOCKS_NO_NORMALIZE;
+                       bh = __ldiskfs_bread(handle, inode, block, flags);
                        create = true;
                } else {
-                       if (sync)
+                       if (sync) {
                                up(&ei->i_append_sem);
+                               sync = false;
+                       }
                        create = false;
                }
                if (IS_ERR_OR_NULL(bh)) {
@@ -1768,8 +1782,10 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                         boffs, size, (unsigned long)bh->b_size);
                if (create) {
                        memset(bh->b_data, 0, bh->b_size);
-                       if (sync)
+                       if (sync) {
                                up(&ei->i_append_sem);
+                               sync = false;
+                       }
                }
                memcpy(bh->b_data + boffs, buf, size);
                err = ldiskfs_handle_dirty_metadata(handle, NULL, bh);
@@ -1782,8 +1798,11 @@ int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
                 bufsize -= size;
                 buf += size;
         }
-        if (bh)
-                brelse(bh);
+       if (sync)
+               up(&ei->i_append_sem);
+
+       if (bh)
+               brelse(bh);
 
        if (write_NUL)
                --new_size;
@@ -1837,9 +1856,8 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
        if (is_link && (buf->lb_len < sizeof(LDISKFS_I(inode)->i_data)))
                result = osd_ldiskfs_writelink(inode, buf->lb_buf, buf->lb_len);
        else
-               result = osd_ldiskfs_write_record(inode, buf->lb_buf,
-                                                 buf->lb_len, is_link, pos,
-                                                 oh->ot_handle);
+               result = osd_ldiskfs_write_record(dt, buf->lb_buf, buf->lb_len,
+                                                 is_link, pos, oh->ot_handle);
        if (result == 0)
                result = buf->lb_len;
 
@@ -2148,7 +2166,9 @@ void osd_execute_truncate(struct osd_object *obj)
                return;
        }
 
+       inode_lock(inode);
        ldiskfs_truncate(inode);
+       inode_unlock(inode);
 
        /*
         * For a partial-page truncate, flush the page to disk immediately to