Whamcloud - gitweb
osd: implement osd_{read,write}() on top of fsfilt_ldiskfs buffer-head-based versions.
authornikita <nikita>
Mon, 23 Oct 2006 22:26:01 +0000 (22:26 +0000)
committernikita <nikita>
Mon, 23 Oct 2006 22:26:01 +0000 (22:26 +0000)
lustre/lvfs/fsfilt_ext3.c
lustre/osd/osd_handler.c
lustre/utils/mkfs_lustre.c

index 2cc74ab..f0e18b8 100644 (file)
@@ -207,11 +207,11 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,
                         FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs;
                 break;
         case FSFILT_OP_JOIN:
-                /* delete 2 file(file + array id) + create 1 file (array id) 
+                /* delete 2 file(file + array id) + create 1 file (array id)
                  * create/update logs for each stripe */
                 nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);
-               
-                /*create array log for head file*/ 
+
+                /*create array log for head file*/
                 nblocks += 3;
                 nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
                             EXT3_SINGLEDATA_TRANS_BLOCKS);
@@ -1119,10 +1119,8 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks,
         return ext3_prep_san_write(inode, blocks, nblocks, newsize);
 }
 
-static int fsfilt_ext3_read_record(struct file * file, void *buf,
-                                   int size, loff_t *offs)
+int fsfilt_ext3_read(struct inode *inode, void *buf, int size, loff_t *offs)
 {
-        struct inode *inode = file->f_dentry->d_inode;
         unsigned long block;
         struct buffer_head *bh;
         int err, blocksize, csize, boffs;
@@ -1164,34 +1162,22 @@ static int fsfilt_ext3_read_record(struct file * file, void *buf,
         }
         return 0;
 }
+EXPORT_SYMBOL(fsfilt_ext3_read);
 
-static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
-                                    loff_t *offs, int force_sync)
+static int fsfilt_ext3_read_record(struct file * file, void *buf,
+                                   int size, loff_t *offs)
+{
+        return fsfilt_ext3_read(file->f_dentry->d_inode, buf, size, offs);
+}
+
+int fsfilt_ext3_write_handle(struct inode *inode, void *buf, int bufsize,
+                                loff_t *offs, handle_t *handle)
 {
         struct buffer_head *bh = NULL;
-        unsigned long block;
-        struct inode *inode = file->f_dentry->d_inode;
         loff_t old_size = inode->i_size, offset = *offs;
         loff_t new_size = inode->i_size;
-        journal_t *journal;
-        handle_t *handle;
-        int err, block_count = 0, blocksize, size, boffs;
-
-        /* Determine how many transaction credits are needed */
-        blocksize = 1 << inode->i_blkbits;
-        block_count = (*offs & (blocksize - 1)) + bufsize;
-        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
-
-        journal = EXT3_SB(inode->i_sb)->s_journal;
-        lock_24kernel();
-        handle = journal_start(journal,
-                               block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
-        unlock_24kernel();
-        if (IS_ERR(handle)) {
-                CERROR("can't start transaction for %d blocks (%d bytes)\n",
-                       block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize);
-                return PTR_ERR(handle);
-        }
+        unsigned long block;
+        int err = 0, blocksize = 1 << inode->i_blkbits, size, boffs;
 
         while (bufsize > 0) {
                 if (bh != NULL)
@@ -1203,14 +1189,14 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                 bh = ext3_bread(handle, inode, block, 1, &err);
                 if (!bh) {
                         CERROR("can't read/create block: %d\n", err);
-                        goto out;
+                        break;
                 }
 
                 err = ext3_journal_get_write_access(handle, bh);
                 if (err) {
                         CERROR("journal_get_write_access() returned error %d\n",
                                err);
-                        goto out;
+                        break;
                 }
                 LASSERT(bh->b_data + boffs + size <= bh->b_data + bh->b_size);
                 memcpy(bh->b_data + boffs, buf, size);
@@ -1218,7 +1204,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                 if (err) {
                         CERROR("journal_dirty_metadata() returned error %d\n",
                                err);
-                        goto out;
+                        break;
                 }
                 if (offset + size > new_size)
                         new_size = offset + size;
@@ -1226,10 +1212,6 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
                 bufsize -= size;
                 buf += size;
         }
-
-        if (force_sync)
-                handle->h_sync = 1; /* recovery likes this */
-out:
         if (bh)
                 brelse(bh);
 
@@ -1245,12 +1227,45 @@ out:
                 unlock_kernel();
         }
 
+        if (err == 0)
+                *offs = offset;
+        return err;
+}
+EXPORT_SYMBOL(fsfilt_ext3_write_handle);
+
+static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
+                                    loff_t *offs, int force_sync)
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        journal_t *journal;
+        handle_t *handle;
+        int err, block_count = 0, blocksize;
+
+        /* Determine how many transaction credits are needed */
+        blocksize = 1 << inode->i_blkbits;
+        block_count = (*offs & (blocksize - 1)) + bufsize;
+        block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
+
+        journal = EXT3_SB(inode->i_sb)->s_journal;
+        lock_24kernel();
+        handle = journal_start(journal,
+                               block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2);
+        unlock_24kernel();
+        if (IS_ERR(handle)) {
+                CERROR("can't start transaction for %d blocks (%d bytes)\n",
+                       block_count * FSFILT_DATA_TRANS_BLOCKS(inode->i_sb) + 2, bufsize);
+                return PTR_ERR(handle);
+        }
+
+        err = fsfilt_ext3_write_handle(inode, buf, bufsize, offs, handle);
+
+        if (!err && force_sync)
+                handle->h_sync = 1; /* recovery likes this */
+
         lock_24kernel();
         journal_stop(handle);
         unlock_24kernel();
 
-        if (err == 0)
-                *offs = offset;
         return err;
 }
 
@@ -1324,7 +1339,7 @@ do {                                            \
         Q_COPY(out, in, dqb_valid);             \
 } while (0)
 
-      
+
 
 static int fsfilt_ext3_quotactl(struct super_block *sb,
                                 struct obd_quotactl *oqc)
@@ -1908,7 +1923,7 @@ out:
 }
 
 #ifdef HAVE_QUOTA_SUPPORT
-static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, 
+static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type,
                                  int cmd)
 {
         int rc = 0;
index 0dd3312..174f207 100644 (file)
@@ -284,31 +284,6 @@ static int osd_write_locked(const struct lu_env *env, struct osd_object *o)
         return oti->oti_w_locks > 0 && o->oo_owner == env;
 }
 
-/* helper to push us into KERNEL_DS context */
-static struct file *osd_rw_init(const struct lu_env *env,
-                                struct inode *inode, mm_segment_t *seg)
-{
-        struct osd_thread_info *info   = lu_context_key_get(&env->le_ctx, &osd_key);
-        struct dentry          *dentry = &info->oti_dentry;
-        struct file            *file   = &info->oti_file;
-
-        file->f_dentry = dentry;
-        file->f_mapping = inode->i_mapping;
-        file->f_op      = inode->i_fop;
-        file->f_mode    = FMODE_WRITE|FMODE_READ;
-        dentry->d_inode = inode;
-
-        *seg = get_fs();
-        set_fs(KERNEL_DS);
-        return file;
-}
-
-/* helper to pop us from KERNEL_DS context */
-static void osd_rw_fini(mm_segment_t *seg)
-{
-        set_fs(*seg);
-}
-
 static int osd_root_get(const struct lu_env *env,
                         struct dt_device *dev, struct lu_fid *f)
 {
@@ -1536,56 +1511,51 @@ static struct dt_object_operations osd_obj_ops = {
  * Body operations.
  */
 
+/*
+ * XXX: Another layering violation for now.
+ *
+ * We don't want to use ->f_op->read methods, because generic file write
+ *
+ *         - serializes on ->i_sem, and
+ *
+ *         - does a lot of extra work like balance_dirty_pages(),
+ *
+ * which doesn't work for globally shared files like /last-received.
+ */
+int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
+int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize,
+                                loff_t *offs, handle_t *handle);
+
 static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
                         struct lu_buf *buf, loff_t *pos,
                         struct lustre_capa *capa)
 {
         struct inode *inode = osd_dt_obj(dt)->oo_inode;
-        struct file  *file;
-        mm_segment_t  seg;
-        ssize_t       result;
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ))
                 RETURN(-EACCES);
 
-        file = osd_rw_init(env, inode, &seg);
-        /*
-         * We'd like to use vfs_read() here, but it messes with
-         * dnotify_parent() and locks.
-         */
-        if (file->f_op->read)
-                result = file->f_op->read(file, buf->lb_buf, buf->lb_len, pos);
-        else {
-                /* TODO: how to serve symlink readlink()? */
-                CERROR("read not implemented currently\n");
-                result = -ENOSYS;
-        }
-        osd_rw_fini(&seg);
-        return result;
+        return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
 }
 
 static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                          const struct lu_buf *buf, loff_t *pos,
                          struct thandle *handle, struct lustre_capa *capa)
 {
-        struct inode *inode = osd_dt_obj(dt)->oo_inode;
-        struct file  *file;
-        mm_segment_t  seg;
-        ssize_t       result;
+        struct inode       *inode = osd_dt_obj(dt)->oo_inode;
+        struct osd_thandle *oh;
+        ssize_t             result;
 
         LASSERT(handle != NULL);
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE))
                 RETURN(-EACCES);
 
-        file = osd_rw_init(env, inode, &seg);
-        if (file->f_op->write)
-                result = file->f_op->write(file, buf->lb_buf, buf->lb_len, pos);
-        else {
-                CERROR("write not implemented currently\n");
-                result = -ENOSYS;
-        }
-        osd_rw_fini(&seg);
+        oh = container_of(handle, struct osd_thandle, ot_super);
+        result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len,
+                                             pos, oh->ot_handle);
+        if (result == 0)
+                result = buf->lb_len;
         return result;
 }
 
index 527947a..3929474 100644 (file)
@@ -1461,9 +1461,9 @@ int main(int argc, char *argv[])
                 goto out;
         }
 #if 0
-        /* 
+        /*
          * Comment out these 2 checks temporarily, since for multi-MDSes
-         * in single node only 1 mds node could have mgs service 
+         * in single node only 1 mds node could have mgs service
          */
         if (IS_MDT(ldd) && !IS_MGS(ldd) && (mop.mo_mgs_failnodes == 0)) {
                 verrprint("No management node specified, adding MGS to this "
@@ -1486,8 +1486,6 @@ int main(int argc, char *argv[])
                 if (IS_MDT(ldd) || IS_MGS(ldd))
                         strcat(always_mountopts,
                                ",iopen_nopriv,user_xattr");
-                if (IS_MDT(ldd))
-                        strcat(always_mountopts, ",data=journal");
                 if ((get_os_version() == 24) && IS_OST(ldd))
                         strcat(always_mountopts, ",asyncdel");
                 /* NB: Files created while extents are enabled cannot be read