Whamcloud - gitweb
LU-1883 osd: Fix niobuf_local offset usage
authorLi Wei <liwei@whamcloud.com>
Tue, 11 Sep 2012 15:02:13 +0000 (23:02 +0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 24 Sep 2012 06:25:35 +0000 (02:25 -0400)
osd-ldiskfs fills file offsets into niobuf_local::offset fields in
osd_map_remote_to_local(), while osd_write_prep() treats the values as
page offsets.  Random memory corruptions may occur as a result of this
offset usage inconsistency.  This patch fixes the issue and introduces
the Orion lnb_file_offset and lnb_page_offset names.

Change-Id: I76c555ddbe7f41271be4bebecbc41ce036747258
Signed-off-by: Li Wei <liwei@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/4051
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
lustre/include/obd.h
lustre/lvfs/fsfilt_ext3.c
lustre/obdclass/debug.c
lustre/obdecho/echo.c
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/osd-ldiskfs/osd_io.c
lustre/osd-zfs/osd_io.c
lustre/ost/ost_handler.c

index 12cea5a..d09369f 100644 (file)
@@ -822,7 +822,7 @@ struct lmv_obd {
 
 struct niobuf_local {
        __u64 lnb_file_offset;
-        __u64 offset;
+       __u32 lnb_page_offset;
         __u32 len;
         __u32 flags;
         cfs_page_t    *page;
index b1aa6c3..1eb15b6 100644 (file)
@@ -369,10 +369,11 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,
 
         for (i = 0, j = 0; i < objcount; i++, fso++) {
                 /* two or more dindirect blocks in case we cross boundary */
-                int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset -
-                                    nb[j].offset) >>
-                                   sb->s_blocksize_bits) /
-                        (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb));
+               int ndind =
+                       (long)((nb[j + fso->fso_bufcnt - 1].lnb_file_offset -
+                               nb[j].lnb_file_offset) >>
+                              sb->s_blocksize_bits) /
+                       (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb));
                 nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2);
 
                 /* leaf, indirect, tindirect blocks for first block */
@@ -381,14 +382,16 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,
                 j += fso->fso_bufcnt;
         }
 
-        next_indir = nb[0].offset +
-                (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits);
-        for (i = 1; i < niocount; i++) {
-                if (nb[i].offset >= next_indir) {
-                        nbitmaps++;     /* additional indirect */
-                        next_indir = nb[i].offset +
-                                (EXT3_ADDR_PER_BLOCK(sb)<<sb->s_blocksize_bits);
-                } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) {
+       next_indir = nb[0].lnb_file_offset +
+                    (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits);
+       for (i = 1; i < niocount; i++) {
+               if (nb[i].lnb_file_offset >= next_indir) {
+                       nbitmaps++;     /* additional indirect */
+                       next_indir = nb[i].lnb_file_offset +
+                                    (EXT3_ADDR_PER_BLOCK(sb) <<
+                                     sb->s_blocksize_bits);
+               } else if (nb[i].lnb_file_offset !=
+                          nb[i - 1].lnb_file_offset + sb->s_blocksize) {
                         nbitmaps++;     /* additional indirect */
                 }
                 nbitmaps += blockpp;    /* each leaf in different group? */
index 1c556bb..d512e19 100644 (file)
 
 void dump_lniobuf(struct niobuf_local *nb)
 {
-        CDEBUG(D_RPCTRACE,
-               "niobuf_local: offset="LPD64", len=%d, page=%p, rc=%d\n",
-               nb->offset, nb->len, nb->page, nb->rc);
-        CDEBUG(D_RPCTRACE, "nb->page: index = %ld\n",
-               nb->page ? cfs_page_index(nb->page) : -1);
+       CDEBUG(D_RPCTRACE,
+              "niobuf_local: file_offset="LPD64", len=%d, page=%p, rc=%d\n",
+              nb->lnb_file_offset, nb->len, nb->page, nb->rc);
+       CDEBUG(D_RPCTRACE, "nb->page: index = %ld\n",
+                       nb->page ? cfs_page_index(nb->page) : -1);
 }
 EXPORT_SYMBOL(dump_lniobuf);
 
index bca3d91..2320dc9 100644 (file)
@@ -306,15 +306,17 @@ static int echo_map_nb_to_lb(struct obdo *oa, struct obd_ioobj *obj,
                 if (*left == 0)
                         return -EINVAL;
 
-                res->offset = offset;
-                res->len = plen;
-                LASSERT((res->offset & ~CFS_PAGE_MASK) + res->len <= CFS_PAGE_SIZE);
-
-
-                if (ispersistent &&
-                    (res->offset >> CFS_PAGE_SHIFT) < ECHO_PERSISTENT_PAGES) {
-                        res->page = echo_persistent_pages[res->offset >>
-                                CFS_PAGE_SHIFT];
+               res->lnb_file_offset = offset;
+               res->len = plen;
+               LASSERT((res->lnb_file_offset & ~CFS_PAGE_MASK) + res->len <=
+                       CFS_PAGE_SIZE);
+
+               if (ispersistent &&
+                   ((res->lnb_file_offset >> CFS_PAGE_SHIFT) <
+                     ECHO_PERSISTENT_PAGES)) {
+                       res->page =
+                               echo_persistent_pages[res->lnb_file_offset >>
+                                                     CFS_PAGE_SHIFT];
                         /* Take extra ref so __free_pages() can be called OK */
                         cfs_get_page (res->page);
                 } else {
@@ -327,14 +329,14 @@ static int echo_map_nb_to_lb(struct obdo *oa, struct obd_ioobj *obj,
                 }
 
                 CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
-                       res->page, res->offset, res->len);
+                      res->page, res->lnb_file_offset, res->len);
 
                 if (cmd & OBD_BRW_READ)
                         res->rc = res->len;
 
                 if (debug_setup)
                         echo_page_debug_setup(res->page, cmd, obj->ioo_id,
-                                              res->offset, res->len);
+                                             res->lnb_file_offset, res->len);
 
                 offset += plen;
                 len -= plen;
@@ -371,11 +373,12 @@ static int echo_finalize_lb(struct obdo *oa, struct obd_ioobj *obj,
                 addr = cfs_kmap(page);
 
                 CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
-                       res->page, addr, res->offset);
+                      res->page, addr, res->lnb_file_offset);
 
                 if (verify) {
                         int vrc = echo_page_debug_check(page, obj->ioo_id,
-                                                        res->offset, res->len);
+                                                       res->lnb_file_offset,
+                                                       res->len);
                         /* check all the pages always */
                         if (vrc != 0 && rc == 0)
                                 rc = vrc;
index a946d90..eefb0c6 100644 (file)
@@ -317,7 +317,8 @@ static int filter_map_remote_to_local(int objcount, struct obd_ioobj *obj,
 
                         if (plen > len)
                                 plen = len;
-                        lnb->offset = offset;
+                       lnb->lnb_file_offset = offset;
+                       lnb->lnb_page_offset = poff;
                         lnb->len = plen;
                         lnb->flags = rnb->flags;
                         lnb->page = NULL;
@@ -423,19 +424,20 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
 
                 lnb->dentry = dentry;
 
-                if (isize <= lnb->offset)
-                        /* If there's no more data, abort early.  lnb->rc == 0,
-                         * so it's easy to detect later. */
-                        break;
+               if (isize <= lnb->lnb_file_offset)
+                       /* If there's no more data, abort early.  lnb->rc == 0,
+                        * so it's easy to detect later. */
+                       break;
 
-                lnb->page = filter_get_page(obd, inode, lnb->offset, 0);
-                if (lnb->page == NULL)
-                        GOTO(cleanup, rc = -ENOMEM);
+               lnb->page = filter_get_page(obd, inode, lnb->lnb_file_offset,
+                                           0);
+               if (lnb->page == NULL)
+                       GOTO(cleanup, rc = -ENOMEM);
 
-                lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_CACHE_ACCESS, 1);
+               lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_CACHE_ACCESS, 1);
 
-                if (isize < lnb->offset + lnb->len - 1)
-                        lnb->rc = isize - lnb->offset;
+               if (isize < lnb->lnb_file_offset + lnb->len - 1)
+                       lnb->rc = isize - lnb->lnb_file_offset;
                 else
                         lnb->rc = lnb->len;
 
@@ -535,8 +537,9 @@ static int filter_grant_check(struct obd_export *exp, struct obdo *oa,
 
                         /* should match the code in osc_exit_cache */
                         bytes = lnb[n].len;
-                        bytes += lnb[n].offset & (blocksize - 1);
-                        tmp = (lnb[n].offset + lnb[n].len) & (blocksize - 1);
+                       bytes += lnb[n].lnb_file_offset & (blocksize - 1);
+                       tmp = (lnb[n].lnb_file_offset + lnb[n].len) &
+                             (blocksize - 1);
                         if (tmp)
                                 bytes += blocksize - tmp;
 
@@ -810,8 +813,8 @@ retry:
                  * needs to keep the pages all aligned properly. */
                 lnb->dentry = dentry;
 
-                lnb->page = filter_get_page(obd, dentry->d_inode, lnb->offset,
-                                            localreq);
+               lnb->page = filter_get_page(obd, dentry->d_inode,
+                                           lnb->lnb_file_offset, localreq);
                 if (lnb->page == NULL)
                         GOTO(cleanup, rc = -ENOMEM);
 
@@ -840,7 +843,8 @@ retry:
                         if (maxidx >= lnb->page->index) {
                                 LL_CDEBUG_PAGE(D_PAGE, lnb->page, "write %u @ "
                                                LPU64" flg %x before EOF %llu\n",
-                                               lnb->len, lnb->offset,lnb->flags,
+                                              lnb->len, lnb->lnb_file_offset,
+                                              lnb->flags,
                                                i_size_read(dentry->d_inode));
                                 filter_iobuf_add_page(obd, iobuf,
                                                       dentry->d_inode,
@@ -849,10 +853,11 @@ retry:
                                 long off;
                                 char *p = kmap(lnb->page);
 
-                                off = lnb->offset & ~CFS_PAGE_MASK;
+                                off = lnb->lnb_file_offset & ~CFS_PAGE_MASK;
                                 if (off)
                                         memset(p, 0, off);
-                                off = (lnb->offset + lnb->len) & ~CFS_PAGE_MASK;
+                               off = (lnb->lnb_file_offset + lnb->len) &
+                                     ~CFS_PAGE_MASK;
                                 if (off)
                                         memset(p + off, 0, CFS_PAGE_SIZE - off);
                                 kunmap(lnb->page);
index 853e4f5..53c293c 100644 (file)
@@ -618,7 +618,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 loff_t this_size;
                 __u32 flags = lnb->flags;
 
-                if (filter_range_is_mapped(inode, lnb->offset, lnb->len)) {
+               if (filter_range_is_mapped(inode, lnb->lnb_file_offset,
+                                          lnb->len)) {
                         /* If overwriting an existing block,
                          * we don't need a grant */
                         if (!(flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC)
@@ -649,7 +650,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
 
                 /* we expect these pages to be in offset order, but we'll
                  * be forgiving */
-                this_size = lnb->offset + lnb->len;
+               this_size = lnb->lnb_file_offset + lnb->len;
                 if (this_size > iattr.ia_size)
                         iattr.ia_size = this_size;
 
index 11181b6..d641331 100644 (file)
@@ -395,8 +395,8 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages,
 
                 if (plen > len)
                         plen = len;
-                lnb->offset = offset;
-                /* lnb->lnb_page_offset = poff; */
+               lnb->lnb_file_offset = offset;
+               lnb->lnb_page_offset = poff;
                 lnb->len = plen;
                 /* lb->flags = rnb->flags; */
                 lnb->flags = 0;
@@ -467,7 +467,7 @@ int osd_bufs_get(const struct lu_env *env, struct dt_object *d, loff_t pos,
                  * needs to keep the pages all aligned properly. */
                 lnb->dentry = (void *) obj;
 
-                lnb->page = osd_get_page(d, lnb->offset, rw);
+               lnb->page = osd_get_page(d, lnb->lnb_file_offset, rw);
                 if (lnb->page == NULL)
                         GOTO(cleanup, rc = -ENOMEM);
 
@@ -565,11 +565,11 @@ static int osd_write_prep(const struct lu_env *env, struct dt_object *dt,
                         long off;
                         char *p = kmap(lnb[i].page);
 
-                        off = lnb[i].offset;
-                        if (off)
-                                memset(p, 0, off);
-                        off = lnb[i].offset + lnb[i].len;
-                        off &= ~CFS_PAGE_MASK;
+                       off = lnb[i].lnb_page_offset;
+                       if (off)
+                               memset(p, 0, off);
+                       off = (lnb[i].lnb_page_offset + lnb[i].len) &
+                             ~CFS_PAGE_MASK;
                         if (off)
                                 memset(p + off, 0, CFS_PAGE_SIZE - off);
                         kunmap(lnb[i].page);
@@ -645,11 +645,11 @@ static int osd_declare_write_commit(const struct lu_env *env,
 
         /* calculate number of extents (probably better to pass nb) */
        for (i = 0; i < npages; i++) {
-               if (i && lnb[i].offset !=
-                   lnb[i - 1].offset + lnb[i - 1].len)
+               if (i && lnb[i].lnb_file_offset !=
+                   lnb[i - 1].lnb_file_offset + lnb[i - 1].len)
                        extents++;
 
-               if (!osd_is_mapped(inode, lnb[i].offset))
+               if (!osd_is_mapped(inode, lnb[i].lnb_file_offset))
                        quota_space += CFS_PAGE_SIZE;
 
                /* ignore quota for the whole request if any page is from
@@ -742,7 +742,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
 
         for (i = 0; i < npages; i++) {
                 if (lnb[i].rc == -ENOSPC &&
-                    osd_is_mapped(inode, lnb[i].offset)) {
+                   osd_is_mapped(inode, lnb[i].lnb_file_offset)) {
                         /* Allow the write to proceed if overwriting an
                          * existing block */
                         lnb[i].rc = 0;
@@ -759,8 +759,8 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
                 LASSERT(PageLocked(lnb[i].page));
                 LASSERT(!PageWriteback(lnb[i].page));
 
-                if (lnb[i].offset + lnb[i].len > isize)
-                        isize = lnb[i].offset + lnb[i].len;
+               if (lnb[i].lnb_file_offset + lnb[i].len > isize)
+                       isize = lnb[i].lnb_file_offset + lnb[i].len;
 
                 /*
                  * Since write and truncate are serialized by oo_sem, even
@@ -835,14 +835,14 @@ static int osd_read_prep(const struct lu_env *env, struct dt_object *dt,
         cfs_gettimeofday(&start);
         for (i = 0; i < npages; i++) {
 
-                if (i_size_read(inode) <= lnb[i].offset)
+               if (i_size_read(inode) <= lnb[i].lnb_file_offset)
                         /* If there's no more data, abort early.
                          * lnb->rc == 0, so it's easy to detect later. */
                         break;
 
                 if (i_size_read(inode) <
-                    lnb[i].offset + lnb[i].len - 1)
-                        lnb[i].rc = i_size_read(inode) - lnb[i].offset;
+                   lnb[i].lnb_file_offset + lnb[i].len - 1)
+                       lnb[i].rc = i_size_read(inode) - lnb[i].lnb_file_offset;
                 else
                         lnb[i].rc = lnb[i].len;
                 m += lnb[i].len;
index 3cc3c62..8825892 100644 (file)
@@ -297,7 +297,7 @@ static int osd_bufs_get_read(const struct lu_env *env, struct osd_object *obj,
 
                                lnb->rc = 0;
                                lnb->lnb_file_offset = off;
-                               lnb->offset = bufoff & ~CFS_PAGE_MASK;
+                               lnb->lnb_page_offset = bufoff & ~CFS_PAGE_MASK;
                                lnb->len = thispage;
                                lnb->page = kmem_to_page(dbp[i]->db_data +
                                                                bufoff);
@@ -363,7 +363,7 @@ static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj,
                                plen = min_t(int, sz_in_block, CFS_PAGE_SIZE);
 
                                lnb[i].lnb_file_offset = off;
-                               lnb[i].offset = 0;
+                               lnb[i].lnb_page_offset = 0;
                                lnb[i].len = plen;
                                lnb[i].rc = 0;
                                if (sz_in_block == bs)
@@ -397,7 +397,7 @@ static int osd_bufs_get_write(const struct lu_env *env, struct osd_object *obj,
                                plen = min_t(int, sz_in_block, CFS_PAGE_SIZE);
 
                                lnb[i].lnb_file_offset = off;
-                               lnb[i].offset = 0;
+                               lnb[i].lnb_page_offset = 0;
                                lnb[i].len = plen;
                                lnb[i].rc = 0;
                                lnb[i].dentry = NULL;
index cd52890..2318c03 100644 (file)
@@ -790,7 +790,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 if (page_rc != 0) {             /* some data! */
                         LASSERT (local_nb[i].page != NULL);
                         ptlrpc_prep_bulk_page(desc, local_nb[i].page,
-                                              local_nb[i].offset & ~CFS_PAGE_MASK,
+                                             local_nb[i].lnb_page_offset,
                                               page_rc);
                 }
 
@@ -1024,7 +1024,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 
         for (i = 0; i < npages; i++)
                 ptlrpc_prep_bulk_page(desc, local_nb[i].page,
-                                      local_nb[i].offset & ~CFS_PAGE_MASK,
+                                     local_nb[i].lnb_page_offset,
                                       local_nb[i].len);
 
         rc = sptlrpc_svc_prep_bulk(req, desc);
@@ -1112,8 +1112,8 @@ skip_transfer:
                                    body->oa.o_id,
                                    body->oa.o_valid & OBD_MD_FLGROUP ?
                                                 body->oa.o_seq : (__u64)0,
-                                   local_nb[0].offset,
-                                   local_nb[npages-1].offset +
+                                  local_nb[0].lnb_file_offset,
+                                  local_nb[npages-1].lnb_file_offset +
                                    local_nb[npages-1].len - 1 );
                 CERROR("client csum %x, original server csum %x, "
                        "server csum now %x\n",