Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / obdfilter / filter_io_26.c
index 58f6bde..960b097 100644 (file)
@@ -101,7 +101,7 @@ static void record_finish_io(struct filter_iobuf *iobuf, int rw, int rc)
 {
         struct filter_obd *filter = iobuf->dr_filter;
 
-        /* CAVEAT EMPTOR: possibly in IRQ context 
+        /* CAVEAT EMPTOR: possibly in IRQ context
          * DO NOT record procfs stats here!!! */
 
         if (rw == OBD_BRW_READ)
@@ -119,7 +119,7 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
         struct bio_vec *bvl;
         int i;
 
-        /* CAVEAT EMPTOR: possibly in IRQ context 
+        /* CAVEAT EMPTOR: possibly in IRQ context
          * DO NOT record procfs stats here!!! */
 
         if (bio->bi_size)                       /* Not complete */
@@ -133,7 +133,7 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
                        "with any interesting messages leading up to this point "
                        "(like SCSI errors, perhaps).  Because bi_private is "
                        "NULL, I can't wake up the thread that initiated this "
-                       "I/O -- so you will probably have to reboot this node.\n");
+                       "IO - you will probably have to reboot this node.\n");
                 CERROR("bi_next: %p, bi_flags: %lx, bi_rw: %lu, bi_vcnt: %d, "
                        "bi_idx: %d, bi->size: %d, bi_end_io: %p, bi_cnt: %d, "
                        "bi_private: %p\n", bio->bi_next, bio->bi_flags,
@@ -149,19 +149,15 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
                         if (likely(error == 0))
                                 SetPageUptodate(bvl->bv_page);
                         LASSERT(PageLocked(bvl->bv_page));
-#ifdef HAVE_PAGE_CONSTANT
                         ClearPageConstant(bvl->bv_page);
-#endif
                 }
                 record_finish_io(iobuf, OBD_BRW_READ, error);
         } else {
-#ifdef HAVE_PAGE_CONSTANT
                 if (mapping_cap_page_constant_write(iobuf->dr_pages[0]->mapping)){
                         bio_for_each_segment(bvl, bio, i) {
                                 ClearPageConstant(bvl->bv_page);
                         }
                 }
-#endif
                 record_finish_io(iobuf, OBD_BRW_WRITE, error);
         }
 
@@ -250,7 +246,8 @@ void filter_free_iobuf(struct filter_iobuf *iobuf)
 void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf,
                       struct obd_trans_info *oti)
 {
-        int thread_id = oti ? oti->oti_thread_id : -1;
+        int thread_id = (oti && oti->oti_thread) ?
+                        oti->oti_thread->t_id : -1;
 
         if (unlikely(thread_id < 0)) {
                 filter_free_iobuf(iobuf);
@@ -328,17 +325,15 @@ int filter_do_bio(struct obd_export *exp, struct inode *inode,
                                 sector_bits))
                                 nblocks++;
 
-#ifdef HAVE_PAGE_CONSTANT
-                        /* I only set the page to be constant only if it 
-                         * is mapped to a contiguous underlying disk block(s). 
-                         * It will then make sure the corresponding device 
-                         * cache of raid5 will be overwritten by this page. 
+                        /* I only set the page to be constant only if it
+                         * is mapped to a contiguous underlying disk block(s).
+                         * It will then make sure the corresponding device
+                         * cache of raid5 will be overwritten by this page.
                          * - jay */
-                        if ((rw == OBD_BRW_WRITE) && 
-                            (nblocks == blocks_per_page) && 
+                        if ((rw == OBD_BRW_WRITE) &&
+                            (nblocks == blocks_per_page) &&
                             mapping_cap_page_constant_write(inode->i_mapping))
                                SetPageConstant(page);
-#endif
 
                         if (bio != NULL &&
                             can_be_merged(bio, sector) &&
@@ -471,7 +466,8 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
                 create = 1;
                 sem = &obd->u.filter.fo_alloc_lock;
 
-                lquota_enforce(filter_quota_interface_ref, obd, iobuf->dr_ignore_quota);
+                lquota_enforce(filter_quota_interface_ref, obd,
+                               iobuf->dr_ignore_quota);
         }
 
         rc = fsfilt_map_inode_pages(obd, inode, iobuf->dr_pages,
@@ -555,13 +551,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         struct lvfs_run_ctxt saved;
         struct fsfilt_objinfo fso;
         struct iattr iattr = { 0 };
-        struct inode *inode = NULL;
+        struct inode *inode = res->dentry->d_inode;
         unsigned long now = jiffies;
         int i, err, cleanup_phase = 0;
         struct obd_device *obd = exp->exp_obd;
         struct filter_obd *fo = &obd->u.filter;
         void *wait_handle;
-        int   total_size = 0, rc2;
+        int total_size = 0;
+        int rec_pending = 0;
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         ENTRY;
 
@@ -572,21 +569,11 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         if (rc != 0)
                 GOTO(cleanup, rc);
 
-        /* Unfortunately, if quota master is too busy to handle the
-         * pre-dqacq in time and quota hash on ost is used up, we
-         * have to wait for the completion of in flight dqacq/dqrel,
-         * then try again */
-        if ((rc2 = lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid,
-                                   oa->o_gid, niocount)) == QUOTA_RET_ACQUOTA) {
-                OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
-                lquota_acquire(filter_quota_interface_ref, obd, oa->o_uid,
-                               oa->o_gid);
-        }
-
-        if (rc2 < 0) {
-                rc = rc2;
-                GOTO(cleanup, rc);
-        }
+        /* we try to get enough quota to write here, and let ldiskfs
+         * decide if it is out of quota or not b=14783 */
+        lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid,
+                        oa->o_gid, niocount, &rec_pending, oti,
+                        LQUOTA_FLAGS_BLK, (void *)inode, obj->ioo_bufcnt);
 
         iobuf = filter_iobuf_get(&obd->u.filter, oti);
         if (IS_ERR(iobuf))
@@ -595,14 +582,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
 
         fso.fso_dentry = res->dentry;
         fso.fso_bufcnt = obj->ioo_bufcnt;
-        inode = res->dentry->d_inode;
 
         iobuf->dr_ignore_quota = 0;
         for (i = 0, lnb = res; i < niocount; i++, lnb++) {
                 loff_t this_size;
+                __u32 flags = lnb->flags;
 
                 /* If overwriting an existing block, we don't need a grant */
-                if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
+                if (!(flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
                     filter_range_is_mapped(inode, lnb->offset, lnb->len))
                         lnb->rc = 0;
 
@@ -614,8 +601,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 LASSERT(PageLocked(lnb->page));
                 LASSERT(!PageWriteback(lnb->page));
 
-                /* truncate might leave tail dirty */
-                clear_page_dirty_for_io(lnb->page);
+                /* preceding filemap_write_and_wait() should have clean pages */
+                if (fo->fo_writethrough_cache)
+                        clear_page_dirty_for_io(lnb->page);
+                LASSERT(!PageDirty(lnb->page));
 
                 SetPageUptodate(lnb->page);
 
@@ -630,10 +619,15 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 if (this_size > iattr.ia_size)
                         iattr.ia_size = this_size;
 
-                /* if one page is a write-back page from client cache, or it's
-                 * written by root, then mark the whole io request as ignore
-                 * quota request */
-                if (lnb->flags & (OBD_BRW_FROM_GRANT | OBD_BRW_NOQUOTA))
+                /* if one page is a write-back page from client cache and
+                 * not from direct_io, or it's written by root, then mark
+                 * the whole io request as ignore quota request, remote
+                 * client can not break through quota. */
+                if (exp_connect_rmtclient(exp))
+                        flags &= ~OBD_BRW_NOQUOTA;
+                if ((flags & OBD_BRW_NOQUOTA) ||
+                    (flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
+                     OBD_BRW_FROM_GRANT)
                         iobuf->dr_ignore_quota = 1;
         }
 
@@ -691,8 +685,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                  * in the inode before filter_direct_io() - see bug 10357. */
                 save = iattr.ia_valid;
                 iattr.ia_valid &= (ATTR_UID | ATTR_GID);
-                rc = fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0);
-                CDEBUG(D_QUOTA, "set uid(%u)/gid(%u) to ino(%lu). rc(%d)\n", 
+                rc = fsfilt_setattr(obd, res->dentry, oti->oti_handle,&iattr,0);
+                CDEBUG(D_QUOTA, "set uid(%u)/gid(%u) to ino(%lu). rc(%d)\n",
                                 iattr.ia_uid, iattr.ia_gid, inode->i_ino, rc);
                 iattr.ia_valid = save & ~(ATTR_UID | ATTR_GID);
         }
@@ -724,6 +718,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         fsfilt_check_slow(obd, now, "commitrw commit");
 
 cleanup:
+        if (rec_pending)
+                lquota_pending_commit(filter_quota_interface_ref, obd, oa->o_uid,
+                                      oa->o_gid, rec_pending, 1);
+
         filter_grant_commit(exp, niocount, res);
 
         switch (cleanup_phase) {