Whamcloud - gitweb
land b1_2_bug2248 on b1_4
authornic <nic>
Tue, 2 Nov 2004 17:39:02 +0000 (17:39 +0000)
committernic <nic>
Tue, 2 Nov 2004 17:39:02 +0000 (17:39 +0000)
- return async write errors to application if possible (2248)

13 files changed:
lustre/ChangeLog
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/llite/file.c
lustre/llite/rw24.c
lustre/llite/rw26.c
lustre/lov/lov_obd.c
lustre/osc/osc_request.c
lustre/portals/include/linux/portals_compat25.h
lustre/portals/libcfs/debug.c
lustre/ptlrpc/client.c
lustre/tests/recovery-small.sh

index dc192d1..95e485a 100644 (file)
@@ -1,8 +1,13 @@
 tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.3.4
        * bug fixes
+       - flock/lockf fixes
+       - don't use EXT3 constants in llite code (5094)
+       - return async write errors to application if possible (2248)
        * miscellania
        - reorganization of lov code
+       - single portals codebase
+       - Infiniband NAL
 
 tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.8
@@ -27,6 +32,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        - add software watchdogs to catch hung threads quickly (4941)
        - make lustrefs init script start after nfs is mounted
        - fix CWARN/ERROR duplication (4930)
+       - add /proc/sys/portal/memused (bytes allocated by PORTALS_ALLOC)
        - print NAL number in %x format (4645)
 
 2004-10-07  Cluster File Systems, Inc. <info@clusterfs.com>
index 69b8d51..90c94bb 100644 (file)
@@ -92,6 +92,9 @@ struct ll_inode_info {
 
         struct list_head        lli_close_item;
 
+        /* for writepage() only to communicate to fsync */
+        int                     lli_async_rc;
+
         struct file_operations *ll_save_ifop;
         struct file_operations *ll_save_ffop;
         struct file_operations *ll_save_wfop;
index 30a2780..335cbc5 100644 (file)
@@ -638,6 +638,8 @@ void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
 void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
                                       struct obd_import *imp);
 __u64 ptlrpc_next_xid(void);
+__u64 ptlrpc_sample_next_xid(void);
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
 
 /* ptlrpc/service.c */
 void ptlrpc_save_lock (struct ptlrpc_request *req, 
index 7f01b9b..141d5a2 100644 (file)
@@ -44,6 +44,12 @@ struct loi_oap_pages {
         struct list_head        lop_pending_group;
 };
 
+struct osc_async_rc {
+        int     ar_rc;
+        int     ar_force_sync;
+        int     ar_min_xid;
+};
+
 struct lov_oinfo {                 /* per-stripe data structure */
         __u64 loi_id;              /* object ID on the target OST */
         __u64 loi_gr;              /* object group on the target OST */
@@ -63,6 +69,8 @@ struct lov_oinfo {                 /* per-stripe data structure */
         __u64 loi_rss;             /* recently seen size */
         __u64 loi_mtime;           /* recently seen mtime */
         __u64 loi_blocks;          /* recently seen blocks */
+
+        struct osc_async_rc     loi_ar;
 };
 
 static inline void loi_init(struct lov_oinfo *loi)
@@ -263,6 +271,9 @@ struct client_obd {
         struct mdc_rpc_lock     *cl_rpc_lock;
         struct mdc_rpc_lock     *cl_setattr_lock;
         struct osc_creator      cl_oscc;
+
+        /* also protected by the poorly named _loi_list_lock lock above */
+        struct osc_async_rc      cl_ar;
 };
 
 /* Like a client, with some hangers-on.  Keep mc_client_obd first so that we
index 5bd0eab..c06de8c 100644 (file)
@@ -78,6 +78,8 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
         RETURN(rc);
 }
 
+int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
+
 /* While this returns an error code, fput() the caller does not, so we need
  * to make every effort to clean up all of our state here.  Also, applications
  * rarely check close errors and even if an error is returned they will not
@@ -87,6 +89,8 @@ int ll_file_release(struct inode *inode, struct file *file)
 {
         struct ll_file_data *fd;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
         int rc;
 
         ENTRY;
@@ -101,6 +105,10 @@ int ll_file_release(struct inode *inode, struct file *file)
         fd = (struct ll_file_data *)file->private_data;
         LASSERT(fd != NULL);
 
+        if (lsm)
+                lov_test_and_clear_async_rc(lsm);
+        lli->lli_async_rc = 0;
+
         rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file);
         RETURN(rc);
 }
@@ -1138,7 +1146,8 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
         struct inode *inode = dentry->d_inode;
-        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
         struct ll_fid fid;
         struct ptlrpc_request *req;
         int rc, err;
@@ -1152,6 +1161,18 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
          * that IO to finish before calling the osc and mdc sync methods */
         rc = filemap_fdatawait(inode->i_mapping);
 
+        /* catch async errors that were recorded back when async writeback
+         * failed for pages in this mapping. */
+        err = lli->lli_async_rc;
+        lli->lli_async_rc = 0;
+        if (rc == 0)
+                rc = err;
+        if (lsm) {
+                err = lov_test_and_clear_async_rc(lsm);
+                if (rc == 0)
+                        rc = err;
+        }
+
         ll_inode2fid(&fid, inode);
         err = mdc_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req);
         if (!rc)
index 9c4e2a2..736caf3 100644 (file)
@@ -52,6 +52,7 @@
 static int ll_writepage_24(struct page *page)
 {
         struct inode *inode = page->mapping->host;
+        struct ll_inode_info *lli = ll_i2info(inode);
         struct obd_export *exp;
         struct ll_async_page *llap;
         int rc = 0;
@@ -71,12 +72,12 @@ static int ll_writepage_24(struct page *page)
         page_cache_get(page);
         if (llap->llap_write_queued) {
                 LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-                rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+                rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
                                          llap->llap_cookie,
                                          ASYNC_READY | ASYNC_URGENT);
         } else {
                 llap->llap_write_queued = 1;
-                rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+                rc = obd_queue_async_io(exp, lli->lli_smd, NULL,
                                         llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
                                         0, ASYNC_READY | ASYNC_URGENT);
                 if (rc == 0)
@@ -87,8 +88,11 @@ static int ll_writepage_24(struct page *page)
         if (rc)
                 page_cache_release(page);
 out:
-        if (rc)
+        if (rc) {
+                if (!lli->lli_async_rc)
+                        lli->lli_async_rc = rc;
                 unlock_page(page);
+        }
         RETURN(rc);
 }
 
index b585b09..bade134 100644 (file)
@@ -54,6 +54,7 @@
 static int ll_writepage_26(struct page *page, struct writeback_control *wbc)
 {
         struct inode *inode = page->mapping->host;
+        struct ll_inode_info *lli = ll_i2info(inode);
         struct obd_export *exp;
         struct ll_async_page *llap;
         int rc;
@@ -73,12 +74,12 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc)
         page_cache_get(page);
         if (llap->llap_write_queued) {
                 LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-                rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+                rc = obd_set_async_flags(exp, lli->lli_smd, NULL,
                                          llap->llap_cookie,
                                          ASYNC_READY | ASYNC_URGENT);
         } else {
                 llap->llap_write_queued = 1;
-                rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+                rc = obd_queue_async_io(exp, lli->lli_smd, NULL,
                                         llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
                                         0, ASYNC_READY | ASYNC_URGENT);
                 if (rc == 0)
@@ -89,10 +90,13 @@ static int ll_writepage_26(struct page *page, struct writeback_control *wbc)
         if (rc)
                 page_cache_release(page);
 out:
-        if (rc)
+        if (rc) {
+                if (!lli->lli_async_rc)
+                        lli->lli_async_rc = rc;
                 unlock_page(page);
-        else
+        } else {
                 set_page_writeback(page);
+        }
         RETURN(rc);
 }
 
index 5ca8bf3..8afa23d 100644 (file)
@@ -1662,6 +1662,22 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
 #undef KEY_IS
 }
 
+int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm)
+{
+        struct lov_oinfo *loi;
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+             i++, loi++) {
+                if (loi->loi_ar.ar_rc && !rc)
+                        rc = loi->loi_ar.ar_rc;
+                loi->loi_ar.ar_rc = 0;
+        }
+        RETURN(rc);
+}
+EXPORT_SYMBOL(lov_test_and_clear_async_rc);
+
 #if 0
 struct lov_multi_wait {
         struct ldlm_lock *lock;
index 9e7f5e6..ff1dca9 100644 (file)
@@ -1180,6 +1180,28 @@ unlock:
         spin_unlock(&oap->oap_cli->cl_loi_list_lock);
 }
 
+/* this is trying to propogate async writeback errors back up to the
+ * application.  As an async write fails we record the error code for later if
+ * the app does an fsync.  as long as errors persist we force future rpcs to be
+ * sync so that the app can get a sync error and break the cycle of queueing
+ * pages for which writeback will fail. */
+static void osc_process_ar(struct osc_async_rc *ar, struct ptlrpc_request *req,
+                           int rc)
+{
+        if (rc) {
+                if (!ar->ar_rc)
+                        ar->ar_rc = rc;
+
+                ar->ar_force_sync = 1;
+                ar->ar_min_xid = ptlrpc_sample_next_xid();
+                return;
+
+        } 
+        
+        if (ar->ar_force_sync && (ptlrpc_req_xid(req) >= ar->ar_min_xid))
+                ar->ar_force_sync = 0;
+}
+
 /* this must be called holding the loi list lock to give coverage to exit_cache,
  * async_flag maintenance, and oap_request */
 static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
@@ -1190,6 +1212,12 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
         oap->oap_interrupted = 0;
 
         if (oap->oap_request != NULL) {
+                if (sent && oap->oap_cmd == OBD_BRW_WRITE) {
+                        osc_process_ar(&cli->cl_ar, oap->oap_request, rc);
+                        osc_process_ar(&oap->oap_loi->loi_ar, 
+                                        oap->oap_request, rc);
+                }
+
                 ptlrpc_req_finished(oap->oap_request);
                 oap->oap_request = NULL;
         }
@@ -1220,7 +1248,6 @@ static int brw_interpret_oap(struct ptlrpc_request *request,
         struct list_head *pos, *n;
         ENTRY;
 
-
         rc = osc_brw_fini_request(request, aa->aa_oa, aa->aa_requested_nob,
                                   aa->aa_nio_count, aa->aa_page_count,
                                   aa->aa_pga, rc);
@@ -1228,15 +1255,6 @@ static int brw_interpret_oap(struct ptlrpc_request *request,
         CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc);
 
         cli = aa->aa_cli;
-        /* in failout recovery we ignore writeback failure and want
-         * to just tell llite to unlock the page and continue */
-        if (request->rq_reqmsg->opc == OST_WRITE && 
-            (cli->cl_import == NULL || cli->cl_import->imp_invalid)) {
-                CDEBUG(D_INODE, "flipping to rc 0 imp %p inv %d\n", 
-                       cli->cl_import, 
-                       cli->cl_import ? cli->cl_import->imp_invalid : -1);
-                rc = 0;
-        }
 
         spin_lock(&cli->cl_loi_list_lock);
 
@@ -1725,7 +1743,10 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi,
                cli->cl_dirty, cli->cl_dirty_max, cli->cl_lost_grant,
                cli->cl_avail_grant);
 
-        if (cli->cl_dirty_max < PAGE_SIZE)
+        /* force the caller to try sync io.  this can jump the list
+         * of queued writes and create a discontiguous rpc stream */
+        if (cli->cl_dirty_max < PAGE_SIZE || cli->cl_ar.ar_force_sync ||
+            loi->loi_ar.ar_force_sync)
                 return(-EDQUOT);
 
         /* Hopefully normal case - cache space and write credits available */
index e4831aa..fa2709e 100644 (file)
 #endif
 
 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-# define THREAD_NAME(comm, len, fmt, a...)                              \
-        snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid)
+#define UML_PID(tsk) ((tsk)->thread.extern_pid)
 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid)
+#else
+#define UML_PID(tsk) ((tsk)->pid)
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 # define THREAD_NAME(comm, len, fmt, a...)                              \
-        snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid)
+        snprintf(comm, len,fmt"|%d", ## a, UML_PID(current))
 #else
 # define THREAD_NAME(comm, len, fmt, a...)                              \
         snprintf(comm, len, fmt, ## a)
index 1e81801..8b4b335 100644 (file)
@@ -297,13 +297,12 @@ char *portals_id2str(int nal, ptl_process_id_t id, char *str)
 
 #ifdef __KERNEL__
 
-
 void portals_debug_dumpstack(struct task_struct *tsk)
 {
 #if defined(__arch_um__)
         if (tsk != NULL)
-                CWARN("stack dump for process %d requested; I'll wake up gdb.\n",
-                      tsk->pid);
+                CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
+                      tsk->pid, UML_PID(tsk));
         asm("int $3");
 #elif defined(HAVE_SHOW_TASK)
         /* this is exported by lustre kernel version 42 */
index b82c5ce..2bd4f07 100644 (file)
@@ -1097,6 +1097,12 @@ void ptlrpc_req_finished(struct ptlrpc_request *request)
         __ptlrpc_req_finished(request, 0);
 }
 
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
+{
+        return request->rq_xid;
+}
+EXPORT_SYMBOL(ptlrpc_req_xid);
+
 /* Disengage the client's reply buffer from the network
  * NB does _NOT_ unregister any client-side bulk.
  * IDEMPOTENT, but _not_ safe against concurrent callers.
@@ -1664,4 +1670,12 @@ __u64 ptlrpc_next_xid(void)
         return tmp;
 }
 
-
+__u64 ptlrpc_sample_next_xid(void)
+{
+        __u64 tmp;
+        spin_lock(&ptlrpc_last_xid_lock);
+        tmp = ptlrpc_last_xid + 1;
+        spin_unlock(&ptlrpc_last_xid_lock);
+        return tmp;
+}
+EXPORT_SYMBOL(ptlrpc_sample_next_xid);
index db2b19f..6865e6c 100755 (executable)
@@ -363,4 +363,19 @@ test_20b() {       # bug 2986 - ldlm_handle_enqueue error during open
 }
 run_test 20b "ldlm_handle_enqueue error (should return error)"
 
+test_21() {    # bug 3267 - eviction fails writeback but app doesn't see it
+       mkdir -p $DIR/$tdir
+       cancel_lru_locks OSC
+       multiop $DIR/$tdir/$tfile Owyw_yc &
+       MULTI_PID=$!
+       usleep 500
+# OBD_FAIL_PTLRPC_BULK_PUT_NET|OBD_FAIL_ONCE
+       sysctl -w lustre.fail_loc=0x80000503
+       kill -USR1 $MULTI_PID
+       wait $MULTI_PID
+       rc=$?
+       [ $rc -eq 0 ] && error "multiop didn't fail fsync: rc $rc" || true
+}
+run_test 21 "fsync error (should return error)" 
+
 $CLEANUP