Branch HEAD

author johann <johann>

Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)

committer johann <johann>

Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)
author johann <johann>
Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)
committer johann <johann>
Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index 3c54e94..bb8cb73 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -22,7 +22,7 @@ Bugzilla   : 14742
  Frequency  : rare
  Description: ASSERTION(CheckWriteback(page,cmd)) failed
  Details    : badly clear PG_Writeback bit in ll_ap_completion can produce false
-             positive assertion.
+            positive assertion.
  
  Severity   : enhancement
  Bugzilla   : 15865
@@ -32,7 +32,7 @@ Severity   : major
  Bugzilla   : 15924
  Description: do not process already freed flock
  Details    : flock can possibly be freed by another thread before it reaches
-             to ldlm_flock_completion_ast.
+            to ldlm_flock_completion_ast.
  
  Severity   : normal
  Bugzilla   : 14480
@@ -43,14 +43,14 @@ Severity   : minor
  Bugzilla   : 15837
  Description: oops in page fault handler
  Details    : kernel page fault handler can return two special 'pages' in error case, don't
-             try dereference NOPAGE_SIGBUS and NOPAGE_OMM.
+            try dereference NOPAGE_SIGBUS and NOPAGE_OMM.
  
  Severity   : minor
  Bugzilla   : 15716
  Description: timeout with invalidate import.
  Details    : ptlrpcd_check call obd_zombie_impexp_cull and wait request which should be
-             handled by ptlrpcd. This produce long age waiting and -ETIMEOUT
-             ptlrpc_invalidate_import and as result LASSERT.
+            handled by ptlrpcd. This produce long age waiting and -ETIMEOUT
+            ptlrpc_invalidate_import and as result LASSERT.
  
  Severity   : enhancement
  Bugzilla   : 15741
@@ -60,35 +60,35 @@ Severity   : major
  Bugzilla   : 14134
  Description: enable MGS and MDT services start separately
  Details    : add a 'nomgs' option in mount.lustre to enable start a MDT with
-             a co-located MGS without starting the MGS, which is a complement
-             to 'nosvc' mount option.
+            a co-located MGS without starting the MGS, which is a complement
+            to 'nosvc' mount option.
  
  Severity   : normal
  Bugzilla   : 14835
  Frequency  : after recovery
  Description: precreate to many object's after del orphan.
  Details    : del orphan st in oscc last_id == next_id and this triger growing
-             count of precreated objects. Set flag LOW to skip increase count
-             of precreated objects.
+            count of precreated objects. Set flag LOW to skip increase count
+            of precreated objects.
  
  Severity   : normal
  Bugzilla   : 15139
  Frequency  : rare, on clear nid stats
  Description: ASSERTION(client_stat->nid_exp_ref_count == 0)
  Details    : when clean nid stats sometimes try destroy live entry,
-             and this produce panic in free.
+            and this produce panic in free.
  
  Severity   : major
  Bugzilla   : 15575
  Description: Stack overflow during MDS log replay
- ease stack pressure by using a thread dealing llog_process.
+            ease stack pressure by using a thread dealing llog_process.
  
  Severity   : normal
  Bugzilla   : 15443
  Description: wait until IO finished before start new when do lock cancel.
  Details    : VM protocol want old IO finished before start new, in this case
-             need wait until PG_writeback is cleared until check dirty flag and
-             call writepages in lock cancel callback.
+            need wait until PG_writeback is cleared until check dirty flag and
+            call writepages in lock cancel callback.
  
  Severity   : enhancement
  Bugzilla   : 14929
@@ -96,9 +96,9 @@ Description: using special macro for print time and cleanup in includes.
  
  Severity   : normal
  Bugzilla   : 12888
-Description: mds_mfd_close() ASSERTION(rc == 0) 
-Details    : In mds_mfd_close(), we need protect inode's writecount change 
-             within its orphan write semaphore to prevent possible races.
+Description: mds_mfd_close() ASSERTION(rc == 0)
+Details    : In mds_mfd_close(), we need protect inode's writecount change
+            within its orphan write semaphore to prevent possible races.
  
  Severity   : minor
  Bugzilla   : 14929
@@ -115,7 +115,7 @@ Severity   : minor
  Bugzilla   : 14949
  Description: don't panic with use echo client
  Details    : echo client pass NULL as client nid pointer and this produce null
-             pointer dereference.
+            pointer dereference.
  
  Severity   : normal
  Bugzilla   : 15278
@@ -128,20 +128,20 @@ Description: add message levels for liblustreapi
  
  Severity   : normal
  Bugzilla   : 13380
-Description: fix for occasional failure case of -ENOSPC in recovery-small tests 
-Details    : Move the 'good_osts' check before the 'total_bavail' check.  This 
-            will result in an -EAGAIN and in the exit call path we call 
-            alloc_rr() which will with increasing aggressiveness attempt to 
+Description: fix for occasional failure case of -ENOSPC in recovery-small tests
+Details    : Move the 'good_osts' check before the 'total_bavail' check.  This
+            will result in an -EAGAIN and in the exit call path we call
+            alloc_rr() which will with increasing aggressiveness attempt to
              aquire precreated objects on the minimum number of required OSCs.
  
  Severity   : major
  Bugzilla   : 14326
  Description: Use old size assignment to avoid deadlock
  Details    : This reverts the changes in bugs 2369 and bug 14138 that introduced
-            the scheduling while holding a spinlock.  We do not need locking 
-            for size in ll_update_inode() because size is only updated from 
-            the MDS for directories or files without objects, so there is no 
-            other place to do the update, and concurrent access to such inodes 
+            the scheduling while holding a spinlock.  We do not need locking
+            for size in ll_update_inode() because size is only updated from
+            the MDS for directories or files without objects, so there is no
+            other place to do the update, and concurrent access to such inodes
              are protected by the inode lock.
  
  Severity   : normal
@@ -171,7 +171,7 @@ Severity   : normal
  Bugzilla   : 14803
  Description: Don't update lov_desc members until making sure they are valid
  Details    : When updating lov_desc members via proc fs, need fix their
-             validities before doing the real update.
+            validities before doing the real update.
  
  Severity   : normal
  Bugzilla   : 15069
@@ -222,17 +222,17 @@ Details    : When MGC is disconnected from MGS long enough, MGS will evict the
  Severity   : major
  Bugzilla   : 15027
  Frequency  : on network error
-Description: panic with double free request if network error 
+Description: panic with double free request if network error
  Details    : mdc_finish_enqueue is finish request if any network error ocuring,
-             but it's true only for synchronus enqueue, for async enqueue 
-             (via ptlrpcd) this incorrect and ptlrpcd want finish request
-             himself.
+            but it's true only for synchronus enqueue, for async enqueue
+            (via ptlrpcd) this incorrect and ptlrpcd want finish request
+            himself.
  
  Severity   : enhancement
  Bugzilla   : 11401
  Description: client-side metadata stat-ahead during readdir(directory readahead)
  Details    : perform client-side metadata stat-ahead when the client detects
-             readdir and sequential stat of dir entries therein
+            readdir and sequential stat of dir entries therein
  
  Severity   : major
  Frequency  : on start mds
@@ -1107,35 +1107,42 @@ Severity   : normal
  Bugzilla   : 15346
  Description: skiplist implementation simplification
  Details    : skiplists are used to group compatible locks on granted list
-             that was implemented as tracking first and last lock of each lock group
-             the patch changes that to using doubly linked lists
+            that was implemented as tracking first and last lock of each lock group
+            the patch changes that to using doubly linked lists
  
  Severity   : normal
  Bugzilla   : 15574
  Description: MDS LBUG: ASSERTION(!IS_ERR(dchild))
  Details    : Change LASSERTs to client eviction (i.e. abort client's recovery)
-             because LASSERT on both the data supplied by a client, and the data 
+            because LASSERT on both the data supplied by a client, and the data 
              on disk is dangerous and incorrect.
  
  Severity   : enhancement
  Bugzilla   : 10718
-Description: Slow trucate/writes to huge files at high offsets.
+Description: Slow truncate/writes to huge files at high offsets.
  Details    : Directly associate cached pages to lock that protect those pages,
-             this allows us to quickly find what pages to write and remove
+            this allows us to quickly find what pages to write and remove
              once lock callback is received.
  
  Severity   : normal
  Bugzilla   : 15953
  Description: more ldlm soft lockups
  Details    : In ldlm_resource_add_lock(), call to ldlm_resource_dump()
-             starve other threads from the resource lock for a long time in
-             case of long waiting queue, so change the debug level from
-             D_OTHER to the less frequently used D_INFO.
+            starve other threads from the resource lock for a long time in
+            case of long waiting queue, so change the debug level from
+            D_OTHER to the less frequently used D_INFO.
  
  Severity   : enhancement
  Bugzilla   : 13128
  Description: add -gid, -group, -uid, -user options to lfs find
  
+Severity   : normal
+Bugzilla   : 15950
+Description: Hung threads in invalidate_inode_pages2_range
+Details    : The direct IO path doesn't call check_rpcs to submit a new RPC once
+            one is completed. As a result, some RPCs are stuck in the queue
+            and are never sent.
+
  --------------------------------------------------------------------------------
  
  2007-08-10         Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index 646e579..1b800b6 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -221,6 +221,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
  #define OBD_FAIL_OSC_BRW_PREP_REQ2       0x40a
  #define OBD_FAIL_OSC_CONNECT_CKSUM       0x40b
  #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE           0x40d
  
  #define OBD_FAIL_PTLRPC                  0x500
  #define OBD_FAIL_PTLRPC_ACK              0x501
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 06fabd3..43f8c7e 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -64,6 +64,7 @@ static quota_interface_t *quota_interface = NULL;
  extern quota_interface_t osc_quota_interface;
  
  static void osc_release_ppga(struct brw_page **ppga, obd_count count);
+static int brw_interpret(struct ptlrpc_request *request, void *data, int rc);
  int osc_cleanup(struct obd_device *obd);
  
  /* Pack OSC object metadata for disk storage (LE byte order). */
@@ -880,7 +881,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
          CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant);
          if (body->oa.o_valid & OBD_MD_FLGRANT)
                  cli->cl_avail_grant += body->oa.o_grant;
-        /* waiters are woken in brw_interpret_oap */
+        /* waiters are woken in brw_interpret */
          client_obd_list_unlock(&cli->cl_loi_list_lock);
  }
  
@@ -1517,33 +1518,6 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
          RETURN(0);
  }
  
-static int brw_interpret(struct ptlrpc_request *req, void *data, int rc)
-{
-        struct osc_brw_async_args *aa = data;
-        int                        i;
-        ENTRY;
-
-        rc = osc_brw_fini_request(req, rc);
-        if (osc_recoverable_error(rc)) {
-                rc = osc_brw_redo_request(req, aa);
-                if (rc == 0)
-                        RETURN(0);
-        }
-
-        client_obd_list_lock(&aa->aa_cli->cl_loi_list_lock);
-        if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE)
-                aa->aa_cli->cl_w_in_flight--;
-        else
-                aa->aa_cli->cl_r_in_flight--;
-        for (i = 0; i < aa->aa_page_count; i++)
-                osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
-        client_obd_list_unlock(&aa->aa_cli->cl_loi_list_lock);
-
-        osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
-
-        RETURN(rc);
-}
-
  static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                            struct lov_stripe_md *lsm, obd_count page_count,
                            struct brw_page **pga, struct ptlrpc_request_set *set,
@@ -1581,6 +1555,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                  ptlrpc_lprocfs_brw(req, OST_WRITE, aa->aa_requested_nob);
          }
  
+        LASSERT(list_empty(&aa->aa_oaps));
          if (rc == 0) {
                  req->rq_interpret_reply = brw_interpret;
                  ptlrpc_set_add_req(set, req);
@@ -1590,10 +1565,12 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                  else
                          cli->cl_w_in_flight++;
                  client_obd_list_unlock(&cli->cl_loi_list_lock);
+                OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DIO_PAUSE, 3);
          } else if (cmd == OBD_BRW_WRITE) {
                  client_obd_list_lock(&cli->cl_loi_list_lock);
                  for (i = 0; i < page_count; i++)
                          osc_release_write_grant(cli, pga[i], 0);
+                osc_wake_cache_waiters(cli);
                  client_obd_list_unlock(&cli->cl_loi_list_lock);
          }
          RETURN (rc);
@@ -2048,9 +2025,8 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
          EXIT;
  }
  
-static int brw_interpret_oap(struct ptlrpc_request *req, void *data, int rc)
+static int brw_interpret(struct ptlrpc_request *req, void *data, int rc)
  {
-        struct osc_async_page *oap, *tmp;
          struct osc_brw_async_args *aa = data;
          struct client_obd *cli;
          ENTRY;
@@ -2075,20 +2051,24 @@ static int brw_interpret_oap(struct ptlrpc_request *req, void *data, int rc)
          else
                  cli->cl_r_in_flight--;
  
-        /* the caller may re-use the oap after the completion call so
-         * we need to clean it up a little */
-        list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) {
-                list_del_init(&oap->oap_rpc_item);
-                osc_ap_completion(cli, aa->aa_oa, oap, 1, rc);
+        if (!list_empty(&aa->aa_oaps)) { /* from osc_send_oap_rpc() */
+                struct osc_async_page *oap, *tmp;
+                /* the caller may re-use the oap after the completion call so
+                 * we need to clean it up a little */
+                list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) {
+                        list_del_init(&oap->oap_rpc_item);
+                        osc_ap_completion(cli, aa->aa_oa, oap, 1, rc);
+                }
+                OBDO_FREE(aa->aa_oa);
+        } else { /* from async_internal() */
+                int i;
+                for (i = 0; i < aa->aa_page_count; i++)
+                        osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
          }
-
          osc_wake_cache_waiters(cli);
          osc_check_rpcs(cli);
-
          client_obd_list_unlock(&cli->cl_loi_list_lock);
  
-        OBDO_FREE(aa->aa_oa);
-        
          osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
          RETURN(rc);
  }
@@ -2388,7 +2368,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
          DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
                    page_count, aa, cli->cl_r_in_flight, cli->cl_w_in_flight);
  
-        req->rq_interpret_reply = brw_interpret_oap;
+        req->rq_interpret_reply = brw_interpret;
          ptlrpcd_add_req(req);
          RETURN(1);
  }
@@ -3938,7 +3918,7 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
  
                  oscc_init(obd);
                  /* We need to allocate a few requests more, because
-                   brw_interpret_oap tries to create new requests before freeing
+                   brw_interpret tries to create new requests before freeing
                     previous ones. Ideally we want to have 2x max_rpcs_in_flight
                     reserved, but I afraid that might be too much wasted RAM
                     in fact, so 2 is just my guess and still should work. */
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 03831df..7570e9d 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -4664,6 +4664,31 @@ test_119c() # bug 13099
  }
  run_test 119c "Testing for direct read hitting hole"
  
+test_119d() # bug 15950
+{
+        MAX_RPCS_IN_FLIGHT=`$LCTL get_param -n osc.*OST0000-osc-*.max_rpcs_in_flight`
+        $LCTL set_param -n osc.*OST0000-osc-*.max_rpcs_in_flight 1
+        BSIZE=1048576
+        $SETSTRIPE $DIR/$tfile -i 0 -c 1 || error "setstripe failed"
+        $DIRECTIO write $DIR/$tfile 0 1 $BSIZE || error "first directio failed"
+        #define OBD_FAIL_OSC_DIO_PAUSE           0x40d
+        lctl set_param fail_loc=0x40d
+        $DIRECTIO write $DIR/$tfile 1 4 $BSIZE &
+        pid_dio=$!
+        sleep 1
+        cat $DIR/$tfile > /dev/null &
+        lctl set_param fail_loc=0
+        pid_reads=$!
+        wait $pid_dio
+        log "the DIO writes have completed, now wait for the reads (should not block very long)"
+        sleep 2
+        [ -n "`ps h -p $pid_reads -o comm`" ] && \
+        error "the read rpcs have not completed in 2s"
+        rm -f $DIR/$tfile
+        $LCTL set_param -n osc.*OST0000-osc-*.max_rpcs_in_flight $MAX_RPCS_IN_FLIGHT
+}
+run_test 119d "The DIO path should try to send a new rpc once one is completed"
+
  test_120a() {
          mkdir -p $DIR/$tdir
          [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \
author	johann <johann>
	Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)
committer	johann <johann>
	Thu, 3 Jul 2008 07:16:34 +0000 (07:16 +0000)
lustre/ChangeLog		patch \| blob \| history
lustre/include/obd_support.h		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history