Branch b1_8

author johann <johann>

Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)

committer johann <johann>

Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)
author johann <johann>
Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)
committer johann <johann>
Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index f4f8f5e..b38afdf 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -18,10 +18,10 @@ tbd Sun Microsystems, Inc.
           removed cwd "./" (refer to Bugzilla 14399).
         * A new quota file format has been introduced in 1.6.5.
           The format conversion from prior releases is handled transparently,
-         but releases older than 1.4.12/1.6.5 will not understand this new
-         format.  The automatic format conversion can be avoided by running
+         but releases older than 1.4.12/1.6.5 don't understand this new
+         format. The automatic format conversion can be avoided by running
           the following command on the MDS:
-               'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'.
+               'tunefs.lustre --param="mdt.quota_type=ug1" $MDTDEV'.
           For more information, please refer to bugzilla 13904.
  
  Severity   : enhancement
@@ -43,7 +43,7 @@ Bugzilla   : 14742
  Frequency  : rare
  Description: ASSERTION(CheckWriteback(page,cmd)) failed
  Details    : badly clear PG_Writeback bit in ll_ap_completion can produce false
-             positive assertion.
+            positive assertion.
  
  Severity   : enhancement
  Bugzilla   : 15865
@@ -53,7 +53,7 @@ Severity   : major
  Bugzilla   : 15924
  Description: do not process already freed flock
  Details    : flock can possibly be freed by another thread before it reaches
-             to ldlm_flock_completion_ast.
+            to ldlm_flock_completion_ast.
  
  Severity   : normal
  Bugzilla   : 14480
@@ -64,14 +64,14 @@ Severity   : minor
  Bugzilla   : 15837
  Description: oops in page fault handler
  Details    : kernel page fault handler can return two special 'pages' in 
-             error case, don't try dereference NOPAGE_SIGBUS and NOPAGE_OMM.
+            error case, don't try dereference NOPAGE_SIGBUS and NOPAGE_OMM.
  
  Severity   : minor
  Bugzilla   : 15716
  Description: timeout with invalidate import.
  Details    : ptlrpcd_check call obd_zombie_impexp_cull and wait request which should be
-             handled by ptlrpcd. This produce long age waiting and -ETIMEOUT
-             ptlrpc_invalidate_import and as result LASSERT.
+            handled by ptlrpcd. This produce long age waiting and -ETIMEOUT
+            ptlrpc_invalidate_import and as result LASSERT.
  
  Severity   : normal
  Frequency  : only with broken builds/installations
@@ -91,38 +91,38 @@ Severity   : major
  Bugzilla   : 14134
  Description: enable MGS and MDT services start separately
  Details    : add a 'nomgs' option in mount.lustre to enable start a MDT with
-             a co-located MGS without starting the MGS, which is a complement
-             to 'nosvc' mount option.
+            a co-located MGS without starting the MGS, which is a complement
+            to 'nosvc' mount option.
  
  Severity   : normal
  Frequency  : always, on ppc.
  Bugzilla   : 14856
  Description: cleanup in ptlrpc code, related to ppc platform
  Details    : store magic in native order avoid panic's in recovery on ppc node
-             and forbid from this error in future. Also fix posibily of twice swab
-             data. Fix get lov striping to userpace.
+            and forbid from this error in future. Also fix posibily of twice swab
+            data. Fix get lov striping to userpace.
  
  Severity   : normal
  Bugzilla   : 15756
  Frequency  : rare, replay get lost on server
  Description: server incorrectly drop resent replays lead to recovery failure.
  Details    : do not drop replay according to msg flags, instead we check the
-             per-export recovery request queue for duplication of transno.
+            per-export recovery request queue for duplication of transno.
  
  Severity   : normal
  Bugzilla   : 14835
  Frequency  : after recovery
  Description: precreate to many object's after del orphan.
  Details    : del orphan st in oscc last_id == next_id and this triger growing
-             count of precreated objects. Set flag LOW to skip increase count
-             of precreated objects.
+            count of precreated objects. Set flag LOW to skip increase count
+            of precreated objects.
  
  Severity   : normal
  Bugzilla   : 15139
  Frequency  : rare, on clear nid stats
  Description: ASSERTION(client_stat->nid_exp_ref_count == 0)
  Details    : when clean nid stats sometimes try destroy live entry,
-             and this produce panic in free.
+            and this produce panic in free.
  
  Severity   : major
  Bugzilla   : 15575
@@ -141,13 +141,13 @@ Severity   : normal
  Bugzilla   : 15443
  Description: wait until IO finished before start new when do lock cancel.
  Details    : VM protocol want old IO finished before start new, in this case
-             need wait until PG_writeback is cleared until check dirty flag and
-             call writepages in lock cancel callback.
+            need wait until PG_writeback is cleared until check dirty flag and
+            call writepages in lock cancel callback.
  
  Severity   : normal
  Bugzilla   : 12888
-Description: mds_mfd_close() ASSERTION(rc == 0) 
-Details    : In mds_mfd_close(), we need protect inode's writecount change 
+Description: mds_mfd_close() ASSERTION(rc == 0)
+Details    : In mds_mfd_close(), we need protect inode's writecount change
              within its orphan write semaphore to prevent possible races.
  
  Severity   : minor
@@ -172,15 +172,15 @@ Severity   : normal
  Bugzilla   : 15574
  Description: MDS LBUG: ASSERTION(!IS_ERR(dchild))
  Details    : In reconstruct_* functions, LASSERTs on both the data supplied by
-             a client, and the data on disk are dangerous and incorrect. Change
+            a client, and the data on disk are dangerous and incorrect. Change
              them with client eviction.
  
  Severity   : normal
  Bugzilla   : 15346
  Description: skiplist implementation simplification
  Details    : skiplists are used to group compatible locks on granted list
-             that was implemented as tracking first and last lock of each lock group
-             the patch changes that to using doubly linked lists
+            that was implemented as tracking first and last lock of each lock group
+            the patch changes that to using doubly linked lists
  
  Severity   : normal
  Bugzilla   : 15933
@@ -196,8 +196,8 @@ Description: ldiskfs error: XXX blocks in bitmap, YYY in gd
  Details    : If blocks per group is less than blocksize*8, set rest of the
              bitmap to 1.
  
-Severity   : major 
-Frequency  : Application do stride read on lustre 
+Severity   : major
+Frequency  : Application do stride read on lustre
  Bugzilla   : 16172
  Description: The read performance will drop a lot if the application does
              stride read.
@@ -209,9 +209,9 @@ Severity   : normal
  Bugzilla   : 15953
  Description: more ldlm soft lockups
  Details    : In ldlm_resource_add_lock(), call to ldlm_resource_dump()
-             starve other threads from the resource lock for a long time in
-             case of long waiting queue, so change the debug level from
-             D_OTHER to the less frequently used D_INFO.
+            starve other threads from the resource lock for a long time in
+            case of long waiting queue, so change the debug level from
+            OTHER to the less frequently used D_INFO.
  
  Severity   : enhancement
  Bugzilla   : 13128
@@ -233,6 +233,13 @@ Description: this bug _only_ happens when inode quota limitation is very low
  Details    : if remaining quota        equates 1, it is a sign to demonstate that quota
              is effective now. So least quota qunit should be 2.
  
+Severity   : normal
+Bugzilla   : 15950
+Description: Hung threads in invalidate_inode_pages2_range
+Details    : The direct IO path doesn't call check_rpcs to submit a new RPC once
+            one is completed. As a result, some RPCs are stuck in the queue
+            and are never sent.
+
  -------------------------------------------------------------------------------
  
  
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h

index 6ceb9cc..34cf827 100644 (file)
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -228,6 +228,7 @@ extern unsigned int obd_alloc_fail_rate;
  #define OBD_FAIL_OSC_BRW_PREP_REQ2       0x40a
  #define OBD_FAIL_OSC_CONNECT_CKSUM       0x40b
  #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
+#define OBD_FAIL_OSC_DIO_PAUSE           0x40d
  
  #define OBD_FAIL_PTLRPC                  0x500
  #define OBD_FAIL_PTLRPC_ACK              0x501
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 00a3e9a..9c5a2db 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -63,6 +63,7 @@ static quota_interface_t *quota_interface = NULL;
  extern quota_interface_t osc_quota_interface;
  
  static void osc_release_ppga(struct brw_page **ppga, obd_count count);
+static int brw_interpret(struct ptlrpc_request *request, void *data, int rc);
  int osc_cleanup(struct obd_device *obd);
  
  static quota_interface_t *quota_interface;
@@ -814,7 +815,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
          CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant);
          if (body->oa.o_valid & OBD_MD_FLGRANT)
                  cli->cl_avail_grant += body->oa.o_grant;
-        /* waiters are woken in brw_interpret_oap */
+        /* waiters are woken in brw_interpret */
          client_obd_list_unlock(&cli->cl_loi_list_lock);
  }
  
@@ -1425,33 +1426,6 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
          RETURN(0);
  }
  
-static int brw_interpret(struct ptlrpc_request *request, void *data, int rc)
-{
-        struct osc_brw_async_args *aa = data;
-        int                        i;
-        ENTRY;
-
-        rc = osc_brw_fini_request(request, rc);
-        CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc);  
-        if (osc_recoverable_error(rc)) {
-                rc = osc_brw_redo_request(request, aa);
-                if (rc == 0)
-                        RETURN(0);
-        }
-        client_obd_list_lock(&aa->aa_cli->cl_loi_list_lock);
-        if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE)
-                aa->aa_cli->cl_w_in_flight--;
-        else
-                aa->aa_cli->cl_r_in_flight--;
-
-        for (i = 0; i < aa->aa_page_count; i++)
-                osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
-        client_obd_list_unlock(&aa->aa_cli->cl_loi_list_lock);
-        osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
-
-        RETURN(rc);
-}
-
  static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                            struct lov_stripe_md *lsm, obd_count page_count,
                            struct brw_page **pga, struct ptlrpc_request_set *set)
@@ -1487,6 +1461,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                                   cli->cl_w_in_flight);
                  ptlrpc_lprocfs_brw(request, OST_WRITE, aa->aa_requested_nob);
          }
+        LASSERT(list_empty(&aa->aa_oaps));
  
          if (rc == 0) {
                  request->rq_interpret_reply = brw_interpret;
@@ -1497,10 +1472,12 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                  else
                          cli->cl_w_in_flight++;
                  client_obd_list_unlock(&cli->cl_loi_list_lock);
+                OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DIO_PAUSE, 3);
          } else if (cmd == OBD_BRW_WRITE) {
                  client_obd_list_lock(&cli->cl_loi_list_lock);
                  for (i = 0; i < page_count; i++)
                          osc_release_write_grant(cli, pga[i], 0);
+                osc_wake_cache_waiters(cli);
                  client_obd_list_unlock(&cli->cl_loi_list_lock);
          }
  
@@ -1957,10 +1934,9 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
          EXIT;
  }
  
-static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc)
+static int brw_interpret(struct ptlrpc_request *request, void *data, int rc)
  {
          struct osc_brw_async_args *aa = data;
-        struct osc_async_page *oap, *tmp;
          struct client_obd *cli;
          ENTRY;
  
@@ -1983,19 +1959,24 @@ static int brw_interpret_oap(struct ptlrpc_request *request, void *data, int rc)
          else
                  cli->cl_r_in_flight--;
  
-        /* the caller may re-use the oap after the completion call so
-         * we need to clean it up a little */
-        list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) {
-                list_del_init(&oap->oap_rpc_item);
-                osc_ap_completion(cli, aa->aa_oa, oap, 1, rc);
+        if (!list_empty(&aa->aa_oaps)) { /* from osc_send_oap_rpc() */
+                struct osc_async_page *oap, *tmp;
+                /* the caller may re-use the oap after the completion call so
+                 * we need to clean it up a little */
+                list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) {
+                        list_del_init(&oap->oap_rpc_item);
+                        osc_ap_completion(cli, aa->aa_oa, oap, 1, rc);
+                }
+                OBDO_FREE(aa->aa_oa);
+        } else { /* from async_internal() */
+                int i;
+                for (i = 0; i < aa->aa_page_count; i++)
+                        osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
          }
-
          osc_wake_cache_waiters(cli);
          osc_check_rpcs(cli);
          client_obd_list_unlock(&cli->cl_loi_list_lock);
  
-        OBDO_FREE(aa->aa_oa);
-
          osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
          RETURN(rc);
  }
@@ -2295,7 +2276,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
          DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
                    page_count, aa, cli->cl_r_in_flight, cli->cl_w_in_flight);
  
-        req->rq_interpret_reply = brw_interpret_oap;
+        req->rq_interpret_reply = brw_interpret;
          ptlrpcd_add_req(req);
          RETURN(1);
  }
@@ -3810,7 +3791,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf)
  
                  oscc_init(obd);
                  /* We need to allocate a few requests more, because
-                   brw_interpret_oap tries to create new requests before freeing
+                   brw_interpret tries to create new requests before freeing
                     previous ones. Ideally we want to have 2x max_rpcs_in_flight
                     reserved, but I afraid that might be too much wasted RAM
                     in fact, so 2 is just my guess and still should work. */
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 2d2f725..9cea5c1 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -4551,6 +4551,31 @@ test_119c() # bug 13099
  }
  run_test 119c "Testing for direct read hitting hole"
  
+test_119d() # bug 15950
+{
+        MAX_RPCS_IN_FLIGHT=`$LCTL get_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight`
+        $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight 1
+        BSIZE=1048576
+        $SETSTRIPE $DIR/$tfile -i 0 -c 1 || error "setstripe failed"
+        $DIRECTIO write $DIR/$tfile 0 1 $BSIZE || error "first directio failed"
+        #define OBD_FAIL_OSC_DIO_PAUSE           0x40d
+        lctl set_param fail_loc=0x40d
+        $DIRECTIO write $DIR/$tfile 1 4 $BSIZE &
+        pid_dio=$!
+        sleep 1
+        cat $DIR/$tfile > /dev/null &
+        lctl set_param fail_loc=0
+        pid_reads=$!
+        wait $pid_dio
+        log "the DIO writes have completed, now wait for the reads (should not block very long)"
+        sleep 2
+        [ -n "`ps h -p $pid_reads -o comm`" ] && \
+                error "the read rpcs have not completed in 2s"
+        rm -f $DIR/$tfile
+        $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight $MAX_RPCS_IN_FLIGHT
+}
+run_test 119d "The DIO path should try to send a new rpc once one is completed"
+
  test_120a() {
          mkdir -p $DIR/$tdir
          [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \
author	johann <johann>
	Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)
committer	johann <johann>
	Thu, 3 Jul 2008 07:56:27 +0000 (07:56 +0000)
lustre/ChangeLog		patch \| blob \| history
lustre/include/obd_support.h		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history