Whamcloud - gitweb
LU-11765 ofd: return EAGAIN during 1st CLEANUP_ORPHAN 36/33836/6
authorSergey Cheremencev <c17829@cray.com>
Wed, 24 Oct 2018 10:23:43 +0000 (13:23 +0300)
committerOleg Drokin <green@whamcloud.com>
Fri, 15 Mar 2019 23:14:37 +0000 (23:14 +0000)
During the 1st CLEANUP_ORPHAN after failover some objects
could absent - they haven't been recreated yet. Issue exists
when MDS last_id much grater than OST last_id and ofd should
recreate thousands of objects. Some of these objects could
be assigned to a FID and requested by client through
glimpse RPC. Thus if object is not found return EAGAIN instead
of ENOENT during the 1st CLEANUP_ORPHAN.

Patch is also adding a test to reproduce the issue.
Test adds a delay to osd_trans_commit_cb() causing
large number OST objects not written to the disk
after failover. And checks that all objects have been
successfully recreated after failover.
The test works only with FAILURE_MODE=HARD option.

Cray-bug-id: LUS-6414
Change-Id: Ia6899b4c1c35e1681f49faf1cb93a501ad159ec2
Signed-off-by: Sergey Cheremencev <c17829@cray.com>
Reviewed-on: https://es-gerrit.dev.cray.com/154151
Reviewed-by: Alexander Boyko <c17825@cray.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Tested-by: Alexander Lezhoev <c17454@cray.com>
Reviewed-on: https://review.whamcloud.com/33836
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_fs.c
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_lvb.c
lustre/osd-ldiskfs/osd_handler.c
lustre/tests/replay-ost-single.sh

index d958bae..ce83162 100644 (file)
@@ -341,6 +341,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_INTEGRITY_FAULT    0x243
 #define OBD_FAIL_OST_INTEGRITY_CMP      0x244
 #define OBD_FAIL_OST_DISCONNECT_DELAY   0x245
 #define OBD_FAIL_OST_INTEGRITY_FAULT    0x243
 #define OBD_FAIL_OST_INTEGRITY_CMP      0x244
 #define OBD_FAIL_OST_DISCONNECT_DELAY   0x245
+#define OBD_FAIL_OST_DELAY_TRANS        0x246
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 03f8893..af33796 100644 (file)
@@ -1715,6 +1715,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
        EXIT;
        ofd_counter_incr(exp, LPROC_OFD_STATS_CREATE,
                         tsi->tsi_jobid, 1);
        EXIT;
        ofd_counter_incr(exp, LPROC_OFD_STATS_CREATE,
                         tsi->tsi_jobid, 1);
+       if (unlikely(!oseq->os_last_id_synced))
+               oseq->os_last_id_synced = 1;
 out:
        mutex_unlock(&oseq->os_create_lock);
 out_nolock:
 out:
        mutex_unlock(&oseq->os_create_lock);
 out_nolock:
index 8be08a3..f8c3e2d 100644 (file)
@@ -407,6 +407,7 @@ struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
        mutex_init(&oseq->os_create_lock);
        spin_lock_init(&oseq->os_last_oid_lock);
        ostid_set_seq(&oseq->os_oi, seq);
        mutex_init(&oseq->os_create_lock);
        spin_lock_init(&oseq->os_last_oid_lock);
        ostid_set_seq(&oseq->os_oi, seq);
+       oseq->os_last_id_synced = 0;
 
        atomic_set(&oseq->os_refc, 1);
 
 
        atomic_set(&oseq->os_refc, 1);
 
index c76124b..afc210b 100644 (file)
@@ -106,7 +106,8 @@ struct ofd_seq {
        struct mutex            os_create_lock;
        atomic_t                os_refc;
        struct dt_object        *os_lastid_obj;
        struct mutex            os_create_lock;
        atomic_t                os_refc;
        struct dt_object        *os_lastid_obj;
-       unsigned long           os_destroys_in_progress:1;
+       unsigned long           os_destroys_in_progress:1,
+                               os_last_id_synced:1;
 };
 
 struct ofd_device {
 };
 
 struct ofd_device {
index 7b46388..7c21683 100644 (file)
@@ -126,8 +126,24 @@ static int ofd_lvbo_init(const struct lu_env *env, struct ldlm_resource *res)
                GOTO(out_lvb, rc = PTR_ERR(fo));
 
        rc = ofd_attr_get(env, fo, &info->fti_attr);
                GOTO(out_lvb, rc = PTR_ERR(fo));
 
        rc = ofd_attr_get(env, fo, &info->fti_attr);
-       if (rc)
+       if (rc) {
+               struct ofd_seq          *oseq;
+               __u64                    seq;
+
+               /* Object could be recreated during the first
+                * CLEANUP_ORPHAN request. */
+               if (rc == -ENOENT) {
+                       seq = fid_seq(&info->fti_fid);
+                       oseq = ofd_seq_load(env, ofd, fid_seq_is_idif(seq) ?
+                                           FID_SEQ_OST_MDT0 : seq);
+                       if (!IS_ERR_OR_NULL(oseq)) {
+                               if (!oseq->os_last_id_synced)
+                                       rc = -EAGAIN;
+                               ofd_seq_put(env, oseq);
+                       }
+               }
                GOTO(out_obj, rc);
                GOTO(out_obj, rc);
+       }
 
        lvb->lvb_size = info->fti_attr.la_size;
        lvb->lvb_blocks = info->fti_attr.la_blocks;
 
        lvb->lvb_size = info->fti_attr.la_size;
        lvb->lvb_blocks = info->fti_attr.la_blocks;
index f1f50d7..adc7b4a 100644 (file)
@@ -1707,6 +1707,7 @@ static void osd_trans_commit_cb(struct super_block *sb,
        if (error)
                CERROR("transaction @0x%p commit error: %d\n", th, error);
 
        if (error)
                CERROR("transaction @0x%p commit error: %d\n", th, error);
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40);
        /* call per-transaction callbacks if any */
        list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
                                 dcb_linkage) {
        /* call per-transaction callbacks if any */
        list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
                                 dcb_linkage) {
index f7d4d4a..9764e6b 100755 (executable)
@@ -439,6 +439,49 @@ test_10() {
 }
 run_test 10 "conflicting PW & PR locks on a client"
 
 }
 run_test 10 "conflicting PW & PR locks on a client"
 
+test_12() {
+       [ $FAILURE_MODE != "HARD" ] &&
+               skip "Test needs FAILURE_MODE HARD" && return 0
+       remote_ost || { skip "need remote OST" && return 0; }
+
+       local tmp=$TMP/$tdir
+       local dir=$DIR/$tdir
+       declare -a pids
+
+
+       mkdir -p $tmp || error "can't create $tmp"
+       mkdir -p $dir || error "can't create $dir"
+
+       $LFS setstripe -c 1 -i 0 $dir
+
+       for i in `seq 1 10`; do mkdir $dir/d$i; done
+
+       #define OBD_FAIL_OST_DELAY_TRANS        0x245
+       do_facet ost1 "$LCTL set_param fail_loc=0x245" ||
+               error "can't set fail_loc"
+
+       for i in `seq 1 10`;
+       do
+               createmany -o $dir/d$i/$(openssl rand -base64 12) 500 &
+               pids+=($!)
+       done
+       echo "Waiting createmany pids"
+       wait ${pids[@]}
+
+       ls -lR $dir > $tmp/ls_r_out 2>&1&
+       local ls_pid=$!
+
+       facet_failover ost1
+
+       echo "starting wait for ls -l"
+       wait $ls_pid
+       grep "?\|No such file or directory" $tmp/ls_r_out &&
+               error "Found file without object on OST"
+       rm -rf $tmp
+       rm -rf $dir
+}
+run_test 12 "check stat after OST failover"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status