#define OBD_FAIL_MDS_LL_BLOCK 0x172
#define OBD_FAIL_MDS_LOD_CREATE_PAUSE 0x173
#define OBD_FAIL_MDS_CONNECT_VS_EVICT 0x174
+#define OBD_FAIL_MDS_DELAY_OPEN 0x175
/* CMD */
#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180
#define OBD_FAIL_LLITE_XATTR_PAUSE 0x1420
#define OBD_FAIL_LLITE_PAGE_INVALIDATE_PAUSE 0x1421
#define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422
+#define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423
#define OBD_FAIL_FID_INDIR 0x1501
#define OBD_FAIL_FID_INLMA 0x1502
rec->lor_range);
lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE);
} else {
- lcm->lcm_layout_gen = cpu_to_le32(1);
+ /*
+ * if OST doesn't provide layout version, then try
+ * to inherit one from MDS's layout, but increment
+ * it so the client notices and applies modified
+ * layout
+ */
+ le32_add_cpu(&lcm->lcm_layout_gen, 1);
lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE);
}
lcm->lcm_entry_count = cpu_to_le16(1);
obj->vob_discard_page_warned = 0;
} else {
SetPageError(vmpage);
- if (ioret == -ENOSPC)
+ if (ioret == -ENOSPC) {
set_bit(AS_ENOSPC, &inode->i_mapping->flags);
- else
+ } else {
+ if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PANIC_ON_ESTALE))
+ LBUG();
set_bit(AS_EIO, &inode->i_mapping->flags);
+ }
if ((ioret == -ESHUTDOWN || ioret == -EINTR ||
ioret == -EIO) && obj->vob_discard_page_warned == 0) {
LASSERT(conf->coc_opc == OBJECT_CONF_SET);
+ /*
+ * don't apply old layouts which can be brought
+ * if returned w/o ldlm lock.
+ * XXX: can we rollback in case of recovery?
+ */
+ if (lsm && lov->lo_lsm) {
+ u32 oldgen = lov->lo_lsm->lsm_layout_gen &= ~LU_LAYOUT_RESYNC;
+ u32 newgen = lsm->lsm_layout_gen & ~LU_LAYOUT_RESYNC;
+
+ if (newgen < oldgen) {
+ CDEBUG(D_HA, "skip old for "DFID": %d < %d\n",
+ PFID(lu_object_fid(lov2lu(lov))),
+ (int)newgen, (int)oldgen);
+ GOTO(out, result = 0);
+ }
+ }
+
if ((lsm == NULL && lov->lo_lsm == NULL) ||
((lsm != NULL && lov->lo_lsm != NULL) &&
(lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen) &&
return true;
}
+static int mdt_refetch_lovea(struct mdt_thread_info *info,
+ struct mdt_object *o, struct md_attr *ma,
+ u64 ibits)
+{
+ struct mdt_body *repbody;
+ int rc;
+
+ if ((ibits & MDS_INODELOCK_LAYOUT) == 0)
+ return 0;
+ if (!S_ISREG(lu_object_attr(&o->mot_obj)))
+ return 0;
+
+ if ((ma->ma_valid & MA_LOV) == 0)
+ return 0;
+
+ ma->ma_valid &= ~MA_LOV;
+ info->mti_big_lmm_used = 0;
+ ma->ma_lmm = req_capsule_server_get(info->mti_pill, &RMF_MDT_MD);
+ ma->ma_lmm_size = req_capsule_get_size(info->mti_pill, &RMF_MDT_MD,
+ RCL_SERVER);
+ rc = __mdt_stripe_get(info, o, ma, XATTR_NAME_LOV);
+ if (rc)
+ return rc;
+
+ repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+ repbody->mbo_eadatasize = ma->ma_lmm_size;
+ return 0;
+}
+
static int mdt_open_by_fid_lock(struct mdt_thread_info *info,
struct ldlm_reply *rep,
struct mdt_lock_handle *lhc)
tgt_open_obj_set(info->mti_env, mdt_obj2dt(o));
rc = mdt_finish_open(info, parent, o, open_flags, rep);
- if (!rc) {
- mdt_set_disposition(info, rep, DISP_LOOKUP_POS);
- if (open_flags & MDS_OPEN_LOCK)
- mdt_set_disposition(info, rep, DISP_OPEN_LOCK);
- if (open_flags & MDS_OPEN_LEASE)
- mdt_set_disposition(info, rep, DISP_OPEN_LEASE);
- }
+ if (rc)
+ GOTO(out_unlock, rc);
+
+ mdt_set_disposition(info, rep, DISP_LOOKUP_POS);
+ if (open_flags & MDS_OPEN_LOCK)
+ mdt_set_disposition(info, rep, DISP_OPEN_LOCK);
+ if (open_flags & MDS_OPEN_LEASE)
+ mdt_set_disposition(info, rep, DISP_OPEN_LEASE);
+
+ /*
+ * if layout lock is granted, then we should re-fetch LOVEA
+ * which was originally taken w/o the lock
+ */
+ rc = mdt_refetch_lovea(info, o, ma, ibits);
+
GOTO(out_unlock, rc);
out_unlock:
} else {
/* get openlock if this isn't replay and client requested it */
if (!req_is_replay(req)) {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_DELAY_OPEN, cfs_fail_val);
rc = mdt_object_open_lock(info, child, lhc, &ibits);
object_locked = 1;
if (rc != 0)
PFID(mdt_object_fid(child)), rc);
mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE);
}
+ GOTO(out_child_unlock, result);
}
+ /*
+ * if layout lock is granted, then we should re-fetch LOVEA
+ * which was originally taken w/o the lock.
+ */
+ result = mdt_refetch_lovea(info, child, ma, ibits);
+
mdt_counter_incr(req, LPROC_MDT_OPEN,
ktime_us_delta(ktime_get(), kstart));
- EXIT;
+ GOTO(out_child_unlock, result);
+
out_child_unlock:
if (object_locked)
mdt_object_open_unlock(info, child, lhc, ibits, result);
}
run_test 100 "flr mode fsx test"
-ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
-lock_file=$(mktemp /var/lock/FLR.XXXXXX)
-
write_file_200() {
local tf=$1
done
}
-test_200() {
+# this was test_200 before adding "b" and "c" subtests
+test_200a() {
local tf=$DIR/$tfile
local tf2=$DIR2/$tfile
local tf3=$DIR3/$tfile
+ ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
+ lock_file=$(mktemp /var/lock/FLR.XXXXXX)
+ stack_trap "rm -f $ctrl_file $lock_file $tf $tf-2 $tf-3"
+
$LFS setstripe -E 1M -S 1M -E 2M -c 2 -E 4M -E 16M -E eof $tf
$LFS setstripe -E 2M -S 1M -E 6M -c 2 -E 8M -E 32M -E eof $tf-2
$LFS setstripe -E 4M -c 2 -E 8M -E 64M -E eof $tf-3
umount_client $MOUNT2
umount_client $MOUNT3
- rm -f $lock_file
-
# resync and verify mirrors
$LFS mirror resync $tf || error "final resync failed"
get_mirror_ids $tf
true
}
-run_test 200 "stress test"
+run_test 200a "stress test"
+
+test_200b() {
+ local tf=$DIR/$tfile
+ local tf2=$DIR2/$tfile
+ local tf3=$DIR3/$tfile
+
+ ctrl_file=$(mktemp /tmp/CTRL.XXXXXX)
+ lock_file=$(mktemp /var/lock/FLR.XXXXXX)
+ stack_trap "rm -f $ctrl_file $lock_file $tf $tf-2 $tf-3"
+
+ $LFS setstripe -E 1M -S 1M -E 2M -c 2 -E 4M -E 16M -E eof $tf
+ $LFS setstripe -E 2M -S 1M -E 6M -c 2 -E 8M -E 32M -E eof $tf-2
+ $LFS setstripe -E 4M -c 2 -E 8M -E 64M -E eof $tf-3
+
+ $LFS mirror extend -N -f $tf-2 $tf ||
+ error "merging $tf-2 into $tf failed"
+ $LFS mirror extend -N -f $tf-3 $tf ||
+ error "merging $tf-3 into $tf failed"
+
+ mkdir -p $MOUNT2 && mount_client $MOUNT2
+
+ mkdir -p $MOUNT3 && mount_client $MOUNT3
+
+ verify_flr_state $tf3 "ro"
+
+#define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423
+ $LCTL set_param fail_loc=0x1423
+
+ local -a pids
+
+ write_file_200 $tf &
+ pids+=($!)
+
+ read_file_200 $tf &
+ pids+=($!)
+
+ write_file_200 $tf2 &
+ pids+=($!)
+
+ read_file_200 $tf2 &
+ pids+=($!)
+
+ resync_file_200 $tf3 &
+ pids+=($!)
+
+ local sleep_time=60
+ [ "$SLOW" = "yes" ] && sleep_time=400
+ sleep $sleep_time
+ rm -f $ctrl_file
+
+ echo "Waiting ${pids[@]}"
+ wait ${pids[@]}
+
+ umount_client $MOUNT2
+ umount_client $MOUNT3
+
+ # resync and verify mirrors
+ $LFS mirror resync $tf || {
+ ps ax
+ error "final resync failed"
+ }
+ get_mirror_ids $tf
+
+ local csum=$($LFS mirror read -N ${mirror_array[0]} $tf | md5sum)
+ for id in ${mirror_array[@]:1}; do
+ [ "$($LFS mirror read -N $id $tf | md5sum)" = "$csum" ] ||
+ error "checksum error for mirror $id"
+ done
+
+ true
+}
+run_test 200b "racing IO, mirror extend and resync"
+
+test_200c() {
+ local tf=$DIR/$tfile
+ local tf2=$DIR2/$tfile
+
+ mkdir -p $MOUNT2 && mount_client $MOUNT2
+ stack_trap "umount_client $MOUNT2"
+ stack_trap "rm -f $tf"
+
+ $LFS df
+
+ dd if=/dev/urandom of=$tf bs=1M count=2 || error "can't write"
+ local mdt_idx
+ mdt_idx=$($LFS getstripe -m $tf)
+
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+
+ # start a process modifying file, block it just
+ # before layout lock acquisition
+#define OBD_FAIL_MDS_DELAY_OPEN 0x175
+ do_facet mds$((mdt_idx+1)) $LCTL set_param fail_loc=0x80000175 fail_val=10
+ #log "dd to stale replica"
+ dd if=/dev/urandom of=$tf bs=1M count=2 oflag=direct conv=notrunc &
+ local PID=$!
+ sleep 0.5
+
+ # make a replica
+ log "mirror extend"
+ $LFS mirror extend -N -c -1 $tf2 || {
+ ps ax
+ error "can't mirror"
+ }
+ log "mirror extend done"
+ do_facet mds$((mdt_idx+1)) $LCTL set_param fail_loc=0 fail_val=0
+
+ # wait for blocking dd to complete and modify file
+ wait $PID || error "2nd dd failed"
+ log "dd completed"
+
+ verify_mirror_count $tf 2
+
+ $LFS getstripe $tf | grep -q lcme_flags.*stale || {
+ $LFS getstripe $tf
+ $LFS getstripe $tf2
+ error "both replicas are still in sync"
+ }
+
+ $LFS mirror verify -vvv $tf || {
+ $LFS getstripe $tf
+ error "corrupted in-sync file"
+ }
+}
+run_test 200c "layout change racing with open: LOVEA changes"
cleanup_test_201() {
do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER