From: jacob Date: Tue, 28 Jun 2005 23:33:36 +0000 (+0000) Subject: b=6409 X-Git-Tag: v1_7_100~1^25~8^2 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=815545609ad474f7fe4b549aa0c880bd71acb707;p=fs%2Flustre-release.git b=6409 r=adilger - add code from the lov qos branch to deal with object creation failures --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index a88551f..38d4982 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -43,7 +43,17 @@ Details : The interpretation of the default stripe count (0, to lfs or lmc) has been changed to mean striping across a single OST, rather than all available. For general usage we have found a stripe count of 1 or 2 works best. - + +Severity : major +Frequency : occasional +Bugzilla : 6409, 6834 +Description: Creating files with an explicit stripe count may lead to + a failed assertion on the MDS +Details : If some OSTs are full or unavailable, creating files may + trigger a failed assertion on the MDS. Now, Lustre will + try to use other servers or return an error to the + client. + ------------------------------------------------------------------------------ 2005-06-20 Cluster File Systems, Inc. diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 64a41b2..097e7c8 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -132,6 +132,7 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off); void qos_shrink_lsm(struct lov_request_set *set); int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea); +int qos_remedy_create(struct lov_request_set *set, struct lov_request *req); /* lov_request.c */ void lov_set_add_req(struct lov_request *req, struct lov_request_set *set); diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index f7b0f76..c7bd979 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -72,6 +72,37 @@ void qos_shrink_lsm(struct lov_request_set *set) } } +int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) +{ + struct lov_stripe_md *lsm = set->set_md; + struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; + int stripe, i, rc = -EIO; + ENTRY; + + ost_idx = (req->rq_idx + 1) % ost_count; + for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { + if (lov->tgts[ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); + continue; + } + /* check if objects has been created on this ost */ + for (stripe = req->rq_stripe; stripe >= 0; stripe--) { + if (ost_idx == lsm->lsm_oinfo[stripe].loi_ost_idx) + break; + } + + if (stripe < 0) { + req->rq_idx = ost_idx; + rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, + &req->rq_md, set->set_oti); + if (!rc) + break; + } + } + RETURN(rc); +} + #define LOV_CREATE_RESEED_INTERVAL 1000 /* FIXME use real qos data to prepare the lov create request */ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea) diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 4477213..3054173 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -497,9 +497,33 @@ static int create_done(struct obd_export *exp, struct lov_request_set *set, LASSERT(set->set_completes); - if (!set->set_success) - GOTO(cleanup, rc = -EIO); - if (*lsmp == NULL && set->set_count != set->set_success) { + /* try alloc objects on other osts if osc_create fails for + * exceptions: RPC failure, ENOSPC, etc */ + if (set->set_count != set->set_success) { + list_for_each_entry (req, &set->set_list, rq_link) { + if (req->rq_rc == 0) + continue; + + set->set_completes--; + req->rq_complete = 0; + + rc = qos_remedy_create(set, req); + lov_update_create_set(set, req, rc); + + if (rc) + break; + } + } + + /* no successful creates */ + if (set->set_success == 0) + GOTO(cleanup, rc); + + /* If there was an explicit stripe set, fail. Otherwise, we + * got some objects and that's not bad. */ + if (set->set_count != set->set_success) { + if (*lsmp) + GOTO(cleanup, rc); set->set_count = set->set_success; qos_shrink_lsm(set); } @@ -535,7 +559,7 @@ cleanup: if (!req->rq_complete || req->rq_rc) continue; - sub_exp = lov->tgts[req->rq_idx].ltd_exp, + sub_exp = lov->tgts[req->rq_idx].ltd_exp; err = obd_destroy(sub_exp, req->rq_oa, NULL, oti); if (err) CERROR("Failed to uncreate objid "LPX64" subobj " diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 71f6c8d..7b735b9 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -102,6 +102,11 @@ unsigned int obd_print_fail_loc(void) return obd_fail_loc; } +void obd_set_fail_loc(unsigned int fl) +{ + obd_fail_loc = fl; +} + /* opening /dev/obd */ static int obd_class_open(struct inode * inode, struct file * file) { diff --git a/lustre/tests/2ost.sh b/lustre/tests/2ost.sh new file mode 100644 index 0000000..1f890fb --- /dev/null +++ b/lustre/tests/2ost.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +export PATH=`dirname $0`/../utils:$PATH + +config=${1:-`basename $0 .sh`.xml} + +LMC="${LMC:-lmc} -m $config" +TMP=${TMP:-/tmp} + +MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} +MDSSIZE=${MDSSIZE:-400000} +FSTYPE=${FSTYPE:-ext3} +MOUNT=${MOUNT:-/mnt/lustre} +MOUNT2=${MOUNT2:-${MOUNT}2} +NETTYPE=${NETTYPE:-tcp} + +OSTDEV=${OSTDEV:-$TMP/ost-`hostname`} +OSTSIZE=${OSTSIZE:-400000} + +# specific journal size for the ost, in MB +JSIZE=${JSIZE:-0} +[ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE" +MDSISIZE=${MDSISIZE:-0} +[ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE" + +STRIPE_BYTES=${STRIPE_BYTES:-1048576} +STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs + +rm -f $config + +# create nodes +${LMC} --add node --node localhost || exit 10 +${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 + +# configure mds server +${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE \ + --dev $MDSDEV --size $MDSSIZE $JARG $IARG $MDSOPT || exit 20 + +# configure ost +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ + --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 $LOVOPT || exit 20 +${LMC} --add ost --ost ost1 --node localhost --lov lov1 \ + --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE $JARG $OSTOPT || exit 30 +${LMC} --add ost --ost ost2 --node localhost --lov lov1 \ + --fstype $FSTYPE --dev ${OSTDEV}2 --size $OSTSIZE $JARG $OSTOPT || exit 30 + +# create client config +${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 \ + $CLIENTOPT || exit 40 +${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 \ + $CLIENTOPT || exit 41 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index daf048f..8f3cc7b 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -911,6 +911,103 @@ test_27m() { } run_test 27m "create file while OST0 was full ==================" +# osc's keep a NOSPC stick flag that gets unset with rmdir +reset_enospc() { + sysctl -w lustre.fail_loc=0 + mkdir -p $DIR/d27/nospc + rmdir $DIR/d27/nospc +} + +exhaust_precreations() { + local i + ostidx=$1 + ost=$(head -n $(( ostidx + 1 )) /proc/fs/lustre/lov/${LOVNAME}/target_obd | tail -n 1 | awk '{print $2}' | sed -e 's/_UUID$//') + mds=$(find /proc/fs/lustre/mds/ -maxdepth 1 -type d | tail -n 1) + mds=$(basename $mds) + + last_id=$(tail -n 1 /proc/fs/lustre/osc/OSC_*_${ost}_${mds}/prealloc_last_id) + next_id=$(tail -n 1 /proc/fs/lustre/osc/OSC_*_${ost}_${mds}/prealloc_next_id) + + mkdir -p $DIR/d27/${ost} + $LSTRIPE $DIR/d27/${ost} 0 $ostidx 1 + sysctl -w lustre.fail_loc=0x215 + echo "Creating to objid $last_id on ost $ost..." + for (( i = next_id; i <= last_id; i++ )) ; do + touch $DIR/d27/${ost}/f$i + done + reset_enospc +} + +exhaust_all_precreations() { + local i + for (( i=0; i < OSTCOUNT; i++ )) ; do + exhaust_precreations $i + done +} + +test_27n() { + [ "$OSTCOUNT" -lt "2" ] && echo "" && return + reset_enospc + rm -f $DIR/d27/f27n + exhaust_precreations 0 + sysctl -w lustre.fail_loc=0x80000215 + touch $DIR/d27/f27n || error + reset_enospc +} +run_test 27n "creating a file while some OSTs are full (should succeed) ===" + +test_27o() { + [ "$OSTCOUNT" -lt "2" ] && echo "" && return + reset_enospc + rm -f $DIR/d27/f27o + exhaust_all_precreations + sysctl -w lustre.fail_loc=0x215 + touch $DIR/d27/f27o && error + reset_enospc +} +run_test 27o "creating a file while all OSTs are full (should error) ===" + +test_27p() { + [ "$OSTCOUNT" -lt "2" ] && echo "" && return + reset_enospc + rm -f $DIR/d27/f27p + exhaust_precreations 0 + $MCREATE $DIR/d27/f27p || error + $TRUNCATE $DIR/d27/f27p 80000000 || error + $CHECKSTAT -s 80000000 $DIR/d27/f27p || error + sysctl -w lustre.fail_loc=0x80000215 + echo foo >> $DIR/d27/f27p || error + $CHECKSTAT -s 80000004 $DIR/d27/f27p || error + reset_enospc +} +run_test 27p "appending to a truncated file while some OSTs are full ===" + +test_27q() { + [ "$OSTCOUNT" -lt "2" ] && echo "" && return + reset_enospc + rm -f $DIR/d27/f27q + exhaust_precreations 0 + $MCREATE $DIR/d27/f27q || error + $TRUNCATE $DIR/d27/f27q 80000000 || error + $CHECKSTAT -s 80000000 $DIR/d27/f27q || error + sysctl -w lustre.fail_loc=0x215 + echo foo >> $DIR/d27/f27q && error + $CHECKSTAT -s 80000000 $DIR/d27/f27q || error + reset_enospc +} +run_test 27q "appending to a truncated file while all OSTs are full (should error) ===" + +test_27r() { + [ "$OSTCOUNT" -lt "2" ] && echo "" && return + reset_enospc + rm -f $DIR/d27/f27r + exhaust_precreations 0 + sysctl -w lustre.fail_loc=0x80000215 + $LSTRIPE $DIR/d27/f27r 0 0 -1 && error + reset_enospc +} +run_test 27r "creating a file while some OSTs are full with an explicit stripe count (should error) ===" + test_28() { mkdir $DIR/d28 $CREATETEST $DIR/d28/ct || error