or lmc) has been changed to mean striping across a single
OST, rather than all available. For general usage we have
found a stripe count of 1 or 2 works best.
-
+
+Severity : major
+Frequency : occasional
+Bugzilla : 6409, 6834
+Description: Creating files with an explicit stripe count may lead to
+ a failed assertion on the MDS
+Details : If some OSTs are full or unavailable, creating files may
+ trigger a failed assertion on the MDS. Now, Lustre will
+ try to use other servers or return an error to the
+ client.
+
------------------------------------------------------------------------------
2005-06-20 Cluster File Systems, Inc. <info@clusterfs.com>
void qos_shrink_lsm(struct lov_request_set *set);
int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set,
int newea);
+int qos_remedy_create(struct lov_request_set *set, struct lov_request *req);
/* lov_request.c */
void lov_set_add_req(struct lov_request *req, struct lov_request_set *set);
}
}
+int qos_remedy_create(struct lov_request_set *set, struct lov_request *req)
+{
+ struct lov_stripe_md *lsm = set->set_md;
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+ int stripe, i, rc = -EIO;
+ ENTRY;
+
+ ost_idx = (req->rq_idx + 1) % ost_count;
+ for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+ if (lov->tgts[ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+ continue;
+ }
+ /* check if objects has been created on this ost */
+ for (stripe = req->rq_stripe; stripe >= 0; stripe--) {
+ if (ost_idx == lsm->lsm_oinfo[stripe].loi_ost_idx)
+ break;
+ }
+
+ if (stripe < 0) {
+ req->rq_idx = ost_idx;
+ rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa,
+ &req->rq_md, set->set_oti);
+ if (!rc)
+ break;
+ }
+ }
+ RETURN(rc);
+}
+
#define LOV_CREATE_RESEED_INTERVAL 1000
/* FIXME use real qos data to prepare the lov create request */
int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
LASSERT(set->set_completes);
- if (!set->set_success)
- GOTO(cleanup, rc = -EIO);
- if (*lsmp == NULL && set->set_count != set->set_success) {
+ /* try alloc objects on other osts if osc_create fails for
+ * exceptions: RPC failure, ENOSPC, etc */
+ if (set->set_count != set->set_success) {
+ list_for_each_entry (req, &set->set_list, rq_link) {
+ if (req->rq_rc == 0)
+ continue;
+
+ set->set_completes--;
+ req->rq_complete = 0;
+
+ rc = qos_remedy_create(set, req);
+ lov_update_create_set(set, req, rc);
+
+ if (rc)
+ break;
+ }
+ }
+
+ /* no successful creates */
+ if (set->set_success == 0)
+ GOTO(cleanup, rc);
+
+ /* If there was an explicit stripe set, fail. Otherwise, we
+ * got some objects and that's not bad. */
+ if (set->set_count != set->set_success) {
+ if (*lsmp)
+ GOTO(cleanup, rc);
set->set_count = set->set_success;
qos_shrink_lsm(set);
}
if (!req->rq_complete || req->rq_rc)
continue;
- sub_exp = lov->tgts[req->rq_idx].ltd_exp,
+ sub_exp = lov->tgts[req->rq_idx].ltd_exp;
err = obd_destroy(sub_exp, req->rq_oa, NULL, oti);
if (err)
CERROR("Failed to uncreate objid "LPX64" subobj "
return obd_fail_loc;
}
+void obd_set_fail_loc(unsigned int fl)
+{
+ obd_fail_loc = fl;
+}
+
/* opening /dev/obd */
static int obd_class_open(struct inode * inode, struct file * file)
{
--- /dev/null
+#!/bin/bash
+
+export PATH=`dirname $0`/../utils:$PATH
+
+config=${1:-`basename $0 .sh`.xml}
+
+LMC="${LMC:-lmc} -m $config"
+TMP=${TMP:-/tmp}
+
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSSIZE=${MDSSIZE:-400000}
+FSTYPE=${FSTYPE:-ext3}
+MOUNT=${MOUNT:-/mnt/lustre}
+MOUNT2=${MOUNT2:-${MOUNT}2}
+NETTYPE=${NETTYPE:-tcp}
+
+OSTDEV=${OSTDEV:-$TMP/ost-`hostname`}
+OSTSIZE=${OSTSIZE:-400000}
+
+# specific journal size for the ost, in MB
+JSIZE=${JSIZE:-0}
+[ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE"
+MDSISIZE=${MDSISIZE:-0}
+[ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE"
+
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs
+
+rm -f $config
+
+# create nodes
+${LMC} --add node --node localhost || exit 10
+${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11
+${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12
+
+# configure mds server
+${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE \
+ --dev $MDSDEV --size $MDSSIZE $JARG $IARG $MDSOPT || exit 20
+
+# configure ost
+${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \
+ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 $LOVOPT || exit 20
+${LMC} --add ost --ost ost1 --node localhost --lov lov1 \
+ --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE $JARG $OSTOPT || exit 30
+${LMC} --add ost --ost ost2 --node localhost --lov lov1 \
+ --fstype $FSTYPE --dev ${OSTDEV}2 --size $OSTSIZE $JARG $OSTOPT || exit 30
+
+# create client config
+${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 \
+ $CLIENTOPT || exit 40
+${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 \
+ $CLIENTOPT || exit 41
}
run_test 27m "create file while OST0 was full =================="
+# osc's keep a NOSPC stick flag that gets unset with rmdir
+reset_enospc() {
+ sysctl -w lustre.fail_loc=0
+ mkdir -p $DIR/d27/nospc
+ rmdir $DIR/d27/nospc
+}
+
+exhaust_precreations() {
+ local i
+ ostidx=$1
+ ost=$(head -n $(( ostidx + 1 )) /proc/fs/lustre/lov/${LOVNAME}/target_obd | tail -n 1 | awk '{print $2}' | sed -e 's/_UUID$//')
+ mds=$(find /proc/fs/lustre/mds/ -maxdepth 1 -type d | tail -n 1)
+ mds=$(basename $mds)
+
+ last_id=$(tail -n 1 /proc/fs/lustre/osc/OSC_*_${ost}_${mds}/prealloc_last_id)
+ next_id=$(tail -n 1 /proc/fs/lustre/osc/OSC_*_${ost}_${mds}/prealloc_next_id)
+
+ mkdir -p $DIR/d27/${ost}
+ $LSTRIPE $DIR/d27/${ost} 0 $ostidx 1
+ sysctl -w lustre.fail_loc=0x215
+ echo "Creating to objid $last_id on ost $ost..."
+ for (( i = next_id; i <= last_id; i++ )) ; do
+ touch $DIR/d27/${ost}/f$i
+ done
+ reset_enospc
+}
+
+exhaust_all_precreations() {
+ local i
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ exhaust_precreations $i
+ done
+}
+
+test_27n() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "" && return
+ reset_enospc
+ rm -f $DIR/d27/f27n
+ exhaust_precreations 0
+ sysctl -w lustre.fail_loc=0x80000215
+ touch $DIR/d27/f27n || error
+ reset_enospc
+}
+run_test 27n "creating a file while some OSTs are full (should succeed) ==="
+
+test_27o() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "" && return
+ reset_enospc
+ rm -f $DIR/d27/f27o
+ exhaust_all_precreations
+ sysctl -w lustre.fail_loc=0x215
+ touch $DIR/d27/f27o && error
+ reset_enospc
+}
+run_test 27o "creating a file while all OSTs are full (should error) ==="
+
+test_27p() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "" && return
+ reset_enospc
+ rm -f $DIR/d27/f27p
+ exhaust_precreations 0
+ $MCREATE $DIR/d27/f27p || error
+ $TRUNCATE $DIR/d27/f27p 80000000 || error
+ $CHECKSTAT -s 80000000 $DIR/d27/f27p || error
+ sysctl -w lustre.fail_loc=0x80000215
+ echo foo >> $DIR/d27/f27p || error
+ $CHECKSTAT -s 80000004 $DIR/d27/f27p || error
+ reset_enospc
+}
+run_test 27p "appending to a truncated file while some OSTs are full ==="
+
+test_27q() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "" && return
+ reset_enospc
+ rm -f $DIR/d27/f27q
+ exhaust_precreations 0
+ $MCREATE $DIR/d27/f27q || error
+ $TRUNCATE $DIR/d27/f27q 80000000 || error
+ $CHECKSTAT -s 80000000 $DIR/d27/f27q || error
+ sysctl -w lustre.fail_loc=0x215
+ echo foo >> $DIR/d27/f27q && error
+ $CHECKSTAT -s 80000000 $DIR/d27/f27q || error
+ reset_enospc
+}
+run_test 27q "appending to a truncated file while all OSTs are full (should error) ==="
+
+test_27r() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "" && return
+ reset_enospc
+ rm -f $DIR/d27/f27r
+ exhaust_precreations 0
+ sysctl -w lustre.fail_loc=0x80000215
+ $LSTRIPE $DIR/d27/f27r 0 0 -1 && error
+ reset_enospc
+}
+run_test 27r "creating a file while some OSTs are full with an explicit stripe count (should error) ==="
+
test_28() {
mkdir $DIR/d28
$CREATETEST $DIR/d28/ct || error