From: zhanghc Date: Tue, 14 Jul 2009 13:34:03 +0000 (+0000) Subject: b=18539 X-Git-Tag: v1_9_220~24 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;ds=sidebyside;h=41faf44f09a62d7af751791959b589a4495a5a14;p=fs%2Flustre-release.git b=18539 don't allocate new files on degraded OSTs i=adilger@sun.com i=hongchao.zhang@sun.com --- diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 3848808..79b7391 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -331,7 +331,8 @@ struct filter_obd { obd_size fo_readcache_max_filesize; int fo_read_cache:1, /**< enable read-only cache */ fo_writethrough_cache:1,/**< read cache writes */ - fo_mds_ost_sync:1; /**< MDS-OST orphan recovery*/ + fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/ + fo_raid_degraded:1;/**< RAID device degraded */ struct obd_import *fo_mdc_imp; struct obd_uuid fo_mdc_uuid; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 8fc2f31..e3987c5 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -3739,8 +3739,13 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, /* set EROFS to state field if FS is mounted as RDONLY. The goal is to * stop creating files on MDS if OST is not good shape to create * objects.*/ - osfs->os_state = (filter->fo_obt.obt_sb->s_flags & MS_RDONLY) ? - OS_STATE_READONLY : 0; + osfs->os_state = 0; + + if (filter->fo_obt.obt_sb->s_flags & MS_RDONLY) + osfs->os_state = OS_STATE_READONLY; + + if (filter->fo_raid_degraded) + osfs->os_state |= OS_STATE_DEGRADED; RETURN(rc); } diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index bcc46ec..fdd0eba 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -341,6 +341,30 @@ static int lprocfs_filter_rd_mds_sync(char *page, char **start, off_t off, return snprintf(page, count, "%u\n", obd->u.filter.fo_mds_ost_sync); } +int lprocfs_filter_rd_degraded(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + + return snprintf(page, count, "%u\n", obd->u.filter.fo_raid_degraded); +} + +int lprocfs_filter_wr_degraded(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + spin_lock(&obd->obd_osfs_lock); + obd->u.filter.fo_raid_degraded = !!val; + spin_unlock(&obd->obd_osfs_lock); + return count; +} + static struct lprocfs_vars lprocfs_filter_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "blocksize", lprocfs_rd_blksize, 0, 0 }, @@ -383,6 +407,8 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = { { "writethrough_cache_enable", lprocfs_filter_rd_wcache, lprocfs_filter_wr_wcache, 0}, { "mds_sync", lprocfs_filter_rd_mds_sync, 0, 0}, + { "degraded", lprocfs_filter_rd_degraded, + lprocfs_filter_wr_degraded, 0 }, { 0 } }; diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 53d6912..d422453 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -123,18 +123,20 @@ static int osc_interpret_create(const struct lu_env *env, spin_unlock(&oscc->oscc_lock); break; } - case -ENOSPC: case -EROFS: - case -EFBIG: { - oscc->oscc_flags |= OSCC_FLAG_NOSPC; - if (body && rc == -ENOSPC) { - oscc->oscc_grow_count = OST_MIN_PRECREATE; - oscc->oscc_last_id = body->oa.o_id; + oscc->oscc_flags |= OSCC_FLAG_RDONLY; + case -ENOSPC: + case -EFBIG: + if (rc != EROFS) { + oscc->oscc_flags |= OSCC_FLAG_NOSPC; + if (body && rc == -ENOSPC) { + oscc->oscc_last_id = body->oa.o_id; + oscc->oscc_grow_count = OST_MIN_PRECREATE; + } } spin_unlock(&oscc->oscc_lock); DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); break; - } case -EIO: { /* filter always set body->oa.o_id as the last_id * of filter (see filter_handle_precreate for detail)*/ @@ -274,23 +276,22 @@ static int oscc_has_objects(struct osc_creator *oscc, int count) static int oscc_wait_for_objects(struct osc_creator *oscc, int count) { int have_objs; - int ost_full; - int osc_invalid; + int ost_unusable; - osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid; + ost_unusable = oscc->oscc_obd->u.cli.cl_import->imp_invalid; spin_lock(&oscc->oscc_lock); - ost_full = (oscc->oscc_flags & OSCC_FLAG_NOSPC); + ost_unusable |= (OSCC_FLAG_NOSPC | OSCC_FLAG_RDONLY | + OSCC_FLAG_EXITING) & oscc->oscc_flags; have_objs = oscc_has_objects_nolock(oscc, count); - osc_invalid |= oscc->oscc_flags & OSCC_FLAG_EXITING; - if (!ost_full && !osc_invalid) + if (!ost_unusable) /* they release lock himself */ oscc_internal_create(oscc); else spin_unlock(&oscc->oscc_lock); - return have_objs || ost_full || osc_invalid; + return have_objs || ost_unusable; } static int oscc_precreate(struct osc_creator *oscc) @@ -312,23 +313,15 @@ static int oscc_precreate(struct osc_creator *oscc) if (!oscc_has_objects(oscc, 1) || (oscc->oscc_flags & OSCC_FLAG_NOSPC)) rc = -ENOSPC; + if (oscc->oscc_flags & OSCC_FLAG_RDONLY) + rc = -EROFS; + if (oscc->oscc_obd->u.cli.cl_import->imp_invalid) rc = -EIO; RETURN(rc); } -static int oscc_recovering(struct osc_creator *oscc) -{ - int recov; - - spin_lock(&oscc->oscc_lock); - recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING; - spin_unlock(&oscc->oscc_lock); - - return recov; -} - static int oscc_in_sync(struct osc_creator *oscc) { int sync; @@ -358,16 +351,24 @@ int osc_precreate(struct obd_export *exp) RETURN(1000); /* until oscc in recovery - other flags is wrong */ - if (oscc_recovering(oscc)) - RETURN(2); - - if (oscc->oscc_flags & OSCC_FLAG_NOSPC) + spin_lock(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_NOSPC || + oscc->oscc_flags & OSCC_FLAG_RDONLY) { + spin_unlock(&oscc->oscc_lock); RETURN(1000); + } - if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) + if (oscc->oscc_flags & OSCC_FLAG_RECOVERING || + oscc->oscc_flags & OSCC_FLAG_DEGRADED) { + spin_unlock(&oscc->oscc_lock); + RETURN(2); + } + + if (oscc_has_objects_nolock(oscc, oscc->oscc_grow_count / 2)) { + spin_unlock(&oscc->oscc_lock); RETURN(0); + } - spin_lock(&oscc->oscc_lock); if ((oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) || (oscc->oscc_flags & OSCC_FLAG_CREATING)) { spin_unlock(&oscc->oscc_lock); @@ -412,6 +413,9 @@ static int handle_async_create(struct ptlrpc_request *req, int rc) if (oscc->oscc_flags & OSCC_FLAG_NOSPC) GOTO(out_wake, rc = -ENOSPC); + if (oscc->oscc_flags & OSCC_FLAG_RDONLY) + GOTO(out_wake, rc = -EROFS); + /* we not have objects now - continue wait */ RETURN(-EAGAIN); @@ -633,6 +637,10 @@ int osc_create(struct obd_export *exp, struct obdo *oa, rc = -ENOSPC; spin_unlock(&oscc->oscc_lock); break; + } else if (oscc->oscc_flags & OSCC_FLAG_RDONLY) { + rc = -EROFS; + spin_unlock(&oscc->oscc_lock); + break; } spin_unlock(&oscc->oscc_lock); diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 1476019..90c2590 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -101,6 +101,8 @@ struct osc_cache_waiter { #define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */ #define OSCC_FLAG_LOW 0x10 #define OSCC_FLAG_EXITING 0x20 +#define OSCC_FLAG_DEGRADED 0x40 +#define OSCC_FLAG_RDONLY 0x80 int osc_precreate(struct obd_export *exp); int osc_create(struct obd_export *exp, struct obdo *oa, diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 10b27ba..e48a181 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3420,6 +3420,7 @@ static int osc_statfs_interpret(const struct lu_env *env, struct ptlrpc_request *req, struct osc_async_args *aa, int rc) { + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct obd_statfs *msfs; ENTRY; @@ -3435,6 +3436,17 @@ static int osc_statfs_interpret(const struct lu_env *env, GOTO(out, rc = -EPROTO); } + /* Reinitialize the RDONLY and DEGRADED flags at the client + * on each statfs, so they don't stay set permanently. */ + spin_lock(&cli->cl_oscc.oscc_lock); + cli->cl_oscc.oscc_flags &= ~(OSCC_FLAG_RDONLY | OSCC_FLAG_DEGRADED); + if (msfs->os_state & OS_STATE_DEGRADED) + cli->cl_oscc.oscc_flags |= OSCC_FLAG_DEGRADED; + + if (msfs->os_state & OS_STATE_READONLY) + cli->cl_oscc.oscc_flags |= OSCC_FLAG_RDONLY; + spin_unlock(&cli->cl_oscc.oscc_lock); + *aa->aa_oi->oi_osfs = *msfs; out: rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d4af63b..4fcdf11 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1194,6 +1194,65 @@ test_27w() { # bug 10997 } run_test 27w "check lfs setstripe -c -s -i options =============" +test_27x() { + [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2 OSTs" && return + DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}') + OFFSET=$(($OSTCOUNTi - 1)) + OSTIDX=0 + local OST=$(lfs osts | awk '/'${OSTIDX}': / { print $2 }' | sed -e 's/_UUID$//') + + mkdir -p $DIR/$tdir + $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe per file + do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1 + sleep $DELAY + createmany -o $DIR/$tdir/$tfile $OSTCOUNT + for i in `seq 0 $OFFSET`; do + [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}' | grep -w "$OSTIDX"` ] && + error "OST0 was degraded but new created file still use it" + done + do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0 +} +run_test 27x "create files while OST0 is degraded" + +test_27y() { + [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2 OSTs -- skipping" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + + MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` + DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}') + OFFSET=$(($OSTCOUNT-1)) + OST=-1 + for OSC in $MDS_OSCS; do + if [ $OST == -1 ]; then { + OST=`osc_to_ost $OSC` + } else { + echo $OSC "is Deactivate:" + do_facet mds lctl --device %$OSC deactivate + } fi + done + + OSTIDX=$(lfs osts | grep ${OST} | awk '{print $1}' | sed -e 's/://') + mkdir -p $DIR/$tdir + $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file + + do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1 + sleep $DELAY + createmany -o $DIR/$tdir/$tfile $OSTCOUNT + do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0 + + for i in `seq 0 $OFFSET`; do + [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "$OSTIDX"` ] || \ + error "files created on deactivated OSTs instead of degraded OST" + done + for OSC in $MDS_OSCS; do + [ `osc_to_ost $OSC` != $OST ] && { + echo $OSC "is activate" + do_facet mds lctl --device %$OSC activate + } + done +} +run_test 27y "create files while OST0 is degraded and the rest inactive" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091