obd_size fo_readcache_max_filesize;
int fo_read_cache:1, /**< enable read-only cache */
fo_writethrough_cache:1,/**< read cache writes */
- fo_mds_ost_sync:1; /**< MDS-OST orphan recovery*/
+ fo_mds_ost_sync:1, /**< MDS-OST orphan recovery*/
+ fo_raid_degraded:1;/**< RAID device degraded */
struct obd_import *fo_mdc_imp;
struct obd_uuid fo_mdc_uuid;
/* set EROFS to state field if FS is mounted as RDONLY. The goal is to
* stop creating files on MDS if OST is not good shape to create
* objects.*/
- osfs->os_state = (filter->fo_obt.obt_sb->s_flags & MS_RDONLY) ?
- OS_STATE_READONLY : 0;
+ osfs->os_state = 0;
+
+ if (filter->fo_obt.obt_sb->s_flags & MS_RDONLY)
+ osfs->os_state = OS_STATE_READONLY;
+
+ if (filter->fo_raid_degraded)
+ osfs->os_state |= OS_STATE_DEGRADED;
RETURN(rc);
}
return snprintf(page, count, "%u\n", obd->u.filter.fo_mds_ost_sync);
}
+int lprocfs_filter_rd_degraded(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct obd_device *obd = data;
+
+ return snprintf(page, count, "%u\n", obd->u.filter.fo_raid_degraded);
+}
+
+int lprocfs_filter_wr_degraded(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct obd_device *obd = data;
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ spin_lock(&obd->obd_osfs_lock);
+ obd->u.filter.fo_raid_degraded = !!val;
+ spin_unlock(&obd->obd_osfs_lock);
+ return count;
+}
+
static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
{ "uuid", lprocfs_rd_uuid, 0, 0 },
{ "blocksize", lprocfs_rd_blksize, 0, 0 },
{ "writethrough_cache_enable", lprocfs_filter_rd_wcache,
lprocfs_filter_wr_wcache, 0},
{ "mds_sync", lprocfs_filter_rd_mds_sync, 0, 0},
+ { "degraded", lprocfs_filter_rd_degraded,
+ lprocfs_filter_wr_degraded, 0 },
{ 0 }
};
spin_unlock(&oscc->oscc_lock);
break;
}
- case -ENOSPC:
case -EROFS:
- case -EFBIG: {
- oscc->oscc_flags |= OSCC_FLAG_NOSPC;
- if (body && rc == -ENOSPC) {
- oscc->oscc_grow_count = OST_MIN_PRECREATE;
- oscc->oscc_last_id = body->oa.o_id;
+ oscc->oscc_flags |= OSCC_FLAG_RDONLY;
+ case -ENOSPC:
+ case -EFBIG:
+ if (rc != EROFS) {
+ oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+ if (body && rc == -ENOSPC) {
+ oscc->oscc_last_id = body->oa.o_id;
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
+ }
}
spin_unlock(&oscc->oscc_lock);
DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
break;
- }
case -EIO: {
/* filter always set body->oa.o_id as the last_id
* of filter (see filter_handle_precreate for detail)*/
static int oscc_wait_for_objects(struct osc_creator *oscc, int count)
{
int have_objs;
- int ost_full;
- int osc_invalid;
+ int ost_unusable;
- osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid;
+ ost_unusable = oscc->oscc_obd->u.cli.cl_import->imp_invalid;
spin_lock(&oscc->oscc_lock);
- ost_full = (oscc->oscc_flags & OSCC_FLAG_NOSPC);
+ ost_unusable |= (OSCC_FLAG_NOSPC | OSCC_FLAG_RDONLY |
+ OSCC_FLAG_EXITING) & oscc->oscc_flags;
have_objs = oscc_has_objects_nolock(oscc, count);
- osc_invalid |= oscc->oscc_flags & OSCC_FLAG_EXITING;
- if (!ost_full && !osc_invalid)
+ if (!ost_unusable)
/* they release lock himself */
oscc_internal_create(oscc);
else
spin_unlock(&oscc->oscc_lock);
- return have_objs || ost_full || osc_invalid;
+ return have_objs || ost_unusable;
}
static int oscc_precreate(struct osc_creator *oscc)
if (!oscc_has_objects(oscc, 1) || (oscc->oscc_flags & OSCC_FLAG_NOSPC))
rc = -ENOSPC;
+ if (oscc->oscc_flags & OSCC_FLAG_RDONLY)
+ rc = -EROFS;
+
if (oscc->oscc_obd->u.cli.cl_import->imp_invalid)
rc = -EIO;
RETURN(rc);
}
-static int oscc_recovering(struct osc_creator *oscc)
-{
- int recov;
-
- spin_lock(&oscc->oscc_lock);
- recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING;
- spin_unlock(&oscc->oscc_lock);
-
- return recov;
-}
-
static int oscc_in_sync(struct osc_creator *oscc)
{
int sync;
RETURN(1000);
/* until oscc in recovery - other flags is wrong */
- if (oscc_recovering(oscc))
- RETURN(2);
-
- if (oscc->oscc_flags & OSCC_FLAG_NOSPC)
+ spin_lock(&oscc->oscc_lock);
+ if (oscc->oscc_flags & OSCC_FLAG_NOSPC ||
+ oscc->oscc_flags & OSCC_FLAG_RDONLY) {
+ spin_unlock(&oscc->oscc_lock);
RETURN(1000);
+ }
- if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2))
+ if (oscc->oscc_flags & OSCC_FLAG_RECOVERING ||
+ oscc->oscc_flags & OSCC_FLAG_DEGRADED) {
+ spin_unlock(&oscc->oscc_lock);
+ RETURN(2);
+ }
+
+ if (oscc_has_objects_nolock(oscc, oscc->oscc_grow_count / 2)) {
+ spin_unlock(&oscc->oscc_lock);
RETURN(0);
+ }
- spin_lock(&oscc->oscc_lock);
if ((oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) ||
(oscc->oscc_flags & OSCC_FLAG_CREATING)) {
spin_unlock(&oscc->oscc_lock);
if (oscc->oscc_flags & OSCC_FLAG_NOSPC)
GOTO(out_wake, rc = -ENOSPC);
+ if (oscc->oscc_flags & OSCC_FLAG_RDONLY)
+ GOTO(out_wake, rc = -EROFS);
+
/* we not have objects now - continue wait */
RETURN(-EAGAIN);
rc = -ENOSPC;
spin_unlock(&oscc->oscc_lock);
break;
+ } else if (oscc->oscc_flags & OSCC_FLAG_RDONLY) {
+ rc = -EROFS;
+ spin_unlock(&oscc->oscc_lock);
+ break;
}
spin_unlock(&oscc->oscc_lock);
#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */
#define OSCC_FLAG_LOW 0x10
#define OSCC_FLAG_EXITING 0x20
+#define OSCC_FLAG_DEGRADED 0x40
+#define OSCC_FLAG_RDONLY 0x80
int osc_precreate(struct obd_export *exp);
int osc_create(struct obd_export *exp, struct obdo *oa,
struct ptlrpc_request *req,
struct osc_async_args *aa, int rc)
{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
struct obd_statfs *msfs;
ENTRY;
GOTO(out, rc = -EPROTO);
}
+ /* Reinitialize the RDONLY and DEGRADED flags at the client
+ * on each statfs, so they don't stay set permanently. */
+ spin_lock(&cli->cl_oscc.oscc_lock);
+ cli->cl_oscc.oscc_flags &= ~(OSCC_FLAG_RDONLY | OSCC_FLAG_DEGRADED);
+ if (msfs->os_state & OS_STATE_DEGRADED)
+ cli->cl_oscc.oscc_flags |= OSCC_FLAG_DEGRADED;
+
+ if (msfs->os_state & OS_STATE_READONLY)
+ cli->cl_oscc.oscc_flags |= OSCC_FLAG_RDONLY;
+ spin_unlock(&cli->cl_oscc.oscc_lock);
+
*aa->aa_oi->oi_osfs = *msfs;
out:
rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
}
run_test 27w "check lfs setstripe -c -s -i options ============="
+test_27x() {
+ [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2 OSTs" && return
+ DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}')
+ OFFSET=$(($OSTCOUNTi - 1))
+ OSTIDX=0
+ local OST=$(lfs osts | awk '/'${OSTIDX}': / { print $2 }' | sed -e 's/_UUID$//')
+
+ mkdir -p $DIR/$tdir
+ $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe per file
+ do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1
+ sleep $DELAY
+ createmany -o $DIR/$tdir/$tfile $OSTCOUNT
+ for i in `seq 0 $OFFSET`; do
+ [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}' | grep -w "$OSTIDX"` ] &&
+ error "OST0 was degraded but new created file still use it"
+ done
+ do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0
+}
+run_test 27x "create files while OST0 is degraded"
+
+test_27y() {
+ [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2 OSTs -- skipping" && return
+ remote_mds_nodsh && skip "remote MDS with nodsh" && return
+
+ MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'`
+ DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}')
+ OFFSET=$(($OSTCOUNT-1))
+ OST=-1
+ for OSC in $MDS_OSCS; do
+ if [ $OST == -1 ]; then {
+ OST=`osc_to_ost $OSC`
+ } else {
+ echo $OSC "is Deactivate:"
+ do_facet mds lctl --device %$OSC deactivate
+ } fi
+ done
+
+ OSTIDX=$(lfs osts | grep ${OST} | awk '{print $1}' | sed -e 's/://')
+ mkdir -p $DIR/$tdir
+ $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file
+
+ do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1
+ sleep $DELAY
+ createmany -o $DIR/$tdir/$tfile $OSTCOUNT
+ do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0
+
+ for i in `seq 0 $OFFSET`; do
+ [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "$OSTIDX"` ] || \
+ error "files created on deactivated OSTs instead of degraded OST"
+ done
+ for OSC in $MDS_OSCS; do
+ [ `osc_to_ost $OSC` != $OST ] && {
+ echo $OSC "is activate"
+ do_facet mds lctl --device %$OSC activate
+ }
+ done
+}
+run_test 27y "create files while OST0 is degraded and the rest inactive"
+
# createtest also checks that device nodes are created and
# then visible correctly (#2091)
test_28() { # bug 2091