* File join has been disabled in this release, refer to Bugzilla 16929.
Severity : enhancement
+Bugzilla : 19662
+Description: Remove set_info(KEY_UNLINKED) from MDS/OSC
+
+Severity : enhancement
Bugzilla : 19526
Description: correctly handle big reply message.
Details : send LNet event if reply is bigger then buffer and adjust this buffer
#define KEY_SET_FS "set_fs"
/* KEY_SET_INFO in lustre_idl.h */
#define KEY_SPTLRPC_CONF "sptlrpc_conf"
-#define KEY_UNLINKED "unlinked"
struct lu_context;
next_id = 1;
} else if (KEY_IS(KEY_CHECKSUM)) {
do_inactive = 1;
- } else if (KEY_IS(KEY_UNLINKED)) {
- check_uuid = val ? 1 : 0;
} else if (KEY_IS(KEY_EVICT_BY_NID)) {
/* use defaults: do_inactive = incr = 0; */
} else if (KEY_IS(KEY_MDS_CONN)) {
#define TGT_BAVAIL(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bavail *\
lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bsize)
-#define TGT_FFREE(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_ffree)
int qos_add_tgt(struct obd_device *obd, __u32 index)
char *poolname, int flags)
{
struct lov_obd *lov = &exp->exp_obd->u.lov;
- static time_t last_warn = 0;
- time_t now = cfs_time_current_sec();
- __u64 total_bavail, total_weight = 0;
- int nfound, good_osts, i, warn = 0, rc = 0;
+ __u64 total_weight = 0;
+ int nfound, good_osts, i, rc = 0;
int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
struct pool_desc *pool;
struct ost_pool *osts;
if (rc)
GOTO(out, rc);
- total_bavail = 0;
good_osts = 0;
- /* Warn users about zero available space/inode every 30 min */
- if (cfs_time_sub(now, last_warn) > 60 * 30)
- warn = 1;
/* Find all the OSTs that are valid stripe candidates */
for (i = 0; i < osts->op_count; i++) {
- __u64 bavail;
-
if (!lov->lov_tgts[osts->op_array[i]] ||
!lov->lov_tgts[osts->op_array[i]]->ltd_active)
continue;
- bavail = TGT_BAVAIL(osts->op_array[i]);
- if (!bavail) {
- if (warn) {
- CDEBUG(D_QOS, "no free space on %s\n",
- obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
- last_warn = now;
- }
- continue;
- }
- if (!TGT_FFREE(osts->op_array[i])) {
- if (warn) {
- CDEBUG(D_QOS, "no free inodes on %s\n",
- obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid));
- last_warn = now;
- }
- continue;
- }
/* Fail Check before osc_precreate() is called
so we can only 'fail' single OSC. */
lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable = 1;
qos_calc_weight(lov, osts->op_array[i]);
- total_bavail += bavail;
total_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight;
good_osts++;
if (good_osts < stripe_cnt_min)
GOTO(out, rc = -EAGAIN);
- if (!total_bavail)
- GOTO(out, rc = -ENOSPC);
-
/* We have enough osts */
if (good_osts < *stripe_cnt)
*stripe_cnt = good_osts;
}
}
#endif
-
- if (rc == 0)
- obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp,
- sizeof(KEY_UNLINKED), KEY_UNLINKED, 0,
- NULL, NULL);
if (!is_dir)
/* old files may not have link ea; ignore errors */
mdd_links_rename(env, mdd_cobj, mdo2fid(mdd_pobj),
oscc->oscc_flags |= OSCC_FLAG_RDONLY;
case -ENOSPC:
case -EFBIG:
- if (rc != EROFS) {
+ if (rc != -EROFS) {
oscc->oscc_flags |= OSCC_FLAG_NOSPC;
if (body && rc == -ENOSPC) {
oscc->oscc_last_id = body->oa.o_id;
LASSERT_SPIN_LOCKED(&oscc->oscc_lock);
- if(oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
+ if ((oscc->oscc_flags & OSCC_FLAG_RECOVERING) ||
+ (oscc->oscc_flags & OSCC_FLAG_DEGRADED)) {
spin_unlock(&oscc->oscc_lock);
RETURN(0);
}
OSCC_FLAG_EXITING) & oscc->oscc_flags;
have_objs = oscc_has_objects_nolock(oscc, count);
- if (!ost_unusable)
+ if (!ost_unusable && !have_objs)
/* they release lock himself */
- oscc_internal_create(oscc);
+ have_objs = oscc_internal_create(oscc);
else
spin_unlock(&oscc->oscc_lock);
NULL, NULL);
rc = l_wait_event(oscc->oscc_waitq, oscc_wait_for_objects(oscc, 1), &lwi);
-
- if (!oscc_has_objects(oscc, 1) || (oscc->oscc_flags & OSCC_FLAG_NOSPC))
- rc = -ENOSPC;
-
- if (oscc->oscc_flags & OSCC_FLAG_RDONLY)
- rc = -EROFS;
-
- if (oscc->oscc_obd->u.cli.cl_import->imp_invalid)
- rc = -EIO;
-
RETURN(rc);
}
if (imp != NULL && imp->imp_deactive)
RETURN(1000);
- /* until oscc in recovery - other flags is wrong */
+ /* Handle critical states first */
spin_lock(&oscc->oscc_lock);
if (oscc->oscc_flags & OSCC_FLAG_NOSPC ||
- oscc->oscc_flags & OSCC_FLAG_RDONLY) {
+ oscc->oscc_flags & OSCC_FLAG_RDONLY ||
+ oscc->oscc_flags & OSCC_FLAG_EXITING) {
spin_unlock(&oscc->oscc_lock);
RETURN(1000);
}
RETURN(1);
}
- oscc_internal_create(oscc);
+ if (oscc_internal_create(oscc))
+ RETURN(1000);
RETURN(1);
}
if(rc)
GOTO(out_wake, rc);
- if ((oscc->oscc_flags & OSCC_FLAG_EXITING))
+ /* Handle the critical type errors first.
+ * Should we also test cl_import state as well ? */
+ if (oscc->oscc_flags & OSCC_FLAG_EXITING)
GOTO(out_wake, rc = -EIO);
+ if (oscc->oscc_flags & OSCC_FLAG_NOSPC)
+ GOTO(out_wake, rc = -ENOSPC);
+
+ if (oscc->oscc_flags & OSCC_FLAG_RDONLY)
+ GOTO(out_wake, rc = -EROFS);
+
+ /* should be try wait until recovery finished */
+ if((oscc->oscc_flags & OSCC_FLAG_RECOVERING) ||
+ (oscc->oscc_flags & OSCC_FLAG_DEGRADED))
+ RETURN(-EAGAIN);
+
if (oscc_has_objects_nolock(oscc, 1)) {
memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
oa->o_id = oscc->oscc_next_id;
CDEBUG(D_RPCTRACE, " set oscc_next_id = "LPU64"\n",
oscc->oscc_next_id);
- GOTO(out_wake, rc = 0);
+ GOTO(out_wake, rc = 0);
}
- /* should be try wait until recovery finished */
- if(oscc->oscc_flags & OSCC_FLAG_RECOVERING)
- RETURN(-EAGAIN);
-
- if (oscc->oscc_flags & OSCC_FLAG_NOSPC)
- GOTO(out_wake, rc = -ENOSPC);
-
- if (oscc->oscc_flags & OSCC_FLAG_RDONLY)
- GOTO(out_wake, rc = -EROFS);
-
- /* we not have objects now - continue wait */
+ /* we don't have objects now - continue wait */
RETURN(-EAGAIN);
out_wake:
oscc->oscc_obd->obd_name);
rc = oscc_precreate(oscc);
- if (rc) {
+ if (rc)
CDEBUG(D_HA,"%s: error create %d\n",
oscc->oscc_obd->obd_name, rc);
+
+ spin_lock(&oscc->oscc_lock);
+
+ /* wakeup but recovery did not finished */
+ if ((oscc->oscc_obd->u.cli.cl_import->imp_invalid) ||
+ (oscc->oscc_flags & OSCC_FLAG_RECOVERING)) {
+ rc = -EIO;
+ spin_unlock(&oscc->oscc_lock);
break;
}
- spin_lock(&oscc->oscc_lock);
- if (oscc->oscc_flags & OSCC_FLAG_EXITING) {
+ if (oscc->oscc_flags & OSCC_FLAG_NOSPC) {
+ rc = -ENOSPC;
spin_unlock(&oscc->oscc_lock);
break;
}
- /* wakeup but recovery not finished */
- if (oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
- rc = -EIO;
+
+ if (oscc->oscc_flags & OSCC_FLAG_RDONLY) {
+ rc = -EROFS;
+ spin_unlock(&oscc->oscc_lock);
+ break;
+ }
+
+ // Should we report -EIO error ?
+ if (oscc->oscc_flags & OSCC_FLAG_EXITING) {
spin_unlock(&oscc->oscc_lock);
break;
}
CDEBUG(D_RPCTRACE, "%s: set oscc_next_id = "LPU64"\n",
exp->exp_obd->obd_name, oscc->oscc_next_id);
break;
- } else if (oscc->oscc_flags & OSCC_FLAG_NOSPC) {
- rc = -ENOSPC;
- spin_unlock(&oscc->oscc_lock);
- break;
- } else if (oscc->oscc_flags & OSCC_FLAG_RDONLY) {
- rc = -EROFS;
- spin_unlock(&oscc->oscc_lock);
- break;
}
spin_unlock(&oscc->oscc_lock);
{
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
struct obd_statfs *msfs;
+ __u64 used;
ENTRY;
if (rc == -EBADR)
/* Reinitialize the RDONLY and DEGRADED flags at the client
* on each statfs, so they don't stay set permanently. */
spin_lock(&cli->cl_oscc.oscc_lock);
- cli->cl_oscc.oscc_flags &= ~(OSCC_FLAG_RDONLY | OSCC_FLAG_DEGRADED);
- if (msfs->os_state & OS_STATE_DEGRADED)
+
+ if (unlikely(msfs->os_state & OS_STATE_DEGRADED))
cli->cl_oscc.oscc_flags |= OSCC_FLAG_DEGRADED;
+ else if (unlikely(cli->cl_oscc.oscc_flags & OSCC_FLAG_DEGRADED))
+ cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_DEGRADED;
- if (msfs->os_state & OS_STATE_READONLY)
+ if (unlikely(msfs->os_state & OS_STATE_READONLY))
cli->cl_oscc.oscc_flags |= OSCC_FLAG_RDONLY;
+ else if (unlikely(cli->cl_oscc.oscc_flags & OSCC_FLAG_RDONLY))
+ cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_RDONLY;
+
+ /* Add a bit of hysteresis so this flag isn't continually flapping,
+ * and ensure that new files don't get extremely fragmented due to
+ * only a small amount of available space in the filesystem.
+ * We want to set the NOSPC flag when there is less than ~0.1% free
+ * and clear it when there is at least ~0.2% free space, so:
+ * avail < ~0.1% max max = avail + used
+ * 1025 * avail < avail + used used = blocks - free
+ * 1024 * avail < used
+ * 1024 * avail < blocks - free
+ * avail < ((blocks - free) >> 10)
+ *
+ * On very large disk, say 16TB 0.1% will be 16 GB. We don't want to
+ * lose that amount of space so in those cases we report no space left
+ * if their is less than 1 GB left. */
+ used = min((msfs->os_blocks - msfs->os_bfree) >> 10, 1ULL << 30);
+ if (unlikely(((cli->cl_oscc.oscc_flags & OSCC_FLAG_NOSPC) == 0) &&
+ ((msfs->os_ffree < 32) || (msfs->os_bavail < used))))
+ cli->cl_oscc.oscc_flags |= OSCC_FLAG_NOSPC;
+ else if (unlikely(((cli->cl_oscc.oscc_flags & OSCC_FLAG_NOSPC) != 0) &&
+ (msfs->os_ffree > 64) && (msfs->os_bavail > (used << 1))))
+ cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_NOSPC;
+
spin_unlock(&cli->cl_oscc.oscc_lock);
*aa->aa_oi->oi_osfs = *msfs;
RETURN(0);
}
- if (KEY_IS(KEY_UNLINKED)) {
- struct osc_creator *oscc = &obd->u.cli.cl_oscc;
- spin_lock(&oscc->oscc_lock);
- oscc->oscc_flags &= ~OSCC_FLAG_NOSPC;
- spin_unlock(&oscc->oscc_lock);
- RETURN(0);
- }
-
if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
}
run_test 27m "create file while OST0 was full =================="
-# osc's keep a NOSPC stick flag that gets unset with rmdir
+sleep_maxage() {
+ local DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}')
+ sleep $DELAY
+}
+
+# OSCs keep a NOSPC flag that will be reset after ~5s (qos_maxage)
+# if the OST isn't full anymore.
reset_enospc() {
local FAIL_LOC=${1:-0}
local OSTIDX=${2:-""}
- mkdir -p $DIR/d27/nospc
- rmdir $DIR/d27/nospc
local list=$(comma_list $(osts_nodes))
[ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1)))
do_nodes $list lctl set_param fail_loc=$FAIL_LOC
+ sleep_maxage
}
exhaust_precreations() {
reset_enospc
rm -f $DIR/d27/f27o
exhaust_all_precreations 0x215
- sleep 5
touch $DIR/d27/f27o && error "able to create $DIR/d27/f27o"
reset_enospc
rm -f $DIR/d27/f27p
+ mkdir -p $DIR/d27
$MCREATE $DIR/d27/f27p || error "mcreate failed"
$TRUNCATE $DIR/d27/f27p 80000000 || error "truncate failed"
local START=`date +%s`
createmany -o $DIR/$tdir/$tfile 32
- reset_enospc
-
local FINISH=`date +%s`
local TIMEOUT=`lctl get_param -n timeout`
[ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \
test_27x() {
[ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs" && return
- DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}')
OFFSET=$(($OSTCOUNTi - 1))
OSTIDX=0
local OST=$(lfs osts | awk '/'${OSTIDX}': / { print $2 }' | sed -e 's/_UUID$//')
mkdir -p $DIR/$tdir
$SETSTRIPE $DIR/$tdir -c 1 # 1 stripe per file
do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1
- sleep $DELAY
+ sleep_maxage
createmany -o $DIR/$tdir/$tfile $OSTCOUNT
for i in `seq 0 $OFFSET`; do
[ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}' | grep -w "$OSTIDX"` ] &&
remote_mds_nodsh && skip "remote MDS with nodsh" && return
MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'`
- DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}')
OFFSET=$(($OSTCOUNT-1))
OST=-1
for OSC in $MDS_OSCS; do
$SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file
do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1
- sleep $DELAY
+ sleep_maxage
createmany -o $DIR/$tdir/$tfile $OSTCOUNT
do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0
echo -n "Free space priority "
lctl get_param -n lov.*-clilov-*.qos_prio_free
- DELAY=$(lctl get_param -n lov.*-clilov-*.qos_maxage | head -1 | awk '{print $1}')
declare -a AVAIL
free_min_max
[ $MINV -gt 960000 ] && skip "too much free space in OST$MINI, skip" &&\
done
FILL=$(($MINV / 4))
sync
- sleep $DELAY
+ sleep_maxage
free_min_max
DIFF=$(($MAXV - $MINV))
done
echo "wrote $i 200k files"
sync
- sleep $DELAY
+ sleep_maxage
echo "Note: free space may not be updated, so measurements might be off"
free_min_max