From: shadow Date: Fri, 2 Oct 2009 11:16:49 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_9_280~25 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=cbc35cf7d05e133553d876505b0089496ec5acd1;p=fs%2Flustre-release.git Branch HEAD b=19662 i=adilger i=shadow --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 5b92f92..0052dd5 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -15,6 +15,10 @@ tbd Sun Microsystems, Inc. * File join has been disabled in this release, refer to Bugzilla 16929. Severity : enhancement +Bugzilla : 19662 +Description: Remove set_info(KEY_UNLINKED) from MDS/OSC + +Severity : enhancement Bugzilla : 19526 Description: correctly handle big reply message. Details : send LNet event if reply is bigger then buffer and adjust this buffer diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 789ad5c..73a01f0 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1201,7 +1201,6 @@ enum obd_cleanup_stage { #define KEY_SET_FS "set_fs" /* KEY_SET_INFO in lustre_idl.h */ #define KEY_SPTLRPC_CONF "sptlrpc_conf" -#define KEY_UNLINKED "unlinked" struct lu_context; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index a90739b..ec0eb36 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -2581,8 +2581,6 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, next_id = 1; } else if (KEY_IS(KEY_CHECKSUM)) { do_inactive = 1; - } else if (KEY_IS(KEY_UNLINKED)) { - check_uuid = val ? 1 : 0; } else if (KEY_IS(KEY_EVICT_BY_NID)) { /* use defaults: do_inactive = incr = 0; */ } else if (KEY_IS(KEY_MDS_CONN)) { diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index b5ba722..eaa2eb1 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -55,7 +55,6 @@ #define TGT_BAVAIL(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bavail *\ lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_bsize) -#define TGT_FFREE(i) (lov->lov_tgts[i]->ltd_exp->exp_obd->obd_osfs.os_ffree) int qos_add_tgt(struct obd_device *obd, __u32 index) @@ -742,10 +741,8 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt, char *poolname, int flags) { struct lov_obd *lov = &exp->exp_obd->u.lov; - static time_t last_warn = 0; - time_t now = cfs_time_current_sec(); - __u64 total_bavail, total_weight = 0; - int nfound, good_osts, i, warn = 0, rc = 0; + __u64 total_weight = 0; + int nfound, good_osts, i, rc = 0; int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags); struct pool_desc *pool; struct ost_pool *osts; @@ -793,35 +790,12 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt, if (rc) GOTO(out, rc); - total_bavail = 0; good_osts = 0; - /* Warn users about zero available space/inode every 30 min */ - if (cfs_time_sub(now, last_warn) > 60 * 30) - warn = 1; /* Find all the OSTs that are valid stripe candidates */ for (i = 0; i < osts->op_count; i++) { - __u64 bavail; - if (!lov->lov_tgts[osts->op_array[i]] || !lov->lov_tgts[osts->op_array[i]]->ltd_active) continue; - bavail = TGT_BAVAIL(osts->op_array[i]); - if (!bavail) { - if (warn) { - CDEBUG(D_QOS, "no free space on %s\n", - obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid)); - last_warn = now; - } - continue; - } - if (!TGT_FFREE(osts->op_array[i])) { - if (warn) { - CDEBUG(D_QOS, "no free inodes on %s\n", - obd_uuid2str(&lov->lov_tgts[osts->op_array[i]]->ltd_uuid)); - last_warn = now; - } - continue; - } /* Fail Check before osc_precreate() is called so we can only 'fail' single OSC. */ @@ -833,7 +807,6 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt, lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_usable = 1; qos_calc_weight(lov, osts->op_array[i]); - total_bavail += bavail; total_weight += lov->lov_tgts[osts->op_array[i]]->ltd_qos.ltq_weight; good_osts++; @@ -846,9 +819,6 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt, if (good_osts < stripe_cnt_min) GOTO(out, rc = -EAGAIN); - if (!total_bavail) - GOTO(out, rc = -ENOSPC); - /* We have enough osts */ if (good_osts < *stripe_cnt) *stripe_cnt = good_osts; diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 451011e..e053f61 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -903,11 +903,6 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, } } #endif - - if (rc == 0) - obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp, - sizeof(KEY_UNLINKED), KEY_UNLINKED, 0, - NULL, NULL); if (!is_dir) /* old files may not have link ea; ignore errors */ mdd_links_rename(env, mdd_cobj, mdo2fid(mdd_pobj), diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 9046d7d..2bd2311 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -127,7 +127,7 @@ static int osc_interpret_create(const struct lu_env *env, oscc->oscc_flags |= OSCC_FLAG_RDONLY; case -ENOSPC: case -EFBIG: - if (rc != EROFS) { + if (rc != -EROFS) { oscc->oscc_flags |= OSCC_FLAG_NOSPC; if (body && rc == -ENOSPC) { oscc->oscc_last_id = body->oa.o_id; @@ -194,7 +194,8 @@ static int oscc_internal_create(struct osc_creator *oscc) LASSERT_SPIN_LOCKED(&oscc->oscc_lock); - if(oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + if ((oscc->oscc_flags & OSCC_FLAG_RECOVERING) || + (oscc->oscc_flags & OSCC_FLAG_DEGRADED)) { spin_unlock(&oscc->oscc_lock); RETURN(0); } @@ -285,9 +286,9 @@ static int oscc_wait_for_objects(struct osc_creator *oscc, int count) OSCC_FLAG_EXITING) & oscc->oscc_flags; have_objs = oscc_has_objects_nolock(oscc, count); - if (!ost_unusable) + if (!ost_unusable && !have_objs) /* they release lock himself */ - oscc_internal_create(oscc); + have_objs = oscc_internal_create(oscc); else spin_unlock(&oscc->oscc_lock); @@ -309,16 +310,6 @@ static int oscc_precreate(struct osc_creator *oscc) NULL, NULL); rc = l_wait_event(oscc->oscc_waitq, oscc_wait_for_objects(oscc, 1), &lwi); - - if (!oscc_has_objects(oscc, 1) || (oscc->oscc_flags & OSCC_FLAG_NOSPC)) - rc = -ENOSPC; - - if (oscc->oscc_flags & OSCC_FLAG_RDONLY) - rc = -EROFS; - - if (oscc->oscc_obd->u.cli.cl_import->imp_invalid) - rc = -EIO; - RETURN(rc); } @@ -350,10 +341,11 @@ int osc_precreate(struct obd_export *exp) if (imp != NULL && imp->imp_deactive) RETURN(1000); - /* until oscc in recovery - other flags is wrong */ + /* Handle critical states first */ spin_lock(&oscc->oscc_lock); if (oscc->oscc_flags & OSCC_FLAG_NOSPC || - oscc->oscc_flags & OSCC_FLAG_RDONLY) { + oscc->oscc_flags & OSCC_FLAG_RDONLY || + oscc->oscc_flags & OSCC_FLAG_EXITING) { spin_unlock(&oscc->oscc_lock); RETURN(1000); } @@ -375,7 +367,8 @@ int osc_precreate(struct obd_export *exp) RETURN(1); } - oscc_internal_create(oscc); + if (oscc_internal_create(oscc)) + RETURN(1000); RETURN(1); } @@ -392,9 +385,22 @@ static int handle_async_create(struct ptlrpc_request *req, int rc) if(rc) GOTO(out_wake, rc); - if ((oscc->oscc_flags & OSCC_FLAG_EXITING)) + /* Handle the critical type errors first. + * Should we also test cl_import state as well ? */ + if (oscc->oscc_flags & OSCC_FLAG_EXITING) GOTO(out_wake, rc = -EIO); + if (oscc->oscc_flags & OSCC_FLAG_NOSPC) + GOTO(out_wake, rc = -ENOSPC); + + if (oscc->oscc_flags & OSCC_FLAG_RDONLY) + GOTO(out_wake, rc = -EROFS); + + /* should be try wait until recovery finished */ + if((oscc->oscc_flags & OSCC_FLAG_RECOVERING) || + (oscc->oscc_flags & OSCC_FLAG_DEGRADED)) + RETURN(-EAGAIN); + if (oscc_has_objects_nolock(oscc, 1)) { memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); oa->o_id = oscc->oscc_next_id; @@ -403,20 +409,10 @@ static int handle_async_create(struct ptlrpc_request *req, int rc) CDEBUG(D_RPCTRACE, " set oscc_next_id = "LPU64"\n", oscc->oscc_next_id); - GOTO(out_wake, rc = 0); + GOTO(out_wake, rc = 0); } - /* should be try wait until recovery finished */ - if(oscc->oscc_flags & OSCC_FLAG_RECOVERING) - RETURN(-EAGAIN); - - if (oscc->oscc_flags & OSCC_FLAG_NOSPC) - GOTO(out_wake, rc = -ENOSPC); - - if (oscc->oscc_flags & OSCC_FLAG_RDONLY) - GOTO(out_wake, rc = -EROFS); - - /* we not have objects now - continue wait */ + /* we don't have objects now - continue wait */ RETURN(-EAGAIN); out_wake: @@ -605,20 +601,34 @@ int osc_create(struct obd_export *exp, struct obdo *oa, oscc->oscc_obd->obd_name); rc = oscc_precreate(oscc); - if (rc) { + if (rc) CDEBUG(D_HA,"%s: error create %d\n", oscc->oscc_obd->obd_name, rc); + + spin_lock(&oscc->oscc_lock); + + /* wakeup but recovery did not finished */ + if ((oscc->oscc_obd->u.cli.cl_import->imp_invalid) || + (oscc->oscc_flags & OSCC_FLAG_RECOVERING)) { + rc = -EIO; + spin_unlock(&oscc->oscc_lock); break; } - spin_lock(&oscc->oscc_lock); - if (oscc->oscc_flags & OSCC_FLAG_EXITING) { + if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { + rc = -ENOSPC; spin_unlock(&oscc->oscc_lock); break; } - /* wakeup but recovery not finished */ - if (oscc->oscc_flags & OSCC_FLAG_RECOVERING) { - rc = -EIO; + + if (oscc->oscc_flags & OSCC_FLAG_RDONLY) { + rc = -EROFS; + spin_unlock(&oscc->oscc_lock); + break; + } + + // Should we report -EIO error ? + if (oscc->oscc_flags & OSCC_FLAG_EXITING) { spin_unlock(&oscc->oscc_lock); break; } @@ -634,14 +644,6 @@ int osc_create(struct obd_export *exp, struct obdo *oa, CDEBUG(D_RPCTRACE, "%s: set oscc_next_id = "LPU64"\n", exp->exp_obd->obd_name, oscc->oscc_next_id); break; - } else if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { - rc = -ENOSPC; - spin_unlock(&oscc->oscc_lock); - break; - } else if (oscc->oscc_flags & OSCC_FLAG_RDONLY) { - rc = -EROFS; - spin_unlock(&oscc->oscc_lock); - break; } spin_unlock(&oscc->oscc_lock); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 90716fe..65b1436 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3447,6 +3447,7 @@ static int osc_statfs_interpret(const struct lu_env *env, { struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct obd_statfs *msfs; + __u64 used; ENTRY; if (rc == -EBADR) @@ -3472,12 +3473,39 @@ static int osc_statfs_interpret(const struct lu_env *env, /* Reinitialize the RDONLY and DEGRADED flags at the client * on each statfs, so they don't stay set permanently. */ spin_lock(&cli->cl_oscc.oscc_lock); - cli->cl_oscc.oscc_flags &= ~(OSCC_FLAG_RDONLY | OSCC_FLAG_DEGRADED); - if (msfs->os_state & OS_STATE_DEGRADED) + + if (unlikely(msfs->os_state & OS_STATE_DEGRADED)) cli->cl_oscc.oscc_flags |= OSCC_FLAG_DEGRADED; + else if (unlikely(cli->cl_oscc.oscc_flags & OSCC_FLAG_DEGRADED)) + cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_DEGRADED; - if (msfs->os_state & OS_STATE_READONLY) + if (unlikely(msfs->os_state & OS_STATE_READONLY)) cli->cl_oscc.oscc_flags |= OSCC_FLAG_RDONLY; + else if (unlikely(cli->cl_oscc.oscc_flags & OSCC_FLAG_RDONLY)) + cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_RDONLY; + + /* Add a bit of hysteresis so this flag isn't continually flapping, + * and ensure that new files don't get extremely fragmented due to + * only a small amount of available space in the filesystem. + * We want to set the NOSPC flag when there is less than ~0.1% free + * and clear it when there is at least ~0.2% free space, so: + * avail < ~0.1% max max = avail + used + * 1025 * avail < avail + used used = blocks - free + * 1024 * avail < used + * 1024 * avail < blocks - free + * avail < ((blocks - free) >> 10) + * + * On very large disk, say 16TB 0.1% will be 16 GB. We don't want to + * lose that amount of space so in those cases we report no space left + * if their is less than 1 GB left. */ + used = min((msfs->os_blocks - msfs->os_bfree) >> 10, 1ULL << 30); + if (unlikely(((cli->cl_oscc.oscc_flags & OSCC_FLAG_NOSPC) == 0) && + ((msfs->os_ffree < 32) || (msfs->os_bavail < used)))) + cli->cl_oscc.oscc_flags |= OSCC_FLAG_NOSPC; + else if (unlikely(((cli->cl_oscc.oscc_flags & OSCC_FLAG_NOSPC) != 0) && + (msfs->os_ffree > 64) && (msfs->os_bavail > (used << 1)))) + cli->cl_oscc.oscc_flags &= ~OSCC_FLAG_NOSPC; + spin_unlock(&cli->cl_oscc.oscc_lock); *aa->aa_oi->oi_osfs = *msfs; @@ -3912,14 +3940,6 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(0); } - if (KEY_IS(KEY_UNLINKED)) { - struct osc_creator *oscc = &obd->u.cli.cl_oscc; - spin_lock(&oscc->oscc_lock); - oscc->oscc_flags &= ~OSCC_FLAG_NOSPC; - spin_unlock(&oscc->oscc_lock); - RETURN(0); - } - if (KEY_IS(KEY_INIT_RECOV)) { if (vallen != sizeof(int)) RETURN(-EINVAL); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 3c71bd4..692dec1 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -983,17 +983,22 @@ test_27m() { } run_test 27m "create file while OST0 was full ==================" -# osc's keep a NOSPC stick flag that gets unset with rmdir +sleep_maxage() { + local DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}') + sleep $DELAY +} + +# OSCs keep a NOSPC flag that will be reset after ~5s (qos_maxage) +# if the OST isn't full anymore. reset_enospc() { local FAIL_LOC=${1:-0} local OSTIDX=${2:-""} - mkdir -p $DIR/d27/nospc - rmdir $DIR/d27/nospc local list=$(comma_list $(osts_nodes)) [ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1))) do_nodes $list lctl set_param fail_loc=$FAIL_LOC + sleep_maxage } exhaust_precreations() { @@ -1054,7 +1059,6 @@ test_27o() { reset_enospc rm -f $DIR/d27/f27o exhaust_all_precreations 0x215 - sleep 5 touch $DIR/d27/f27o && error "able to create $DIR/d27/f27o" @@ -1070,6 +1074,7 @@ test_27p() { reset_enospc rm -f $DIR/d27/f27p + mkdir -p $DIR/d27 $MCREATE $DIR/d27/f27p || error "mcreate failed" $TRUNCATE $DIR/d27/f27p 80000000 || error "truncate failed" @@ -1178,8 +1183,6 @@ test_27v() { # bug 4900 local START=`date +%s` createmany -o $DIR/$tdir/$tfile 32 - reset_enospc - local FINISH=`date +%s` local TIMEOUT=`lctl get_param -n timeout` [ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \ @@ -1209,7 +1212,6 @@ run_test 27w "check lfs setstripe -c -s -i options =============" test_27x() { [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs" && return - DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}') OFFSET=$(($OSTCOUNTi - 1)) OSTIDX=0 local OST=$(lfs osts | awk '/'${OSTIDX}': / { print $2 }' | sed -e 's/_UUID$//') @@ -1217,7 +1219,7 @@ test_27x() { mkdir -p $DIR/$tdir $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe per file do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1 - sleep $DELAY + sleep_maxage createmany -o $DIR/$tdir/$tfile $OSTCOUNT for i in `seq 0 $OFFSET`; do [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}' | grep -w "$OSTIDX"` ] && @@ -1232,7 +1234,6 @@ test_27y() { remote_mds_nodsh && skip "remote MDS with nodsh" && return MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` - DELAY=$(do_facet mds lctl get_param -n lov.*.qos_maxage | awk '{print $1 + 2}') OFFSET=$(($OSTCOUNT-1)) OST=-1 for OSC in $MDS_OSCS; do @@ -1249,7 +1250,7 @@ test_27y() { $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1 - sleep $DELAY + sleep_maxage createmany -o $DIR/$tdir/$tfile $OSTCOUNT do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0 @@ -4583,7 +4584,6 @@ test_116() { echo -n "Free space priority " lctl get_param -n lov.*-clilov-*.qos_prio_free - DELAY=$(lctl get_param -n lov.*-clilov-*.qos_maxage | head -1 | awk '{print $1}') declare -a AVAIL free_min_max [ $MINV -gt 960000 ] && skip "too much free space in OST$MINI, skip" &&\ @@ -4604,7 +4604,7 @@ test_116() { done FILL=$(($MINV / 4)) sync - sleep $DELAY + sleep_maxage free_min_max DIFF=$(($MAXV - $MINV)) @@ -4633,7 +4633,7 @@ test_116() { done echo "wrote $i 200k files" sync - sleep $DELAY + sleep_maxage echo "Note: free space may not be updated, so measurements might be off" free_min_max