* File join has been disabled in this release, refer to Bugzilla 16929.
Severity : enhancement
+Bugzilla : 17536
+Description: MDS create should not wait for statfs RPC while holding DLM lock.
+
+Severity : enhancement
Bugzilla : 18289
Description: Update to RHEL5U3 kernel-2.6.18-128.1.1.el5.
Severity : normal
-Frequency : normal
+Frequency : normal
Bugzilla : 12069
-Descriptoin: OST grant too much space to client even there are not enough space.
+Descriptoin: OST grant too much space to client even there are not enough space.
Details : Client will shrink its grant cache to OST if there are no write
activity over 6 mins (GRANT_SHRINK_INTERVAL), and OST will retrieve
this grant cache if there are already not enough avaible space
- (left_space < total_clients * 32M).
+ (left_space < total_clients * 32M).
Severity : normal
Frequency : start MDS on uncleanly shutdowned MDS device
Bugzilla : 16839
Descriptoin: ll_sync thread stay in waiting mds<>ost recovery finished
Details : stay in waiting mds<>ost recovery finished produce random bugs
- due race between two ll_sync thread for one lov target. send
+ due race between two ll_sync thread for one lov target. send
ACTIVATE event only if connect realy finished and import have
FULL state.
extern void lustre_swab_obd_statfs (struct obd_statfs *os);
#define OBD_STATFS_NODELAY 0x0001 /* requests should be send without delay
* and resends for avoid deadlocks */
-
#define OBD_STATFS_FROM_CACHE 0x0002 /* the statfs callback should not update
* obd_osfs_age */
+#define OBD_STATFS_PTLRPCD 0x0004 /* requests will be sent via ptlrpcd
+ * instead of a specific set. This
+ * means that we cannot rely on the set
+ * interpret routine to be called.
+ * lov_statfs_fini() must thus be called
+ * by the request interpret routine */
/* ost_body.data values for OST_BRW */
{
if (req->rq_phase == new_phase)
return;
-
+
if (new_phase == RQ_PHASE_UNREGISTERING) {
req->rq_next_phase = req->rq_phase;
if (req->rq_import)
atomic_inc(&req->rq_import->imp_unregistering);
}
-
+
if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
if (req->rq_import)
atomic_dec(&req->rq_import->imp_unregistering);
}
- DEBUG_REQ(D_RPCTRACE, req, "move req \"%s\" -> \"%s\"",
+ DEBUG_REQ(D_RPCTRACE, req, "move req \"%s\" -> \"%s\"",
ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase));
req->rq_phase = new_phase;
/* ptlrpc/pinger.c */
enum timeout_event {
- TIMEOUT_GRANT = 1
+ TIMEOUT_GRANT = 1
};
struct timeout_item;
typedef int (*timeout_cb_t)(struct timeout_item *, void *);
void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force);
void ptlrpcd_wake(struct ptlrpc_request *req);
void ptlrpcd_add_req(struct ptlrpc_request *req, enum ptlrpcd_scope scope);
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set);
int ptlrpcd_addref(void);
void ptlrpcd_decref(void);
unsigned long lqr_dirty:1; /* recalc round-robin list */
};
+struct lov_statfs_data {
+ struct obd_info lsd_oi;
+ struct obd_statfs lsd_statfs;
+};
/* Stripe placement optimization */
struct lov_qos {
struct list_head lq_oss_list; /* list of OSSs that targets use */
unsigned long lq_dirty:1, /* recalc qos data */
lq_same_space:1,/* the ost's all have approx.
the same space avail */
- lq_reset:1; /* zero current penalties */
+ lq_reset:1, /* zero current penalties */
+ lq_statfs_in_progress:1; /* statfs op in progress */
+ /* qos statfs data */
+ struct lov_statfs_data *lq_statfs_data;
+ cfs_waitq_t lq_statfs_waitq; /* waitqueue to notify statfs
+ * requests completion */
};
struct lov_tgt_desc {
void qos_shrink_lsm(struct lov_request_set *set);
int qos_prep_create(struct obd_export *exp, struct lov_request_set *set);
void qos_update(struct lov_obd *lov);
+void qos_statfs_done(struct lov_obd *lov);
+void qos_statfs_update(struct obd_device *obd, __u64 max_age, int wait);
int qos_remedy_create(struct lov_request_set *set, struct lov_request *req);
/* lov_request.c */
int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,
int success);
int lov_fini_statfs_set(struct lov_request_set *set);
+int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc);
/* lov_obd.c */
void lov_fix_desc(struct lov_desc *desc);
lov->lov_qos.lq_prio_free = 232;
/* Default threshold for rr (roughly 17%) */
lov->lov_qos.lq_threshold_rr = 43;
+ /* Init statfs fields */
+ OBD_ALLOC_PTR(lov->lov_qos.lq_statfs_data);
+ if (NULL == lov->lov_qos.lq_statfs_data)
+ RETURN(-ENOMEM);
+ cfs_waitq_init(&lov->lov_qos.lq_statfs_waitq);
lov->lov_pools_hash_body = lustre_hash_init("POOLS", 7, 7,
&pool_hash_operations, 0);
/* clear pools parent proc entry only after all pools is killed */
lprocfs_obd_cleanup(obd);
+ OBD_FREE_PTR(lov->lov_qos.lq_statfs_data);
RETURN(0);
}
struct obd_info oinfo;
struct lov_request_set *set = NULL;
struct lov_request *req;
- struct obd_statfs osfs;
- __u64 maxage;
int rc = 0;
ENTRY;
GOTO(out, rc);
}
- maxage = cfs_time_shift_64(-lov->desc.ld_qos_maxage);
- obd_statfs_rqset(exp->exp_obd, &osfs, maxage, OBD_STATFS_NODELAY);
+ /* issue statfs rpcs if the osfs data is older than qos_maxage - 1s,
+ * later in alloc_qos(), we will wait for those rpcs to complete if
+ * the osfs age is older than 2 * qos_maxage */
+ qos_statfs_update(exp->exp_obd,
+ cfs_time_shift_64(-lov->desc.ld_qos_maxage) + HZ, 0);
rc = lov_prep_create_set(exp, &oinfo, ea, src_oa, oti, &set);
if (rc)
RETURN(rc);
}
-static int lov_statfs_interpret(struct ptlrpc_request_set *rqset,
- void *data, int rc)
+int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
{
struct lov_request_set *lovset = (struct lov_request_set *)data;
int err;
#include <obd_class.h>
#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
#include "lov_internal.h"
/* #define QOS_DEBUG 1 */
lov_getref(exp->exp_obd);
+ /* wait for fresh statfs info if needed, the rpcs are sent in
+ * lov_create() */
+ qos_statfs_update(exp->exp_obd,
+ cfs_time_shift_64(-2 * lov->desc.ld_qos_maxage), 1);
+
/* Detect -EAGAIN early, before expensive lock is taken. */
if (!lov->lov_qos.lq_dirty && lov->lov_qos.lq_same_space)
GOTO(out_nolock, rc = -EAGAIN);
ENTRY;
lov->lov_qos.lq_dirty = 1;
}
+
+void qos_statfs_done(struct lov_obd *lov)
+{
+ LASSERT(lov->lov_qos.lq_statfs_in_progress);
+ down_write(&lov->lov_qos.lq_rw_sem);
+ lov->lov_qos.lq_statfs_in_progress = 0;
+ /* wake up any threads waiting for the statfs rpcs to complete */
+ cfs_waitq_signal(&lov->lov_qos.lq_statfs_waitq);
+ up_write(&lov->lov_qos.lq_rw_sem);
+}
+
+static int qos_statfs_ready(struct obd_device *obd, __u64 max_age)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ int rc;
+ ENTRY;
+ down_read(&lov->lov_qos.lq_rw_sem);
+ rc = lov->lov_qos.lq_statfs_in_progress == 0 ||
+ cfs_time_beforeq_64(max_age, obd->obd_osfs_age);
+ up_read(&lov->lov_qos.lq_rw_sem);
+ RETURN(rc);
+}
+
+/*
+ * Update statfs data if the current osfs age is older than max_age.
+ * If wait is not set, it means that we are called from lov_create()
+ * and we should just issue the rpcs without waiting for them to complete.
+ * If wait is set, we are called from alloc_qos() and we just have
+ * to wait for the request set to complete.
+ */
+void qos_statfs_update(struct obd_device *obd, __u64 max_age, int wait)
+{
+ struct lov_obd *lov = &obd->u.lov;
+ struct obd_info *oinfo;
+ int rc = 0;
+ struct ptlrpc_request_set *set = NULL;
+ ENTRY;
+
+ if (cfs_time_beforeq_64(max_age, obd->obd_osfs_age))
+ /* statfs data are quite recent, don't need to refresh it */
+ RETURN_EXIT;
+
+ if (!wait && lov->lov_qos.lq_statfs_in_progress)
+ /* statfs already in progress */
+ RETURN_EXIT;
+
+ down_write(&lov->lov_qos.lq_rw_sem);
+ if (lov->lov_qos.lq_statfs_in_progress) {
+ up_write(&lov->lov_qos.lq_rw_sem);
+ GOTO(out, rc = 0);
+ }
+ /* no statfs in flight, send rpcs */
+ lov->lov_qos.lq_statfs_in_progress = 1;
+ up_write(&lov->lov_qos.lq_rw_sem);
+
+ if (wait)
+ CDEBUG(D_QOS, "%s: did not manage to get fresh statfs data "
+ "in a timely manner (osfs age "LPU64", max age "LPU64")"
+ ", sending new statfs rpcs\n",
+ obd_uuid2str(&lov->desc.ld_uuid), obd->obd_osfs_age,
+ max_age);
+
+ /* need to send statfs rpcs */
+ CDEBUG(D_QOS, "sending new statfs requests\n");
+ memset(lov->lov_qos.lq_statfs_data, 0,
+ sizeof(*lov->lov_qos.lq_statfs_data));
+ oinfo = &lov->lov_qos.lq_statfs_data->lsd_oi;
+ oinfo->oi_osfs = &lov->lov_qos.lq_statfs_data->lsd_statfs;
+ oinfo->oi_flags = OBD_STATFS_NODELAY;
+ set = ptlrpc_prep_set();
+ if (!set)
+ GOTO(out_failed, rc = -ENOMEM);
+
+ rc = obd_statfs_async(obd, oinfo, max_age, set);
+ if (rc || list_empty(&set->set_requests)) {
+ if (rc)
+ CWARN("statfs failed with %d\n", rc);
+ GOTO(out_failed, rc);
+ }
+ /* send requests via ptlrpcd */
+ oinfo->oi_flags |= OBD_STATFS_PTLRPCD;
+ ptlrpcd_add_rqset(set);
+ GOTO(out, rc);
+
+out_failed:
+ down_write(&lov->lov_qos.lq_rw_sem);
+ lov->lov_qos.lq_statfs_in_progress = 0;
+ /* wake up any threads waiting for the statfs rpcs to complete */
+ cfs_waitq_signal(&lov->lov_qos.lq_statfs_waitq);
+ up_write(&lov->lov_qos.lq_rw_sem);
+ wait = 0;
+out:
+ if (set)
+ ptlrpc_set_destroy(set);
+ if (wait) {
+ struct l_wait_info lwi = { 0 };
+ CDEBUG(D_QOS, "waiting for statfs requests to complete\n");
+ l_wait_event(lov->lov_qos.lq_statfs_waitq,
+ qos_statfs_ready(obd, max_age), &lwi);
+ if (cfs_time_before_64(obd->obd_osfs_age, max_age))
+ CDEBUG(D_QOS, "%s: still no fresh statfs data after "
+ "waiting (osfs age "LPU64", max age "
+ LPU64")\n",
+ obd_uuid2str(&lov->desc.ld_uuid),
+ obd->obd_osfs_age, max_age);
+ }
+}
}
/* The callback for osc_getattr_async that finilizes a request info when a
- * response is recieved. */
+ * response is received. */
static int cb_getattr_update(void *cookie, int rc)
{
struct obd_info *oinfo = cookie;
}
/* The callback for osc_setattr_async that finilizes a request info when a
- * response is recieved. */
+ * response is received. */
static int cb_setattr_update(void *cookie, int rc)
{
struct obd_info *oinfo = cookie;
}
/* The callback for osc_punch that finilizes a request info when a response
- * is recieved. */
+ * is received. */
static int cb_update_punch(void *cookie, int rc)
{
struct obd_info *oinfo = cookie;
}
/* The callback for osc_statfs_async that finilizes a request info when a
- * response is recieved. */
+ * response is received. */
static int cb_statfs_update(void *cookie, int rc)
{
struct obd_info *oinfo = cookie;
if (rc && !(lov->lov_tgts[lovreq->rq_idx] &&
lov->lov_tgts[lovreq->rq_idx]->ltd_active))
rc = 0;
- RETURN(rc);
+ GOTO(out, rc);
}
spin_lock(&obd->obd_osfs_lock);
lov_update_statfs(osfs, lov_sfs, success);
qos_update(lov);
+out:
+ if (lovreq->rq_rqset->set_oi->oi_flags & OBD_STATFS_PTLRPCD &&
+ lovreq->rq_rqset->set_count == lovreq->rq_rqset->set_completes) {
+ lov_statfs_interpret(NULL, lovreq->rq_rqset,
+ lovreq->rq_rqset->set_success !=
+ lovreq->rq_rqset->set_count);
+ qos_statfs_done(lov);
+ }
RETURN(0);
}
}
/*
+ * Move all request from an existing request set to the ptlrpcd queue.
+ * All requests from the set must be in phase RQ_PHASE_NEW.
+ */
+void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
+{
+ struct list_head *tmp, *pos;
+
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request, rq_set_chain);
+
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+ ptlrpcd_add_req(req, PSCOPE_OTHER);
+ set->set_remaining--;
+ }
+ LASSERT(set->set_remaining == 0);
+}
+EXPORT_SYMBOL(ptlrpcd_add_rqset);
+
+/*
* Requests that are added to the ptlrpcd queue are sent via
* ptlrpcd_check->ptlrpc_check_set().
*/
exit++;
}
- /*
+ /*
* Let's make one more loop to make sure that ptlrpcd_check()
* copied all raced new rpcs into the set so we can kill them.
*/