* Note that reiserfs quotas are disabled on SLES 10 in this kernel.
* bug fixes
+Severity : minor
+Frequency : at statup only
+Bugzilla : 12860
+Description: mds_lov_synchronize race leads to various problems
+Details : simultaneous MDT->OST connections at startup can cause the
+ sync to abort, leaving the OSC in a bad state.
+
Severity : enhancement
Bugzilla : 12194
Description: add optional extra BUILD_VERSION info
#define OBD_FAIL_MDS_FS_SETUP 0x135
#define OBD_FAIL_MDS_RESEND 0x136
#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137
+#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
/* if called for a specific target, we don't
care if it is not active. */
- if (!lov->lov_tgts[i]->ltd_active == 0 && ost_uuid == NULL) {
+ if (!lov->lov_tgts[i]->ltd_active && ost_uuid == NULL) {
CDEBUG(D_HA, "lov idx %d inactive\n", i);
continue;
}
{
struct obd_device *obddev = class_exp2obd(exp);
struct lov_obd *lov = &obddev->u.lov;
+ obd_count count;
int i, rc = 0, err, incr = 0, check_uuid = 0, do_inactive = 0;
int no_set = !set;
ENTRY;
RETURN(-ENOMEM);
}
+ lov_getref(obddev);
+ count = lov->desc.ld_tgt_count;
+
if (KEY_IS(KEY_NEXT_ID)) {
- if (vallen != lov->desc.ld_tgt_count * sizeof(obd_id))
- RETURN(-EINVAL);
+ /* We must use mds's idea of # osts for indexing into
+ mds->mds_lov_objids */
+ count = vallen / sizeof(obd_id);
+ LASSERT(count <= lov->desc.ld_tgt_count);
vallen = sizeof(obd_id);
incr = sizeof(obd_id);
do_inactive = 1;
} else if (KEY_IS("checksum")) {
do_inactive = 1;
- } else if (KEY_IS("mds_conn") || KEY_IS("unlinked")) {
+ } else if (KEY_IS(KEY_MDS_CONN) || KEY_IS("unlinked")) {
check_uuid = val ? 1 : 0;
} else if (KEY_IS("evict_by_nid")) {
/* use defaults:
*/
}
- lov_getref(obddev);
-
- for (i = 0; i < lov->desc.ld_tgt_count; i++, val = (char *)val + incr) {
+ for (i = 0; i < count; i++, val = (char *)val + incr) {
/* OST was disconnected */
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp)
continue;
LASSERT(!obd->obd_recovering);
LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
- /* FIXME why not put this in the synchronize? */
/* set nextid first, so we are sure it happens */
+ mutex_down(&obd->obd_dev_sem);
rc = mds_lov_set_nextid(obd);
+ mutex_up(&obd->obd_dev_sem);
if (rc) {
CERROR("%s: mds_lov_set_nextid failed %d\n",
obd->obd_name, rc);
ENTRY;
LASSERT(!obd->obd_recovering);
-
LASSERT(mds->mds_lov_objids != NULL);
+
+ /* obd->obd_dev_sem must be held so mds_lov_objids doesn't change */
+ LASSERT_SEM_LOCKED(&obd->obd_dev_sem);
rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_NEXT_ID),
KEY_NEXT_ID,
/* If we added a target we have to reconnect the llogs */
/* We only _need_ to do this at first add (idx), or the first time
after recovery. However, it should now be safe to call anytime. */
- mutex_down(&obd->obd_dev_sem);
llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, NULL);
- mutex_up(&obd->obd_dev_sem);
out:
OBD_FREE(ld, sizeof(*ld));
int rc = 0;
ENTRY;
+ /* Don't let anyone else mess with mds_lov_objids now */
+ mutex_down(&obd->obd_dev_sem);
+
old_count = mds->mds_lov_desc.ld_tgt_count;
rc = mds_lov_update_desc(obd, mds->mds_osc_exp);
- if (rc)
- RETURN(rc);
+ if (rc)
+ GOTO(out, rc);
CDEBUG(D_CONFIG, "idx=%d, recov=%d/%d, cnt=%d/%d\n",
idx, obd->obd_recovering, obd->obd_async_recov, old_count,
mds->mds_lov_desc.ld_tgt_count);
/* idx is set as data from lov_notify. */
- if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) {
- if (idx >= mds->mds_lov_desc.ld_tgt_count) {
- CERROR("index %d > count %d!\n", idx,
- mds->mds_lov_desc.ld_tgt_count);
- RETURN(-EINVAL);
- }
-
- if (idx >= mds->mds_lov_objids_in_file) {
- /* We never read this lastid; ask the osc */
- obd_id lastid;
- __u32 size = sizeof(lastid);
- rc = obd_get_info(watched->obd_self_export,
- strlen("last_id"),
- "last_id", &size, &lastid);
- if (rc)
- RETURN(rc);
- mds->mds_lov_objids[idx] = lastid;
- mds->mds_lov_objids_dirty = 1;
- mds_lov_write_objids(obd);
- } else {
- /* We have read this lastid from disk; tell the osc.
- Don't call this during recovery. */
- rc = mds_lov_set_nextid(obd);
+ if (idx == MDSLOV_NO_INDEX || obd->obd_recovering)
+ GOTO(out, rc);
+
+ if (idx >= mds->mds_lov_desc.ld_tgt_count) {
+ CERROR("index %d > count %d!\n", idx,
+ mds->mds_lov_desc.ld_tgt_count);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (idx >= mds->mds_lov_objids_in_file) {
+ /* We never read this lastid; ask the osc */
+ obd_id lastid;
+ __u32 size = sizeof(lastid);
+ rc = obd_get_info(watched->obd_self_export, strlen("last_id"),
+ "last_id", &size, &lastid);
+ if (rc)
+ GOTO(out, rc);
+ mds->mds_lov_objids[idx] = lastid;
+ mds->mds_lov_objids_dirty = 1;
+ mds_lov_write_objids(obd);
+ } else {
+ /* We have read this lastid from disk; tell the osc.
+ Don't call this during recovery. */
+ rc = mds_lov_set_nextid(obd);
+ if (rc) {
+ CERROR("Failed to set next id, idx=%d rc=%d\n", idx,rc);
+ /* Don't abort the rest of the sync */
+ rc = 0;
}
-
- CDEBUG(D_CONFIG, "last object "LPU64" from OST %d\n",
- mds->mds_lov_objids[idx], idx);
}
+ CDEBUG(D_CONFIG, "last object "LPU64" from OST %d rc=%d\n",
+ mds->mds_lov_objids[idx], idx, rc);
+out:
+ mutex_up(&obd->obd_dev_sem);
RETURN(rc);
}
/* Deny new client connections until we are sure we have some OSTs */
obd->obd_no_conn = 1;
+ mutex_down(&obd->obd_dev_sem);
rc = mds_lov_read_objids(obd);
if (rc) {
CERROR("cannot read %s: rc = %d\n", "lov_objids", rc);
"writing objids file: %d\n", rc);
}
}
+ mutex_up(&obd->obd_dev_sem);
/* I want to see a callback happen when the OBD moves to a
* "For General Use" state, and that's when we'll call
RETURN(rc);
err_reg:
+ mutex_up(&obd->obd_dev_sem);
obd_register_observer(mds->mds_osc_obd, NULL);
err_discon:
obd_disconnect(mds->mds_osc_exp);
uuid = &watched->u.cli.cl_target_uuid;
LASSERT(uuid);
+ OBD_RACE(OBD_FAIL_MDS_LOV_SYNC_RACE);
+
rc = mds_lov_update_mds(obd, watched, idx, uuid);
- if (rc != 0)
+ if (rc != 0) {
+ CERROR("%s failed at update_mds: %d\n", obd_uuid2str(uuid), rc);
GOTO(out, rc);
+ }
rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
KEY_MDS_CONN, 0, uuid, NULL);
NULL, NULL, uuid);
if (rc != 0) {
- CERROR("%s: failed at llog_origin_connect: %d\n",
- obd->obd_name, rc);
+ CERROR("%s failed at llog_origin_connect: %d\n",
+ obd_uuid2str(uuid), rc);
GOTO(out, rc);
}
rc = mds_lov_clear_orphans(mds, uuid);
if (rc != 0) {
- CERROR("%s: failed at mds_lov_clear_orphans: %d\n",
- obd->obd_name, rc);
+ CERROR("%s failed at mds_lov_clear_orphans: %d\n",
+ obd_uuid2str(uuid), rc);
GOTO(out, rc);
}
EXIT;
out:
+ if (rc) {
+ /* Deactivate it for safety */
+ CERROR("%s sync failed %d, deactivating\n", obd_uuid2str(uuid),
+ rc);
+ obd_notify(mds->mds_osc_obd, watched, OBD_NOTIFY_INACTIVE,
+ NULL);
+ }
class_decref(obd);
return rc;
}
/* We still have to fix the lov descriptor for ost's added
after the mdt in the config log. They didn't make it into
mds_lov_connect. */
+ mutex_down(&obd->obd_dev_sem);
rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp);
+ mutex_up(&obd->obd_dev_sem);
RETURN(rc);
}
spin_lock(&oscc->oscc_lock);
oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
- if (rc == -ENOSPC || rc == -EROFS) {
+ switch (rc) {
+ case 0: {
+ if (body) {
+ int diff = body->oa.o_id - oscc->oscc_last_id;
+
+ if (diff < oscc->oscc_grow_count)
+ oscc->oscc_grow_count =
+ max(diff/3, OST_MIN_PRECREATE);
+ else
+ oscc->oscc_flags &= ~OSCC_FLAG_LOW;
+ oscc->oscc_last_id = body->oa.o_id;
+ }
+ spin_unlock(&oscc->oscc_lock);
+ break;
+ }
+ case -ENOSPC:
+ case -EROFS: {
oscc->oscc_flags |= OSCC_FLAG_NOSPC;
if (body && rc == -ENOSPC) {
oscc->oscc_grow_count = OST_MIN_PRECREATE;
}
spin_unlock(&oscc->oscc_lock);
DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
- } else if (rc != 0 && rc != -EIO) {
+ break;
+ }
+ case -EIO: {
+ /* filter always set body->oa.o_id as the last_id
+ * of filter (see filter_handle_precreate for detail)*/
+ if (body && body->oa.o_id > oscc->oscc_last_id)
+ oscc->oscc_last_id = body->oa.o_id;
+ spin_unlock(&oscc->oscc_lock);
+ break;
+ }
+ default: {
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
oscc->oscc_grow_count = OST_MIN_PRECREATE;
spin_unlock(&oscc->oscc_lock);
"unknown rc %d from async create: failing oscc", rc);
ptlrpc_fail_import(req->rq_import,
lustre_msg_get_conn_cnt(req->rq_reqmsg));
- } else {
- if (rc == 0) {
- if (body) {
- int diff = body->oa.o_id - oscc->oscc_last_id;
-
- if (diff < oscc->oscc_grow_count)
- oscc->oscc_grow_count =
- max(diff/3, OST_MIN_PRECREATE);
- else
- oscc->oscc_flags &= ~OSCC_FLAG_LOW;
- oscc->oscc_last_id = body->oa.o_id;
- }
- } else {
- /* filter always set body->oa.o_id as the last_id
- * of filter (see filter_handle_precreate for detail)*/
- if (body && body->oa.o_id > oscc->oscc_last_id)
- oscc->oscc_last_id = body->oa.o_id;
- }
- spin_unlock(&oscc->oscc_lock);
-
+ }
}
CDEBUG(D_HA, "preallocated through id "LPU64" (next to use "LPU64")\n",
if (oscc_recovering(oscc)) {
struct l_wait_info lwi;
- CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n",
- oscc);
+ CDEBUG(D_HA,"%s: oscc recovery in progress, waiting\n",
+ oscc->oscc_obd->obd_name);
lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout/4)),
NULL, NULL);
!oscc_recovering(oscc), &lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
if (rc == -ETIMEDOUT) {
- CDEBUG(D_HA,"%p: timeout waiting on recovery\n",
- oscc);
+ CDEBUG(D_HA,"%s: timeout waiting on recovery\n",
+ oscc->oscc_obd->obd_name);
RETURN(rc);
}
- CDEBUG(D_HA, "%p: oscc recovery over, waking up\n",
- oscc);
+ CDEBUG(D_HA, "%s: oscc recovery over, waking up\n",
+ oscc->oscc_obd->obd_name);
}
spin_lock(&oscc->oscc_lock);
CDEBUG(D_HA, "transno: "LPD64"\n",
lustre_msg_get_transno(req->rq_repmsg));
- EXIT;
out_req:
ptlrpc_req_finished(req);
out:
if (rc && !*ea)
obd_free_memmd(exp, &lsm);
- return rc;
+ RETURN(rc);
}
static int osc_punch_interpret(struct ptlrpc_request *req,
if (req == NULL)
RETURN(-ENOMEM);
- if (KEY_IS("mds_conn"))
+ if (KEY_IS(KEY_MDS_CONN))
req->rq_interpret_reply = osc_setinfo_mds_conn_interpret;
ptlrpc_req_set_repsize(req, 1, NULL);
imp->imp_state == LUSTRE_IMP_CONNECTING) {
/* allow CONNECT even if import is invalid */ ;
} else if (imp->imp_invalid) {
- /* If the import has been invalidated (such as by an OST
- * failure) the request must fail with -EINVAL. This
- * indicates the requests should be discarded, an -EIO
- * may result in a resend of the request. */
if (!imp->imp_deactive)
DEBUG_REQ(D_ERROR, req, "IMP_INVALID");
- *status = -EINVAL;
+ *status = -ESHUTDOWN; /* bz 12940 */
} else if (req->rq_import_generation != imp->imp_generation) {
DEBUG_REQ(D_ERROR, req, "req wrong generation:");
*status = -EIO;
if (ev->unlinked) {
service->srv_nrqbd_receiving--;
- CDEBUG(D_RPCTRACE,"Buffer complete: %d buffers still posted\n",
+ CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n",
service->srv_nrqbd_receiving);
/* Normally, don't complain about 0 buffers posted; LNET won't