* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_FILTER
#include <dt_object.h>
+#include <lustre/lustre_idl.h>
#include "ofd_internal.h"
info->fti_pre_version != curr_version) {
CDEBUG(D_INODE, "Version mismatch "LPX64" != "LPX64"\n",
info->fti_pre_version, curr_version);
- cfs_spin_lock(&info->fti_exp->exp_lock);
+ spin_lock(&info->fti_exp->exp_lock);
info->fti_exp->exp_vbr_failed = 1;
- cfs_spin_unlock(&info->fti_exp->exp_lock);
+ spin_unlock(&info->fti_exp->exp_lock);
RETURN (-EOVERFLOW);
}
info->fti_pre_version = curr_version;
if (likely(!IS_ERR(o)))
fo = ofd_obj(o);
else
- fo = (struct ofd_object *)o; /* return error */
+ fo = ERR_CAST(o); /* return error */
+
RETURN(fo);
}
dto = dt_find_or_create(env, ofd->ofd_osd, fid, &info->fti_dof, attr);
if (IS_ERR(dto))
- RETURN((struct ofd_object *)dto);
+ RETURN(ERR_CAST(dto));
fo_obj = lu_object_locate(dto->do_lu.lo_header,
ofd->ofd_dt_dev.dd_lu_dev.ld_type);
int ofd_object_ff_check(const struct lu_env *env, struct ofd_object *fo)
{
- struct ofd_thread_info *info = ofd_info(env);
- int rc = 0;
+ int rc = 0;
ENTRY;
* This actually means that we don't know whether the object
* has the "fid" EA or not.
*/
- info->fti_buf.lb_buf = &info->fti_mds_fid2;
- info->fti_buf.lb_len = sizeof(info->fti_mds_fid2);
- rc = dt_xattr_get(env, ofd_object_child(fo), &info->fti_buf,
+ rc = dt_xattr_get(env, ofd_object_child(fo), &LU_BUF_NULL,
XATTR_NAME_FID, BYPASS_CAPA);
if (rc >= 0 || rc == -ENODATA) {
/*
lu_object_put(env, &fo->ofo_obj.do_lu);
}
-int ofd_precreate_object(const struct lu_env *env, struct ofd_device *ofd,
- obd_id id, obd_seq group)
+int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
+ obd_id id, struct ofd_seq *oseq, int nr, int sync)
{
struct ofd_thread_info *info = ofd_info(env);
- struct ofd_object *fo;
+ struct ofd_object *fo = NULL;
struct dt_object *next;
struct thandle *th;
+ struct ofd_object **batch;
+ struct lu_fid *fid = &info->fti_fid;
obd_id tmp;
int rc;
+ int i;
+ int objects = 0;
+ int nr_saved = nr;
ENTRY;
/* Don't create objects beyond the valid range for this SEQ */
- if (unlikely(fid_seq_is_mdt0(group) && id >= IDIF_MAX_OID)) {
- CERROR("%s:"POSTID" hit the IDIF_MAX_OID (1<<48)!\n",
- ofd_name(ofd), id, group);
+ if (unlikely(fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
+ (id + nr) >= IDIF_MAX_OID)) {
+ CERROR("%s:"DOSTID" hit the IDIF_MAX_OID (1<<48)!\n",
+ ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
RETURN(rc = -ENOSPC);
- } else if (unlikely(!fid_seq_is_mdt0(group) && id >= OBIF_MAX_OID)) {
- CERROR("%s:"POSTID" hit the OBIF_MAX_OID (1<<32)!\n",
- ofd_name(ofd), id, group);
+ } else if (unlikely(!fid_seq_is_mdt0(ostid_seq(&oseq->os_oi)) &&
+ (id + nr) >= OBIF_MAX_OID)) {
+ CERROR("%s:"DOSTID" hit the OBIF_MAX_OID (1<<32)!\n",
+ ofd_name(ofd), id, ostid_seq(&oseq->os_oi));
RETURN(rc = -ENOSPC);
}
- info->fti_ostid.oi_id = id;
- info->fti_ostid.oi_seq = group;
- fid_ostid_unpack(&info->fti_fid, &info->fti_ostid, 0);
- fo = ofd_object_find(env, ofd, &info->fti_fid);
- if (IS_ERR(fo))
- RETURN(PTR_ERR(fo));
+ OBD_ALLOC(batch, nr_saved * sizeof(struct ofd_object *));
+ if (batch == NULL)
+ RETURN(-ENOMEM);
info->fti_attr.la_valid = LA_TYPE | LA_MODE;
/*
info->fti_attr.la_mtime = 0;
info->fti_attr.la_ctime = 0;
- next = ofd_object_child(fo);
- LASSERT(next != NULL);
+ LASSERT(id != 0);
+
+ /* prepare objects */
+ *fid = *lu_object_fid(&oseq->os_lastid_obj->do_lu);
+ for (i = 0; i < nr; i++) {
+ rc = fid_set_id(fid, id + i);
+ if (rc != 0) {
+ if (i == 0)
+ GOTO(out, rc);
+
+ nr = i;
+ break;
+ }
+
+ fo = ofd_object_find(env, ofd, fid);
+ if (IS_ERR(fo)) {
+ if (i == 0)
+ GOTO(out, rc = PTR_ERR(fo));
+
+ nr = i;
+ break;
+ }
+ ofd_write_lock(env, fo);
+ batch[i] = fo;
+ }
info->fti_buf.lb_buf = &tmp;
info->fti_buf.lb_len = sizeof(tmp);
info->fti_off = 0;
- ofd_write_lock(env, fo);
th = ofd_trans_create(env, ofd);
if (IS_ERR(th))
- GOTO(out_unlock, rc = PTR_ERR(th));
+ GOTO(out, rc = PTR_ERR(th));
+
+ th->th_sync |= sync;
- rc = dt_declare_record_write(env, ofd->ofd_lastid_obj[group],
- sizeof(tmp), info->fti_off, th);
+ rc = dt_declare_record_write(env, oseq->os_lastid_obj, sizeof(tmp),
+ info->fti_off, th);
if (rc)
GOTO(trans_stop, rc);
- if (unlikely(ofd_object_exists(fo))) {
- /* object may exist being re-created by write replay */
- CDEBUG(D_INODE, "object %u/"LPD64" exists: "DFID"\n",
- (unsigned) group, id, PFID(&info->fti_fid));
- rc = dt_trans_start_local(env, ofd->ofd_osd, th);
- if (rc)
- GOTO(trans_stop, rc);
- GOTO(last_id_write, rc);
+ for (i = 0; i < nr; i++) {
+ fo = batch[i];
+ LASSERT(fo);
+
+ if (unlikely(ofd_object_exists(fo))) {
+ /* object may exist being re-created by write replay */
+ CDEBUG(D_INODE, "object "LPX64"/"LPX64" exists: "
+ DFID"\n", ostid_seq(&oseq->os_oi), id,
+ PFID(lu_object_fid(&fo->ofo_obj.do_lu)));
+ continue;
+ }
+
+ next = ofd_object_child(fo);
+ LASSERT(next != NULL);
+
+ rc = dt_declare_create(env, next, &info->fti_attr, NULL,
+ &info->fti_dof, th);
+ if (rc) {
+ nr = i;
+ break;
+ }
}
- rc = dt_declare_create(env, next, &info->fti_attr, NULL,
- &info->fti_dof, th);
- if (rc)
- GOTO(trans_stop, rc);
rc = dt_trans_start_local(env, ofd->ofd_osd, th);
if (rc)
GOTO(trans_stop, rc);
- CDEBUG(D_OTHER, "create new object %lu:%llu\n",
- (unsigned long) info->fti_fid.f_oid, info->fti_fid.f_seq);
+ CDEBUG(D_OTHER, "%s: create new object "DFID" nr %d\n",
+ ofd_name(ofd), PFID(fid), nr);
+
+ LASSERT(nr > 0);
+
+ /* When the LFSCK scanning the whole device to verify the LAST_ID file
+ * consistency, it will load the last_id into RAM firstly, and compare
+ * the last_id with echo OST-object's ID. If the later one is larger,
+ * then it will regard the LAST_ID file crashed. But during the LFSCK
+ * scanning, the OFD may continue to create new OST-objects. Those new
+ * created OST-objects will have larger IDs than the LFSCK known ones.
+ * So from the LFSCK view, it needs to re-load the last_id from disk
+ * file, and if the latest last_id is still smaller than the object's
+ * ID, then the LAST_ID file is real crashed.
+ *
+ * To make above mechanism to work, before OFD pre-create OST-objects,
+ * it needs to update the LAST_ID file firstly, otherwise, the LFSCK
+ * may cannot get latest last_id although new OST-object created. */
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_SKIP_LASTID)) {
+ tmp = cpu_to_le64(id + nr - 1);
+ dt_write_lock(env, oseq->os_lastid_obj, 0);
+ rc = dt_record_write(env, oseq->os_lastid_obj,
+ &info->fti_buf, &info->fti_off, th);
+ dt_write_unlock(env, oseq->os_lastid_obj);
+ if (rc != 0)
+ GOTO(trans_stop, rc);
+ }
- rc = dt_create(env, next, &info->fti_attr, NULL, &info->fti_dof, th);
- if (rc)
- GOTO(trans_stop, rc);
- LASSERT(ofd_object_exists(fo));
+ for (i = 0; i < nr; i++) {
+ fo = batch[i];
+ LASSERT(fo);
-last_id_write:
- ofd_last_id_set(ofd, id, group);
+ if (likely(!ofd_object_exists(fo))) {
+ next = ofd_object_child(fo);
+ LASSERT(next != NULL);
+
+ rc = dt_create(env, next, &info->fti_attr, NULL,
+ &info->fti_dof, th);
+ if (rc)
+ break;
+ LASSERT(ofd_object_exists(fo));
+ }
+ ofd_seq_last_oid_set(oseq, id + i);
+ }
+
+ objects = i;
+ /* NOT all the wanted objects have been created,
+ * set the LAST_ID as the real created. */
+ if (unlikely(objects < nr)) {
+ int rc1;
+
+ info->fti_off = 0;
+ tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
+ dt_write_lock(env, oseq->os_lastid_obj, 0);
+ rc1 = dt_record_write(env, oseq->os_lastid_obj,
+ &info->fti_buf, &info->fti_off, th);
+ dt_write_unlock(env, oseq->os_lastid_obj);
+ if (rc1 != 0)
+ CERROR("%s: fail to reset the LAST_ID for seq ("LPX64
+ ") from "LPU64" to "LPU64"\n", ofd_name(ofd),
+ ostid_seq(&oseq->os_oi), id + nr - 1,
+ ofd_seq_last_oid(oseq));
+ }
- tmp = cpu_to_le64(ofd_last_id(ofd, group));
- rc = dt_record_write(env, ofd->ofd_lastid_obj[group], &info->fti_buf,
- &info->fti_off, th);
trans_stop:
ofd_trans_stop(env, ofd, th, rc);
-out_unlock:
- ofd_write_unlock(env, fo);
- ofd_object_put(env, fo);
- RETURN(rc);
+out:
+ for (i = 0; i < nr_saved; i++) {
+ fo = batch[i];
+ if (fo) {
+ ofd_write_unlock(env, fo);
+ ofd_object_put(env, fo);
+ }
+ }
+ OBD_FREE(batch, nr_saved * sizeof(struct ofd_object *));
+
+ CDEBUG((objects == 0 && rc == 0) ? D_ERROR : D_OTHER,
+ "created %d/%d objects: %d\n", objects, nr_saved, rc);
+
+ LASSERT(ergo(objects == 0, rc < 0));
+ RETURN(objects > 0 ? objects : rc);
}
/*