static void filter_commit_cb(struct obd_device *obd, __u64 transno,
void *cb_data, int error)
{
- obd_transno_commit_cb(obd, transno, error);
+ struct obd_export *exp = cb_data;
+ LASSERT(exp->exp_obd == obd);
+ obd_transno_commit_cb(obd, transno, exp, error);
+ atomic_dec(&exp->exp_cb_count);
+ class_export_put(exp);
+}
+
+int filter_version_get_check(struct obd_export *exp,
+ struct obd_trans_info *oti, struct inode *inode)
+{
+ __u64 curr_version;
+
+ if (inode == NULL || oti == NULL)
+ RETURN(0);
+
+ curr_version = fsfilt_get_version(exp->exp_obd, inode);
+ if ((__s64)curr_version == -EOPNOTSUPP)
+ RETURN(0);
+ /* VBR: version is checked always because costs nothing */
+ if (oti->oti_pre_version != 0 &&
+ oti->oti_pre_version != curr_version) {
+ CDEBUG(D_INODE, "Version mismatch "LPX64" != "LPX64"\n",
+ oti->oti_pre_version, curr_version);
+ spin_lock(&exp->exp_lock);
+ exp->exp_vbr_failed = 1;
+ spin_unlock(&exp->exp_lock);
+ RETURN (-EOVERFLOW);
+ }
+ oti->oti_pre_version = curr_version;
+ RETURN(0);
}
/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
- int rc, int force_sync)
+int filter_finish_transno(struct obd_export *exp, struct inode *inode,
+ struct obd_trans_info *oti, int rc, int force_sync)
{
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
RETURN(rc);
/* we don't allocate new transnos for replayed requests */
+ spin_lock(&filter->fo_translock);
if (oti->oti_transno == 0) {
- spin_lock(&filter->fo_translock);
last_rcvd = le64_to_cpu(filter->fo_fsd->lsd_last_transno) + 1;
filter->fo_fsd->lsd_last_transno = cpu_to_le64(last_rcvd);
- spin_unlock(&filter->fo_translock);
- oti->oti_transno = last_rcvd;
} else {
- spin_lock(&filter->fo_translock);
last_rcvd = oti->oti_transno;
if (last_rcvd > le64_to_cpu(filter->fo_fsd->lsd_last_transno))
filter->fo_fsd->lsd_last_transno =
cpu_to_le64(last_rcvd);
+ }
+ oti->oti_transno = last_rcvd;
+ if (last_rcvd <= le64_to_cpu(lcd->lcd_last_transno)) {
spin_unlock(&filter->fo_translock);
+ LBUG();
}
lcd->lcd_last_transno = cpu_to_le64(last_rcvd);
+ lcd->lcd_pre_versions[0] = cpu_to_le64(oti->oti_pre_version);
+ lcd->lcd_last_xid = cpu_to_le64(oti->oti_xid);
+ spin_unlock(&filter->fo_translock);
- /* could get xid from oti, if it's ever needed */
- lcd->lcd_last_xid = 0;
+ if (inode)
+ fsfilt_set_version(exp->exp_obd, inode, last_rcvd);
off = fed->fed_lr_off;
if (off <= 0) {
fed->fed_lr_idx, fed->fed_lr_off);
err = -EINVAL;
} else {
+ class_export_get(exp); /* released when the cb is called */
+ atomic_inc(&exp->exp_cb_count);
if (!force_sync)
force_sync = fsfilt_add_journal_cb(exp->exp_obd,
last_rcvd,
oti->oti_handle,
filter_commit_cb,
- NULL);
+ exp);
err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp,
lcd, sizeof(*lcd), &off,
force_sync | exp->exp_need_sync);
if (force_sync)
- filter_commit_cb(exp->exp_obd, last_rcvd, NULL, err);
+ filter_commit_cb(exp->exp_obd, last_rcvd, exp, err);
}
if (err) {
log_pri = D_ERROR;
num_stats = (sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) +
LPROC_FILTER_LAST - 1;
- *stats = lprocfs_alloc_stats(num_stats, 0);
+ *stats = lprocfs_alloc_stats(num_stats, LPROCFS_STATS_FLAG_NOPERCPU);
if (*stats == NULL)
return -ENOMEM;
struct obd_export *exp,
void *client_nid)
{
- struct filter_export_data *fed = &exp->exp_filter_data;
int rc, newnid = 0;
ENTRY;
- init_brw_stats(&fed->fed_brw_stats);
-
if (obd_uuid_equals(&exp->exp_client_uuid, &obd->obd_uuid))
/* Self-export gets no proc entry */
RETURN(0);
if (rc)
RETURN(rc);
/* Always add in ldlm_stats */
- tmp->nid_ldlm_stats = lprocfs_alloc_stats(LDLM_LAST_OPC -
- LDLM_FIRST_OPC, 0);
+ tmp->nid_ldlm_stats =
+ lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC,
+ LPROCFS_STATS_FLAG_NOPERCPU);
if (tmp->nid_ldlm_stats == NULL)
return -ENOMEM;
rc = PTR_ERR(handle);
CERROR("unable to start transaction: rc %d\n", rc);
} else {
+ fed->fed_lcd->lcd_last_epoch =
+ filter->fo_fsd->lsd_start_epoch;
+ exp->exp_last_request_time = cfs_time_current_sec();
rc = fsfilt_add_journal_cb(obd, 0, handle,
target_client_add_cb, exp);
if (rc == 0) {
RETURN(0);
}
+struct lsd_client_data zero_lcd; /* globals are implicitly zeroed */
+
static int filter_client_free(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct obd_device *obd = exp->exp_obd;
- struct lsd_client_data zero_lcd;
struct lvfs_run_ctxt saved;
int rc;
loff_t off;
if (strcmp(fed->fed_lcd->lcd_uuid, obd->obd_uuid.uuid ) == 0)
GOTO(free, 0);
- CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n",
- fed->fed_lr_idx, fed->fed_lr_off, fed->fed_lcd->lcd_uuid);
-
LASSERT(filter->fo_last_rcvd_slots != NULL);
off = fed->fed_lr_off;
+ CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n",
+ fed->fed_lr_idx, fed->fed_lr_off, fed->fed_lcd->lcd_uuid);
+
/* Don't clear fed_lr_idx here as it is likely also unset. At worst
* we leak a client slot that will be cleaned on the next recovery. */
if (off <= 0) {
}
if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
- memset(&zero_lcd, 0, sizeof zero_lcd);
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ int need_sync = exp->exp_need_sync &&
+ !(exp->exp_flags&OBD_OPT_ABORT_RECOV);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
- sizeof(zero_lcd), &off,
- (!exp->exp_libclient ||
- exp->exp_need_sync));
+ sizeof(zero_lcd), &off, 0);
+
+ /* Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed. */
if (rc == 0)
- /* update server's transno */
filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd,
- !exp->exp_libclient);
+ filter->fo_fsd, need_sync);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
- "zeroing out client %s at idx %u (%llu) in %s rc %d\n",
+ "zero out client %s at idx %u/%llu in %s %ssync rc %d\n",
fed->fed_lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
- LAST_RCVD, rc);
+ LAST_RCVD, need_sync ? "" : "a", rc);
}
if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_mod_data *found = NULL, *fmd_new = NULL;
- OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO, sizeof(*fmd_new));
+ OBD_SLAB_ALLOC_PTR_GFP(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO);
spin_lock(&fed->fed_lock);
found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group);
static int filter_free_server_data(struct filter_obd *filter)
{
- OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
+ OBD_FREE_PTR(filter->fo_fsd);
filter->fo_fsd = NULL;
OBD_FREE(filter->fo_last_rcvd_slots, LR_MAX_CLIENTS / 8);
filter->fo_last_rcvd_slots = NULL;
CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
le64_to_cpu(fsd->lsd_mount_count));
- fsd->lsd_compat14 = fsd->lsd_last_transno;
rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off, force_sync);
if (rc)
CERROR("error writing lr_server_data: rc = %d\n", rc);
struct inode *inode = filp->f_dentry->d_inode;
unsigned long last_rcvd_size = i_size_read(inode);
__u64 mount_count;
+ __u32 start_epoch;
int cl_idx;
loff_t off = 0;
int rc;
GOTO(err_fsd, rc = -EINVAL);
}
- CDEBUG(D_INODE, "%s: server last_transno : "LPU64"\n",
+ start_epoch = le32_to_cpu(fsd->lsd_start_epoch);
+
+ CDEBUG(D_INODE, "%s: server start_epoch : %#x\n",
+ obd->obd_name, start_epoch);
+ CDEBUG(D_INODE, "%s: server last_transno : "LPX64"\n",
obd->obd_name, le64_to_cpu(fsd->lsd_last_transno));
CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
obd->obd_name, mount_count + 1);
/* can't fail for existing client */
LASSERTF(rc == 0, "rc = %d\n", rc);
- lcd = NULL;
+ /* VBR: set export last committed */
+ exp->exp_last_committed = last_rcvd;
spin_lock(&exp->exp_lock);
exp->exp_connecting = 0;
exp->exp_in_recovery = 0;
spin_unlock(&exp->exp_lock);
+ spin_lock_bh(&obd->obd_processing_task_lock);
obd->obd_max_recoverable_clients++;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
+ lcd = NULL;
class_export_put(exp);
}
obd->obd_last_committed = le64_to_cpu(fsd->lsd_last_transno);
- target_recovery_init(obd, ost_handle);
+ target_recovery_init(&filter->fo_lut, ost_handle);
out:
filter->fo_mount_count = mount_count + 1;
GOTO(err_filp, rc = -EOPNOTSUPP);
}
+ /** lu_target has very limited use in filter now */
+ lut_init(NULL, &filter->fo_lut, obd, NULL);
+
rc = filter_init_server_data(obd, file);
if (rc) {
CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
}
static int filter_prepare_destroy(struct obd_device *obd, obd_id objid,
- obd_id group)
+ obd_id group, struct lustre_handle *lockh)
{
- struct lustre_handle lockh;
int flags = LDLM_AST_DISCARD_DATA, rc;
struct ldlm_res_id res_id;
ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_EXTENT,
&policy, LCK_PW, &flags, ldlm_blocking_ast,
ldlm_completion_ast, NULL, NULL, 0, NULL,
- NULL, &lockh);
-
- /* We only care about the side-effects, just drop the lock. */
- if (rc == ELDLM_OK)
- ldlm_lock_decref(&lockh, LCK_PW);
-
+ NULL, lockh);
+ if (rc != ELDLM_OK)
+ lockh->cookie = 0;
RETURN(rc);
}
+static void filter_fini_destroy(struct obd_device *obd,
+ struct lustre_handle *lockh)
+{
+ if (lockh->cookie)
+ ldlm_lock_decref(lockh, LCK_PW);
+}
+
/* This is vfs_unlink() without down(i_sem). If we call regular vfs_unlink()
* we have 2.6 lock ordering issues with filter_commitrw_write() as it takes
* i_sem before starting a handle, while filter_destroy() + vfs_unlink do the
if (rc != 0)
GOTO(err_ops, rc);
- LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) {
+ CERROR("%s: Underlying device is marked as read-only. "
+ "Setup failed\n", obd->obd_name);
+ GOTO(err_ops, rc = -EROFS);
+ }
/* failover is the default */
obd->obd_replayable = 1;
init_mutex(&filter->fo_init_lock);
filter->fo_committed_group = 0;
-
- rc = filter_prep(obd);
- if (rc)
- GOTO(err_ops, rc);
-
filter->fo_destroys_in_progress = 0;
for (i = 0; i < 32; i++)
sema_init(&filter->fo_create_locks[i], 1);
filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT;
filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT;
+ rc = filter_prep(obd);
+ if (rc)
+ GOTO(err_ops, rc);
+
CFS_INIT_LIST_HEAD(&filter->fo_llog_list);
spin_lock_init(&filter->fo_llog_list_lock);
filter->fo_fl_oss_capa = 1;
+
CFS_INIT_LIST_HEAD(&filter->fo_capa_keys);
filter->fo_capa_hash = init_capa_hash();
if (filter->fo_capa_hash == NULL)
- GOTO(err_ops, rc = -ENOMEM);
+ GOTO(err_post, rc = -ENOMEM);
sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
obd->obd_namespace = ldlm_namespace_new(obd, ns_name, LDLM_NAMESPACE_SERVER,
if (obd->obd_recovering) {
LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in "
"recovery for at least %d:%.02d, or until %d "
- "client%s reconnect. During this time new clients"
- " will not be allowed to connect. "
- "Recovery progress can be monitored by watching "
- "/proc/fs/lustre/obdfilter/%s/recovery_status.\n",
+ "client%s reconnect%s.\n",
obd->obd_name, lustre_cfg_string(lcfg, 1),
label ?: "", label ? "/" : "", str,
obd->obd_recovery_timeout / 60,
obd->obd_recovery_timeout % 60,
obd->obd_max_recoverable_clients,
(obd->obd_max_recoverable_clients == 1) ? "":"s",
- obd->obd_name);
+ (obd->obd_max_recoverable_clients == 1) ? "s":"");
} else {
LCONSOLE_INFO("OST %s now serving %s (%s%s%s) with recovery "
"%s\n", obd->obd_name, lustre_cfg_string(lcfg, 1),
LLOG_MDS_OST_REPL_CTXT);
GOTO(cleanup_olg, rc = -ENODEV);
}
- ctxt->loc_lcm = filter->fo_lcm;
+ ctxt->loc_lcm = lcm_get(filter->fo_lcm);
ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
llog_ctxt_put(ctxt);
RETURN(-ENODEV);
}
ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
- ctxt->loc_lcm = filter->fo_lcm;
+ ctxt->loc_lcm = lcm_get(filter->fo_lcm);
llog_ctxt_put(ctxt);
RETURN(rc);
}
}
if (filter->fo_lcm) {
+ mutex_down(&ctxt->loc_sem);
llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
filter->fo_lcm = NULL;
+ mutex_up(&ctxt->loc_sem);
}
RETURN(filter_olg_fini(&obd->obd_olg));
}
obd->obd_name, body->lgdc_logid.lgl_oid,
body->lgdc_logid.lgl_ogr, body->lgdc_logid.lgl_ogen);
+ spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->u.filter.fo_mds_ost_sync = 1;
+ spin_unlock_bh(&obd->obd_processing_task_lock);
rc = llog_connect(ctxt, &body->lgdc_logid,
&body->lgdc_gen, NULL);
llog_ctxt_put(ctxt);
case OBD_CLEANUP_EARLY:
break;
case OBD_CLEANUP_EXPORTS:
- target_cleanup_recovery(obd);
+ /* Stop recovery before namespace cleanup. */
+ target_recovery_fini(obd);
rc = filter_llog_preclean(obd);
break;
}
LCONSOLE_WARN("%s: shutting down for failover; client state "
"will be preserved.\n", obd->obd_name);
- if (!list_empty(&obd->obd_exports)) {
- CERROR("%s: still has clients!\n", obd->obd_name);
- class_disconnect_exports(obd);
- if (!list_empty(&obd->obd_exports)) {
- CERROR("still has exports after forced cleanup?\n");
- RETURN(-EBUSY);
- }
- }
+ obd_exports_barrier(obd);
+ obd_zombie_barrier();
lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
lprocfs_free_per_client_stats(obd);
lprocfs_obd_cleanup(obd);
lquota_cleanup(filter_quota_interface_ref, obd);
- /* Stop recovery before namespace cleanup. */
- target_stop_recovery_thread(obd);
- target_cleanup_recovery(obd);
-
ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
obd->obd_namespace = NULL;
exp->exp_connect_flags = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
+ /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
+ if (!ergo(data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN,
+ data->ocd_connect_flags & OBD_CONNECT_MDS))
+ RETURN(-EPROTO);
+
if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
obd_size left, want;
spin_lock(&exp->exp_obd->obd_osfs_lock);
LPU64" left: "LPU64"\n", exp->exp_obd->obd_name,
exp->exp_client_uuid.uuid, exp,
data->ocd_grant, want, left);
+
+ filter->fo_tot_granted_clients ++;
}
if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
data->ocd_index);
RETURN(-EBADF);
}
+ /* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
+ * FIXME: We don't strictly need the COMPAT flag for that,
+ * FIXME: as fsd_peeruuid[0] will tell us if that is set.
+ * FIXME: We needed it for the index, as index 0 is valid. */
}
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) {
obd_export_nid2str(exp));
}
- /* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
- * FIXME: We don't strictly need the COMPAT flag for that,
- * FIXME: as fsd_peeruuid[0] will tell us if that is set.
- * FIXME: We needed it for the index, as index 0 is valid. */
-
RETURN(0);
}
/* nearly identical to mds_connect */
static int filter_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct lvfs_run_ctxt saved;
- struct obd_export *exp;
+ struct lustre_handle conn = { 0 };
+ struct obd_export *lexp;
struct filter_export_data *fed;
struct lsd_client_data *lcd = NULL;
__u32 group;
int rc;
ENTRY;
- if (conn == NULL || obd == NULL || cluuid == NULL)
+ if (exp == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp != NULL);
- fed = &exp->exp_filter_data;
+ fed = &lexp->exp_filter_data;
- rc = filter_connect_internal(exp, data);
+ rc = filter_connect_internal(lexp, data);
if (rc)
GOTO(cleanup, rc);
- filter_export_stats_init(obd, exp, localdata);
+ filter_export_stats_init(obd, lexp, localdata);
if (obd->obd_replayable) {
OBD_ALLOC(lcd, sizeof(*lcd));
if (!lcd) {
memcpy(lcd->lcd_uuid, cluuid, sizeof(lcd->lcd_uuid));
fed->fed_lcd = lcd;
- rc = filter_client_add(obd, exp, -1);
+ rc = filter_client_add(obd, lexp, -1);
if (rc)
GOTO(cleanup, rc);
}
group = data->ocd_group;
CWARN("%s: Received MDS connection ("LPX64"); group %d\n",
- obd->obd_name, exp->exp_handle.h_cookie, group);
+ obd->obd_name, lexp->exp_handle.h_cookie, group);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = filter_read_groups(obd, group, 1);
OBD_FREE_PTR(lcd);
fed->fed_lcd = NULL;
}
- class_disconnect(exp);
+ class_disconnect(lexp);
+ *exp = NULL;
} else {
- class_export_put(exp);
+ *exp = lexp;
}
RETURN(rc);
filter_grant_discard(exp);
filter_fmd_cleanup(exp);
+ if (exp->exp_connect_flags & OBD_CONNECT_GRANT_SHRINK) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
+ if (filter->fo_tot_granted_clients > 0)
+ filter->fo_tot_granted_clients --;
+ }
+
if (!(exp->exp_flags & OBD_OPT_FORCE))
filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
/* Flush any remaining cancel messages out to the target */
filter_sync_llogs(obd, exp);
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
+
/* Disconnect early so that clients can't keep using export */
rc = class_disconnect(exp);
if (exp->exp_obd->obd_namespace != NULL)
static int filter_ping(struct obd_export *exp)
{
filter_fmd_expire(exp);
-
return 0;
}
unsigned int orig_ids[MAXQUOTAS] = {0, 0};
struct llog_cookie *fcc = NULL;
struct filter_obd *filter;
- int rc, err, locked = 0, sync = 0;
+ int rc, err, sync = 0;
loff_t old_size = 0;
unsigned int ia_valid;
struct inode *inode;
if (fcc != NULL)
*fcc = oa->o_lcookie;
}
-
- if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) {
+ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
DQUOT_INIT(inode);
+ /* Filter truncates and writes are serialized by
+ * i_alloc_sem, see the comment in
+ * filter_preprw_write.*/
+ if (ia_valid & ATTR_SIZE)
+ down_write(&inode->i_alloc_sem);
LOCK_INODE_MUTEX(inode);
old_size = i_size_read(inode);
- locked = 1;
}
+ /* VBR: version recovery check */
+ rc = filter_version_get_check(exp, oti, inode);
+ if (rc)
+ GOTO(out_unlock, rc);
+
/* If the inode still has SUID+SGID bits set (see filter_precreate())
* then we will accept the UID+GID sent by the client during write for
* initializing the ownership of this inode. We only allow this to
* sure we have one left for the last_rcvd update. */
err = fsfilt_extend(exp->exp_obd, inode, 1, handle);
- rc = filter_finish_transno(exp, oti, rc, sync);
+ rc = filter_finish_transno(exp, inode, oti, rc, sync);
if (sync) {
filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc);
fcc = NULL;
rc = err;
}
- if (locked) {
- UNLOCK_INODE_MUTEX(inode);
- locked = 0;
- }
-
EXIT;
out_unlock:
- if (locked)
+ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID))
UNLOCK_INODE_MUTEX(inode);
-
+ if (ia_valid & ATTR_SIZE)
+ up_write(&inode->i_alloc_sem);
if (fcc)
OBD_FREE(fcc, sizeof(*fcc));
/* caller must hold fo_create_locks[oa->o_gr] */
static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
- struct filter_obd *filter)
+ struct filter_obd *filter)
{
- struct obdo doa; /* XXX obdo on stack */
+ struct obdo doa = { 0 }; /* XXX obdo on stack */
obd_id last, id;
- int rc;
+ int rc = 0;
+ int skip_orphan;
ENTRY;
LASSERT(oa);
last = filter_last_id(filter, doa.o_gr);
- CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
- exp->exp_obd->obd_name, oa->o_id + 1, last);
+ skip_orphan = !!(exp->exp_connect_flags & OBD_CONNECT_SKIP_ORPHAN);
+
+ CDEBUG(D_HA, "%s: deleting orphan objects from "LPU64" to "LPU64"%s\n",
+ exp->exp_obd->obd_name, oa->o_id + 1, last,
+ skip_orphan ? ", orphan objids won't be reused any more." : ".");
for (id = last; id > oa->o_id; id--) {
doa.o_id = id;
if (rc && rc != -ENOENT) /* this is pretty fatal... */
CEMERG("error destroying precreate objid "LPU64": %d\n",
id, rc);
- filter_set_last_id(filter, id - 1, doa.o_gr);
+
/* update last_id on disk periodically so that if we restart
* we don't need to re-scan all of the just-deleted objects. */
- if ((id & 511) == 0)
+ if ((id & 511) == 0 && !skip_orphan) {
+ filter_set_last_id(filter, id - 1, doa.o_gr);
filter_update_last_objid(exp->exp_obd, doa.o_gr, 0);
+ }
}
CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n",
exp->exp_obd->obd_name, doa.o_gr, oa->o_id);
- rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+ if (!skip_orphan) {
+ filter_set_last_id(filter, id, doa.o_gr);
+ rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+ } else {
+ /* don't reuse orphan object, return last used objid */
+ oa->o_id = last;
+ rc = 0;
+ }
clear_bit(doa.o_gr, &filter->fo_destroys_in_progress);
RETURN(rc);
/* delete orphans request */
if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_DELORPHAN)){
+ obd_id last = filter_last_id(filter, group);
+
if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
CERROR("%s: dropping old orphan cleanup request\n",
obd->obd_name);
up(&filter->fo_create_locks[group]);
RETURN(0);
}
- diff = oa->o_id - filter_last_id(filter, group);
+ diff = oa->o_id - last;
CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
- filter_last_id(filter, group), diff);
+ last, diff);
if (-diff > OST_MAX_PRECREATE) {
CERROR("%s: ignoring bogus orphan destroy request: "
"obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
- oa->o_id, filter_last_id(filter, group));
+ oa->o_id, last);
/* FIXME: should reset precreate_next_id on MDS */
GOTO(out, rc = -EINVAL);
}
* stop creating files on MDS if OST is not good shape to create
* objects.*/
osfs->os_state = (filter->fo_obt.obt_sb->s_flags & MS_RDONLY) ?
- EROFS : 0;
+ OS_STATE_READONLY : 0;
RETURN(rc);
}
return rc;
}
+static __u64 filter_calc_free_inodes(struct obd_device *obd)
+{
+ int rc;
+ __u64 os_ffree = -1;
+
+ spin_lock(&obd->obd_osfs_lock);
+ rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, cfs_time_shift_64(1));
+ if (rc == 0)
+ os_ffree = obd->obd_osfs.os_ffree;
+ spin_unlock(&obd->obd_osfs_lock);
+
+ return os_ffree;
+}
/* We rely on the fact that only one thread will be creating files in a given
* group at a time, which is why we don't need an atomic filter_get_new_id.
struct obd_statfs *osfs;
int err = 0, rc = 0, recreate_obj = 0, i;
cfs_time_t enough_time = cfs_time_shift(DISK_TIMEOUT/2);
+ __u64 os_ffree;
obd_id next_id;
void *handle = NULL;
ENTRY;
S_IFREG | S_ISUID | S_ISGID | 0666, NULL);
if (rc) {
CERROR("create failed rc = %d\n", rc);
+ if (rc == -ENOSPC) {
+ os_ffree = filter_calc_free_inodes(obd);
+ if (os_ffree != -1)
+ CERROR("%s: free inode "LPU64"\n",
+ obd->obd_name, os_ffree);
+ }
GOTO(cleanup, rc);
}
+ if (dchild->d_inode)
+ CDEBUG(D_INFO, "objid "LPU64" got inum %lu\n", next_id,
+ dchild->d_inode->i_ino);
+
set_last_id:
if (!recreate_obj) {
filter_set_last_id(filter, next_id, group);
struct obd_device *obd;
struct filter_obd *filter;
struct dentry *dchild = NULL, *dparent = NULL;
+ struct lustre_handle lockh = { 0 };
struct lvfs_run_ctxt saved;
void *handle = NULL;
struct llog_cookie *fcc = NULL;
GOTO(cleanup, rc = -ENOENT);
}
- filter_prepare_destroy(obd, oa->o_id, oa->o_gr);
+ filter_prepare_destroy(obd, oa->o_id, oa->o_gr, &lockh);
/* Our MDC connection is established by the MDS to us */
if (oa->o_valid & OBD_MD_FLCOOKIE) {
* (see BUG 4180) -bzzz
*/
LOCK_INODE_MUTEX(dchild->d_inode);
+
+ /* VBR: version recovery check */
+ rc = filter_version_get_check(exp, oti, dchild->d_inode);
+ if (rc)
+ GOTO(cleanup, rc);
+
handle = fsfilt_start_log(obd, dchild->d_inode, FSFILT_OP_SETATTR,
NULL, 1);
if (IS_ERR(handle)) {
* on commit. then we call callback directly to free
* the fcc.
*/
- rc = filter_finish_transno(exp, oti, rc, sync);
+ rc = filter_finish_transno(exp, NULL, oti, rc, sync);
if (sync) {
filter_cancel_cookies_cb(obd, 0, fcc, rc);
fcc = NULL;
case 3:
filter_parent_unlock(dparent);
case 2:
+ filter_fini_destroy(obd, &lockh);
+
f_dput(dchild);
if (fcc != NULL)
OBD_FREE(fcc, sizeof(*fcc));
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_iocontrol(obd, dentry->d_inode, NULL,
EXT3_IOC_FIEMAP, (long)fiemap);
- if (rc) {
- f_dput(dentry);
- RETURN(rc);
- }
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
f_dput(dentry);
- RETURN(0);
+ RETURN(rc);
}
CDEBUG(D_IOCTL, "invalid key\n");
llog_ctxt_put(ctxt);
return rc;
}
+
+static int filter_set_grant_shrink(struct obd_export *exp,
+ struct ost_body *body)
+{
+ /* handle shrink grant */
+ spin_lock(&exp->exp_obd->obd_osfs_lock);
+ filter_grant_incoming(exp, &body->oa);
+ spin_unlock(&exp->exp_obd->obd_osfs_lock);
+
+ RETURN(0);
+
+}
+
+static int filter_set_mds_conn(struct obd_export *exp, void *val)
+{
+ struct obd_device *obd;
+ int rc = 0, group;
+ ENTRY;
+
+ obd = exp->exp_obd;
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "invalid export %p\n", exp);
+ RETURN(-EINVAL);
+ }
+
+ LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
+ obd_export_nid2str(exp));
+ obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
+
+ /* setup llog imports */
+ if (val != NULL)
+ group = (int)(*(__u32 *)val);
+ else
+ group = 0; /* default value */
+
+ LASSERT_MDS_GROUP(group);
+ rc = filter_setup_llog_group(exp, obd, group);
+ if (rc)
+ goto out;
+
+ if (group == FILTER_GROUP_MDS0) {
+ /* setup llog group 1 for interop */
+ filter_setup_llog_group(exp, obd, FILTER_GROUP_LLOG);
+ }
+
+ lquota_setinfo(filter_quota_interface_ref, obd, exp);
+out:
+ RETURN(rc);
+}
+
static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
void *key, __u32 vallen, void *val,
struct ptlrpc_request_set *set)
{
struct obd_device *obd;
- int rc = 0, group;
ENTRY;
obd = exp->exp_obd;
}
if (KEY_IS(KEY_CAPA_KEY)) {
+ int rc;
rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val);
if (rc)
CERROR("filter update capability key failed: %d\n", rc);
RETURN(0);
}
- if (!KEY_IS(KEY_MDS_CONN))
- RETURN(-EINVAL);
+ if (KEY_IS(KEY_MDS_CONN))
+ RETURN(filter_set_mds_conn(exp, val));
- LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
- obd_export_nid2str(exp));
- obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
-
- /* setup llog imports */
- LASSERT(val != NULL);
-
- group = (int)(*(__u32 *)val);
- LASSERT_MDS_GROUP(group);
- rc = filter_setup_llog_group(exp, obd, group);
- if (rc)
- goto out;
+ if (KEY_IS(KEY_GRANT_SHRINK))
+ RETURN(filter_set_grant_shrink(exp, val));
- lquota_setinfo(filter_quota_interface_ref, obd, exp);
-
- if (group == FILTER_GROUP_MDS0) {
- /* setup llog group 1 for interop */
- filter_setup_llog_group(exp, obd, FILTER_GROUP_LLOG);
- }
-out:
- RETURN(rc);
+ RETURN(-EINVAL);
}
int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
switch (cmd) {
case OBD_IOC_ABORT_RECOVERY: {
- CERROR("aborting recovery for device %s\n", obd->obd_name);
+ LCONSOLE_WARN("%s: Aborting recovery.\n", obd->obd_name);
target_stop_recovery_thread(obd);
RETURN(0);
}
rc = class_process_proc_param(PARAM_OST, lvars.obd_vars,
lcfg, obd);
- if (rc > 0)
- rc = 0;
+ if (rc > 0)
+ rc = 0;
break;
}