#include "filter_internal.h"
/* Group 0 is no longer a legal group, to catch uninitialized IDs */
-#define FILTER_MIN_GROUPS FILTER_GROUP_MDS0
+#define FILTER_MIN_GROUPS FILTER_GROUP_MDS1_N_BASE
static struct lvfs_callback_ops filter_lvfs_ops;
cfs_mem_cache_t *ll_fmd_cachep;
struct obd_export *exp,
void *client_nid)
{
- struct filter_export_data *fed = &exp->exp_filter_data;
int rc, newnid = 0;
ENTRY;
- init_brw_stats(&fed->fed_brw_stats);
-
if (obd_uuid_equals(&exp->exp_client_uuid, &obd->obd_uuid))
/* Self-export gets no proc entry */
RETURN(0);
RETURN(0);
}
+struct lsd_client_data zero_lcd; /* globals are implicitly zeroed */
+
static int filter_client_free(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct obd_device *obd = exp->exp_obd;
- struct lsd_client_data zero_lcd;
struct lvfs_run_ctxt saved;
int rc;
loff_t off;
}
if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
- memset(&zero_lcd, 0, sizeof zero_lcd);
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ int need_sync = (!exp->exp_libclient || exp->exp_need_sync) &&
+ !(exp->exp_flags&OBD_OPT_ABORT_RECOV);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
- sizeof(zero_lcd), &off,
- (!exp->exp_libclient ||
- exp->exp_need_sync));
+ sizeof(zero_lcd), &off, 0);
+
+ /* Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed. */
if (rc == 0)
- /* update server's transno */
filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd,
- !exp->exp_libclient);
+ filter->fo_fsd, need_sync);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
- "zeroing out client %s at idx %u (%llu) in %s rc %d\n",
+ "zero out client %s at idx %u/%llu in %s %ssync rc %d\n",
fed->fed_lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
- LAST_RCVD, rc);
+ LAST_RCVD, need_sync ? "" : "a", rc);
}
if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_mod_data *found = NULL, *fmd_new = NULL;
- OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO, sizeof(*fmd_new));
+ OBD_SLAB_ALLOC_PTR_GFP(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO);
spin_lock(&fed->fed_lock);
found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group);
CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n",rc);
GOTO(cleanup, rc);
}
- LASSERT(off == 0 || last_group >= FILTER_MIN_GROUPS);
+ LASSERTF(off == 0 || CHECK_MDS_GROUP(last_group),
+ "off = %llu and last_group = %d\n", off, last_group);
+
CDEBUG(D_INODE, "%s: previous %d, new %d\n",
obd->obd_name, last_group, group);
down(&filter->fo_init_lock);
old_count = filter->fo_group_count;
for (group = old_count; group <= last_group; group++) {
- if (group == 0)
- continue; /* no group zero */
rc = filter_read_group_internal(obd, group, create);
if (rc != 0)
if (off == 0) {
last_group = FILTER_MIN_GROUPS;
} else {
- LASSERT(last_group >= FILTER_MIN_GROUPS);
+ LASSERT_MDS_GROUP(last_group);
}
CWARN("%s: initialize groups [%d,%d]\n", obd->obd_name,
if (rc)
CERROR("error writing server data: rc = %d\n", rc);
- for (i = 1; i < filter->fo_group_count; i++) {
+ for (i = 0; i < filter->fo_group_count; i++) {
rc = filter_update_last_objid(obd, i,
(i == filter->fo_group_count - 1));
if (rc)
spin_lock(&filter->fo_objidlock);
id = filter->fo_last_objids[group];
spin_unlock(&filter->fo_objidlock);
-
return id;
}
struct filter_subdirs *subdirs;
LASSERT(group < filter->fo_group_count); /* FIXME: object groups */
- if ((group > 0 && group < FILTER_GROUP_MDS0) ||
+ if ((group > FILTER_GROUP_MDS0 && group < FILTER_GROUP_MDS1_N_BASE) ||
filter->fo_subdir_count == 0)
return filter->fo_dentry_O_groups[group];
EXIT;
}
+static int filter_adapt_sptlrpc_conf(struct obd_device *obd, int initial)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ struct sptlrpc_rule_set tmp_rset;
+ int rc;
+
+ sptlrpc_rule_set_init(&tmp_rset);
+ rc = sptlrpc_conf_target_get_rules(obd, &tmp_rset, initial);
+ if (rc) {
+ CERROR("obd %s: failed get sptlrpc rules: %d\n",
+ obd->obd_name, rc);
+ return rc;
+ }
+
+ sptlrpc_target_update_exp_flavor(obd, &tmp_rset);
+
+ write_lock(&filter->fo_sptlrpc_lock);
+ sptlrpc_rule_set_free(&filter->fo_sptlrpc_rset);
+ filter->fo_sptlrpc_rset = tmp_rset;
+ write_unlock(&filter->fo_sptlrpc_lock);
+
+ return 0;
+}
+
/*
* pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
*/
if (rc != 0)
GOTO(err_ops, rc);
- LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) {
+ CERROR("%s: Underlying device is marked as read-only. "
+ "Setup failed\n", obd->obd_name);
+ GOTO(err_ops, rc = -EROFS);
+ }
/* failover is the default */
obd->obd_replayable = 1;
sema_init(&filter->fo_alloc_lock, 1);
init_brw_stats(&filter->fo_filter_stats);
filter->fo_read_cache = 1; /* enable read-only cache by default */
- filter->fo_writethrough_cache = 1; /* disable writethrough cache */
+ filter->fo_writethrough_cache = 1; /* enable writethrough cache */
filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT;
filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT;
CFS_INIT_LIST_HEAD(&filter->fo_llog_list);
spin_lock_init(&filter->fo_llog_list_lock);
- rwlock_init(&filter->fo_sptlrpc_lock);
- sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset);
-
filter->fo_fl_oss_capa = 1;
CFS_INIT_LIST_HEAD(&filter->fo_capa_keys);
filter->fo_capa_hash = init_capa_hash();
GOTO(err_post, rc);
}
+ rwlock_init(&filter->fo_sptlrpc_lock);
+ sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset);
+ /* do this after llog being initialized */
+ filter_adapt_sptlrpc_conf(obd, 1);
+
rc = lquota_setup(filter_quota_interface_ref, obd);
if (rc)
GOTO(err_post, rc);
rc = llog_cleanup(ctxt);
ctxt = llog_group_get_ctxt(olg, LLOG_SIZE_ORIG_CTXT);
- if (ctxt)
+ if (ctxt) {
+ rc2 = llog_cleanup(ctxt);
+ if (!rc)
+ rc = rc2;
+ }
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt) {
rc2 = llog_cleanup(ctxt);
- if (!rc)
- rc = rc2;
+ if (!rc)
+ rc = rc2;
+ }
RETURN(rc);
}
if (rc)
GOTO(cleanup_lcm, rc);
+ rc = llog_setup(obd, olg, LLOG_CONFIG_ORIG_CTXT, tgt, 0, NULL,
+ &llog_lvfs_ops);
+ if (rc)
+ GOTO(cleanup_olg, rc);
+
ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
if (!ctxt) {
CERROR("Can't get ctxt for %p:%x\n", olg,
exp->exp_connect_flags = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
+ /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
+ if (!ergo(data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN,
+ data->ocd_connect_flags & OBD_CONNECT_MDS))
+ RETURN(-EPROTO);
+
if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
obd_size left, want;
spin_lock(&exp->exp_obd->obd_osfs_lock);
LPU64" left: "LPU64"\n", exp->exp_obd->obd_name,
exp->exp_client_uuid.uuid, exp,
data->ocd_grant, want, left);
+
+ filter->fo_tot_granted_clients ++;
}
if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
/* nearly identical to mds_connect */
static int filter_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct lvfs_run_ctxt saved;
- struct obd_export *exp;
+ struct lustre_handle conn = { 0 };
+ struct obd_export *lexp;
struct filter_export_data *fed;
struct lsd_client_data *lcd = NULL;
__u32 group;
int rc;
ENTRY;
- if (conn == NULL || obd == NULL || cluuid == NULL)
+ if (exp == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp != NULL);
- fed = &exp->exp_filter_data;
+ fed = &lexp->exp_filter_data;
- rc = filter_connect_internal(exp, data);
+ rc = filter_connect_internal(lexp, data);
if (rc)
GOTO(cleanup, rc);
- filter_export_stats_init(obd, exp, localdata);
+ filter_export_stats_init(obd, lexp, localdata);
if (obd->obd_replayable) {
OBD_ALLOC(lcd, sizeof(*lcd));
if (!lcd) {
memcpy(lcd->lcd_uuid, cluuid, sizeof(lcd->lcd_uuid));
fed->fed_lcd = lcd;
- rc = filter_client_add(obd, exp, -1);
+ rc = filter_client_add(obd, lexp, -1);
if (rc)
GOTO(cleanup, rc);
}
group = data->ocd_group;
- if (group == 0)
- GOTO(cleanup, rc);
CWARN("%s: Received MDS connection ("LPX64"); group %d\n",
- obd->obd_name, exp->exp_handle.h_cookie, group);
+ obd->obd_name, lexp->exp_handle.h_cookie, group);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = filter_read_groups(obd, group, 1);
OBD_FREE_PTR(lcd);
fed->fed_lcd = NULL;
}
- class_disconnect(exp);
+ class_disconnect(lexp);
+ *exp = NULL;
} else {
- class_export_put(exp);
+ *exp = lexp;
}
RETURN(rc);
filter_grant_discard(exp);
filter_fmd_cleanup(exp);
+ if (exp->exp_connect_flags & OBD_CONNECT_GRANT_SHRINK) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
+ if (filter->fo_tot_granted_clients > 0)
+ filter->fo_tot_granted_clients --;
+ }
+
if (!(exp->exp_flags & OBD_OPT_FORCE))
filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
{
struct obd_llog_group *olg_min, *olg;
struct filter_obd *filter;
- int worked = 0, group;
+ int worked = -1, group;
struct llog_ctxt *ctxt;
ENTRY;
/* Flush any remaining cancel messages out to the target */
filter_sync_llogs(obd, exp);
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
+
/* Disconnect early so that clients can't keep using export */
rc = class_disconnect(exp);
if (exp->exp_obd->obd_namespace != NULL)
}
if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) {
- old_size = i_size_read(inode);
DQUOT_INIT(inode);
LOCK_INODE_MUTEX(inode);
+ old_size = i_size_read(inode);
locked = 1;
}
/* caller must hold fo_create_locks[oa->o_gr] */
static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
- struct filter_obd *filter)
+ struct filter_obd *filter)
{
struct obdo doa; /* XXX obdo on stack */
obd_id last, id;
- int rc;
+ int rc = 0;
+ int skip_orphan;
ENTRY;
LASSERT(oa);
- LASSERT(oa->o_gr != 0);
+ LASSERT_MDS_GROUP(oa->o_gr);
LASSERT(oa->o_valid & OBD_MD_FLGROUP);
LASSERT(down_trylock(&filter->fo_create_locks[oa->o_gr]) != 0);
last = filter_last_id(filter, doa.o_gr);
- CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
- exp->exp_obd->obd_name, oa->o_id + 1, last);
+ skip_orphan = !!(exp->exp_connect_flags & OBD_CONNECT_SKIP_ORPHAN);
+
+ CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"%s\n",
+ exp->exp_obd->obd_name, oa->o_id + 1, last,
+ skip_orphan ? ", orphan objids won't be reused any more." : ".");
for (id = last; id > oa->o_id; id--) {
doa.o_id = id;
if (rc && rc != -ENOENT) /* this is pretty fatal... */
CEMERG("error destroying precreate objid "LPU64": %d\n",
id, rc);
- filter_set_last_id(filter, id - 1, doa.o_gr);
+
/* update last_id on disk periodically so that if we restart
* we don't need to re-scan all of the just-deleted objects. */
- if ((id & 511) == 0)
+ if ((id & 511) == 0 && !skip_orphan) {
+ filter_set_last_id(filter, id - 1, doa.o_gr);
filter_update_last_objid(exp->exp_obd, doa.o_gr, 0);
+ }
}
CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n",
exp->exp_obd->obd_name, doa.o_gr, oa->o_id);
- rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+ if (!skip_orphan) {
+ filter_set_last_id(filter, id, doa.o_gr);
+ rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+ } else {
+ /* don't reuse orphan object, return last used objid */
+ oa->o_id = last;
+ rc = 0;
+ }
clear_bit(doa.o_gr, &filter->fo_destroys_in_progress);
RETURN(rc);
/* delete orphans request */
if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_DELORPHAN)){
+ obd_id last = filter_last_id(filter, group);
+
if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
CERROR("%s: dropping old orphan cleanup request\n",
obd->obd_name);
up(&filter->fo_create_locks[group]);
RETURN(0);
}
- diff = oa->o_id - filter_last_id(filter, group);
+ diff = oa->o_id - last;
CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
- filter_last_id(filter, group), diff);
+ last, diff);
if (-diff > OST_MAX_PRECREATE) {
CERROR("%s: ignoring bogus orphan destroy request: "
"obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
- oa->o_id, filter_last_id(filter, group));
+ oa->o_id, last);
/* FIXME: should reset precreate_next_id on MDS */
GOTO(out, rc = -EINVAL);
}
obd->obd_name);
GOTO(out, rc = 0);
}
- /* only precreate if group == 0 and o_id is specified */
- if (group < FILTER_GROUP_MDS0 || oa->o_id == 0)
+ /* only precreate if group == 0 and o_id is specfied */
+ if (group == FILTER_GROUP_LLOG || oa->o_id == 0)
diff = 1;
else
diff = oa->o_id - filter_last_id(filter, group);
CDEBUG(D_INODE, "%s: filter_create(od->o_gr="LPU64",od->o_id="
LPU64")\n", obd->obd_name, oa->o_gr, oa->o_id);
- if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) {
+ if (!(oa->o_valid & OBD_MD_FLGROUP)) {
CERROR("!!! nid %s sent invalid object group %d\n",
obd_export_nid2str(exp), group);
RETURN(-EINVAL);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_iocontrol(obd, dentry->d_inode, NULL,
EXT3_IOC_FIEMAP, (long)fiemap);
- if (rc) {
- f_dput(dentry);
- RETURN(rc);
- }
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
f_dput(dentry);
- RETURN(0);
+ RETURN(rc);
}
CDEBUG(D_IOCTL, "invalid key\n");
RETURN(-EINVAL);
}
+static inline int filter_setup_llog_group(struct obd_export *exp,
+ struct obd_device *obd,
+ int group)
+{
+ struct obd_llog_group *olg;
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ olg = filter_find_create_olg(obd, group);
+ if (IS_ERR(olg))
+ RETURN(PTR_ERR(olg));
+
+ llog_group_set_export(olg, exp);
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
+ LASSERTF(ctxt != NULL, "ctxt is null\n");
+
+ rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
void *key, __u32 vallen, void *val,
struct ptlrpc_request_set *set)
{
struct obd_device *obd;
- struct obd_llog_group *olg;
- struct llog_ctxt *ctxt;
int rc = 0, group;
ENTRY;
RETURN(0);
}
+ if (KEY_IS(KEY_SPTLRPC_CONF)) {
+ filter_adapt_sptlrpc_conf(obd, 0);
+ RETURN(0);
+ }
+
+ if (KEY_IS(KEY_GRANT_SHRINK)) {
+ struct ost_body *body = (struct ost_body *)val;
+ /* handle shrink grant */
+ spin_lock(&exp->exp_obd->obd_osfs_lock);
+ filter_grant_incoming(exp, &body->oa);
+ spin_unlock(&exp->exp_obd->obd_osfs_lock);
+ RETURN(rc);
+ }
+
if (!KEY_IS(KEY_MDS_CONN))
RETURN(-EINVAL);
obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
/* setup llog imports */
- LASSERT(val != NULL);
- group = (int)(*(__u32 *)val);
- LASSERT(group >= FILTER_GROUP_MDS0);
-
- olg = filter_find_create_olg(obd, group);
- if (IS_ERR(olg))
- RETURN(PTR_ERR(olg));
-
- llog_group_set_export(olg, exp);
-
- ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
- LASSERTF(ctxt != NULL, "ctxt is null\n");
+ if (val != NULL)
+ group = (int)(*(__u32 *)val);
+ else
+ group = 0; /* default value */
- rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
- llog_ctxt_put(ctxt);
+ LASSERT_MDS_GROUP(group);
+ rc = filter_setup_llog_group(exp, obd, group);
+ if (rc)
+ goto out;
lquota_setinfo(filter_quota_interface_ref, obd, exp);
+ if (group == FILTER_GROUP_MDS0) {
+ /* setup llog group 1 for interop */
+ filter_setup_llog_group(exp, obd, FILTER_GROUP_LLOG);
+ }
+out:
RETURN(rc);
}
int rc = 0;
switch (lcfg->lcfg_command) {
- case LCFG_SPTLRPC_CONF: {
- struct filter_obd *filter = &obd->u.filter;
- struct sptlrpc_conf_log *log;
- struct sptlrpc_rule_set tmp_rset;
-
- log = sptlrpc_conf_log_extract(lcfg);
- if (IS_ERR(log)) {
- rc = PTR_ERR(log);
- break;
- }
-
- sptlrpc_rule_set_init(&tmp_rset);
-
- rc = sptlrpc_rule_set_from_log(&tmp_rset, log);
- if (rc) {
- CERROR("obd %s: failed get sptlrpc rules: %d\n",
- obd->obd_name, rc);
- break;
- }
-
- write_lock(&filter->fo_sptlrpc_lock);
- sptlrpc_rule_set_free(&filter->fo_sptlrpc_rset);
- filter->fo_sptlrpc_rset = tmp_rset;
- write_unlock(&filter->fo_sptlrpc_lock);
-
- sptlrpc_target_update_exp_flavor(obd, &tmp_rset);
- break;
- }
default:
lprocfs_filter_init_vars(&lvars);
rc = class_process_proc_param(PARAM_OST, lvars.obd_vars,
lcfg, obd);
- if (rc > 0)
- rc = 0;
+ if (rc > 0)
+ rc = 0;
break;
}