-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#define DEBUG_SUBSYSTEM S_FILTER
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/dcache.h>
* at the OST layer there are only (potentially) multiple obd_device of type
* unknown at the time of OST thread creation.
*
- * Instead array of iobuf's is attached to struct filter_obd (->fo_iobuf_pool
- * field). This array has size OST_MAX_THREADS, so that each OST thread uses
- * it's very own iobuf.
+ * We create a cfs_hash for struct filter_obd (->fo_iobuf_hash field) on
+ * initializing, each OST thread will create it's own iobuf on the first
+ * access and insert it into ->fo_iobuf_hash with thread ID as key,
+ * so the iobuf can be found again by thread ID.
*
* Functions below
*
- * filter_kiobuf_pool_init()
+ * filter_iobuf_pool_init()
*
- * filter_kiobuf_pool_done()
+ * filter_iobuf_pool_done()
*
* filter_iobuf_get()
*
*/
static void filter_iobuf_pool_done(struct filter_obd *filter)
{
- struct filter_iobuf **pool;
- int i;
+ ENTRY;
- ENTRY;
-
- pool = filter->fo_iobuf_pool;
- if (pool != NULL) {
- for (i = 0; i < filter->fo_iobuf_count; ++ i) {
- if (pool[i] != NULL)
- filter_free_iobuf(pool[i]);
- }
- OBD_FREE(pool, filter->fo_iobuf_count * sizeof pool[0]);
- filter->fo_iobuf_pool = NULL;
- }
- EXIT;
+ if (filter->fo_iobuf_hash != NULL) {
+ cfs_hash_putref(filter->fo_iobuf_hash);
+ filter->fo_iobuf_hash = NULL;
+ }
+ EXIT;
}
static int filter_adapt_sptlrpc_conf(struct obd_device *obd, int initial)
return 0;
}
-/*
- * pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
- */
-static int filter_iobuf_pool_init(struct filter_obd *filter)
+static unsigned
+filter_iobuf_hop_hash(cfs_hash_t *hs, const void *key, unsigned mask)
{
- void **pool;
+ __u64 val = *((__u64 *)key);
- ENTRY;
+ return cfs_hash_long(val, hs->hs_cur_bits);
+}
+static void *
+filter_iobuf_hop_key(cfs_hlist_node_t *hnode)
+{
+ struct filter_iobuf *pool;
- OBD_ALLOC_GFP(filter->fo_iobuf_pool, OSS_THREADS_MAX * sizeof(*pool),
- GFP_KERNEL);
- if (filter->fo_iobuf_pool == NULL)
- RETURN(-ENOMEM);
+ pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+ return &pool->dr_hkey;
+}
- filter->fo_iobuf_count = OSS_THREADS_MAX;
+static int
+filter_iobuf_hop_keycmp(const void *key, cfs_hlist_node_t *hnode)
+{
+ struct filter_iobuf *pool;
- RETURN(0);
+ pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+ return pool->dr_hkey == *((__u64 *)key);
}
-/* Return iobuf allocated for @thread_id. We don't know in advance how
- * many threads there will be so we allocate a large empty array and only
- * fill in those slots that are actually in use.
- * If we haven't allocated a pool entry for this thread before, do so now. */
-void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
+static void *
+filter_iobuf_hop_object(cfs_hlist_node_t *hnode)
{
- int thread_id = (oti && oti->oti_thread) ?
- oti->oti_thread->t_id : -1;
- struct filter_iobuf *pool = NULL;
- struct filter_iobuf **pool_place = NULL;
+ return cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+}
- if (thread_id >= 0) {
- LASSERT(thread_id < filter->fo_iobuf_count);
- pool = *(pool_place = &filter->fo_iobuf_pool[thread_id]);
- }
+static void
+filter_iobuf_hop_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+ /* dummy, required by cfs_hash */
+}
- if (unlikely(pool == NULL)) {
- pool = filter_alloc_iobuf(filter, OBD_BRW_WRITE,
- PTLRPC_MAX_BRW_PAGES);
- if (pool_place != NULL)
- *pool_place = pool;
- }
+static void
+filter_iobuf_hop_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+ /* dummy, required by cfs_hash */
+}
+
+static void
+filter_iobuf_hop_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+ struct filter_iobuf *pool;
+
+ pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+ filter_free_iobuf(pool);
+}
+
+static struct cfs_hash_ops filter_iobuf_hops = {
+ .hs_hash = filter_iobuf_hop_hash,
+ .hs_key = filter_iobuf_hop_key,
+ .hs_keycmp = filter_iobuf_hop_keycmp,
+ .hs_object = filter_iobuf_hop_object,
+ .hs_get = filter_iobuf_hop_get,
+ .hs_put_locked = filter_iobuf_hop_put_locked,
+ .hs_exit = filter_iobuf_hop_exit
+};
+
+#define FILTER_IOBUF_HASH_BITS 9
+#define FILTER_IOBUF_HBKT_BITS 4
- return pool;
+/*
+ * pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
+ */
+static int filter_iobuf_pool_init(struct filter_obd *filter)
+{
+ filter->fo_iobuf_hash = cfs_hash_create("filter_iobuf",
+ FILTER_IOBUF_HASH_BITS,
+ FILTER_IOBUF_HASH_BITS,
+ FILTER_IOBUF_HBKT_BITS, 0,
+ CFS_HASH_MIN_THETA,
+ CFS_HASH_MAX_THETA,
+ &filter_iobuf_hops,
+ CFS_HASH_RW_BKTLOCK |
+ CFS_HASH_NO_ITEMREF);
+
+ return filter->fo_iobuf_hash != NULL ? 0 : -ENOMEM;
+}
+
+/* Return iobuf allocated for @thread_id.
+ * If we haven't allocated a pool entry for this thread before, do so now and
+ * insert it into fo_iobuf_hash, otherwise we can find it from fo_iobuf_hash */
+void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
+{
+ struct filter_iobuf *pool = NULL;
+ __u64 key = 0;
+ int thread_id;
+ int rc;
+
+ thread_id = (oti && oti->oti_thread) ? oti->oti_thread->t_id : -1;
+ if (thread_id >= 0) {
+ struct ptlrpc_service_part *svcpt;
+
+ svcpt = oti->oti_thread->t_svcpt;
+ LASSERT(svcpt != NULL);
+
+ key = (__u64)(svcpt->scp_cpt) << 32 | thread_id;
+ pool = cfs_hash_lookup(filter->fo_iobuf_hash, &key);
+ if (pool != NULL)
+ return pool;
+ }
+
+ pool = filter_alloc_iobuf(filter, OBD_BRW_WRITE, PTLRPC_MAX_BRW_PAGES);
+ if (pool == NULL)
+ return NULL;
+
+ if (thread_id >= 0) {
+ pool->dr_hkey = key;
+ rc = cfs_hash_add_unique(filter->fo_iobuf_hash,
+ &key, &pool->dr_hlist);
+ /* ptlrpc service thould guarantee thread ID is unique */
+ LASSERT(rc != -EALREADY);
+ }
+
+ return pool;
}
/* mount the file system (secretly). lustre_cfg parameters are:
{
struct filter_obd *filter = &obd->u.filter;
struct vfsmount *mnt;
+ struct file_system_type *type;
struct lustre_mount_info *lmi;
struct obd_uuid uuid;
__u8 *uuid_ptr;
} else {
/* old path - used by lctl */
CERROR("Using old MDS mount method\n");
- mnt = ll_kern_mount(lustre_cfg_string(lcfg, 2),
- MS_NOATIME|MS_NODIRATIME,
- lustre_cfg_string(lcfg, 1), option);
+ type = get_fs_type(lustre_cfg_string(lcfg, 2));
+ if (!type) {
+ CERROR("get_fs_type failed\n");
+ RETURN(-ENODEV);
+ }
+ mnt = vfs_kern_mount(type, MS_NOATIME|MS_NODIRATIME,
+ lustre_cfg_string(lcfg, 1), option);
+ cfs_module_put(type->owner);
if (IS_ERR(mnt)) {
rc = PTR_ERR(mnt);
LCONSOLE_ERROR_MSG(0x135, "Can't mount disk %s (%d)\n",
GOTO(free_obd_stats, rc);
}
+ rc = lprocfs_job_stats_init(obd, LPROC_FILTER_STATS_LAST,
+ filter_stats_counter_init);
+ if (rc)
+ GOTO(remove_entry_clear, rc);
+
/* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
if (!page)
- GOTO(remove_entry_clear, rc = -ENOMEM);
+ GOTO(job_stats_fini, rc = -ENOMEM);
addr = (unsigned long)cfs_page_address(page);
clear_page((void *)addr);
memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
if (rc) {
CERROR("%s: filter_common_setup failed: %d.\n",
obd->obd_name, rc);
- GOTO(remove_entry_clear, rc);
+ GOTO(job_stats_fini, rc);
}
RETURN(0);
+job_stats_fini:
+ lprocfs_job_stats_fini(obd);
remove_entry_clear:
lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
free_obd_stats:
* We actually do sync in disconnect time, but disconnect
* may not come being marked rq_no_resend = 1.
*/
- llog_sync(ctxt, NULL);
+ llog_sync(ctxt, NULL, OBD_LLOG_FL_EXIT);
/*
* Balance class_import_get() in llog_receptor_accept().
class_import_put(ctxt->loc_imp);
ctxt->loc_imp = NULL;
}
+
+ if (filter->fo_lcm) {
+ llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
+ filter->fo_lcm = NULL;
+ }
+
cfs_mutex_unlock(&ctxt->loc_mutex);
llog_ctxt_put(ctxt);
}
- if (filter->fo_lcm) {
- cfs_mutex_lock(&ctxt->loc_mutex);
- llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
- filter->fo_lcm = NULL;
- cfs_mutex_unlock(&ctxt->loc_mutex);
- }
RETURN(filter_olg_fini(&obd->obd_olg));
}
LASSERTF(ctxt != NULL, "ctxt is not null, ctxt idx %d \n",
body->lgdc_ctxt_idx);
- CWARN("%s: Recovery from log "LPX64"/"LPX64":%x\n",
- obd->obd_name, body->lgdc_logid.lgl_oid,
- body->lgdc_logid.lgl_oseq, body->lgdc_logid.lgl_ogen);
+ CDEBUG(D_HA, "%s: Recovery from log "LPX64"/"LPX64":%x\n",
+ obd->obd_name, body->lgdc_logid.lgl_oid,
+ body->lgdc_logid.lgl_oseq, body->lgdc_logid.lgl_ogen);
cfs_spin_lock(&obd->u.filter.fo_flags_lock);
obd->u.filter.fo_mds_ost_sync = 1;
obd_zombie_barrier();
rc = filter_llog_preclean(obd);
+ lprocfs_job_stats_fini(obd);
lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
lprocfs_free_per_client_stats(obd);
lprocfs_obd_cleanup(obd);
/* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
if (data->ocd_connect_flags & OBD_CONNECT_MDS)
- CWARN("%s: Received MDS connection for group %u\n",
- exp->exp_obd->obd_name, data->ocd_group);
+ CDEBUG(D_HA, "%s: Received MDS connection for group %u\n",
+ exp->exp_obd->obd_name, data->ocd_group);
else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN)
RETURN(-EPROTO);
/* The client set in ocd_cksum_types the checksum types it
* supports. We have to mask off the algorithms that we don't
* support */
- data->ocd_cksum_types &= cksum_types_supported();
+ data->ocd_cksum_types &= cksum_types_supported_server();
- /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */
- if (unlikely(data->ocd_cksum_types == 0))
- data->ocd_cksum_types = OBD_CKSUM_CRC32;
+ /* 1.6.4 clients are not supported any more */
CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
"%x\n", exp->exp_obd->obd_name,
ctxt = llog_group_get_ctxt(olg_min,
LLOG_MDS_OST_REPL_CTXT);
if (ctxt) {
- err = llog_sync(ctxt, olg_min->olg_exp);
+ err = llog_sync(ctxt, olg_min->olg_exp, 0);
llog_ctxt_put(ctxt);
if (err) {
CERROR("error flushing logs to MDS: "
EXIT;
}
-static int filter_ping(struct obd_export *exp)
+static int filter_ping(const struct lu_env *env, struct obd_export *exp)
{
filter_fmd_expire(exp);
return 0;
return dchild;
}
-static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
+static int filter_getattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo)
{
struct dentry *dentry = NULL;
struct obd_device *obd;
/* Limit the valid bits in the return data to what we actually use */
oinfo->oi_oa->o_valid = OBD_MD_FLID;
- obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL, FILTER_VALID_FLAGS);
+ obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
/* Store inode version in reply */
curr_version = fsfilt_get_version(exp->exp_obd, dentry->d_inode);
}
/* this is called from filter_truncate() until we have filter_punch() */
-int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti)
+int filter_setattr(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti)
{
struct obdo *oa = oinfo->oi_oa;
struct lustre_capa *capa = oinfo_capa(oinfo);
oa->o_valid = OBD_MD_FLID;
/* Quota release need uid/gid info */
- obdo_from_inode(oa, dentry->d_inode, NULL,
+ obdo_from_inode(oa, dentry->d_inode,
FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
+ filter_counter_incr(exp, LPROC_FILTER_STATS_SETATTR,
+ oti ? oti->oti_jobid : NULL, 1);
EXIT;
out_unlock:
f_dput(dentry);
for (id = last; id > oa->o_id; id--) {
doa.o_id = id;
- rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
+ rc = filter_destroy(NULL, exp, &doa, NULL, NULL, NULL, NULL);
if (rc && rc != -ENOENT) /* this is pretty fatal... */
CEMERG("error destroying precreate objid "LPU64": %d\n",
id, rc);
CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
last, diff);
- if (-diff > OST_MAX_PRECREATE) {
+ if (-diff > (OST_MAX_PRECREATE * 3) / 2) {
CERROR("%s: ignoring bogus orphan destroy request: "
"obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
oa->o_id, last);
return rc;
}
-static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
- __u64 max_age, __u32 flags)
+static int filter_statfs(const struct lu_env *env, struct obd_export *exp,
+ struct obd_statfs *osfs, __u64 max_age, __u32 flags)
{
+ struct obd_device *obd = class_exp2obd(exp);
struct filter_obd *filter = &obd->u.filter;
int blockbits = obd->u.obt.obt_sb->s_blocksize_bits;
struct lr_server_data *lsd = class_server_data(obd);
OBD_ALLOC(osfs, sizeof(*osfs));
if (osfs == NULL)
RETURN(-ENOMEM);
- rc = filter_statfs(obd, osfs,
+ rc = filter_statfs(NULL, obd->obd_self_export, osfs,
cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
0);
if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
RETURN(rc);
}
-int filter_create(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md **ea, struct obd_trans_info *oti)
+int filter_create(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md **ea,
+ struct obd_trans_info *oti)
{
struct obd_device *obd = exp->exp_obd;
struct filter_export_data *fed;
RETURN(rc);
}
-int filter_destroy(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *md, struct obd_trans_info *oti,
- struct obd_export *md_exp, void *capa)
+int filter_destroy(const struct lu_env *env, struct obd_export *exp,
+ struct obdo *oa, struct lov_stripe_md *md,
+ struct obd_trans_info *oti, struct obd_export *md_exp,
+ void *capa)
{
unsigned int qcids[MAXQUOTAS] = {0, 0};
struct obd_device *obd;
cleanup_phase = 4; /* fsfilt_commit */
/* Quota release need uid/gid of inode */
- obdo_from_inode(oa, dchild->d_inode, NULL, OBD_MD_FLUID|OBD_MD_FLGID);
+ obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID | OBD_MD_FLGID);
filter_fmd_drop(exp, oa->o_id, oa->o_seq);
}
/* NB start and end are used for punch, but not truncate */
-static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
+static int filter_truncate(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, struct obd_trans_info *oti,
struct ptlrpc_request_set *rqset)
{
int rc;
oinfo->oi_policy.l_extent.start);
oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
- rc = filter_setattr(exp, oinfo, oti);
+ rc = filter_setattr(env, exp, oinfo, oti);
RETURN(rc);
}
-static int filter_sync(struct obd_export *exp, struct obd_info *oinfo,
- obd_off start, obd_off end,
+static int filter_sync(const struct lu_env *env, struct obd_export *exp,
+ struct obd_info *oinfo, obd_off start, obd_off end,
struct ptlrpc_request_set *set)
{
struct lvfs_run_ctxt saved;
UNLOCK_INODE_MUTEX(dentry->d_inode);
oinfo->oi_oa->o_valid = OBD_MD_FLID;
- obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL,
- FILTER_VALID_FLAGS);
+ obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
+ filter_counter_incr(exp, LPROC_FILTER_STATS_SYNC, oinfo->oi_jobid, 1);
f_dput(dentry);
RETURN(rc);
}
-static int filter_get_info(struct obd_export *exp, __u32 keylen,
- void *key, __u32 *vallen, void *val,
+static int filter_get_info(const struct lu_env *env, struct obd_export *exp,
+ __u32 keylen, void *key, __u32 *vallen, void *val,
struct lov_stripe_md *lsm)
{
struct obd_device *obd;
RETURN(rc);
}
-static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
+static int filter_set_info_async(const struct lu_env *env,
+ struct obd_export *exp, __u32 keylen,
void *key, __u32 vallen, void *val,
struct ptlrpc_request_set *set)
{
RETURN(0);
}
-static int filter_health_check(struct obd_device *obd)
+static int filter_health_check(const struct lu_env *env, struct obd_device *obd)
{
#ifdef USE_HEALTH_CHECK_WRITE
struct filter_obd *filter = &obd->u.filter;
.o_create = filter_create,
.o_setattr = filter_setattr,
.o_destroy = filter_destroy,
- .o_brw = filter_brw,
.o_punch = filter_truncate,
.o_sync = filter_sync,
.o_preprw = filter_preprw,