Whamcloud - gitweb
LU-812 kernel: AUTOCONF_INCLUDED removed
[fs/lustre-release.git] / lustre / obdfilter / filter.c
index 30b7156..422db94 100644 (file)
@@ -51,9 +51,6 @@
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/dcache.h>
@@ -1848,15 +1845,16 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
  * at the OST layer there are only (potentially) multiple obd_device of type
  * unknown at the time of OST thread creation.
  *
- * Instead array of iobuf's is attached to struct filter_obd (->fo_iobuf_pool
- * field). This array has size OST_MAX_THREADS, so that each OST thread uses
- * it's very own iobuf.
+ * We create a cfs_hash for struct filter_obd (->fo_iobuf_hash field) on
+ * initializing, each OST thread will create it's own iobuf on the first
+ * access and insert it into ->fo_iobuf_hash with thread ID as key,
+ * so the iobuf can be found again by thread ID.
  *
  * Functions below
  *
- *     filter_kiobuf_pool_init()
+ *     filter_iobuf_pool_init()
  *
- *     filter_kiobuf_pool_done()
+ *     filter_iobuf_pool_done()
  *
  *     filter_iobuf_get()
  *
@@ -1869,21 +1867,13 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
  */
 static void filter_iobuf_pool_done(struct filter_obd *filter)
 {
-        struct filter_iobuf **pool;
-        int i;
-
-        ENTRY;
+       ENTRY;
 
-        pool = filter->fo_iobuf_pool;
-        if (pool != NULL) {
-                for (i = 0; i < filter->fo_iobuf_count; ++ i) {
-                        if (pool[i] != NULL)
-                                filter_free_iobuf(pool[i]);
-                }
-                OBD_FREE(pool, filter->fo_iobuf_count * sizeof pool[0]);
-                filter->fo_iobuf_pool = NULL;
-        }
-        EXIT;
+       if (filter->fo_iobuf_hash != NULL) {
+               cfs_hash_putref(filter->fo_iobuf_hash);
+               filter->fo_iobuf_hash = NULL;
+       }
+       EXIT;
 }
 
 static int filter_adapt_sptlrpc_conf(struct obd_device *obd, int initial)
@@ -1910,50 +1900,126 @@ static int filter_adapt_sptlrpc_conf(struct obd_device *obd, int initial)
         return 0;
 }
 
-/*
- * pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
- */
-static int filter_iobuf_pool_init(struct filter_obd *filter)
+static unsigned
+filter_iobuf_hop_hash(cfs_hash_t *hs, const void *key, unsigned mask)
 {
-        void **pool;
+       __u64   val = *((__u64 *)key);
 
-        ENTRY;
+       return cfs_hash_long(val, hs->hs_cur_bits);
+}
 
+static void *
+filter_iobuf_hop_key(cfs_hlist_node_t *hnode)
+{
+       struct filter_iobuf     *pool;
 
-        OBD_ALLOC_GFP(filter->fo_iobuf_pool, OSS_THREADS_MAX * sizeof(*pool),
-                      GFP_KERNEL);
-        if (filter->fo_iobuf_pool == NULL)
-                RETURN(-ENOMEM);
+       pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+       return &pool->dr_hkey;
+}
 
-        filter->fo_iobuf_count = OSS_THREADS_MAX;
+static int
+filter_iobuf_hop_keycmp(const void *key, cfs_hlist_node_t *hnode)
+{
+       struct filter_iobuf     *pool;
 
-        RETURN(0);
+       pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+       return pool->dr_hkey == *((__u64 *)key);
 }
 
-/* Return iobuf allocated for @thread_id.  We don't know in advance how
- * many threads there will be so we allocate a large empty array and only
- * fill in those slots that are actually in use.
- * If we haven't allocated a pool entry for this thread before, do so now. */
-void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
+static void *
+filter_iobuf_hop_object(cfs_hlist_node_t *hnode)
 {
-        int thread_id                    = (oti && oti->oti_thread) ?
-                                           oti->oti_thread->t_id : -1;
-        struct filter_iobuf  *pool       = NULL;
-        struct filter_iobuf **pool_place = NULL;
+       return cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+}
 
-        if (thread_id >= 0) {
-                LASSERT(thread_id < filter->fo_iobuf_count);
-                pool = *(pool_place = &filter->fo_iobuf_pool[thread_id]);
-        }
+static void
+filter_iobuf_hop_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+       /* dummy, required by cfs_hash */
+}
 
-        if (unlikely(pool == NULL)) {
-                pool = filter_alloc_iobuf(filter, OBD_BRW_WRITE,
-                                          PTLRPC_MAX_BRW_PAGES);
-                if (pool_place != NULL)
-                        *pool_place = pool;
-        }
+static void
+filter_iobuf_hop_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+       /* dummy, required by cfs_hash */
+}
+
+static void
+filter_iobuf_hop_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+       struct filter_iobuf     *pool;
+
+       pool = cfs_hlist_entry(hnode, struct filter_iobuf, dr_hlist);
+       filter_free_iobuf(pool);
+}
+
+static struct cfs_hash_ops filter_iobuf_hops = {
+       .hs_hash        = filter_iobuf_hop_hash,
+       .hs_key         = filter_iobuf_hop_key,
+       .hs_keycmp      = filter_iobuf_hop_keycmp,
+       .hs_object      = filter_iobuf_hop_object,
+       .hs_get         = filter_iobuf_hop_get,
+       .hs_put_locked  = filter_iobuf_hop_put_locked,
+       .hs_exit        = filter_iobuf_hop_exit
+};
 
-        return pool;
+#define FILTER_IOBUF_HASH_BITS 9
+#define FILTER_IOBUF_HBKT_BITS 4
+
+/*
+ * pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
+ */
+static int filter_iobuf_pool_init(struct filter_obd *filter)
+{
+       filter->fo_iobuf_hash = cfs_hash_create("filter_iobuf",
+                                               FILTER_IOBUF_HASH_BITS,
+                                               FILTER_IOBUF_HASH_BITS,
+                                               FILTER_IOBUF_HBKT_BITS, 0,
+                                               CFS_HASH_MIN_THETA,
+                                               CFS_HASH_MAX_THETA,
+                                               &filter_iobuf_hops,
+                                               CFS_HASH_RW_BKTLOCK |
+                                               CFS_HASH_NO_ITEMREF);
+
+       return filter->fo_iobuf_hash != NULL ? 0 : -ENOMEM;
+}
+
+/* Return iobuf allocated for @thread_id.
+ * If we haven't allocated a pool entry for this thread before, do so now and
+ * insert it into fo_iobuf_hash, otherwise we can find it from fo_iobuf_hash */
+void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
+{
+       struct filter_iobuf     *pool = NULL;
+       __u64                   key = 0;
+       int                     thread_id;
+       int                     rc;
+
+       thread_id = (oti && oti->oti_thread) ? oti->oti_thread->t_id : -1;
+       if (thread_id >= 0) {
+               struct ptlrpc_service_part *svcpt;
+
+               svcpt = oti->oti_thread->t_svcpt;
+               LASSERT(svcpt != NULL);
+
+               key = (__u64)(svcpt->scp_cpt) << 32 | thread_id;
+               pool = cfs_hash_lookup(filter->fo_iobuf_hash, &key);
+               if (pool != NULL)
+                       return pool;
+       }
+
+       pool = filter_alloc_iobuf(filter, OBD_BRW_WRITE, PTLRPC_MAX_BRW_PAGES);
+       if (pool == NULL)
+               return NULL;
+
+       if (thread_id >= 0) {
+               pool->dr_hkey = key;
+               rc = cfs_hash_add_unique(filter->fo_iobuf_hash,
+                                        &key, &pool->dr_hlist);
+               /* ptlrpc service thould guarantee thread ID is unique */
+               LASSERT(rc != -EALREADY);
+       }
+
+       return pool;
 }
 
 /* mount the file system (secretly).  lustre_cfg parameters are:
@@ -2224,10 +2290,15 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                 GOTO(free_obd_stats, rc);
         }
 
+       rc = lprocfs_job_stats_init(obd, LPROC_FILTER_STATS_LAST,
+                                   filter_stats_counter_init);
+       if (rc)
+               GOTO(remove_entry_clear, rc);
+
         /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
         OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
         if (!page)
-                GOTO(remove_entry_clear, rc = -ENOMEM);
+               GOTO(job_stats_fini, rc = -ENOMEM);
         addr = (unsigned long)cfs_page_address(page);
         clear_page((void *)addr);
         memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
@@ -2237,11 +2308,13 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         if (rc) {
                 CERROR("%s: filter_common_setup failed: %d.\n",
                        obd->obd_name, rc);
-                GOTO(remove_entry_clear, rc);
+               GOTO(job_stats_fini, rc);
         }
 
         RETURN(0);
 
+job_stats_fini:
+       lprocfs_job_stats_fini(obd);
 remove_entry_clear:
         lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
 free_obd_stats:
@@ -2401,7 +2474,7 @@ static int filter_llog_finish(struct obd_device *obd, int count)
                  * We actually do sync in disconnect time, but disconnect
                  * may not come being marked rq_no_resend = 1.
                  */
-                llog_sync(ctxt, NULL);
+               llog_sync(ctxt, NULL, OBD_LLOG_FL_EXIT);
 
                 /*
                  * Balance class_import_get() in llog_receptor_accept().
@@ -2413,16 +2486,16 @@ static int filter_llog_finish(struct obd_device *obd, int count)
                         class_import_put(ctxt->loc_imp);
                         ctxt->loc_imp = NULL;
                 }
+
+               if (filter->fo_lcm) {
+                       llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
+                       filter->fo_lcm = NULL;
+               }
+
                 cfs_mutex_unlock(&ctxt->loc_mutex);
                 llog_ctxt_put(ctxt);
         }
 
-        if (filter->fo_lcm) {
-                cfs_mutex_lock(&ctxt->loc_mutex);
-                llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
-                filter->fo_lcm = NULL;
-                cfs_mutex_unlock(&ctxt->loc_mutex);
-        }
         RETURN(filter_olg_fini(&obd->obd_olg));
 }
 
@@ -2548,9 +2621,9 @@ static int filter_llog_connect(struct obd_export *exp,
         LASSERTF(ctxt != NULL, "ctxt is not null, ctxt idx %d \n",
                  body->lgdc_ctxt_idx);
 
-        CWARN("%s: Recovery from log "LPX64"/"LPX64":%x\n",
-              obd->obd_name, body->lgdc_logid.lgl_oid,
-              body->lgdc_logid.lgl_oseq, body->lgdc_logid.lgl_ogen);
+        CDEBUG(D_HA, "%s: Recovery from log "LPX64"/"LPX64":%x\n",
+               obd->obd_name, body->lgdc_logid.lgl_oid,
+               body->lgdc_logid.lgl_oseq, body->lgdc_logid.lgl_ogen);
 
         cfs_spin_lock(&obd->u.filter.fo_flags_lock);
         obd->u.filter.fo_mds_ost_sync = 1;
@@ -2618,6 +2691,7 @@ static int filter_precleanup(struct obd_device *obd,
                 obd_zombie_barrier();
 
                 rc = filter_llog_preclean(obd);
+               lprocfs_job_stats_fini(obd);
                 lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
                 lprocfs_free_per_client_stats(obd);
                 lprocfs_obd_cleanup(obd);
@@ -2693,8 +2767,8 @@ static int filter_connect_internal(struct obd_export *exp,
 
         /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
         if (data->ocd_connect_flags & OBD_CONNECT_MDS)
-                CWARN("%s: Received MDS connection for group %u\n",
-                      exp->exp_obd->obd_name, data->ocd_group);
+                CDEBUG(D_HA, "%s: Received MDS connection for group %u\n",
+                       exp->exp_obd->obd_name, data->ocd_group);
         else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN)
                 RETURN(-EPROTO);
 
@@ -2769,11 +2843,9 @@ static int filter_connect_internal(struct obd_export *exp,
                 /* The client set in ocd_cksum_types the checksum types it
                  * supports. We have to mask off the algorithms that we don't
                  * support */
-                data->ocd_cksum_types &= cksum_types_supported();
+               data->ocd_cksum_types &= cksum_types_supported_server();
 
-                /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */
-                if (unlikely(data->ocd_cksum_types == 0))
-                        data->ocd_cksum_types = OBD_CKSUM_CRC32;
+               /* 1.6.4 clients are not supported any more */
 
                 CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
                                    "%x\n", exp->exp_obd->obd_name,
@@ -3058,7 +3130,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
                         ctxt = llog_group_get_ctxt(olg_min,
                                                    LLOG_MDS_OST_REPL_CTXT);
                         if (ctxt) {
-                                err = llog_sync(ctxt, olg_min->olg_exp);
+                               err = llog_sync(ctxt, olg_min->olg_exp, 0);
                                 llog_ctxt_put(ctxt);
                                 if (err) {
                                         CERROR("error flushing logs to MDS: "
@@ -3114,7 +3186,7 @@ static void filter_revimp_update(struct obd_export *exp)
         EXIT;
 }
 
-static int filter_ping(struct obd_export *exp)
+static int filter_ping(const struct lu_env *env, struct obd_export *exp)
 {
         filter_fmd_expire(exp);
         return 0;
@@ -3160,7 +3232,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct ost_id *ostid,
         return dchild;
 }
 
-static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
+static int filter_getattr(const struct lu_env *env, struct obd_export *exp,
+                          struct obd_info *oinfo)
 {
         struct dentry *dentry = NULL;
         struct obd_device *obd;
@@ -3417,8 +3490,8 @@ out_unlock:
 }
 
 /* this is called from filter_truncate() until we have filter_punch() */
-int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
-                   struct obd_trans_info *oti)
+int filter_setattr(const struct lu_env *env, struct obd_export *exp,
+                   struct obd_info *oinfo, struct obd_trans_info *oti)
 {
         struct obdo *oa = oinfo->oi_oa;
         struct lustre_capa *capa = oinfo_capa(oinfo);
@@ -3512,6 +3585,8 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
         obdo_from_inode(oa, dentry->d_inode,
                         FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
 
+       filter_counter_incr(exp, LPROC_FILTER_STATS_SETATTR,
+                           oti ? oti->oti_jobid : NULL, 1);
         EXIT;
 out_unlock:
         f_dput(dentry);
@@ -3609,7 +3684,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
 
         for (id = last; id > oa->o_id; id--) {
                 doa.o_id = id;
-                rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
+                rc = filter_destroy(NULL, exp, &doa, NULL, NULL, NULL, NULL);
                 if (rc && rc != -ENOENT) /* this is pretty fatal... */
                         CEMERG("error destroying precreate objid "LPU64": %d\n",
                                id, rc);
@@ -3729,9 +3804,10 @@ out:
         return rc;
 }
 
-static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                         __u64 max_age, __u32 flags)
+static int filter_statfs(const struct lu_env *env, struct obd_export *exp,
+                         struct obd_statfs *osfs, __u64 max_age, __u32 flags)
 {
+        struct obd_device *obd = class_exp2obd(exp);
         struct filter_obd *filter = &obd->u.filter;
         int blockbits = obd->u.obt.obt_sb->s_blocksize_bits;
         struct lr_server_data *lsd = class_server_data(obd);
@@ -3855,7 +3931,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 OBD_ALLOC(osfs, sizeof(*osfs));
                 if (osfs == NULL)
                         RETURN(-ENOMEM);
-                rc = filter_statfs(obd, osfs,
+                rc = filter_statfs(NULL, obd->obd_self_export, osfs,
                                    cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
                                    0);
                 if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
@@ -4057,8 +4133,9 @@ set_last_id:
         RETURN(rc);
 }
 
-int filter_create(struct obd_export *exp, struct obdo *oa,
-                  struct lov_stripe_md **ea, struct obd_trans_info *oti)
+int filter_create(const struct lu_env *env, struct obd_export *exp,
+                  struct obdo *oa, struct lov_stripe_md **ea,
+                  struct obd_trans_info *oti)
 {
         struct obd_device *obd = exp->exp_obd;
         struct filter_export_data *fed;
@@ -4137,9 +4214,10 @@ int filter_create(struct obd_export *exp, struct obdo *oa,
         RETURN(rc);
 }
 
-int filter_destroy(struct obd_export *exp, struct obdo *oa,
-                   struct lov_stripe_md *md, struct obd_trans_info *oti,
-                   struct obd_export *md_exp, void *capa)
+int filter_destroy(const struct lu_env *env, struct obd_export *exp,
+                   struct obdo *oa, struct lov_stripe_md *md,
+                   struct obd_trans_info *oti, struct obd_export *md_exp,
+                   void *capa)
 {
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         struct obd_device *obd;
@@ -4333,8 +4411,8 @@ cleanup:
 }
 
 /* NB start and end are used for punch, but not truncate */
-static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
-                           struct obd_trans_info *oti,
+static int filter_truncate(const struct lu_env *env, struct obd_export *exp,
+                           struct obd_info *oinfo, struct obd_trans_info *oti,
                            struct ptlrpc_request_set *rqset)
 {
         int rc;
@@ -4351,12 +4429,12 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
                 oinfo->oi_policy.l_extent.start);
 
         oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
-        rc = filter_setattr(exp, oinfo, oti);
+        rc = filter_setattr(env, exp, oinfo, oti);
         RETURN(rc);
 }
 
-static int filter_sync(struct obd_export *exp, struct obd_info *oinfo,
-                       obd_off start, obd_off end,
+static int filter_sync(const struct lu_env *env, struct obd_export *exp,
+                       struct obd_info *oinfo, obd_off start, obd_off end,
                        struct ptlrpc_request_set *set)
 {
         struct lvfs_run_ctxt saved;
@@ -4408,12 +4486,13 @@ static int filter_sync(struct obd_export *exp, struct obd_info *oinfo,
 
         pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
 
+       filter_counter_incr(exp, LPROC_FILTER_STATS_SYNC, oinfo->oi_jobid, 1);
         f_dput(dentry);
         RETURN(rc);
 }
 
-static int filter_get_info(struct obd_export *exp, __u32 keylen,
-                           void *key, __u32 *vallen, void *val,
+static int filter_get_info(const struct lu_env *env, struct obd_export *exp,
+                           __u32 keylen, void *key, __u32 *vallen, void *val,
                            struct lov_stripe_md *lsm)
 {
         struct obd_device *obd;
@@ -4569,7 +4648,8 @@ out:
         RETURN(rc);
 }
 
-static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
+static int filter_set_info_async(const struct lu_env *env,
+                                 struct obd_export *exp, __u32 keylen,
                                  void *key, __u32 vallen, void *val,
                                  struct ptlrpc_request_set *set)
 {
@@ -4678,7 +4758,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
         RETURN(0);
 }
 
-static int filter_health_check(struct obd_device *obd)
+static int filter_health_check(const struct lu_env *env, struct obd_device *obd)
 {
 #ifdef USE_HEALTH_CHECK_WRITE
         struct filter_obd *filter = &obd->u.filter;