Whamcloud - gitweb
LU-3126 osd: remove fld lookup during configuration
[fs/lustre-release.git] / lustre / fld / fld_handler.c
index 00c9d12..d54a21a 100644 (file)
-/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- *  lustre/fld/fld_handler.c
- *  FLD (Fids Location Database)
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *  Copyright (C) 2006 Cluster File Systems, Inc.
- *   Author: Yury Umanets <umka@clusterfs.com>
- *           WangDi <wangdi@clusterfs.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
  *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * GPL HEADER END
  */
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/fld/fld_handler.c
+ *
+ * FLD (Fids Location Database)
+ *
+ * Author: Yury Umanets <umka@clusterfs.com>
+ * Author: WangDi <wangdi@clusterfs.com>
+ * Author: Pravin Shelar <pravin.shelar@sun.com>
+ */
+
 #define DEBUG_SUBSYSTEM S_FLD
 
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-# include <linux/module.h>
-# include <linux/jbd.h>
-# include <asm/div64.h>
-#else /* __KERNEL__ */
-# include <liblustre.h>
-# include <libcfs/list.h>
-#endif
+#include <libcfs/libcfs.h>
+#include <linux/module.h>
 
 #include <obd.h>
-#include <obd_class.h>
-#include <lustre_ver.h>
 #include <obd_support.h>
-#include <lprocfs_status.h>
-
+#include <lustre_fid.h>
+#include <lustre_fld.h>
 #include <md_object.h>
 #include <lustre_req_layout.h>
+#include <lprocfs_status.h>
 #include "fld_internal.h"
 
-#ifdef __KERNEL__
-
 /* context key constructor/destructor: fld_key_init, fld_key_fini */
 LU_KEY_INIT_FINI(fld, struct fld_thread_info);
 
 /* context key: fld_thread_key */
-LU_CONTEXT_KEY_DEFINE(fld, LCT_MD_THREAD|LCT_DT_THREAD);
-
-cfs_proc_dir_entry_t *fld_type_proc_dir = NULL;
+/* MGS thread may create llog file causing FLD lookup */
+LU_CONTEXT_KEY_DEFINE(fld, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD);
 
-static int __init fld_mod_init(void)
+int fld_server_mod_init(void)
 {
-        fld_type_proc_dir = lprocfs_register(LUSTRE_FLD_NAME,
-                                             proc_lustre_root,
-                                             NULL, NULL);
-        if (IS_ERR(fld_type_proc_dir))
-                return PTR_ERR(fld_type_proc_dir);
-
-        LU_CONTEXT_KEY_INIT(&fld_thread_key);
-        lu_context_key_register(&fld_thread_key);
-        return 0;
+       LU_CONTEXT_KEY_INIT(&fld_thread_key);
+       return lu_context_key_register(&fld_thread_key);
 }
 
-static void __exit fld_mod_exit(void)
+void fld_server_mod_exit(void)
 {
-        lu_context_key_degister(&fld_thread_key);
-        if (fld_type_proc_dir != NULL && !IS_ERR(fld_type_proc_dir)) {
-                lprocfs_remove(&fld_type_proc_dir);
-                fld_type_proc_dir = NULL;
-        }
+       lu_context_key_degister(&fld_thread_key);
 }
 
-/* Insert index entry and update cache. */
-int fld_server_create(struct lu_server_fld *fld,
-                      const struct lu_env *env,
-                      seqno_t seq, mdsno_t mds)
+int fld_declare_server_create(const struct lu_env *env,
+                             struct lu_server_fld *fld,
+                             const struct lu_seq_range *range,
+                             struct thandle *th)
 {
-        int rc;
-        ENTRY;
-        
-        rc = fld_index_create(fld, env, seq, mds);
-        
-        if (rc == 0) {
-                /*
-                 * Do not return result of calling fld_cache_insert()
-                 * here. First of all because it may return -EEXISTS. Another
-                 * reason is that, we do not want to stop proceeding even after
-                 * cache errors.
-                 */
-                fld_cache_insert(fld->lsf_cache, seq, mds);
-        }
+       int rc;
 
-        RETURN(rc);
+       rc = fld_declare_index_create(env, fld, range, th);
+       RETURN(rc);
 }
-EXPORT_SYMBOL(fld_server_create);
+EXPORT_SYMBOL(fld_declare_server_create);
 
-/* Delete index entry. */
-int fld_server_delete(struct lu_server_fld *fld,
-                      const struct lu_env *env,
-                      seqno_t seq)
+/**
+ * Insert FLD index entry and update FLD cache.
+ *
+ * This function is called from the sequence allocator when a super-sequence
+ * is granted to a server.
+ */
+int fld_server_create(const struct lu_env *env, struct lu_server_fld *fld,
+                     const struct lu_seq_range *range, struct thandle *th)
 {
-        int rc;
-        ENTRY;
+       int rc;
+
+       mutex_lock(&fld->lsf_lock);
+       rc = fld_index_create(env, fld, range, th);
+       mutex_unlock(&fld->lsf_lock);
 
-        fld_cache_delete(fld->lsf_cache, seq);
-        rc = fld_index_delete(fld, env, seq);
-        
-        RETURN(rc);
+       RETURN(rc);
 }
-EXPORT_SYMBOL(fld_server_delete);
+EXPORT_SYMBOL(fld_server_create);
 
-/* Lookup mds by seq. */
-int fld_server_lookup(struct lu_server_fld *fld,
-                      const struct lu_env *env,
-                      seqno_t seq, mdsno_t *mds)
+/**
+ *  Lookup mds by seq, returns a range for given seq.
+ *
+ *  If that entry is not cached in fld cache, request is sent to super
+ *  sequence controller node (MDT0). All other MDT[1...N] and client
+ *  cache fld entries, but this cache is not persistent.
+ */
+int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
+                     seqno_t seq, struct lu_seq_range *range)
 {
-        int rc;
-        ENTRY;
-        
-        /* Lookup it in the cache. */
-        rc = fld_cache_lookup(fld->lsf_cache, seq, mds);
-        if (rc == 0)
-                RETURN(0);
-
-        rc = fld_index_lookup(fld, env, seq, mds);
-        if (rc == 0) {
-                /*
-                 * Do not return error here as well. See previous comment in
-                 * same situation in function fld_server_create().
-                 */
-                fld_cache_insert(fld->lsf_cache, seq, *mds);
-        }
-        RETURN(rc);
+       struct lu_seq_range *erange;
+       struct fld_thread_info *info;
+       int rc;
+       ENTRY;
+
+       info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+       LASSERT(info != NULL);
+       erange = &info->fti_lrange;
+
+       /* Lookup it in the cache. */
+       rc = fld_cache_lookup(fld->lsf_cache, seq, erange);
+       if (rc == 0) {
+               if (unlikely(fld_range_type(erange) != fld_range_type(range) &&
+                            !fld_range_is_any(range))) {
+                       CERROR("%s: FLD cache range "DRANGE" does not match"
+                              "requested flag %x: rc = %d\n", fld->lsf_name,
+                              PRANGE(erange), range->lsr_flags, -EIO);
+                       RETURN(-EIO);
+               }
+               *range = *erange;
+               RETURN(0);
+       }
+
+       if (fld->lsf_obj) {
+               /* On server side, all entries should be in cache.
+                * If we can not find it in cache, just return error */
+               CERROR("%s: Cannot find sequence "LPX64": rc = %d\n",
+                      fld->lsf_name, seq, -EIO);
+               RETURN(-EIO);
+       } else {
+               if (fld->lsf_control_exp == NULL) {
+                       CERROR("%s: lookup "LPX64", but not connects to MDT0"
+                              "yet: rc = %d.\n", fld->lsf_name, seq, -EIO);
+                       RETURN(-EIO);
+               }
+               /* send request to mdt0 i.e. super seq. controller.
+                * This is temporary solution, long term solution is fld
+                * replication on all mdt servers.
+                */
+               range->lsr_start = seq;
+               rc = fld_client_rpc(fld->lsf_control_exp,
+                                   range, FLD_LOOKUP);
+               if (rc == 0)
+                       fld_cache_insert(fld->lsf_cache, range);
+       }
+       RETURN(rc);
 }
 EXPORT_SYMBOL(fld_server_lookup);
 
+/**
+ * All MDT server handle fld lookup operation. But only MDT0 has fld index.
+ * if entry is not found in cache we need to forward lookup request to MDT0
+ */
 static int fld_server_handle(struct lu_server_fld *fld,
-                             const struct lu_env *env,
-                             __u32 opc, struct md_fld *mf,
-                             struct fld_thread_info *info)
-{
-        int rc;
-        ENTRY;
-
-        switch (opc) {
-        case FLD_CREATE:
-                rc = fld_server_create(fld, env,
-                                       mf->mf_seq, mf->mf_mds);
-
-                /* Do not return -EEXIST error for resent case */
-                if ((info->fti_flags & MSG_RESENT) && rc == -EEXIST)
-                        rc = 0;
-                break;
-        case FLD_DELETE:
-                rc = fld_server_delete(fld, env, mf->mf_seq);
-
-                /* Do not return -ENOENT error for resent case */
-                if ((info->fti_flags & MSG_RESENT) && rc == -ENOENT)
-                        rc = 0;
-                break;
-        case FLD_LOOKUP:
-                rc = fld_server_lookup(fld, env,
-                                       mf->mf_seq, &mf->mf_mds);
-                break;
-        default:
-                rc = -EINVAL;
-                break;
-        }
-
-        CDEBUG(D_INFO, "%s: FLD req handle: error %d (opc: %d, seq: "
-               LPX64", mds: "LPU64")\n", fld->lsf_name, rc, opc,
-               mf->mf_seq, mf->mf_mds);
-        
-        RETURN(rc);
-
-}
-
-static int fld_req_handle(struct ptlrpc_request *req,
-                          struct fld_thread_info *info)
-{
-        struct lu_site *site;
-        struct md_fld *in;
-        struct md_fld *out;
-        int rc;
-        __u32 *opc;
-        ENTRY;
-
-        site = req->rq_export->exp_obd->obd_lu_dev->ld_site;
-
-        rc = req_capsule_pack(&info->fti_pill);
-        if (rc)
-                RETURN(err_serious(rc));
-
-        opc = req_capsule_client_get(&info->fti_pill, &RMF_FLD_OPC);
-        if (opc != NULL) {
-                in = req_capsule_client_get(&info->fti_pill, &RMF_FLD_MDFLD);
-                if (in == NULL)
-                        RETURN(err_serious(-EPROTO));
-                out = req_capsule_server_get(&info->fti_pill, &RMF_FLD_MDFLD);
-                if (out == NULL)
-                        RETURN(err_serious(-EPROTO));
-                *out = *in;
-
-                rc = fld_server_handle(site->ls_server_fld,
-                                       req->rq_svc_thread->t_env,
-                                       *opc, out, info);
-        } else
-                rc = err_serious(-EPROTO);
-
-        RETURN(rc);
-}
-
-static void fld_thread_info_init(struct ptlrpc_request *req,
-                                 struct fld_thread_info *info)
+                            const struct lu_env *env,
+                            __u32 opc, struct lu_seq_range *range)
 {
-        int i;
+       int rc;
 
-        info->fti_flags = lustre_msg_get_flags(req->rq_reqmsg);
+       ENTRY;
 
-        /* Mark rep buffer as req-layout stuff expects. */
-        for (i = 0; i < ARRAY_SIZE(info->fti_rep_buf_size); i++)
-                info->fti_rep_buf_size[i] = -1;
+       switch (opc) {
+       case FLD_LOOKUP:
+               rc = fld_server_lookup(env, fld, range->lsr_start, range);
+               break;
+       default:
+               rc = -EINVAL;
+               break;
+       }
 
-        /* Init request capsule. */
-        req_capsule_init(&info->fti_pill, req, RCL_SERVER,
-                         info->fti_rep_buf_size);
+       CDEBUG(D_INFO, "%s: FLD req handle: error %d (opc: %d, range: "
+              DRANGE"\n", fld->lsf_name, rc, opc, PRANGE(range));
 
-        req_capsule_set(&info->fti_pill, &RQF_FLD_QUERY);
-}
-
-static void fld_thread_info_fini(struct fld_thread_info *info)
-{
-        req_capsule_fini(&info->fti_pill);
+       RETURN(rc);
 }
 
-static int fld_handle(struct ptlrpc_request *req)
-{
-        struct fld_thread_info *info;
-        const struct lu_env *env;
-        int rc;
-
-        env = req->rq_svc_thread->t_env;
-        LASSERT(env != NULL);
-
-        info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
-        LASSERT(info != NULL);
-
-        fld_thread_info_init(req, info);
-        rc = fld_req_handle(req, info);
-        fld_thread_info_fini(info);
-
-        return rc;
-}
-
-/*
- * Entry point for handling FLD RPCs called from MDT.
- */
-int fld_query(struct com_thread_info *info)
+static int fld_handler(struct tgt_session_info *tsi)
 {
-        return fld_handle(info->cti_pill.rc_req);
+       struct obd_export       *exp = tsi->tsi_exp;
+       struct lu_site          *site = exp->exp_obd->obd_lu_dev->ld_site;
+       struct lu_seq_range     *in;
+       struct lu_seq_range     *out;
+       int                      rc;
+       __u32                   *opc;
+
+       ENTRY;
+
+       opc = req_capsule_client_get(tsi->tsi_pill, &RMF_FLD_OPC);
+       if (opc != NULL) {
+               in = req_capsule_client_get(tsi->tsi_pill, &RMF_FLD_MDFLD);
+               if (in == NULL)
+                       RETURN(err_serious(-EPROTO));
+               out = req_capsule_server_get(tsi->tsi_pill, &RMF_FLD_MDFLD);
+               if (out == NULL)
+                       RETURN(err_serious(-EPROTO));
+               *out = *in;
+
+               /* For old 2.0 client, the 'lsr_flags' is uninitialized.
+                * Set it as 'LU_SEQ_RANGE_MDT' by default. */
+               if (!(exp_connect_flags(exp) & OBD_CONNECT_64BITHASH) &&
+                   !(exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) &&
+                   !(exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) &&
+                   !exp->exp_libclient)
+                       fld_range_set_mdt(out);
+
+               rc = fld_server_handle(lu_site2seq(site)->ss_server_fld,
+                                      tsi->tsi_env, *opc, out);
+       } else {
+               rc = err_serious(-EPROTO);
+       }
+
+       RETURN(rc);
 }
-EXPORT_SYMBOL(fld_query);
 
 /*
  * Returns true, if fid is local to this server node.
@@ -282,21 +237,29 @@ EXPORT_SYMBOL(fld_query);
  *
  * fid_is_local() is supposed to be used in assertion checks only.
  */
-int fid_is_local(struct lu_site *site, const struct lu_fid *fid)
+int fid_is_local(const struct lu_env *env,
+                 struct lu_site *site, const struct lu_fid *fid)
 {
-        int result;
-
-        result = 1; /* conservatively assume fid is local */
-        if (site->ls_client_fld != NULL) {
-                mdsno_t mds;
-                int rc;
-
-                rc = fld_cache_lookup(site->ls_client_fld->lcf_cache,
-                                      fid_seq(fid), &mds);
-                if (rc == 0)
-                        result = (mds == site->ls_node_id);
-        }
-        return result;
+       int result;
+       struct seq_server_site *ss_site;
+       struct lu_seq_range *range;
+       struct fld_thread_info *info;
+       ENTRY;
+
+       info = lu_context_key_get(&env->le_ctx, &fld_thread_key);
+       range = &info->fti_lrange;
+
+       result = 1; /* conservatively assume fid is local */
+       ss_site = lu_site2seq(site);
+       if (ss_site->ss_client_fld != NULL) {
+               int rc;
+
+               rc = fld_cache_lookup(ss_site->ss_client_fld->lcf_cache,
+                                     fid_seq(fid), range);
+               if (rc == 0)
+                       result = (range->lsr_index == ss_site->ss_node_id);
+       }
+       return result;
 }
 EXPORT_SYMBOL(fid_is_local);
 
@@ -316,7 +279,14 @@ static int fld_server_proc_init(struct lu_server_fld *fld)
                 RETURN(rc);
         }
 
-        RETURN(rc);
+       rc = lprocfs_seq_create(fld->lsf_proc_dir, "fldb", 0444,
+                               &fld_proc_seq_fops, fld);
+       if (rc) {
+               lprocfs_remove(&fld->lsf_proc_dir);
+               fld->lsf_proc_dir = NULL;
+       }
+
+       RETURN(rc);
 }
 
 static void fld_server_proc_fini(struct lu_server_fld *fld)
@@ -341,68 +311,72 @@ static void fld_server_proc_fini(struct lu_server_fld *fld)
 }
 #endif
 
-int fld_server_init(struct lu_server_fld *fld, struct dt_device *dt,
-                    const char *prefix, const struct lu_env *env)
+int fld_server_init(const struct lu_env *env, struct lu_server_fld *fld,
+                   struct dt_device *dt, const char *prefix, int mds_node_id,
+                   int type)
 {
-        int cache_size, cache_threshold;
-        int rc;
-        ENTRY;
-
-        snprintf(fld->lsf_name, sizeof(fld->lsf_name),
-                 "srv-%s", prefix);
-
-        cache_size = FLD_SERVER_CACHE_SIZE /
-                sizeof(struct fld_cache_entry);
-
-        cache_threshold = cache_size *
-                FLD_SERVER_CACHE_THRESHOLD / 100;
-
-        fld->lsf_cache = fld_cache_init(fld->lsf_name,
-                                        FLD_SERVER_HTABLE_SIZE,
-                                        cache_size, cache_threshold);
-        if (IS_ERR(fld->lsf_cache)) {
-                rc = PTR_ERR(fld->lsf_cache);
-                fld->lsf_cache = NULL;
-                GOTO(out, rc);
-        }
-
-        rc = fld_index_init(fld, env, dt);
-        if (rc)
-                GOTO(out, rc);
-
-        rc = fld_server_proc_init(fld);
-        if (rc)
-                GOTO(out, rc);
-
-        EXIT;
-out:
-        if (rc)
-                fld_server_fini(fld, env);
-        return rc;
+       int cache_size, cache_threshold;
+       int rc;
+
+       ENTRY;
+
+       snprintf(fld->lsf_name, sizeof(fld->lsf_name),
+                "srv-%s", prefix);
+
+       cache_size = FLD_SERVER_CACHE_SIZE / sizeof(struct fld_cache_entry);
+
+       cache_threshold = cache_size * FLD_SERVER_CACHE_THRESHOLD / 100;
+
+       mutex_init(&fld->lsf_lock);
+       fld->lsf_cache = fld_cache_init(fld->lsf_name, cache_size,
+                                       cache_threshold);
+       if (IS_ERR(fld->lsf_cache)) {
+               rc = PTR_ERR(fld->lsf_cache);
+               fld->lsf_cache = NULL;
+               RETURN(rc);
+       }
+
+       if (!mds_node_id && type == LU_SEQ_RANGE_MDT) {
+               rc = fld_index_init(env, fld, dt);
+               if (rc)
+                       GOTO(out_cache, rc);
+       } else {
+               fld->lsf_obj = NULL;
+       }
+
+       rc = fld_server_proc_init(fld);
+       if (rc)
+               GOTO(out_index, rc);
+
+       fld->lsf_control_exp = NULL;
+
+       RETURN(0);
+out_index:
+       fld_index_fini(env, fld);
+out_cache:
+       fld_cache_fini(fld->lsf_cache);
+       return rc;
 }
 EXPORT_SYMBOL(fld_server_init);
 
-void fld_server_fini(struct lu_server_fld *fld,
-                     const struct lu_env *env)
+void fld_server_fini(const struct lu_env *env, struct lu_server_fld *fld)
 {
-        ENTRY;
+       ENTRY;
 
-        fld_server_proc_fini(fld);
-        fld_index_fini(fld, env);
+       fld_server_proc_fini(fld);
+       fld_index_fini(env, fld);
 
-        if (fld->lsf_cache != NULL) {
-                if (!IS_ERR(fld->lsf_cache))
-                        fld_cache_fini(fld->lsf_cache);
-                fld->lsf_cache = NULL;
-        }
+       if (fld->lsf_cache != NULL) {
+               if (!IS_ERR(fld->lsf_cache))
+                       fld_cache_fini(fld->lsf_cache);
+               fld->lsf_cache = NULL;
+       }
 
-        EXIT;
+       EXIT;
 }
 EXPORT_SYMBOL(fld_server_fini);
 
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre FLD");
-MODULE_LICENSE("GPL");
-
-cfs_module(mdd, "0.1.0", fld_mod_init, fld_mod_exit);
-#endif
+struct tgt_handler fld_handlers[] = {
+TGT_FLD_HDL(HABEO_REFERO,      FLD_QUERY,      fld_handler),
+};
+EXPORT_SYMBOL(fld_handlers);