Whamcloud - gitweb
LU-2075 fld: use predefined FIDs
[fs/lustre-release.git] / lustre / fid / fid_handler.c
index 5af9343..df3a999 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -40,9 +40,6 @@
  * Author: Yury Umanets <umka@clusterfs.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_FID
 
 #ifdef __KERNEL__
@@ -74,7 +71,7 @@ int seq_server_set_cli(struct lu_server_seq *seq,
          * Ask client for new range, assign that range to ->seq_space and write
          * seq state to backing store should be atomic.
          */
-        down(&seq->lss_sem);
+        cfs_mutex_lock(&seq->lss_mutex);
 
         if (cli == NULL) {
                 CDEBUG(D_INFO, "%s: Detached sequence client %s\n",
@@ -93,13 +90,25 @@ int seq_server_set_cli(struct lu_server_seq *seq,
                seq->lss_name, cli->lcs_name);
 
         seq->lss_cli = cli;
-        cli->lcs_space.lsr_mdt = seq->lss_site->ms_node_id;
+        cli->lcs_space.lsr_index = seq->lss_site->ms_node_id;
         EXIT;
 out_up:
-        up(&seq->lss_sem);
+        cfs_mutex_unlock(&seq->lss_mutex);
         return rc;
 }
 EXPORT_SYMBOL(seq_server_set_cli);
+/*
+ * allocate \a w units of sequence from range \a from.
+ */
+static inline void range_alloc(struct lu_seq_range *to,
+                               struct lu_seq_range *from,
+                               __u64 width)
+{
+        width = min(range_space(from), width);
+        to->lsr_start = from->lsr_start;
+        to->lsr_end = from->lsr_start + width;
+        from->lsr_start += width;
+}
 
 /**
  * On controller node, allocate new super sequence for regular sequence server.
@@ -109,220 +118,172 @@ EXPORT_SYMBOL(seq_server_set_cli);
  */
 
 static int __seq_server_alloc_super(struct lu_server_seq *seq,
-                                    struct lu_seq_range *in,
                                     struct lu_seq_range *out,
                                     const struct lu_env *env)
 {
-        struct lu_seq_range *space = &seq->lss_space;
-        struct thandle *th;
-        __u64 mdt = out->lsr_mdt;
-        int rc, credit;
-        ENTRY;
-
-        LASSERT(range_is_sane(space));
-
-        if (in != NULL) {
-                CDEBUG(D_INFO, "%s: Input seq range: "
-                       DRANGE"\n", seq->lss_name, PRANGE(in));
-
-                if (in->lsr_end > space->lsr_start)
-                        space->lsr_start = in->lsr_end;
-                *out = *in;
-
-                CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n",
-                       seq->lss_name, PRANGE(space));
-        } else {
-                if (range_space(space) < seq->lss_width) {
-                        CWARN("%s: Sequences space to be exhausted soon. "
-                              "Only "LPU64" sequences left\n", seq->lss_name,
-                              range_space(space));
-                        *out = *space;
-                        space->lsr_start = space->lsr_end;
-                } else if (range_is_exhausted(space)) {
-                        CERROR("%s: Sequences space is exhausted\n",
-                               seq->lss_name);
-                        RETURN(-ENOSPC);
-                } else {
-                        range_alloc(out, space, seq->lss_width);
-                }
-        }
-        out->lsr_mdt = mdt;
-
-        credit = SEQ_TXN_STORE_CREDITS + FLD_TXN_INDEX_INSERT_CREDITS;
-
-        th = seq_store_trans_start(seq, env, credit);
-        if (IS_ERR(th))
-                RETURN(PTR_ERR(th));
-
-        rc = seq_store_write(seq, env, th);
-        if (rc) {
-                CERROR("%s: Can't write space data, rc %d\n",
-                       seq->lss_name, rc);
-                goto out;
-        }
+       struct lu_seq_range *space = &seq->lss_space;
+       int rc;
+       ENTRY;
+
+       LASSERT(range_is_sane(space));
+
+       if (range_is_exhausted(space)) {
+               CERROR("%s: Sequences space is exhausted\n",
+                      seq->lss_name);
+               RETURN(-ENOSPC);
+       } else {
+               range_alloc(out, space, seq->lss_width);
+       }
 
-        rc = fld_server_create(seq->lss_site->ms_server_fld,
-                               env, out, th);
-        if (rc) {
-                CERROR("%s: Can't Update fld database, rc %d\n",
-                       seq->lss_name, rc);
-        }
+       rc = seq_store_update(env, seq, out, 1 /* sync */);
 
-out:
-        seq_store_trans_stop(seq, env, th);
+       LCONSOLE_INFO("%s: super-sequence allocation rc = %d " DRANGE"\n",
+                     seq->lss_name, rc, PRANGE(out));
 
-        CDEBUG(D_INFO, "%s: super-sequence allocation rc = %d "
-               DRANGE"\n", seq->lss_name, rc, PRANGE(out));
-
-        RETURN(rc);
+       RETURN(rc);
 }
 
 int seq_server_alloc_super(struct lu_server_seq *seq,
-                           struct lu_seq_range *in,
                            struct lu_seq_range *out,
                            const struct lu_env *env)
 {
         int rc;
         ENTRY;
 
-        down(&seq->lss_sem);
-        rc = __seq_server_alloc_super(seq, in, out, env);
-        up(&seq->lss_sem);
+        cfs_mutex_lock(&seq->lss_mutex);
+        rc = __seq_server_alloc_super(seq, out, env);
+        cfs_mutex_unlock(&seq->lss_mutex);
 
         RETURN(rc);
 }
 
-static int __seq_server_alloc_meta(struct lu_server_seq *seq,
-                                   struct lu_seq_range *in,
-                                   struct lu_seq_range *out,
-                                   const struct lu_env *env)
+static int __seq_set_init(const struct lu_env *env,
+                            struct lu_server_seq *seq)
 {
         struct lu_seq_range *space = &seq->lss_space;
-        struct thandle *th;
-        int rc = 0;
+        int rc;
 
-        ENTRY;
+        range_alloc(&seq->lss_lowater_set, space, seq->lss_set_width);
+        range_alloc(&seq->lss_hiwater_set, space, seq->lss_set_width);
 
-        LASSERT(range_is_sane(space));
+        rc = seq_store_update(env, seq, NULL, 1);
 
-        /*
-         * This is recovery case. Adjust super range if input range looks like
-         * it is allocated from new super.
-         */
-        if (in != NULL) {
-                CDEBUG(D_INFO, "%s: Input seq range: "
-                       DRANGE"\n", seq->lss_name, PRANGE(in));
-
-                if (range_is_exhausted(space)) {
-                        /*
-                         * Server cannot send empty range to client, this is why
-                         * we check here that range from client is "newer" than
-                         * exhausted super.
-                         */
-                        LASSERT(in->lsr_end > space->lsr_start);
-
-                        /*
-                         * Start is set to end of last allocated, because it
-                         * *is* already allocated so we take that into account
-                         * and do not use for other allocations.
-                         */
-                        space->lsr_start = in->lsr_end;
-
-                        /*
-                         * End is set to in->lsr_start + super sequence
-                         * allocation unit. That is because in->lsr_start is
-                         * first seq in new allocated range from controller
-                         * before failure.
-                         */
-                        space->lsr_end = in->lsr_start + LUSTRE_SEQ_SUPER_WIDTH;
-
-                        if (!seq->lss_cli) {
-                                CERROR("%s: No sequence controller "
-                                       "is attached.\n", seq->lss_name);
-                                RETURN(-ENODEV);
-                        }
-
-                        /*
-                         * Let controller know that this is recovery and last
-                         * obtained range from it was @space.
-                         */
-                        rc = seq_client_replay_super(seq->lss_cli, space, env);
-
-                        if (rc) {
-                                CERROR("%s: Can't replay super-sequence, "
-                                       "rc %d\n", seq->lss_name, rc);
-                                RETURN(rc);
-                        }
-                } else {
-                        /*
-                         * Update super start by end from client's range. Super
-                         * end should not be changed if range was not exhausted.
-                         */
-                        if (in->lsr_end > space->lsr_start)
-                                space->lsr_start = in->lsr_end;
-                }
+        return rc;
+}
 
-                *out = *in;
+/*
+ * This function implements new seq allocation algorithm using async
+ * updates to seq file on disk. ref bug 18857 for details.
+ * there are four variable to keep track of this process
+ *
+ * lss_space; - available lss_space
+ * lss_lowater_set; - lu_seq_range for all seqs before barrier, i.e. safe to use
+ * lss_hiwater_set; - lu_seq_range after barrier, i.e. allocated but may be
+ *                    not yet committed
+ *
+ * when lss_lowater_set reaches the end it is replaced with hiwater one and
+ * a write operation is initiated to allocate new hiwater range.
+ * if last seq write opearion is still not commited, current operation is
+ * flaged as sync write op.
+ */
+static int range_alloc_set(const struct lu_env *env,
+                            struct lu_seq_range *out,
+                            struct lu_server_seq *seq)
+{
+        struct lu_seq_range *space = &seq->lss_space;
+        struct lu_seq_range *loset = &seq->lss_lowater_set;
+        struct lu_seq_range *hiset = &seq->lss_hiwater_set;
+        int rc = 0;
 
-                CDEBUG(D_INFO, "%s: Recovered space: "DRANGE"\n",
-                       seq->lss_name, PRANGE(space));
-        } else {
-                /*
-                 * XXX: Avoid cascading RPCs using kind of async preallocation
-                 * when meta-sequence is close to exhausting.
-                 */
-                if (range_is_exhausted(space)) {
-                        if (!seq->lss_cli) {
-                                CERROR("%s: No sequence controller "
-                                       "is attached.\n", seq->lss_name);
-                                RETURN(-ENODEV);
-                        }
-
-                        rc = seq_client_alloc_super(seq->lss_cli, env);
-                        if (rc) {
-                                CERROR("%s: Can't allocate super-sequence, "
-                                       "rc %d\n", seq->lss_name, rc);
-                                RETURN(rc);
-                        }
-
-                        /* Saving new range to allocation space. */
-                        *space = seq->lss_cli->lcs_space;
-                        LASSERT(range_is_sane(space));
-                }
+        if (range_is_zero(loset))
+                __seq_set_init(env, seq);
 
-                range_alloc(out, space, seq->lss_width);
-        }
+        if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_ALLOC)) /* exhaust set */
+                loset->lsr_start = loset->lsr_end;
 
-        th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS);
-        if (IS_ERR(th))
-                RETURN(PTR_ERR(th));
+        if (range_is_exhausted(loset)) {
+                /* reached high water mark. */
+               struct lu_device *dev = seq->lss_site->ms_lu->ls_top_dev;
+                int obd_num_clients = dev->ld_obd->obd_num_exports;
+                __u64 set_sz;
 
-        rc = seq_store_write(seq, env, th);
-        if (rc) {
-                CERROR("%s: Can't write space data, rc %d\n",
-                      seq->lss_name, rc);
-        }
+                /* calculate new seq width based on number of clients */
+                set_sz = max(seq->lss_set_width,
+                             obd_num_clients * seq->lss_width);
+                set_sz = min(range_space(space), set_sz);
+
+                /* Switch to hiwater range now */
+                *loset = *hiset;
+                /* allocate new hiwater range */
+                range_alloc(hiset, space, set_sz);
 
-        if (rc == 0) {
-                CDEBUG(D_INFO, "%s: Allocated meta-sequence "
-                       DRANGE"\n", seq->lss_name, PRANGE(out));
+                /* update ondisk seq with new *space */
+                rc = seq_store_update(env, seq, NULL, seq->lss_need_sync);
         }
 
-        seq_store_trans_stop(seq, env, th);
+        LASSERTF(!range_is_exhausted(loset) || range_is_sane(loset),
+                 DRANGE"\n", PRANGE(loset));
+
+        if (rc == 0)
+                range_alloc(out, loset, seq->lss_width);
+
         RETURN(rc);
 }
 
+static int __seq_server_alloc_meta(struct lu_server_seq *seq,
+                                   struct lu_seq_range *out,
+                                   const struct lu_env *env)
+{
+       struct lu_seq_range *space = &seq->lss_space;
+       int rc = 0;
+
+       ENTRY;
+
+       LASSERT(range_is_sane(space));
+
+       /* Check if available space ends and allocate new super seq */
+       if (range_is_exhausted(space)) {
+               if (!seq->lss_cli) {
+                       CERROR("%s: No sequence controller is attached.\n",
+                              seq->lss_name);
+                       RETURN(-ENODEV);
+               }
+
+               rc = seq_client_alloc_super(seq->lss_cli, env);
+               if (rc) {
+                       CERROR("%s: Can't allocate super-sequence, rc %d\n",
+                              seq->lss_name, rc);
+                       RETURN(rc);
+               }
+
+               /* Saving new range to allocation space. */
+               *space = seq->lss_cli->lcs_space;
+               LASSERT(range_is_sane(space));
+       }
+
+       rc = range_alloc_set(env, out, seq);
+       if (rc != 0) {
+               CERROR("%s: Allocated meta-sequence failed: rc = %d\n",
+                       seq->lss_name, rc);
+               RETURN(rc);
+       }
+
+       CDEBUG(D_INFO, "%s: Allocated meta-sequence " DRANGE"\n",
+               seq->lss_name, PRANGE(out));
+
+       RETURN(rc);
+}
+
 int seq_server_alloc_meta(struct lu_server_seq *seq,
-                          struct lu_seq_range *in,
                           struct lu_seq_range *out,
                           const struct lu_env *env)
 {
         int rc;
         ENTRY;
 
-        down(&seq->lss_sem);
-        rc = __seq_server_alloc_meta(seq, in, out, env);
-        up(&seq->lss_sem);
+        cfs_mutex_lock(&seq->lss_mutex);
+        rc = __seq_server_alloc_meta(seq, out, env);
+        cfs_mutex_unlock(&seq->lss_mutex);
 
         RETURN(rc);
 }
@@ -330,8 +291,7 @@ EXPORT_SYMBOL(seq_server_alloc_meta);
 
 static int seq_server_handle(struct lu_site *site,
                              const struct lu_env *env,
-                             __u32 opc, struct lu_seq_range *in,
-                             struct lu_seq_range *out)
+                             __u32 opc, struct lu_seq_range *out)
 {
         int rc;
         struct md_site *mite;
@@ -345,8 +305,7 @@ static int seq_server_handle(struct lu_site *site,
                                "initialized\n");
                         RETURN(-EINVAL);
                 }
-                rc = seq_server_alloc_meta(mite->ms_server_seq,
-                                           in, out, env);
+                rc = seq_server_alloc_meta(mite->ms_server_seq, out, env);
                 break;
         case SEQ_ALLOC_SUPER:
                 if (!mite->ms_control_seq) {
@@ -354,8 +313,7 @@ static int seq_server_handle(struct lu_site *site,
                                "initialized\n");
                         RETURN(-EINVAL);
                 }
-                rc = seq_server_alloc_super(mite->ms_control_seq,
-                                            in, out, env);
+                rc = seq_server_alloc_super(mite->ms_control_seq, out, env);
                 break;
         default:
                 rc = -EINVAL;
@@ -369,15 +327,16 @@ static int seq_req_handle(struct ptlrpc_request *req,
                           const struct lu_env *env,
                           struct seq_thread_info *info)
 {
-        struct lu_seq_range *out, *in = NULL, *tmp;
+        struct lu_seq_range *out, *tmp;
         struct lu_site *site;
         int rc = -EPROTO;
         __u32 *opc;
         ENTRY;
 
+       LASSERT(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY));
         site = req->rq_export->exp_obd->obd_lu_dev->ld_site;
         LASSERT(site != NULL);
-                       
+
         rc = req_capsule_server_pack(info->sti_pill);
         if (rc)
                 RETURN(err_serious(rc));
@@ -390,15 +349,12 @@ static int seq_req_handle(struct ptlrpc_request *req,
 
                 tmp = req_capsule_client_get(info->sti_pill, &RMF_SEQ_RANGE);
 
-                if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
-                        in = tmp;
-                        LASSERT(!range_is_zero(in) && range_is_sane(in));
-                }
                 /* seq client passed mdt id, we need to pass that using out
                  * range parameter */
 
-                out->lsr_mdt = tmp->lsr_mdt;
-                rc = seq_server_handle(site, env, *opc, in, out);
+                out->lsr_index = tmp->lsr_index;
+                out->lsr_flags = tmp->lsr_flags;
+                rc = seq_server_handle(site, env, *opc, out);
         } else
                 rc = err_serious(-EPROTO);
 
@@ -439,6 +395,9 @@ static int seq_handle(struct ptlrpc_request *req)
 
         seq_thread_info_init(req, info);
         rc = seq_req_handle(req, env, info);
+        /* XXX: we don't need replay but MDT assign transno in any case,
+         * remove it manually before reply*/
+        lustre_msg_set_transno(req->rq_repmsg, 0);
         seq_thread_info_fini(info);
 
         return rc;
@@ -506,6 +465,7 @@ static void seq_server_proc_fini(struct lu_server_seq *seq)
 }
 #endif
 
+
 int seq_server_init(struct lu_server_seq *seq,
                     struct dt_device *dev,
                     const char *prefix,
@@ -513,18 +473,22 @@ int seq_server_init(struct lu_server_seq *seq,
                     struct md_site *ms,
                     const struct lu_env *env)
 {
-        struct thandle *th;
         int rc, is_srv = (type == LUSTRE_SEQ_SERVER);
         ENTRY;
 
-       LASSERT(dev != NULL);
+        LASSERT(dev != NULL);
         LASSERT(prefix != NULL);
 
         seq->lss_cli = NULL;
         seq->lss_type = type;
         seq->lss_site = ms;
         range_init(&seq->lss_space);
-        sema_init(&seq->lss_sem, 1);
+
+        range_init(&seq->lss_lowater_set);
+        range_init(&seq->lss_hiwater_set);
+        seq->lss_set_width = LUSTRE_SEQ_BATCH_WIDTH;
+
+        cfs_mutex_init(&seq->lss_mutex);
 
         seq->lss_width = is_srv ?
                 LUSTRE_SEQ_META_WIDTH : LUSTRE_SEQ_SUPER_WIDTH;
@@ -544,27 +508,21 @@ int seq_server_init(struct lu_server_seq *seq,
                         LUSTRE_SEQ_ZERO_RANGE:
                         LUSTRE_SEQ_SPACE_RANGE;
 
-                seq->lss_space.lsr_mdt = ms->ms_node_id;
-                CDEBUG(D_INFO, "%s: No data found "
-                       "on store. Initialize space\n",
-                       seq->lss_name);
-
-                th = seq_store_trans_start(seq, env, SEQ_TXN_STORE_CREDITS);
-                if (IS_ERR(th))
-                        RETURN(PTR_ERR(th));
+                seq->lss_space.lsr_index = ms->ms_node_id;
+               LCONSOLE_INFO("%s: No data found "
+                             "on store. Initialize space\n",
+                             seq->lss_name);
 
-                /* Save default controller value to store. */
-                rc = seq_store_write(seq, env, th);
+                rc = seq_store_update(env, seq, NULL, 0);
                 if (rc) {
                         CERROR("%s: Can't write space data, "
                                "rc %d\n", seq->lss_name, rc);
                 }
-                seq_store_trans_stop(seq, env, th);
         } else if (rc) {
-               CERROR("%s: Can't read space data, rc %d\n",
-                      seq->lss_name, rc);
-               GOTO(out, rc);
-       }
+                CERROR("%s: Can't read space data, rc %d\n",
+                       seq->lss_name, rc);
+                GOTO(out, rc);
+        }
 
         if (is_srv) {
                 LASSERT(range_is_sane(&seq->lss_space));
@@ -575,13 +533,13 @@ int seq_server_init(struct lu_server_seq *seq,
 
         rc  = seq_server_proc_init(seq);
         if (rc)
-               GOTO(out, rc);
+                GOTO(out, rc);
 
-       EXIT;
+        EXIT;
 out:
-       if (rc)
-               seq_server_fini(seq, env);
-       return rc;
+        if (rc)
+                seq_server_fini(seq, env);
+        return rc;
 }
 EXPORT_SYMBOL(seq_server_init);
 
@@ -599,18 +557,6 @@ EXPORT_SYMBOL(seq_server_fini);
 
 cfs_proc_dir_entry_t *seq_type_proc_dir = NULL;
 
-static struct lu_local_obj_desc llod_seq_srv = {
-        .llod_name      = LUSTRE_SEQ_SRV_NAME,
-        .llod_oid       = FID_SEQ_SRV_OID,
-        .llod_is_index  = 0,
-};
-
-static struct lu_local_obj_desc llod_seq_ctl = {
-        .llod_name      = LUSTRE_SEQ_CTL_NAME,
-        .llod_oid       = FID_SEQ_CTL_OID,
-        .llod_is_index  = 0,
-};
-
 static int __init fid_mod_init(void)
 {
         seq_type_proc_dir = lprocfs_register(LUSTRE_SEQ_NAME,
@@ -619,9 +565,6 @@ static int __init fid_mod_init(void)
         if (IS_ERR(seq_type_proc_dir))
                 return PTR_ERR(seq_type_proc_dir);
 
-        llo_local_obj_register(&llod_seq_srv);
-        llo_local_obj_register(&llod_seq_ctl);
-
         LU_CONTEXT_KEY_INIT(&seq_thread_key);
         lu_context_key_register(&seq_thread_key);
         return 0;
@@ -629,9 +572,6 @@ static int __init fid_mod_init(void)
 
 static void __exit fid_mod_exit(void)
 {
-        llo_local_obj_unregister(&llod_seq_srv);
-        llo_local_obj_unregister(&llod_seq_ctl);
-
         lu_context_key_degister(&seq_thread_key);
         if (seq_type_proc_dir != NULL && !IS_ERR(seq_type_proc_dir)) {
                 lprocfs_remove(&seq_type_proc_dir);