Whamcloud - gitweb
LU-4975 ofd: documenting the ofd_fs.c
[fs/lustre-release.git] / lustre / ofd / ofd_fs.c
index 1828758..25c0009 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, 2014 Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  *
  * lustre/ofd/ofd_fs.c
  *
  *
  * lustre/ofd/ofd_fs.c
  *
- * Author: Alexey Zhuravlev <bzzz@whamcloud.com>
- * Author: Mikhail Pershin <tappro@whamcloud.com>
+ * This file provides helper functions to handle various data stored on disk.
+ * It uses OSD API and works with any OSD.
+ *
+ * Note: this file contains also functions for sequence handling, they are
+ * placed here improperly and will be moved to the ofd_dev.c and ofd_internal.h,
+ * this comment is to be removed after that.
+ *
+ * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
+ * Author: Mikhail Pershin <mike.pershin@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include "ofd_internal.h"
 
  */
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include "ofd_internal.h"
 
-int ofd_record_write(const struct lu_env *env, struct ofd_device *ofd,
-                    struct dt_object *dt, struct lu_buf *buf, loff_t *off)
+/**
+ * Restrict precreate batch count by its upper limit.
+ *
+ * The precreate batch count is a number of precreates to do in
+ * single transaction. It has upper limit - ofd_device::ofd_precreate_batch
+ * value which shouldn't be exceeded.
+ *
+ * \param[in] ofd      OFD device
+ * \param[in] int      number of updates in the batch
+ *
+ * \retval             \a batch limited by ofd_device::ofd_precreate_batch
+ */
+int ofd_precreate_batch(struct ofd_device *ofd, int batch)
 {
 {
-       struct thandle  *th;
-       int              rc;
+       int count;
 
 
-       ENTRY;
+       spin_lock(&ofd->ofd_batch_lock);
+       count = min(ofd->ofd_precreate_batch, batch);
+       spin_unlock(&ofd->ofd_batch_lock);
 
 
-       LASSERT(dt);
+       return count;
+}
 
 
-       th = dt_trans_create(env, ofd->ofd_osd);
-       if (IS_ERR(th))
-               RETURN(PTR_ERR(th));
+/**
+ * Get ofd_seq for \a seq.
+ *
+ * Function finds appropriate structure by \a seq number and
+ * increases the reference counter of that structure.
+ *
+ * \param[in] ofd      OFD device
+ * \param[in] seq      sequence number, FID sequence number usually
+ *
+ * \retval             pointer to the requested ofd_seq structure
+ * \retval             NULL if ofd_seq is not found
+ */
+struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, obd_seq seq)
+{
+       struct ofd_seq *oseq;
+
+       read_lock(&ofd->ofd_seq_list_lock);
+       list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) {
+               if (ostid_seq(&oseq->os_oi) == seq) {
+                       atomic_inc(&oseq->os_refc);
+                       read_unlock(&ofd->ofd_seq_list_lock);
+                       return oseq;
+               }
+       }
+       read_unlock(&ofd->ofd_seq_list_lock);
+       return NULL;
+}
 
 
-       rc = dt_declare_record_write(env, dt, buf->lb_len, *off, th);
-       if (rc == 0) {
-               rc = dt_trans_start_local(env, ofd->ofd_osd, th);
-               if (rc == 0)
-                       rc = dt_record_write(env, dt, buf, off, th);
+/**
+ * Drop a reference to ofd_seq.
+ *
+ * The paired function to the ofd_seq_get(). It decrease the reference counter
+ * of the ofd_seq structure and free it if that reference was last one.
+ *
+ * \param[in] env      execution environment
+ * \param[in] oseq     ofd_seq structure to put
+ */
+void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq)
+{
+       if (atomic_dec_and_test(&oseq->os_refc)) {
+               LASSERT(list_empty(&oseq->os_list));
+               LASSERT(oseq->os_lastid_obj != NULL);
+               lu_object_put(env, &oseq->os_lastid_obj->do_lu);
+               OBD_FREE_PTR(oseq);
        }
        }
-       dt_trans_stop(env, ofd->ofd_osd, th);
+}
 
 
-       RETURN(rc);
+/**
+ * Add a new ofd_seq to the given OFD device.
+ *
+ * First it checks if there is already existent ofd_seq with the same
+ * sequence number as used by \a new_seq.
+ * If such ofd_seq is not found then the \a new_seq is added to the list
+ * of all ofd_seq structures else the \a new_seq is dropped and the found
+ * ofd_seq is returned back.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ * \param[in] new_seq  new ofd_seq to be added
+ *
+ * \retval             ofd_seq structure
+ */
+static struct ofd_seq *ofd_seq_add(const struct lu_env *env,
+                                  struct ofd_device *ofd,
+                                  struct ofd_seq *new_seq)
+{
+       struct ofd_seq *os = NULL;
+
+       write_lock(&ofd->ofd_seq_list_lock);
+       list_for_each_entry(os, &ofd->ofd_seq_list, os_list) {
+               if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) {
+                       atomic_inc(&os->os_refc);
+                       write_unlock(&ofd->ofd_seq_list_lock);
+                       /* The seq has not been added to the list */
+                       ofd_seq_put(env, new_seq);
+                       return os;
+               }
+       }
+       atomic_inc(&new_seq->os_refc);
+       list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list);
+       ofd->ofd_seq_count++;
+       write_unlock(&ofd->ofd_seq_list_lock);
+       return new_seq;
 }
 
 }
 
-obd_id ofd_last_id(struct ofd_device *ofd, obd_seq group)
+/**
+ * Get last object ID for the given sequence.
+ *
+ * \param[in] ofd_seq  OFD sequence structure
+ *
+ * \retval             the last object ID for this sequence
+ */
+obd_id ofd_seq_last_oid(struct ofd_seq *oseq)
 {
        obd_id id;
 
 {
        obd_id id;
 
-       LASSERT(group <= ofd->ofd_max_group);
-
-       cfs_spin_lock(&ofd->ofd_objid_lock);
-       id = ofd->ofd_last_objids[group];
-       cfs_spin_unlock(&ofd->ofd_objid_lock);
+       spin_lock(&oseq->os_last_oid_lock);
+       id = ostid_id(&oseq->os_oi);
+       spin_unlock(&oseq->os_last_oid_lock);
 
        return id;
 }
 
 
        return id;
 }
 
-void ofd_last_id_set(struct ofd_device *ofd, obd_id id, obd_seq group)
+/**
+ * Set new last object ID for the given sequence.
+ *
+ * \param[in] oseq     OFD sequence
+ * \param[in] id       the new OID to set
+ */
+void ofd_seq_last_oid_set(struct ofd_seq *oseq, obd_id id)
 {
 {
-       LASSERT(group <= ofd->ofd_max_group);
-       cfs_spin_lock(&ofd->ofd_objid_lock);
-       if (ofd->ofd_last_objids[group] < id)
-               ofd->ofd_last_objids[group] = id;
-       cfs_spin_unlock(&ofd->ofd_objid_lock);
+       spin_lock(&oseq->os_last_oid_lock);
+       if (likely(ostid_id(&oseq->os_oi) < id))
+               ostid_set_id(&oseq->os_oi, id);
+       spin_unlock(&oseq->os_last_oid_lock);
 }
 
 }
 
-int ofd_last_id_write(const struct lu_env *env, struct ofd_device *ofd,
-                     obd_seq group)
+/**
+ * Update last used OID on disk for the given sequence.
+ *
+ * The last used object ID is stored persistently on disk and
+ * must be written when updated. This function writes the sequence data.
+ * The format is just an object ID of the latest used object FID.
+ * Each ID is stored in per-sequence file.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ * \param[in] oseq     ofd_seq structure with data to write
+ *
+ * \retval             0 on successful write of data from \a oseq
+ * \retval             negative value on error
+ */
+int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
+                          struct ofd_seq *oseq)
 {
        struct ofd_thread_info  *info = ofd_info(env);
        obd_id                   tmp;
 {
        struct ofd_thread_info  *info = ofd_info(env);
        obd_id                   tmp;
+       struct dt_object        *obj = oseq->os_lastid_obj;
+       struct thandle          *th;
        int                      rc;
 
        ENTRY;
 
        int                      rc;
 
        ENTRY;
 
+       tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
+
        info->fti_buf.lb_buf = &tmp;
        info->fti_buf.lb_len = sizeof(tmp);
        info->fti_off = 0;
 
        info->fti_buf.lb_buf = &tmp;
        info->fti_buf.lb_len = sizeof(tmp);
        info->fti_off = 0;
 
-       CDEBUG(D_INODE, "%s: write last_objid for group "LPU64": "LPU64"\n",
-              ofd_obd(ofd)->obd_name, group, ofd_last_id(ofd, group));
+       LASSERT(obj != NULL);
 
 
-       tmp = cpu_to_le64(ofd_last_id(ofd, group));
+       th = dt_trans_create(env, ofd->ofd_osd);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       rc = dt_declare_record_write(env, obj, &info->fti_buf,
+                                    info->fti_off, th);
+       if (rc < 0)
+               GOTO(out, rc);
+       rc = dt_trans_start_local(env, ofd->ofd_osd, th);
+       if (rc < 0)
+               GOTO(out, rc);
+       rc = dt_record_write(env, obj, &info->fti_buf, &info->fti_off,
+                            th);
+       if (rc < 0)
+               GOTO(out, rc);
 
 
-       rc = ofd_record_write(env, ofd, ofd->ofd_lastid_obj[group],
-                             &info->fti_buf, &info->fti_off);
-       RETURN(rc);
+       CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n",
+              ofd_name(ofd), POSTID(&oseq->os_oi), rc);
+       EXIT;
+out:
+       dt_trans_stop(env, ofd->ofd_osd, th);
+       return rc;
 }
 
 }
 
-int ofd_last_group_write(const struct lu_env *env, struct ofd_device *ofd)
+/**
+ * Deregister LWP items for FLDB and SEQ client on OFD.
+ *
+ * LWP is lightweight proxy - simplified connection between
+ * servers. It is used for FID Location Database (FLDB) and
+ * sequence (SEQ) client-server interations.
+ *
+ * This function is used during server cleanup process to free
+ * LWP items that were previously set up upon OFD start.
+ *
+ * \param[in]     ofd  OFD device
+ */
+static void ofd_deregister_seq_exp(struct ofd_device *ofd)
 {
 {
-       struct ofd_thread_info  *info = ofd_info(env);
-       obd_seq                  tmp;
-       int                      rc;
+       struct seq_server_site  *ss = &ofd->ofd_seq_site;
 
 
-       ENTRY;
+       if (ss->ss_client_seq != NULL) {
+               lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
+               ss->ss_client_seq->lcs_exp = NULL;
+       }
 
 
-       info->fti_buf.lb_buf = &tmp;
-       info->fti_buf.lb_len = sizeof(tmp);
-       info->fti_off = 0;
+       if (ss->ss_server_fld != NULL) {
+               lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp);
+               ss->ss_server_fld->lsf_control_exp = NULL;
+       }
+}
 
 
-       tmp = cpu_to_le32(ofd->ofd_max_group);
+/**
+ * Stop FLDB server on OFD.
+ *
+ * This function is part of OFD cleanup process.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ *
+ */
+static void ofd_fld_fini(const struct lu_env *env, struct ofd_device *ofd)
+{
+       struct seq_server_site *ss = &ofd->ofd_seq_site;
 
 
-       rc = ofd_record_write(env, ofd, ofd->ofd_last_group_file,
-                             &info->fti_buf, &info->fti_off);
+       if (ss != NULL && ss->ss_server_fld != NULL) {
+               fld_server_fini(env, ss->ss_server_fld);
+               OBD_FREE_PTR(ss->ss_server_fld);
+               ss->ss_server_fld = NULL;
+       }
+}
 
 
-       RETURN(rc);
+/**
+ * Free sequence structures on OFD.
+ *
+ * This function is part of OFD cleanup process, it goes through
+ * the list of ofd_seq structures stored in ofd_device structure
+ * and frees them.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ */
+void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd)
+{
+       struct ofd_seq          *oseq;
+       struct ofd_seq          *tmp;
+       struct list_head         dispose;
+
+       INIT_LIST_HEAD(&dispose);
+       write_lock(&ofd->ofd_seq_list_lock);
+       list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list)
+               list_move(&oseq->os_list, &dispose);
+       write_unlock(&ofd->ofd_seq_list_lock);
+
+       while (!list_empty(&dispose)) {
+               oseq = container_of0(dispose.next, struct ofd_seq, os_list);
+               list_del_init(&oseq->os_list);
+               ofd_seq_put(env, oseq);
+       }
 }
 
 }
 
-void ofd_group_fini(const struct lu_env *env, struct ofd_device *ofd,
-                   int group)
+/**
+ * Stop FLDB and SEQ services on OFD.
+ *
+ * This function is part of OFD cleanup process.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ *
+ */
+void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd)
 {
 {
-       LASSERT(ofd->ofd_lastid_obj[group]);
-       lu_object_put(env, &ofd->ofd_lastid_obj[group]->do_lu);
-       ofd->ofd_lastid_obj[group] = NULL;
+       int rc;
+
+       ofd_deregister_seq_exp(ofd);
+
+       rc = ofd_fid_fini(env, ofd);
+       if (rc != 0)
+               CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc);
+
+       ofd_fld_fini(env, ofd);
+
+       ofd_seqs_free(env, ofd);
+
+       LASSERT(list_empty(&ofd->ofd_seq_list));
 }
 
 }
 
-int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group)
+/**
+ * Return ofd_seq structure filled with valid data.
+ *
+ * This function gets the ofd_seq by sequence number and read
+ * corresponding data from disk.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ * \param[in] seq      sequence number
+ *
+ * \retval             ofd_seq structure filled with data
+ * \retval             ERR_PTR pointer on error
+ */
+struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
+                            obd_seq seq)
 {
        struct ofd_thread_info  *info = ofd_info(env);
 {
        struct ofd_thread_info  *info = ofd_info(env);
+       struct ofd_seq          *oseq = NULL;
        struct dt_object        *dob;
        obd_id                   lastid;
        int                      rc;
 
        ENTRY;
 
        struct dt_object        *dob;
        obd_id                   lastid;
        int                      rc;
 
        ENTRY;
 
-       /* if group is already initialized */
-       if (ofd->ofd_lastid_obj[group])
-               RETURN(0);
+       /* if seq is already initialized */
+       oseq = ofd_seq_get(ofd, seq);
+       if (oseq != NULL)
+               RETURN(oseq);
 
 
-       lu_local_obj_fid(&info->fti_fid, OFD_GROUP0_LAST_OID + group);
+       OBD_ALLOC_PTR(oseq);
+       if (oseq == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index);
        memset(&info->fti_attr, 0, sizeof(info->fti_attr));
        info->fti_attr.la_valid = LA_MODE;
        info->fti_attr.la_mode = S_IFREG |  S_IRUGO | S_IWUSR;
        info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
 
        memset(&info->fti_attr, 0, sizeof(info->fti_attr));
        info->fti_attr.la_valid = LA_MODE;
        info->fti_attr.la_mode = S_IFREG |  S_IRUGO | S_IWUSR;
        info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
 
-       /* create object tracking per-group last created
+       /* create object tracking per-seq last created
         * id to be used by orphan recovery mechanism */
        dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
                                &info->fti_dof, &info->fti_attr);
         * id to be used by orphan recovery mechanism */
        dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
                                &info->fti_dof, &info->fti_attr);
-       if (IS_ERR(dob))
-               RETURN(PTR_ERR(dob));
+       if (IS_ERR(dob)) {
+               OBD_FREE_PTR(oseq);
+               RETURN((void *)dob);
+       }
+
+       oseq->os_lastid_obj = dob;
 
 
-       ofd->ofd_lastid_obj[group] = dob;
-       cfs_mutex_init(&ofd->ofd_create_locks[group]);
+       INIT_LIST_HEAD(&oseq->os_list);
+       mutex_init(&oseq->os_create_lock);
+       spin_lock_init(&oseq->os_last_oid_lock);
+       ostid_set_seq(&oseq->os_oi, seq);
+
+       atomic_set(&oseq->os_refc, 1);
 
        rc = dt_attr_get(env, dob, &info->fti_attr, BYPASS_CAPA);
        if (rc)
 
        rc = dt_attr_get(env, dob, &info->fti_attr, BYPASS_CAPA);
        if (rc)
@@ -176,10 +409,8 @@ int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group)
 
        if (info->fti_attr.la_size == 0) {
                /* object is just created, initialize last id */
 
        if (info->fti_attr.la_size == 0) {
                /* object is just created, initialize last id */
-               ofd->ofd_last_objids[group] = OFD_INIT_OBJID;
-               ofd_last_id_set(ofd, OFD_INIT_OBJID, group);
-               ofd_last_id_write(env, ofd, group);
-               ofd_last_group_write(env, ofd);
+               ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID);
+               ofd_seq_last_oid_write(env, ofd, oseq);
        } else if (info->fti_attr.la_size == sizeof(lastid)) {
                info->fti_off = 0;
                info->fti_buf.lb_buf = &lastid;
        } else if (info->fti_attr.la_size == sizeof(lastid)) {
                info->fti_off = 0;
                info->fti_buf.lb_buf = &lastid;
@@ -187,274 +418,198 @@ int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group)
 
                rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
                if (rc) {
 
                rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
                if (rc) {
-                       CERROR("can't read last_id: %d\n", rc);
+                       CERROR("%s: can't read last_id: rc = %d\n",
+                               ofd_name(ofd), rc);
                        GOTO(cleanup, rc);
                }
                        GOTO(cleanup, rc);
                }
-               ofd->ofd_last_objids[group] = le64_to_cpu(lastid);
+               ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid));
        } else {
        } else {
-               CERROR("corrupted size %Lu LAST_ID of group %u\n",
-                      (unsigned long long)info->fti_attr.la_size, group);
-               rc = -EINVAL;
+               CERROR("%s: corrupted size "LPU64" LAST_ID of seq "LPX64"\n",
+                       ofd_name(ofd), (__u64)info->fti_attr.la_size, seq);
+               GOTO(cleanup, rc = -EINVAL);
        }
 
        }
 
-       RETURN(0);
+       oseq = ofd_seq_add(env, ofd, oseq);
+       RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT));
 cleanup:
 cleanup:
-       ofd_group_fini(env, ofd, group);
-       RETURN(rc);
+       ofd_seq_put(env, oseq);
+       return ERR_PTR(rc);
 }
 
 }
 
-/* ofd groups managements */
-int ofd_groups_init(const struct lu_env *env, struct ofd_device *ofd)
+/**
+ * initialize local FLDB server.
+ *
+ * \param[in] env      execution environment
+ * \param[in] uuid     unique name for this FLDS server
+ * \param[in] ofd      OFD device
+ *
+ * \retval             0 on successful initialization
+ * \retval             negative value on error
+ */
+static int ofd_fld_init(const struct lu_env *env, const char *uuid,
+                       struct ofd_device *ofd)
 {
 {
-       struct ofd_thread_info  *info = ofd_info(env);
-       unsigned long            groups_size;
-       obd_seq                  last_group;
-       int                      rc = 0;
-       int                      i;
+       struct seq_server_site *ss = &ofd->ofd_seq_site;
+       int rc;
 
        ENTRY;
 
 
        ENTRY;
 
-       cfs_spin_lock_init(&ofd->ofd_objid_lock);
-
-       rc = dt_attr_get(env, ofd->ofd_last_group_file,
-                        &info->fti_attr, BYPASS_CAPA);
-       if (rc)
-               GOTO(cleanup, rc);
-
-       groups_size = (unsigned long)info->fti_attr.la_size;
-
-       if (groups_size == sizeof(last_group)) {
-               info->fti_off = 0;
-               info->fti_buf.lb_buf = &last_group;
-               info->fti_buf.lb_len = sizeof(last_group);
-
-               rc = dt_record_read(env, ofd->ofd_last_group_file,
-                                   &info->fti_buf, &info->fti_off);
-               if (rc) {
-                       CERROR("can't read LAST_GROUP: %d\n", rc);
-                       GOTO(cleanup, rc);
-               }
-
-               ofd->ofd_max_group = le32_to_cpu(last_group);
-               LASSERT(ofd->ofd_max_group <= OFD_MAX_GROUPS);
-       } else if (groups_size == 0) {
-               ofd->ofd_max_group = 0;
-       } else {
-               CERROR("groups file is corrupted? size = %lu\n", groups_size);
-               GOTO(cleanup, rc = -EIO);
-       }
+       OBD_ALLOC_PTR(ss->ss_server_fld);
+       if (ss->ss_server_fld == NULL)
+               RETURN(rc = -ENOMEM);
 
 
-       for (i = 0; i <= ofd->ofd_max_group; i++) {
-               rc = ofd_group_load(env, ofd, i);
-               if (rc) {
-                       CERROR("can't load group %d: %d\n", i, rc);
-                       /* Clean all previously set groups */
-                       while (i > 0)
-                               ofd_group_fini(env, ofd, --i);
-                       GOTO(cleanup, rc);
-               }
+       rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid,
+                            LU_SEQ_RANGE_OST);
+       if (rc < 0) {
+               OBD_FREE_PTR(ss->ss_server_fld);
+               ss->ss_server_fld = NULL;
+               RETURN(rc);
        }
        }
-
-       CWARN("%s: %u groups initialized\n",
-             ofd_obd(ofd)->obd_name, ofd->ofd_max_group + 1);
-cleanup:
-       RETURN(rc);
+       RETURN(0);
 }
 
 }
 
-int ofd_clients_data_init(const struct lu_env *env, struct ofd_device *ofd,
-                         unsigned long fsize)
+/**
+ * Update local FLDB copy from master server.
+ *
+ * This callback is called when LWP is connected to the server.
+ * It retrieves its FLDB entries from MDT0, and it only happens
+ * when upgrading the existing file system to 2.6.
+ *
+ * \param[in] data     OFD device
+ *
+ * \retval             0 on successful FLDB update
+ * \retval             negative value in case if failure
+ */
+static int ofd_register_lwp_callback(void *data)
 {
 {
-       struct obd_device               *obd = ofd_obd(ofd);
-       struct lr_server_data           *lsd = &ofd->ofd_lut.lut_lsd;
-       struct lsd_client_data          *lcd = NULL;
-       struct filter_export_data       *fed;
-       int                              cl_idx;
-       int                              rc = 0;
-       loff_t                           off = lsd->lsd_client_start;
-
-       CLASSERT(offsetof(struct lsd_client_data, lcd_padding) +
-                sizeof(lcd->lcd_padding) == LR_CLIENT_SIZE);
-
-       OBD_ALLOC_PTR(lcd);
-       if (lcd == NULL)
-               RETURN(-ENOMEM);
+       struct lu_env           *env;
+       struct ofd_device       *ofd = data;
+       struct lu_server_fld    *fld = ofd->ofd_seq_site.ss_server_fld;
+       int                     rc;
 
 
-       for (cl_idx = 0; off < fsize; cl_idx++) {
-               struct obd_export       *exp;
-               __u64                    last_rcvd;
-
-               /* Don't assume off is incremented properly by
-                * fsfilt_read_record(), in case sizeof(*lcd)
-                * isn't the same as fsd->lsd_client_size.  */
-               off = lsd->lsd_client_start + cl_idx * lsd->lsd_client_size;
-               rc = lut_client_data_read(env, &ofd->ofd_lut, lcd, &off, cl_idx);
-               if (rc) {
-                       CERROR("error reading FILT %s idx %d off %llu: rc %d\n",
-                              LAST_RCVD, cl_idx, off, rc);
-                       rc = 0;
-                       break; /* read error shouldn't cause startup to fail */
-               }
-
-               if (lcd->lcd_uuid[0] == '\0') {
-                       CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
-                              cl_idx);
-                       continue;
-               }
-
-               last_rcvd = lcd->lcd_last_transno;
+       ENTRY;
 
 
-               /* These exports are cleaned up by ofd_disconnect(), so they
-                * need to be set up like real exports as ofd_connect() does.
-                */
-               exp = class_new_export(obd, (struct obd_uuid *)lcd->lcd_uuid);
+       if (!likely(fld->lsf_new))
+               RETURN(0);
 
 
-               CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
-                      " srv lr: "LPU64"\n", lcd->lcd_uuid, cl_idx,
-                      last_rcvd, lsd->lsd_last_transno);
+       OBD_ALLOC_PTR(env);
+       if (env == NULL)
+               RETURN(-ENOMEM);
 
 
-               if (IS_ERR(exp)) {
-                       if (PTR_ERR(exp) == -EALREADY) {
-                               /* export already exists, zero out this one */
-                               CERROR("Duplicate export %s!\n", lcd->lcd_uuid);
-                               continue;
-                       }
-                       GOTO(err_out, rc = PTR_ERR(exp));
-               }
+       rc = lu_env_init(env, LCT_DT_THREAD);
+       if (rc < 0)
+               GOTO(out, rc);
 
 
-               fed = &exp->exp_filter_data;
-               *fed->fed_ted.ted_lcd = *lcd;
-
-               rc = lut_client_add(env, exp, cl_idx);
-               LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
-               /* VBR: set export last committed version */
-               exp->exp_last_committed = last_rcvd;
-               cfs_spin_lock(&exp->exp_lock);
-               exp->exp_connecting = 0;
-               exp->exp_in_recovery = 0;
-               cfs_spin_unlock(&exp->exp_lock);
-               obd->obd_max_recoverable_clients++;
-               class_export_put(exp);
-
-               /* Need to check last_rcvd even for duplicated exports. */
-               CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
-                      cl_idx, last_rcvd);
-
-               cfs_spin_lock(&ofd->ofd_lut.lut_translock);
-               if (last_rcvd > lsd->lsd_last_transno)
-                       lsd->lsd_last_transno = last_rcvd;
-               cfs_spin_unlock(&ofd->ofd_lut.lut_translock);
+       rc = fld_update_from_controller(env, fld);
+       if (rc < 0) {
+               CERROR("%s: cannot update controller: rc = %d\n",
+                      ofd_name(ofd), rc);
+               GOTO(out, rc);
        }
        }
-
-err_out:
-       OBD_FREE_PTR(lcd);
-       RETURN(rc);
+       EXIT;
+out:
+       lu_env_fini(env);
+       OBD_FREE_PTR(env);
+       return rc;
 }
 
 }
 
-int ofd_server_data_init(const struct lu_env *env, struct ofd_device *ofd)
+/**
+ * Get LWP exports from LWP connection for local FLDB server and SEQ client.
+ *
+ * This function is part of setup process and initialize FLDB server and SEQ
+ * client, so they may work with remote servers.
+ *
+ * \param[in] ofd      OFD device
+ *
+ * \retval             0 on successful export get
+ * \retval             negative value on error
+ */
+static int ofd_register_seq_exp(struct ofd_device *ofd)
 {
 {
-       struct ofd_thread_info  *info = ofd_info(env);
-       struct lr_server_data   *lsd = &ofd->ofd_lut.lut_lsd;
-       struct obd_device       *obd = ofd_obd(ofd);
-       unsigned long            last_rcvd_size;
-       int                      rc;
-
-       rc = dt_attr_get(env, ofd->ofd_lut.lut_last_rcvd, &info->fti_attr,
-                        BYPASS_CAPA);
-       if (rc)
-               RETURN(rc);
-
-       last_rcvd_size = (unsigned long)info->fti_attr.la_size;
+       struct seq_server_site  *ss = &ofd->ofd_seq_site;
+       char                    *lwp_name = NULL;
+       int                     rc;
+
+       OBD_ALLOC(lwp_name, MAX_OBD_NAME);
+       if (lwp_name == NULL)
+               GOTO(out_free, rc = -ENOMEM);
+
+       rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0);
+       if (rc != 0)
+               GOTO(out_free, rc);
+
+       rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp,
+                                     NULL, NULL);
+       if (rc != 0)
+               GOTO(out_free, rc);
+
+       rc = lustre_register_lwp_item(lwp_name,
+                                     &ss->ss_server_fld->lsf_control_exp,
+                                     ofd_register_lwp_callback, ofd);
+       if (rc != 0) {
+               lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
+               ss->ss_client_seq->lcs_exp = NULL;
+               GOTO(out_free, rc);
+       }
+out_free:
+       if (lwp_name != NULL)
+               OBD_FREE(lwp_name, MAX_OBD_NAME);
 
 
-       /* ensure padding in the struct is the correct size */
-       CLASSERT (offsetof(struct lr_server_data, lsd_padding) +
-                 sizeof(lsd->lsd_padding) == LR_SERVER_SIZE);
+       return rc;
+}
 
 
-       if (last_rcvd_size == 0) {
-               LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
+/**
+ * Initialize SEQ and FLD service on OFD.
+ *
+ * This is part of OFD setup process.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ *
+ * \retval             0 on successful services initialization
+ * \retval             negative value on error
+ */
+int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd)
+{
+       int rc;
 
 
-               memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,
-                      sizeof(lsd->lsd_uuid));
-               lsd->lsd_last_transno = 0;
-               lsd->lsd_mount_count = 0;
-               lsd->lsd_server_size = LR_SERVER_SIZE;
-               lsd->lsd_client_start = LR_CLIENT_START;
-               lsd->lsd_client_size = LR_CLIENT_SIZE;
-               lsd->lsd_subdir_count = FILTER_SUBDIR_COUNT;
-               lsd->lsd_feature_incompat = OBD_INCOMPAT_OST;
-       } else {
-               rc = lut_server_data_read(env, &ofd->ofd_lut);
-               if (rc) {
-                       CDEBUG(D_INODE,"OBD ofd: error reading %s: rc %d\n",
-                              LAST_RCVD, rc);
-                       GOTO(err_fsd, rc);
-               }
-               if (strcmp((char *)lsd->lsd_uuid,
-                          (char *)obd->obd_uuid.uuid)) {
-                       LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
-                                      " disk %s. Were the /dev/ assignments "
-                                      "rearranged?\n",
-                                      obd->obd_uuid.uuid, lsd->lsd_uuid);
-                       GOTO(err_fsd, rc = -EINVAL);
-               }
+       rc = ofd_fid_init(env, ofd);
+       if (rc != 0) {
+               CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc);
+               return rc;
        }
 
        }
 
-       lsd->lsd_mount_count++;
-       obd->u.obt.obt_mount_count = lsd->lsd_mount_count;
-       obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
-       ofd->ofd_subdir_count = lsd->lsd_subdir_count;
-
-       if (lsd->lsd_feature_incompat & ~OFD_INCOMPAT_SUPP) {
-               CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
-                      obd->obd_name,
-                      lsd->lsd_feature_incompat & ~OFD_INCOMPAT_SUPP);
-               GOTO(err_fsd, rc = -EINVAL);
+       rc = ofd_fld_init(env, ofd_name(ofd), ofd);
+       if (rc < 0) {
+               CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc);
+               return rc;
        }
        }
-       if (lsd->lsd_feature_rocompat & ~OFD_ROCOMPAT_SUPP) {
-               CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
-                      obd->obd_name,
-                      lsd->lsd_feature_rocompat & ~OFD_ROCOMPAT_SUPP);
-               /* Do something like remount filesystem read-only */
-               GOTO(err_fsd, rc = -EINVAL);
-       }
-
-       CDEBUG(D_INODE, "%s: server last_transno : "LPU64"\n",
-              obd->obd_name, lsd->lsd_last_transno);
-       CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
-              obd->obd_name, lsd->lsd_mount_count);
-       CDEBUG(D_INODE, "%s: server data size: %u\n",
-              obd->obd_name, lsd->lsd_server_size);
-       CDEBUG(D_INODE, "%s: per-client data start: %u\n",
-              obd->obd_name, lsd->lsd_client_start);
-       CDEBUG(D_INODE, "%s: per-client data size: %u\n",
-              obd->obd_name, lsd->lsd_client_size);
-       CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
-              obd->obd_name, lsd->lsd_subdir_count);
-       CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
-              last_rcvd_size <= lsd->lsd_client_start ? 0 :
-              (last_rcvd_size - lsd->lsd_client_start) /
-              lsd->lsd_client_size);
-
-       if (!obd->obd_replayable)
-               CWARN("%s: recovery support OFF\n", obd->obd_name);
-
-       rc = ofd_clients_data_init(env, ofd, last_rcvd_size);
-
-       cfs_spin_lock(&ofd->ofd_lut.lut_translock);
-       obd->obd_last_committed = lsd->lsd_last_transno;
-       cfs_spin_unlock(&ofd->ofd_lut.lut_translock);
-
-       /* save it, so mount count and last_transno is current */
-       rc = lut_server_data_update(env, &ofd->ofd_lut, 0);
-       if (rc)
-               GOTO(err_fsd, rc);
 
 
-       RETURN(0);
+       rc = ofd_register_seq_exp(ofd);
+       if (rc < 0) {
+               CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc);
+               return rc;
+       }
 
 
-err_fsd:
-       class_disconnect_exports(obd);
-       RETURN(rc);
+       rwlock_init(&ofd->ofd_seq_list_lock);
+       INIT_LIST_HEAD(&ofd->ofd_seq_list);
+       ofd->ofd_seq_count = 0;
+       return rc;
 }
 
 }
 
+/**
+ * Initialize storage for the OFD.
+ *
+ * This function sets up service files for OFD. Currently, the only
+ * service file is "health_check".
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ * \param[in] obd      OBD device (unused now)
+ *
+ * \retval             0 on successful setup
+ * \retval             negative value on error
+ */
 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
                 struct obd_device *obd)
 {
 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
                 struct obd_device *obd)
 {
@@ -464,23 +619,13 @@ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
 
        ENTRY;
 
 
        ENTRY;
 
+       rc = ofd_seqs_init(env, ofd);
+       if (rc)
+               GOTO(out_hc, rc);
+
        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
                RETURN (-ENOENT);
 
        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
                RETURN (-ENOENT);
 
-       /* prepare transactions callbacks */
-       ofd->ofd_txn_cb.dtc_txn_start = NULL;
-       ofd->ofd_txn_cb.dtc_txn_stop = ofd_txn_stop_cb;
-       ofd->ofd_txn_cb.dtc_txn_commit = NULL;
-       ofd->ofd_txn_cb.dtc_cookie = ofd;
-       ofd->ofd_txn_cb.dtc_tag = LCT_DT_THREAD;
-       CFS_INIT_LIST_HEAD(&ofd->ofd_txn_cb.dtc_linkage);
-
-       dt_txn_callback_add(ofd->ofd_osd, &ofd->ofd_txn_cb);
-
-       rc = ofd_server_data_init(env, ofd);
-       if (rc)
-               GOTO(out, rc);
-
        lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
        memset(&info->fti_attr, 0, sizeof(info->fti_attr));
        info->fti_attr.la_valid = LA_MODE;
        lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
        memset(&info->fti_attr, 0, sizeof(info->fti_attr));
        info->fti_attr.la_valid = LA_MODE;
@@ -494,59 +639,33 @@ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
 
        ofd->ofd_health_check_file = fo;
 
 
        ofd->ofd_health_check_file = fo;
 
-       lu_local_obj_fid(&info->fti_fid, OFD_LAST_GROUP_OID);
-       memset(&info->fti_attr, 0, sizeof(info->fti_attr));
-       info->fti_attr.la_valid = LA_MODE;
-       info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
-       info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
-
-       fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
-                              &info->fti_dof, &info->fti_attr);
-       if (IS_ERR(fo))
-               GOTO(out_hc, rc = PTR_ERR(fo));
-
-       ofd->ofd_last_group_file = fo;
-
-       rc = ofd_groups_init(env, ofd);
-       if (rc)
-               GOTO(out_lg, rc);
-
        RETURN(0);
        RETURN(0);
-out_lg:
-       lu_object_put(env, &ofd->ofd_last_group_file->do_lu);
 out_hc:
        lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
 out:
 out_hc:
        lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
 out:
-       dt_txn_callback_del(ofd->ofd_osd, &ofd->ofd_txn_cb);
        return rc;
 }
 
        return rc;
 }
 
+/**
+ * Cleanup service files on OFD.
+ *
+ * This function syncs whole OFD device and close "health check" file.
+ *
+ * \param[in] env      execution environment
+ * \param[in] ofd      OFD device
+ */
 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
 {
 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
 {
-       int i;
+       int rc;
 
        ENTRY;
 
 
        ENTRY;
 
-       ofd_info_init(env, NULL);
+       ofd_seqs_fini(env, ofd);
 
 
-       for (i = 0; i <= ofd->ofd_max_group; i++) {
-               if (ofd->ofd_lastid_obj[i]) {
-                       ofd_last_id_write(env, ofd, i);
-                       ofd_group_fini(env, ofd, i);
-               }
-       }
-
-       i = dt_sync(env, ofd->ofd_osd);
-       if (i)
-               CERROR("can't sync: %d\n", i);
-
-       /* Remove transaction callback */
-       dt_txn_callback_del(ofd->ofd_osd, &ofd->ofd_txn_cb);
-
-       if (ofd->ofd_last_group_file) {
-               lu_object_put(env, &ofd->ofd_last_group_file->do_lu);
-               ofd->ofd_last_group_file = NULL;
-       }
+       rc = dt_sync(env, ofd->ofd_osd);
+       if (rc < 0)
+               CWARN("%s: can't sync OFD upon cleanup: %d\n",
+                     ofd_name(ofd), rc);
 
        if (ofd->ofd_health_check_file) {
                lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
 
        if (ofd->ofd_health_check_file) {
                lu_object_put(env, &ofd->ofd_health_check_file->do_lu);