* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define _OSP_INTERNAL_H
#include <obd.h>
+#include <obd_class.h>
#include <dt_object.h>
+#include <md_object.h>
#include <lustre_fid.h>
+/*
+ * Infrastructure to support tracking of last committed llog record
+ */
+struct osp_id_tracker {
+ spinlock_t otr_lock;
+ __u32 otr_next_id;
+ __u32 otr_committed_id;
+ /* callback is register once per diskfs -- that's the whole point */
+ struct dt_txn_callback otr_tx_cb;
+ /* single node can run many clusters */
+ cfs_list_t otr_wakeup_list;
+ cfs_list_t otr_list;
+ /* underlying shared device */
+ struct dt_device *otr_dev;
+ /* how many users of this tracker */
+ cfs_atomic_t otr_refcount;
+};
+
struct osp_device {
struct dt_device opd_dt_dev;
/* corresponded OST index */
int opd_index;
+
+ /* corrsponded MDT index, which will be used when connecting to OST
+ * for validating the connection (see ofd_parse_connect_data) */
+ int opd_group;
/* device used to store persistent state (llogs, last ids) */
struct obd_export *opd_storage_exp;
struct dt_device *opd_storage;
- struct dt_object *opd_last_used_file;
- /* protected by opd_pre_lock */
- volatile obd_id opd_last_used_id;
- obd_id opd_gap_start;
+ struct dt_object *opd_last_used_oid_file;
+ struct dt_object *opd_last_used_seq_file;
+
+ /* stored persistently in LE format, updated directly to/from disk
+ * and required le64_to_cpu() conversion before use.
+ * Protected by opd_pre_lock */
+ struct lu_fid opd_last_used_fid;
+ struct lu_fid opd_gap_start_fid;
int opd_gap_count;
/* connection to OST */
struct obd_device *opd_obd;
int opd_got_disconnected;
int opd_imp_connected;
int opd_imp_active;
- int opd_imp_seen_connected:1;
+ unsigned int opd_imp_seen_connected:1,
+ opd_connect_mdt:1;
/* whether local recovery is completed:
* reported via ->ldo_recovery_complete() */
int opd_recovery_completed;
+ /*
+ * Precreation pool
+ */
+ spinlock_t opd_pre_lock;
+
+ /* last fid to assign in creation */
+ struct lu_fid opd_pre_used_fid;
+ /* last created id OST reported, next-created - available id's */
+ struct lu_fid opd_pre_last_created_fid;
+ /* how many ids are reserved in declare, we shouldn't block in create */
+ __u64 opd_pre_reserved;
+ /* dedicate precreate thread */
+ struct ptlrpc_thread opd_pre_thread;
+ /* thread waits for signals about pool going empty */
+ cfs_waitq_t opd_pre_waitq;
+ /* consumers (who needs new ids) wait here */
+ cfs_waitq_t opd_pre_user_waitq;
+ /* current precreation status: working, failed, stopping? */
+ int opd_pre_status;
+ /* how many to precreate next time */
+ int opd_pre_grow_count;
+ int opd_pre_min_grow_count;
+ int opd_pre_max_grow_count;
+ /* whether to grow precreation window next time or not */
+ int opd_pre_grow_slow;
+ /* cleaning up orphans or recreating missing objects */
+ int opd_pre_recovering;
+
+ /*
+ * OST synchronization
+ */
+ spinlock_t opd_syn_lock;
+ /* unique generation, to recognize start of new records in the llog */
+ struct llog_gen opd_syn_generation;
+ /* number of changes to sync, used to wake up sync thread */
+ unsigned long opd_syn_changes;
+ /* processing of changes from previous mount is done? */
+ int opd_syn_prev_done;
+ /* found records */
+ struct ptlrpc_thread opd_syn_thread;
+ cfs_waitq_t opd_syn_waitq;
+ /* list of remotely committed rpc */
+ cfs_list_t opd_syn_committed_there;
+ /* number of changes being under sync */
+ int opd_syn_sync_in_progress;
+ /* number of RPCs in flight - flow control */
+ int opd_syn_rpc_in_flight;
+ int opd_syn_max_rpc_in_flight;
+ /* number of RPC in processing (including non-committed by OST) */
+ int opd_syn_rpc_in_progress;
+ int opd_syn_max_rpc_in_progress;
+ /* osd api's commit cb control structure */
+ struct dt_txn_callback opd_syn_txn_cb;
+ /* last used change number -- semantically similar to transno */
+ unsigned long opd_syn_last_used_id;
+ /* last committed change number -- semantically similar to
+ * last_committed */
+ unsigned long opd_syn_last_committed_id;
+ /* last processed (taken from llog) id */
+ unsigned long opd_syn_last_processed_id;
+ struct osp_id_tracker *opd_syn_tracker;
+ cfs_list_t opd_syn_ontrack;
+
+ /*
+ * statfs related fields: OSP maintains it on its own
+ */
+ struct obd_statfs opd_statfs;
+ cfs_time_t opd_statfs_fresh_till;
+ cfs_timer_t opd_statfs_timer;
+ int opd_statfs_update_in_progress;
+ /* how often to update statfs data */
+ int opd_statfs_maxage;
+
cfs_proc_dir_entry_t *opd_symlink;
};
-extern cfs_mem_cache_t *osp_object_kmem;
+extern struct kmem_cache *osp_object_kmem;
/* this is a top object */
struct osp_object {
- struct lu_object_header opo_header;
- struct dt_object opo_obj;
- int opo_reserved;
+ struct lu_object_header opo_header;
+ struct dt_object opo_obj;
+ unsigned int opo_reserved:1,
+ opo_new:1,
+ opo_empty:1;
+
+ /* read/write lock for md osp object */
+ struct rw_semaphore opo_sem;
+ const struct lu_env *opo_owner;
};
extern struct lu_object_operations osp_lu_obj_ops;
+extern const struct dt_device_operations osp_dt_ops;
+extern struct dt_object_operations osp_md_obj_ops;
struct osp_thread_info {
struct lu_buf osi_lb;
+ struct lu_buf osi_lb2;
struct lu_fid osi_fid;
struct lu_attr osi_attr;
+ struct ost_id osi_oi;
+ struct ost_id osi_oi2;
obd_id osi_id;
loff_t osi_off;
+ union {
+ struct llog_rec_hdr osi_hdr;
+ struct llog_unlink64_rec osi_unlink;
+ struct llog_setattr64_rec osi_setattr;
+ struct llog_gen_rec osi_gen;
+ };
+ struct llog_cookie osi_cookie;
+ struct llog_catid osi_cid;
+ struct lu_seq_range osi_seq;
+ struct ldlm_res_id osi_resid;
+ struct obdo osi_obdo;
};
-static inline void osp_objid_buf_prep(struct osp_thread_info *osi,
- struct osp_device *d, int index)
+static inline void osp_objid_buf_prep(struct lu_buf *buf, loff_t *off,
+ __u32 *id, int index)
{
- osi->osi_lb.lb_buf = (void *)&d->opd_last_used_id;
- osi->osi_lb.lb_len = sizeof(d->opd_last_used_id);
- osi->osi_off = sizeof(d->opd_last_used_id) * index;
+ buf->lb_buf = (void *)id;
+ buf->lb_len = sizeof(obd_id);
+ *off = sizeof(obd_id) * index;
+}
+
+static inline void osp_objseq_buf_prep(struct lu_buf *buf, loff_t *off,
+ __u64 *seq, int index)
+{
+ buf->lb_buf = (void *)seq;
+ buf->lb_len = sizeof(obd_id);
+ *off = sizeof(obd_id) * index;
+}
+
+static inline void osp_buf_prep(struct lu_buf *lb, void *buf, int buf_len)
+{
+ lb->lb_buf = buf;
+ lb->lb_len = buf_len;
}
extern struct lu_context_key osp_thread_key;
struct dt_object, do_lu);
}
+#define osp_init_rpc_lock(lck) mdc_init_rpc_lock(lck)
+#define osp_get_rpc_lock(lck, it) mdc_get_rpc_lock(lck, it)
+#define osp_put_rpc_lock(lck, it) mdc_put_rpc_lock(lck, it)
+
+static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
+{
+ int diff = lu_fid_diff(fid, &d->opd_last_used_fid);
+ /*
+ * we might have lost precreated objects due to VBR and precreate
+ * orphans, the gap in objid can be calculated properly only here
+ */
+ if (diff > 0) {
+ if (diff > 1) {
+ d->opd_gap_start_fid = d->opd_last_used_fid;
+ d->opd_gap_start_fid.f_oid++;
+ d->opd_gap_count = diff - 1;
+ CDEBUG(D_HA, "Gap in objids: start="DFID", count =%d\n",
+ PFID(&d->opd_gap_start_fid), d->opd_gap_count);
+ }
+ d->opd_last_used_fid = *fid;
+ }
+}
+
+static int osp_fid_end_seq(const struct lu_env *env, struct lu_fid *fid)
+{
+ if (fid_is_idif(fid)) {
+ struct osp_thread_info *info = osp_env_info(env);
+ struct ost_id *oi = &info->osi_oi;
+
+ fid_to_ostid(fid, oi);
+ return ostid_id(oi) == IDIF_MAX_OID;
+ } else {
+ return fid_oid(fid) == LUSTRE_DATA_SEQ_MAX_WIDTH;
+ }
+}
+
+static inline int osp_precreate_end_seq_nolock(const struct lu_env *env,
+ struct osp_device *osp)
+{
+ struct lu_fid *fid = &osp->opd_pre_last_created_fid;
+
+ return osp_fid_end_seq(env, fid);
+}
+
+static inline int osp_precreate_end_seq(const struct lu_env *env,
+ struct osp_device *osp)
+{
+ int rc;
+
+ spin_lock(&osp->opd_pre_lock);
+ rc = osp_precreate_end_seq_nolock(env, osp);
+ spin_unlock(&osp->opd_pre_lock);
+ return rc;
+}
+
+static inline int osp_is_fid_client(struct osp_device *osp)
+{
+ struct obd_import *imp = osp->opd_obd->u.cli.cl_import;
+
+ return imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_FID;
+}
+
/* osp_dev.c */
void osp_update_last_id(struct osp_device *d, obd_id objid);
+extern struct llog_operations osp_mds_ost_orig_logops;
+
+/* osp_md_object.c */
+int osp_trans_start(const struct lu_env *env, struct dt_device *dt,
+ struct thandle *th);
+int osp_trans_stop(const struct lu_env *env, struct thandle *th);
+/* osp_precreate.c */
+int osp_init_precreate(struct osp_device *d);
+int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d);
+__u64 osp_precreate_get_id(struct osp_device *d);
+int osp_precreate_get_fid(const struct lu_env *env, struct osp_device *d,
+ struct lu_fid *fid);
+void osp_precreate_fini(struct osp_device *d);
+int osp_object_truncate(const struct lu_env *env, struct dt_object *dt, __u64);
+void osp_pre_update_status(struct osp_device *d, int rc);
+void osp_statfs_need_now(struct osp_device *d);
+int osp_reset_last_used(const struct lu_env *env, struct osp_device *osp);
+int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
+ struct lu_fid *fid, int sync);
/* lproc_osp.c */
void lprocfs_osp_init_vars(struct lprocfs_static_vars *lvars);
+void osp_lprocfs_init(struct osp_device *osp);
+
+/* osp_sync.c */
+int osp_sync_declare_add(const struct lu_env *env, struct osp_object *o,
+ llog_op_type type, struct thandle *th);
+int osp_sync_add(const struct lu_env *env, struct osp_object *o,
+ llog_op_type type, struct thandle *th,
+ const struct lu_attr *attr);
+int osp_sync_init(const struct lu_env *env, struct osp_device *d);
+int osp_sync_fini(struct osp_device *d);
+void __osp_sync_check_for_work(struct osp_device *d);
+
+/* lwp_dev.c */
+void lprocfs_lwp_init_vars(struct lprocfs_static_vars *lvars);
+extern struct obd_ops lwp_obd_device_ops;
+extern struct lu_device_type lwp_device_type;
#endif