* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define OFD_ROCOMPAT_SUPP (0)
#define OFD_INCOMPAT_SUPP (OBD_INCOMPAT_GROUPS | OBD_INCOMPAT_OST | \
OBD_INCOMPAT_COMMON_LR)
-#define OFD_MAX_GROUPS 256
+#define OFD_PRECREATE_BATCH_DEFAULT (FILTER_SUBDIR_COUNT * 4)
+
+/* on small filesystems we should not precreate too many objects in
+ * a single transaction, otherwise we can overflow transactions */
+#define OFD_PRECREATE_SMALL_FS (1024ULL * 1024 * 1024)
+#define OFD_PRECREATE_BATCH_SMALL 8
+
+/* Limit the returned fields marked valid to those that we actually might set */
+#define OFD_VALID_FLAGS (LA_TYPE | LA_MODE | LA_SIZE | LA_BLOCKS | \
+ LA_BLKSIZE | LA_ATIME | LA_MTIME | LA_CTIME)
/* per-client-per-object persistent state (LRU) */
struct ofd_mod_data {
LPROC_OFD_LAST,
};
+/* for job stats */
+enum {
+ LPROC_OFD_STATS_READ = 0,
+ LPROC_OFD_STATS_WRITE = 1,
+ LPROC_OFD_STATS_SETATTR = 2,
+ LPROC_OFD_STATS_PUNCH = 3,
+ LPROC_OFD_STATS_SYNC = 4,
+ LPROC_OFD_STATS_LAST,
+};
+
+static inline void ofd_counter_incr(struct obd_export *exp, int opcode,
+ char *jobid, long amount)
+{
+ if (exp->exp_obd && exp->exp_obd->u.obt.obt_jobstats.ojs_hash &&
+ (exp->exp_connect_flags & OBD_CONNECT_JOBSTATS))
+ lprocfs_job_stats_log(exp->exp_obd, jobid, opcode, amount);
+
+ if (exp->exp_nid_stats != NULL &&
+ exp->exp_nid_stats->nid_stats != NULL) {
+ if (opcode == LPROC_OFD_STATS_READ)
+ lprocfs_counter_add(exp->exp_nid_stats->nid_stats,
+ LPROC_OFD_READ_BYTES, amount);
+ else if (opcode == LPROC_OFD_STATS_WRITE)
+ lprocfs_counter_add(exp->exp_nid_stats->nid_stats,
+ LPROC_OFD_WRITE_BYTES, amount);
+ }
+}
+
+struct ofd_seq {
+ cfs_list_t os_list;
+ obd_id os_last_oid;
+ obd_seq os_seq;
+ spinlock_t os_last_oid_lock;
+ struct mutex os_create_lock;
+ cfs_atomic_t os_refc;
+ struct dt_object *os_lastid_obj;
+ unsigned long os_destroys_in_progress:1;
+};
+
struct ofd_device {
struct dt_device ofd_dt_dev;
struct dt_device *ofd_osd;
+ struct obd_export *ofd_osd_exp;
struct dt_device_param ofd_dt_conf;
/* DLM name-space for meta-data locks maintained by this server */
struct ldlm_namespace *ofd_namespace;
/* last_rcvd file */
struct lu_target ofd_lut;
- struct dt_object *ofd_last_group_file;
struct dt_object *ofd_health_check_file;
int ofd_subdir_count;
- int ofd_max_group;
- obd_id ofd_last_objids[OFD_MAX_GROUPS];
- cfs_mutex_t ofd_create_locks[OFD_MAX_GROUPS];
- struct dt_object *ofd_lastid_obj[OFD_MAX_GROUPS];
- cfs_spinlock_t ofd_objid_lock;
+ cfs_list_t ofd_seq_list;
+ rwlock_t ofd_seq_list_lock;
+ int ofd_seq_count;
+ int ofd_precreate_batch;
+ spinlock_t ofd_batch_lock;
/* protect all statfs-related counters */
- cfs_spinlock_t ofd_osfs_lock;
+ spinlock_t ofd_osfs_lock;
/* statfs optimization: we cache a bit */
struct obd_statfs ofd_osfs;
__u64 ofd_osfs_age;
/* grants: all values in bytes */
/* grant lock to protect all grant counters */
- cfs_spinlock_t ofd_grant_lock;
+ spinlock_t ofd_grant_lock;
/* total amount of dirty data reported by clients in incoming obdo */
obd_size ofd_tot_dirty;
/* sum of filesystem space granted to clients for async writes */
int ofd_fmd_max_num; /* per ofd ofd_mod_data */
cfs_duration_t ofd_fmd_max_age; /* time to fmd expiry */
- cfs_spinlock_t ofd_flags_lock;
+ spinlock_t ofd_flags_lock;
unsigned long ofd_raid_degraded:1,
/* sync journal on writes */
ofd_syncjournal:1,
/* shall we grant space to clients not
* supporting OBD_CONNECT_GRANT_PARAM? */
ofd_grant_compat_disable:1;
-
- struct lu_site ofd_site;
};
static inline struct ofd_device *ofd_dev(struct lu_device *d)
struct lu_fid fti_fid;
struct lu_attr fti_attr;
struct lu_attr fti_attr2;
+ struct ldlm_res_id fti_resid;
+ struct filter_fid fti_mds_fid;
struct filter_fid fti_mds_fid2;
struct ost_id fti_ostid;
struct ofd_object *fti_obj;
/* Space used by the I/O, used by grant code */
unsigned long fti_used;
+ struct ost_lvb fti_lvb;
};
extern void target_recovery_fini(struct obd_device *obd);
/* ofd_dev.c */
extern struct lu_context_key ofd_thread_key;
+int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd);
/* ofd_obd.c */
extern struct obd_ops ofd_obd_ops;
int *from_cache);
/* ofd_fs.c */
-obd_id ofd_last_id(struct ofd_device *ofd, obd_seq seq);
-void ofd_last_id_set(struct ofd_device *ofd, obd_id id, obd_seq seq);
-int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int);
+obd_id ofd_seq_last_oid(struct ofd_seq *oseq);
+void ofd_seq_last_oid_set(struct ofd_seq *oseq, obd_id id);
+int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
+ struct ofd_seq *oseq);
+int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd);
+struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, obd_seq seq);
+void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq);
+
int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
struct obd_device *obd);
void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd);
+int ofd_precreate_batch(struct ofd_device *ofd, int batch);
+struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
+ obd_seq seq);
+void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd);
+
+/* ofd_io.c */
+int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp,
+ struct obdo *oa, int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *rnb, int *nr_local,
+ struct niobuf_local *lnb, struct obd_trans_info *oti,
+ struct lustre_capa *capa);
+int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
+ struct obdo *oa, int objcount, struct obd_ioobj *obj,
+ struct niobuf_remote *rnb, int npages,
+ struct niobuf_local *lnb, struct obd_trans_info *oti,
+ int old_rc);
/* ofd_trans.c */
struct thandle *ofd_trans_create(const struct lu_env *env,
void *cookie);
/* lproc_ofd.c */
+#ifdef LPROCFS
void lprocfs_ofd_init_vars(struct lprocfs_static_vars *lvars);
int lproc_ofd_attach_seqstat(struct obd_device *dev);
extern struct file_operations ofd_per_nid_stats_fops;
+void ofd_stats_counter_init(struct lprocfs_stats *stats);
+#else
+static void lprocfs_ofd_init_vars(struct lprocfs_static_vars *lvars)
+{
+ memset(lvars, 0, sizeof(*lvars));
+}
+static inline int lproc_ofd_attach_seqstat(struct obd_device *dev) {}
+static inline void ofd_stats_counter_init(struct lprocfs_stats *stats) {}
+#endif
/* ofd_objects.c */
struct ofd_object *ofd_object_find(const struct lu_env *env,
const struct lu_fid *fid,
struct lu_attr *attr);
int ofd_object_ff_check(const struct lu_env *env, struct ofd_object *fo);
-int ofd_precreate_object(const struct lu_env *env, struct ofd_device *ofd,
- obd_id id, obd_seq seq);
+int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd,
+ obd_id id, struct ofd_seq *oseq, int nr);
void ofd_object_put(const struct lu_env *env, struct ofd_object *fo);
int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo,
/* ofd_dlm.c */
int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
- void *req_cookie, ldlm_mode_t mode, int flags,
+ void *req_cookie, ldlm_mode_t mode, __u64 flags,
void *data);
static inline struct ofd_thread_info * ofd_info(const struct lu_env *env)
}
}
+static inline void ofd_oti2info(struct ofd_thread_info *info,
+ struct obd_trans_info *oti)
+{
+ info->fti_xid = oti->oti_xid;
+ info->fti_transno = oti->oti_transno;
+ info->fti_pre_version = oti->oti_pre_version;
+}
+
+static inline void ofd_info2oti(struct ofd_thread_info *info,
+ struct obd_trans_info *oti)
+{
+ oti->oti_xid = info->fti_xid;
+ LASSERTF(ergo(oti->oti_transno > 0,
+ oti->oti_transno == info->fti_transno),
+ "Overwrite replay transno "LPX64" by "LPX64"\n",
+ oti->oti_transno, info->fti_transno);
+ oti->oti_transno = info->fti_transno;
+ oti->oti_pre_version = info->fti_pre_version;
+}
+
/* sync on lock cancel is useless when we force a journal flush,
* and if we enable async journal commit, we should also turn on
* sync on lock cancel if it is not enabled already. */
ff->ff_seq = cpu_to_le64(oa->o_seq);
}
-/* niobuf_local has no rnb_ prefix in master */
+/* niobuf_remote has no rnb_ prefix in master */
#define rnb_offset offset
#define rnb_flags flags
#define rnb_len len
+/* the same for niobuf_local */
+#define lnb_flags flags
+#define lnb_rc rc
#endif /* _OFD_INTERNAL_H */