1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mdt/mdt_internal.h
5 * Lustre Metadata Target (mdt) request handler
7 * Copyright (c) 2006 Cluster File Systems, Inc.
8 * Author: Peter Braam <braam@clusterfs.com>
9 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * Author: Phil Schwan <phil@clusterfs.com>
11 * Author: Mike Shaver <shaver@clusterfs.com>
12 * Author: Nikita Danilov <nikita@clusterfs.com>
13 * Author: Huang Hua <huanghua@clusterfs.com>
15 * This file is part of the Lustre file system, http://www.lustre.org
16 * Lustre is a trademark of Cluster File Systems, Inc.
18 * You may have signed or agreed to another license before downloading
19 * this software. If so, you are bound by the terms and conditions
20 * of that agreement, and the following does not apply to you. See the
21 * LICENSE file included with this distribution for more information.
23 * If you did not agree to a different license, then this copy of Lustre
24 * is open source software; you can redistribute it and/or modify it
25 * under the terms of version 2 of the GNU General Public License as
26 * published by the Free Software Foundation.
28 * In either case, Lustre is distributed in the hope that it will be
29 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
30 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * license text for more details.
34 #ifndef _MDT_INTERNAL_H
35 #define _MDT_INTERNAL_H
37 #if defined(__KERNEL__)
40 * struct ptlrpc_client
42 #include <lustre_net.h>
45 * struct obd_connect_data
46 * struct lustre_handle
48 #include <lustre/lustre_idl.h>
49 #include <md_object.h>
50 #include <dt_object.h>
51 #include <lustre_fid.h>
52 #include <lustre_fld.h>
53 #include <lustre_req_layout.h>
54 /* LR_CLIENT_SIZE, etc. */
55 #include <lustre_disk.h>
56 #include <lustre_sec.h>
60 /* Data stored per client in the last_rcvd file. In le32 order. */
61 struct mdt_client_data {
62 __u8 mcd_uuid[40]; /* client UUID */
63 __u64 mcd_last_transno; /* last completed transaction ID */
64 __u64 mcd_last_xid; /* xid for the last transaction */
65 __u32 mcd_last_result; /* result from last RPC */
66 __u32 mcd_last_data; /* per-op data (disposition for open &c.) */
67 /* for MDS_CLOSE requests */
68 __u64 mcd_last_close_transno; /* last completed transaction ID */
69 __u64 mcd_last_close_xid; /* xid for the last transaction */
70 __u32 mcd_last_close_result; /* result from last RPC */
71 __u8 mcd_padding[LR_CLIENT_SIZE - 84];
74 static inline __u64 mcd_last_transno(struct mdt_client_data *mcd)
76 return max(mcd->mcd_last_transno, mcd->mcd_last_close_transno);
79 static inline __u64 mcd_last_xid(struct mdt_client_data *mcd)
81 return max(mcd->mcd_last_xid, mcd->mcd_last_close_xid);
84 /* check if request's xid is equal to last one or not*/
85 static inline int req_xid_is_last(struct ptlrpc_request *req)
87 struct mdt_client_data *mcd = req->rq_export->exp_mdt_data.med_mcd;
88 return (req->rq_xid == mcd->mcd_last_xid ||
89 req->rq_xid == mcd->mcd_last_close_xid);
92 /* copied from lr_server_data.
93 * mds data stored at the head of last_rcvd file. In le32 order. */
94 struct mdt_server_data {
95 __u8 msd_uuid[40]; /* server UUID */
96 __u64 msd_last_transno; /* last completed transaction ID */
97 __u64 msd_mount_count; /* incarnation number */
98 __u32 msd_feature_compat; /* compatible feature flags */
99 __u32 msd_feature_rocompat;/* read-only compatible feature flags */
100 __u32 msd_feature_incompat;/* incompatible feature flags */
101 __u32 msd_server_size; /* size of server data area */
102 __u32 msd_client_start; /* start of per-client data area */
103 __u16 msd_client_size; /* size of per-client data area */
104 //__u16 msd_subdir_count; /* number of subdirectories for objects */
105 //__u64 msd_catalog_oid; /* recovery catalog object id */
106 //__u32 msd_catalog_ogen; /* recovery catalog inode generation */
107 //__u8 msd_peeruuid[40]; /* UUID of MDS associated with this OST */
108 //__u32 msd_ost_index; /* index number of OST in LOV */
109 //__u32 msd_mdt_index; /* index number of MDT in LMV */
110 __u8 msd_padding[LR_SERVER_SIZE - 78];
114 /* file data for open files on MDS */
115 struct mdt_file_data {
116 struct portals_handle mfd_handle; /* must be first */
117 struct list_head mfd_list; /* protected by med_open_lock */
118 __u64 mfd_xid; /* xid of the open request */
119 int mfd_mode; /* open mode provided by client */
120 struct mdt_object *mfd_object; /* point to opened object */
125 struct md_device mdt_md_dev;
126 struct ptlrpc_service *mdt_regular_service;
127 struct ptlrpc_service *mdt_readpage_service;
128 struct ptlrpc_service *mdt_setattr_service;
129 struct ptlrpc_service *mdt_mdsc_service;
130 struct ptlrpc_service *mdt_mdss_service;
131 struct ptlrpc_service *mdt_dtss_service;
132 struct ptlrpc_service *mdt_fld_service;
133 /* DLM name-space for meta-data locks maintained by this server */
134 struct ldlm_namespace *mdt_namespace;
135 /* ptlrpc handle for MDS->client connections (for lock ASTs). */
136 struct ptlrpc_client *mdt_ldlm_client;
137 /* underlying device */
138 struct md_device *mdt_child;
139 struct dt_device *mdt_bottom;
141 * Options bit-fields.
144 signed int mo_user_xattr :1,
151 /* lock to pretect epoch and write count */
152 spinlock_t mdt_ioepoch_lock;
155 /* Transaction related stuff here */
156 spinlock_t mdt_transno_lock;
157 __u64 mdt_last_transno;
159 /* transaction callbacks */
160 struct dt_txn_callback mdt_txn_cb;
162 struct dt_object *mdt_last_rcvd;
164 /* these values should be updated from lov if necessary.
165 * or should be placed somewhere else. */
167 int mdt_max_cookiesize;
168 __u64 mdt_mount_count;
171 struct mdt_server_data mdt_msd;
172 spinlock_t mdt_client_bitmap_lock;
173 unsigned long mdt_client_bitmap[(LR_MAX_CLIENTS >> 3) / sizeof(long)];
175 struct upcall_cache *mdt_identity_cache;
176 struct upcall_cache *mdt_rmtacl_cache;
179 struct rootsquash_info *mdt_rootsquash_info;
181 /* capability keys */
182 unsigned long mdt_capa_timeout;
184 struct dt_object *mdt_ck_obj;
185 unsigned long mdt_ck_timeout;
186 unsigned long mdt_ck_expiry;
187 struct timer_list mdt_ck_timer;
188 struct ptlrpc_thread mdt_ck_thread;
189 struct lustre_capa_key mdt_capa_keys[2];
190 unsigned int mdt_capa_conf:1;
193 /*XXX copied from mds_internal.h */
194 #define MDT_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000)
195 #define MDT_ROCOMPAT_SUPP (OBD_ROCOMPAT_LOVOBJID)
196 #define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT | OBD_INCOMPAT_COMMON_LR)
199 struct lu_object_header mot_header;
200 struct md_object mot_obj;
207 struct mdt_lock_handle {
208 /* Lock type, reg for cross-ref use or pdo lock. */
212 struct lustre_handle mlh_reg_lh;
213 ldlm_mode_t mlh_reg_mode;
216 struct lustre_handle mlh_pdo_lh;
217 ldlm_mode_t mlh_pdo_mode;
218 unsigned int mlh_pdo_hash;
222 MDT_LH_PARENT, /* parent lockh */
223 MDT_LH_CHILD, /* child lockh */
224 MDT_LH_OLD, /* old lockh for rename */
225 MDT_LH_NEW, /* new lockh for rename */
226 MDT_LH_RMT, /* used for return lh to caller */
235 struct mdt_reint_record {
236 mdt_reint_t rr_opcode;
237 const struct lu_fid *rr_fid1;
238 const struct lu_fid *rr_fid2;
243 const void *rr_eadata;
246 const struct llog_cookie *rr_logcookies;
250 enum mdt_reint_flag {
251 MRF_SETATTR_LOCKED = 1 << 0,
255 MDT_NONEED_TRANSNO = (1 << 0) /*Do not need transno for this req*/
259 * Common data shared by mdt-level handlers. This is allocated per-thread to
260 * reduce stack consumption.
262 struct mdt_thread_info {
265 * The following members will be filled expilictly
266 * with specific data in mdt_thread_info_init().
270 * for req-layout interface. This field should be first to be compatible
271 * with "struct com_thread_info" in seq and fld.
273 struct req_capsule mti_pill;
275 * number of buffers in reply message.
279 * sizes of reply buffers.
281 int mti_rep_buf_size[REQ_MAX_FIELD_NR];
283 * A couple of lock handles.
285 struct mdt_lock_handle mti_lh[MDT_LH_NR];
287 struct mdt_device *mti_mdt;
288 const struct lu_env *mti_env;
291 * Additional fail id that can be set by handler. Passed to
292 * target_send_reply().
296 /* transaction number of current request */
302 * The following members will be filled expilictly
303 * with zero in mdt_thread_info_init(). These members may be used
310 struct md_attr mti_attr;
312 * Body for "habeo corpus" operations.
314 const struct mdt_body *mti_body;
316 * Host object. This is released at the end of mdt_handler().
318 struct mdt_object *mti_object;
320 * Lock request for "habeo clavis" operations.
322 const struct ldlm_request *mti_dlm_req;
324 __u32 mti_has_trans:1, /* has txn already? */
327 /* opdata for mdt_reint_open(), has the same as
328 * ldlm_reply:lock_policy_res1. mdt_update_last_rcvd() stores this
329 * value onto disk for recovery when mdt_trans_stop_cb() is called.
335 * The following members will be filled expilictly
336 * with zero in mdt_reint_unpack(), because they are only used
337 * by reint requests (including mdt_reint_open()).
341 * reint record. contains information for reint operations.
343 struct mdt_reint_record mti_rr;
345 * Create specification
347 struct md_create_spec mti_spec;
352 * The following members will _NOT_ be initialized at all.
353 * DO NOT expect them to contain any valid value.
354 * They should be initialized explicitly by the user themselves.
357 /* XXX: If something is in a union, make sure they do not conflict */
359 struct lu_fid mti_tmp_fid1;
360 struct lu_fid mti_tmp_fid2;
361 ldlm_policy_data_t mti_policy; /* for mdt_object_lock() and
362 * mdt_rename_lock() */
363 struct ldlm_res_id mti_res_id; /* for mdt_object_lock() and
366 struct obd_uuid uuid[2]; /* for mdt_seq_init_cli() */
367 char ns_name[48]; /* for mdt_init0() */
368 struct lustre_cfg_bufs bufs; /* for mdt_stack_fini() */
369 struct kstatfs ksfs; /* for mdt_statfs() */
371 /* for mdt_readpage() */
372 struct lu_rdpg mti_rdpg;
373 /* for mdt_sendpage() */
374 struct l_wait_info mti_wait_info;
378 /* IO epoch related stuff. */
379 struct mdt_epoch *mti_epoch;
380 __u64 mti_replayepoch;
382 /* server and client data buffers */
383 struct mdt_server_data mti_msd;
384 struct mdt_client_data mti_mcd;
386 struct txn_param mti_txn_param;
387 struct lu_buf mti_buf;
388 struct lustre_capa_key mti_capa_key;
391 * Info allocated per-transaction.
393 struct mdt_txn_info {
397 static inline struct md_device_operations *mdt_child_ops(struct mdt_device * m)
399 LASSERT(m->mdt_child);
400 return m->mdt_child->md_ops;
403 static inline struct md_object *mdt_object_child(struct mdt_object *o)
405 return lu2md(lu_object_next(&o->mot_obj.mo_lu));
408 static inline struct ptlrpc_request *mdt_info_req(struct mdt_thread_info *info)
410 return info->mti_pill.rc_req;
413 static inline void mdt_object_get(const struct lu_env *env,
414 struct mdt_object *o)
417 lu_object_get(&o->mot_obj.mo_lu);
421 static inline void mdt_object_put(const struct lu_env *env,
422 struct mdt_object *o)
425 lu_object_put(env, &o->mot_obj.mo_lu);
429 static inline int mdt_object_exists(const struct mdt_object *o)
431 return lu_object_exists(&o->mot_obj.mo_lu);
434 static inline const struct lu_fid *mdt_object_fid(struct mdt_object *o)
436 return lu_object_fid(&o->mot_obj.mo_lu);
439 int mdt_get_disposition(struct ldlm_reply *rep, int flag);
440 void mdt_set_disposition(struct mdt_thread_info *info,
441 struct ldlm_reply *rep, int flag);
442 void mdt_clear_disposition(struct mdt_thread_info *info,
443 struct ldlm_reply *rep, int flag);
445 void mdt_lock_pdo_init(struct mdt_lock_handle *lh,
446 ldlm_mode_t lm, const char *name,
449 void mdt_lock_reg_init(struct mdt_lock_handle *lh,
452 int mdt_lock_setup(struct mdt_thread_info *info,
453 struct mdt_object *o,
454 struct mdt_lock_handle *lh);
456 int mdt_object_lock(struct mdt_thread_info *,
458 struct mdt_lock_handle *,
461 void mdt_object_unlock(struct mdt_thread_info *,
463 struct mdt_lock_handle *,
466 struct mdt_object *mdt_object_find(const struct lu_env *,
468 const struct lu_fid *);
469 struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *,
470 const struct lu_fid *,
471 struct mdt_lock_handle *,
473 void mdt_object_unlock_put(struct mdt_thread_info *,
475 struct mdt_lock_handle *,
478 int mdt_close_unpack(struct mdt_thread_info *info);
479 int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op);
480 int mdt_reint_rec(struct mdt_thread_info *, struct mdt_lock_handle *);
481 void mdt_pack_size2body(struct mdt_body *b, const struct lu_attr *attr,
482 struct mdt_object *o);
483 void mdt_pack_attr2body(struct mdt_thread_info *info, struct mdt_body *b,
484 const struct lu_attr *attr, const struct lu_fid *fid);
486 int mdt_getxattr(struct mdt_thread_info *info);
487 int mdt_setxattr(struct mdt_thread_info *info);
489 void mdt_lock_handle_init(struct mdt_lock_handle *lh);
490 void mdt_lock_handle_fini(struct mdt_lock_handle *lh);
492 void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *);
494 int mdt_fs_setup(const struct lu_env *, struct mdt_device *,
495 struct obd_device *);
496 void mdt_fs_cleanup(const struct lu_env *, struct mdt_device *);
498 int mdt_client_del(const struct lu_env *env,
499 struct mdt_device *mdt,
500 struct mdt_export_data *med);
501 int mdt_client_add(const struct lu_env *env,
502 struct mdt_device *mdt,
503 struct mdt_export_data *med,
505 int mdt_client_new(const struct lu_env *env,
506 struct mdt_device *mdt,
507 struct mdt_export_data *med);
509 int mdt_recovery_handle(struct ptlrpc_request *);
511 int mdt_pin(struct mdt_thread_info* info);
513 int mdt_lock_new_child(struct mdt_thread_info *info,
514 struct mdt_object *o,
515 struct mdt_lock_handle *child_lockh);
517 int mdt_reint_open(struct mdt_thread_info *info,
518 struct mdt_lock_handle *lhc);
520 struct mdt_file_data *mdt_handle2mfd(const struct lustre_handle *handle);
521 int mdt_epoch_open(struct mdt_thread_info *info, struct mdt_object *o);
522 void mdt_sizeonmds_enable(struct mdt_thread_info *info, struct mdt_object *mo);
523 int mdt_sizeonmds_enabled(struct mdt_object *mo);
524 int mdt_write_get(struct mdt_device *mdt, struct mdt_object *o);
525 struct mdt_file_data *mdt_mfd_new(void);
526 int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd);
527 void mdt_mfd_free(struct mdt_file_data *mfd);
528 int mdt_close(struct mdt_thread_info *info);
529 int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
531 int mdt_done_writing(struct mdt_thread_info *info);
532 void mdt_shrink_reply(struct mdt_thread_info *info, int offset,
533 int mdscapa, int osscapa);
534 int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
535 const struct md_attr *);
536 void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
537 struct thandle* mdt_trans_start(const struct lu_env *env,
538 struct mdt_device *mdt, int credits);
539 void mdt_trans_stop(const struct lu_env *env,
540 struct mdt_device *mdt, struct thandle *th);
541 int mdt_record_write(const struct lu_env *env,
542 struct dt_object *dt, const struct lu_buf *buf,
543 loff_t *pos, struct thandle *th);
544 int mdt_record_read(const struct lu_env *env,
545 struct dt_object *dt, struct lu_buf *buf, loff_t *pos);
547 struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len);
548 const struct lu_buf *mdt_buf_const(const struct lu_env *env,
549 const void *area, ssize_t len);
551 void mdt_dump_lmm(int level, const struct lov_mds_md *lmm);
553 int mdt_init_ucred(struct mdt_thread_info *, struct mdt_body *);
555 int mdt_init_ucred_reint(struct mdt_thread_info *);
557 void mdt_exit_ucred(struct mdt_thread_info *);
559 int groups_from_list(struct group_info *, gid_t *);
561 void groups_sort(struct group_info *);
564 int mdt_init_idmap(struct mdt_thread_info *);
566 void mdt_cleanup_idmap(struct mdt_export_data *);
568 int mdt_handle_idmap(struct mdt_thread_info *);
570 int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *,
571 struct ptlrpc_user_desc *);
573 void mdt_body_reverse_idmap(struct mdt_thread_info *,
576 int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *,
577 struct mdt_remote_perm *);
579 int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32);
581 static inline struct mdt_device *mdt_dev(struct lu_device *d)
583 // LASSERT(lu_device_is_mdt(d));
584 return container_of0(d, struct mdt_device, mdt_md_dev.md_lu_dev);
587 /* mdt/mdt_identity.c */
588 #define MDT_IDENTITY_UPCALL_PATH "/usr/sbin/l_getidentity"
590 extern struct upcall_cache_ops mdt_identity_upcall_cache_ops;
592 struct mdt_identity *mdt_identity_get(struct upcall_cache *, __u32);
594 void mdt_identity_put(struct upcall_cache *, struct mdt_identity *);
596 void mdt_flush_identity(struct upcall_cache *, __u32);
598 __u32 mdt_identity_get_setxid_perm(struct mdt_identity *, __u32, lnet_nid_t);
600 int mdt_pack_remote_perm(struct mdt_thread_info *, struct mdt_object *, void *);
602 /* mdt/mdt_rmtacl.c */
603 #define MDT_RMTACL_UPCALL_PATH "/usr/sbin/l_facl"
605 extern struct upcall_cache_ops mdt_rmtacl_upcall_cache_ops;
607 int mdt_rmtacl_upcall(struct mdt_thread_info *, unsigned long,
608 char *, struct lu_buf *);
610 extern struct lu_context_key mdt_thread_key;
611 /* debug issues helper starts here*/
612 static inline void mdt_fail_write(const struct lu_env *env,
613 struct dt_device *dd, int id)
615 if (OBD_FAIL_CHECK(id)) {
616 CERROR(LUSTRE_MDT_NAME": obd_fail_loc=%x, fail write ops\n",
618 dd->dd_ops->dt_ro(env, dd);
619 /* We set FAIL_ONCE because we never "un-fail" a device */
620 obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
624 static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req)
626 return &req->rq_export->exp_mdt_data;
629 #define MDT_FAIL_CHECK(id) \
631 if (OBD_FAIL_CHECK(id)) \
632 CERROR(LUSTRE_MDT_NAME": " #id " test failed\n"); \
633 OBD_FAIL_CHECK(id); \
636 #define MDT_FAIL_CHECK_ONCE(id) \
638 if (OBD_FAIL_CHECK(id)) { \
639 CERROR(LUSTRE_MDT_NAME": *** obd_fail_loc=%x ***\n", id); \
640 obd_fail_loc |= OBD_FAILED; \
641 if ((id) & OBD_FAIL_ONCE) \
642 obd_fail_loc |= OBD_FAIL_ONCE; \
648 #define MDT_FAIL_RETURN(id, ret) \
650 if (MDT_FAIL_CHECK_ONCE(id)) { \
655 struct md_ucred *mdt_ucred(const struct mdt_thread_info *info);
657 static inline int is_identity_get_disabled(struct upcall_cache *cache)
659 return cache ? (strcmp(cache->uc_upcall, "NONE") == 0) : 1;
662 /* Issues dlm lock on passed @ns, @f stores it lock handle into @lh. */
663 static inline int mdt_fid_lock(struct ldlm_namespace *ns,
664 struct lustre_handle *lh,
666 ldlm_policy_data_t *policy,
667 const struct ldlm_res_id *res_id,
675 rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, policy,
676 mode, &flags, ldlm_blocking_ast,
677 ldlm_completion_ast, NULL, NULL,
679 return rc == ELDLM_OK ? 0 : -EIO;
682 static inline void mdt_fid_unlock(struct lustre_handle *lh,
685 ldlm_lock_decref(lh, mode);
691 int mdt_ck_thread_start(struct mdt_device *mdt);
692 void mdt_ck_thread_stop(struct mdt_device *mdt);
693 void mdt_ck_timer_callback(unsigned long castmeharder);
694 int mdt_capa_keys_init(const struct lu_env *env, struct mdt_device *mdt);
696 static inline void mdt_set_capainfo(struct mdt_thread_info *info, int offset,
697 const struct lu_fid *fid,
698 struct lustre_capa *capa)
700 struct mdt_device *dev = info->mti_mdt;
701 struct md_capainfo *ci;
703 if (!dev->mdt_opts.mo_mds_capa)
706 ci = md_capainfo(info->mti_env);
708 ci->mc_fid[offset] = fid;
709 ci->mc_capa[offset] = capa;
712 #endif /* __KERNEL__ */