X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Flustre%2Flustre_idl.h;h=207fe8dc9d065dfd1852d59df17b598379de5912;hb=7e81f13c4a852cdba9fbebcc2b6385d6c2effa4b;hp=9271c00c269274a36683a98a3ff1c5204a1d7bba;hpb=05f69f5ee20eeffcc26f643333cedcfb53ba6669;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 9271c00..207fe8d 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -196,6 +196,12 @@ struct lu_seq_range { __u32 lsr_flags; }; +struct lu_seq_range_array { + __u32 lsra_count; + __u32 lsra_padding; + struct lu_seq_range lsra_lsr[0]; +}; + #define LU_SEQ_RANGE_MDT 0x0 #define LU_SEQ_RANGE_OST 0x1 #define LU_SEQ_RANGE_ANY 0x3 @@ -628,7 +634,7 @@ static inline obd_seq ostid_seq(const struct ost_id *ostid) if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return FID_SEQ_OST_MDT0; - if (fid_seq_is_default(ostid->oi.oi_seq)) + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) return FID_SEQ_LOV_DEFAULT; if (fid_is_idif(&ostid->oi_fid)) @@ -640,9 +646,12 @@ static inline obd_seq ostid_seq(const struct ost_id *ostid) /* extract OST objid from a wire ost_id (id/seq) pair */ static inline obd_id ostid_id(const struct ost_id *ostid) { - if (fid_seq_is_mdt0(ostid_seq(ostid))) + if (fid_seq_is_mdt0(ostid->oi.oi_seq)) return ostid->oi.oi_id & IDIF_OID_MASK; + if (unlikely(fid_seq_is_default(ostid->oi.oi_seq))) + return ostid->oi.oi_id; + if (fid_is_idif(&ostid->oi_fid)) return fid_idif_id(fid_seq(&ostid->oi_fid), fid_oid(&ostid->oi_fid), 0); @@ -685,13 +694,23 @@ static inline void ostid_set_seq_llog(struct ost_id *oi) */ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { + if (fid_seq_is_mdt0(oi->oi.oi_seq)) { if (oid >= IDIF_MAX_OID) { CERROR("Bad "LPU64" to set "DOSTID"\n", oid, POSTID(oi)); return; } oi->oi.oi_id = oid; + } else if (fid_is_idif(&oi->oi_fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Bad "LPU64" to set "DOSTID"\n", + oid, POSTID(oi)); + return; + } + oi->oi_fid.f_seq = fid_idif_seq(oid, + fid_idif_ost_idx(&oi->oi_fid)); + oi->oi_fid.f_oid = oid; + oi->oi_fid.f_ver = oid >> 48; } else { if (oid > OBIF_MAX_OID) { CERROR("Bad "LPU64" to set "DOSTID"\n", @@ -702,25 +721,31 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid) } } -static inline void ostid_inc_id(struct ost_id *oi) +static inline int fid_set_id(struct lu_fid *fid, __u64 oid) { - if (fid_seq_is_mdt0(ostid_seq(oi))) { - if (unlikely(ostid_id(oi) + 1 > IDIF_MAX_OID)) { - CERROR("Bad inc "DOSTID"\n", POSTID(oi)); - return; + if (unlikely(fid_seq_is_igif(fid->f_seq))) { + CERROR("bad IGIF, "DFID"\n", PFID(fid)); + return -EBADF; + } + + if (fid_is_idif(fid)) { + if (oid >= IDIF_MAX_OID) { + CERROR("Bad "LPU64" to set "DFID"\n", + oid, PFID(fid)); + return -EBADF; } - oi->oi.oi_id++; + fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid)); + fid->f_oid = oid; + fid->f_ver = oid >> 48; } else { - oi->oi_fid.f_oid++; + if (oid > OBIF_MAX_OID) { + CERROR("Bad "LPU64" to set "DFID"\n", + oid, PFID(fid)); + return -EBADF; + } + fid->f_oid = oid; } -} - -static inline void ostid_dec_id(struct ost_id *oi) -{ - if (fid_seq_is_mdt0(ostid_seq(oi))) - oi->oi.oi_id--; - else - oi->oi_fid.f_oid--; + return 0; } /** @@ -732,36 +757,41 @@ static inline void ostid_dec_id(struct ost_id *oi) * struct lu_fid fields without loss. For reference see: * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs */ -static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid, +static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid, __u32 ost_idx) { + obd_seq seq = ostid_seq(ostid); + if (ost_idx > 0xffff) { CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - if (fid_seq_is_mdt0(ostid_seq(ostid))) { + if (fid_seq_is_mdt0(seq)) { + obd_id oid = ostid_id(ostid); + /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" * that we map into the IDIF namespace. It allows up to 2^48 * objects per OST, as this is the object namespace that has * been in production for years. This can handle create rates * of 1M objects/s/OST for 9 years, or combinations thereof. */ - if (ostid_id(ostid) >= IDIF_MAX_OID) { + if (oid >= IDIF_MAX_OID) { CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); return -EBADF; } - fid->f_seq = fid_idif_seq(ostid_id(ostid), ost_idx); + fid->f_seq = fid_idif_seq(oid, ost_idx); /* truncate to 32 bits by assignment */ - fid->f_oid = ostid_id(ostid); + fid->f_oid = oid; /* in theory, not currently used */ - fid->f_ver = ostid_id(ostid) >> 48; - } else /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ { - /* This is either an IDIF object, which identifies objects across - * all OSTs, or a regular FID. The IDIF namespace maps legacy - * OST objects into the FID namespace. In both cases, we just - * pass the FID through, no conversion needed. */ + fid->f_ver = oid >> 48; + } else if (likely(!fid_seq_is_default(seq))) + /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ { + /* This is either an IDIF object, which identifies objects across + * all OSTs, or a regular FID. The IDIF namespace maps legacy + * OST objects into the FID namespace. In both cases, we just + * pass the FID through, no conversion needed. */ if (ostid->oi_fid.f_ver != 0) { CERROR("bad MDT0 id, "DOSTID" ost_idx:%u\n", POSTID(ostid), ost_idx); @@ -770,7 +800,7 @@ static inline int ostid_to_fid(struct lu_fid *fid, struct ost_id *ostid, *fid = ostid->oi_fid; } - return 0; + return 0; } /* pack any OST FID into an ostid (id/seq) for the wire/disk */ @@ -1298,6 +1328,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */ #define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ #define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* improved flock deadlock detection */ +#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/* create stripe disposition*/ +#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack + name in request */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same @@ -1341,7 +1374,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \ OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\ OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE |\ - OBD_CONNECT_FLOCK_DEAD) + OBD_CONNECT_FLOCK_DEAD | \ + OBD_CONNECT_DISP_STRIPE) + #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ @@ -1511,6 +1546,7 @@ enum obdo_flags { OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ + OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* Note that while these checksum values are currently separate bits, * in 2.x we can actually allow all values from 1-31 if we wanted. */ @@ -1607,6 +1643,11 @@ static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) oi->oi.oi_seq = seq; } +static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) +{ + oi->oi.oi_id = oid; +} + static inline __u64 lmm_oi_id(struct ost_id *oi) { return oi->oi.oi_id; @@ -1784,6 +1825,10 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os); #define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ #define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ #define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ +#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server + * that the client is running low on + * space for unstable pages; asking + * it to sync quickly */ #define OBD_OBJECT_EOF 0xffffffffffffffffULL @@ -2105,19 +2150,32 @@ extern void lustre_swab_generic_32s (__u32 *val); #define DISP_LOOKUP_POS 0x00000008 #define DISP_OPEN_CREATE 0x00000010 #define DISP_OPEN_OPEN 0x00000020 -#define DISP_ENQ_COMPLETE 0x00400000 +#define DISP_ENQ_COMPLETE 0x00400000 /* obsolete and unused */ #define DISP_ENQ_OPEN_REF 0x00800000 #define DISP_ENQ_CREATE_REF 0x01000000 #define DISP_OPEN_LOCK 0x02000000 #define DISP_OPEN_LEASE 0x04000000 +#define DISP_OPEN_STRIPE 0x08000000 /* INODE LOCK PARTS */ -#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */ -#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ -#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ -#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ -#define MDS_INODELOCK_PERM 0x000010 /* for permission */ -#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ +#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also + * was used to protect permission (mode, + * owner, group etc) before 2.4. */ +#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ +#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ +#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ + +/* The PERM bit is added int 2.4, and it is used to protect permission(mode, + * owner, group, acl etc), so to separate the permission from LOOKUP lock. + * Because for remote directories(in DNE), these locks will be granted by + * different MDTs(different ldlm namespace). + * + * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together. + * For Remote directory, the master MDT, where the remote directory is, will + * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is, + * will grant LOOKUP_LOCK. */ +#define MDS_INODELOCK_PERM 0x000010 +#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ #define MDS_INODELOCK_MAXSHIFT 5 /* This FULL lock is useful to take on unlink sort of operations */ @@ -2599,6 +2657,7 @@ struct mdt_rec_reint { extern void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); +/* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ __u32 ld_active_tgt_count; /* how many active */ @@ -2615,31 +2674,75 @@ struct lmv_desc { extern void lustre_swab_lmv_desc (struct lmv_desc *ld); -/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */ -struct lmv_stripe_md { - __u32 mea_magic; - __u32 mea_count; - __u32 mea_master; - __u32 mea_padding; - char mea_pool_name[LOV_MAXPOOLNAME]; - struct lu_fid mea_ids[0]; -}; - -extern void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea); - /* lmv structures */ -#define MEA_MAGIC_LAST_CHAR 0xb2221ca1 -#define MEA_MAGIC_ALL_CHARS 0xb222a11c -#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b +#define LMV_MAGIC_V1 0x0CD10CD0 /* normal stripe lmv magic */ +#define LMV_USER_MAGIC 0x0CD20CD0 /* default lmv magic*/ +#define LMV_MAGIC LMV_MAGIC_V1 +struct lmv_mds_md_v1 { + __u32 lmv_magic; + __u32 lmv_stripe_count; /* stripe count */ + __u32 lmv_master_mdt_index; /* master MDT index */ + __u32 lmv_hash_type; /* dir stripe policy, i.e. indicate + * which hash function to be used */ + __u32 lmv_layout_version; /* Used for directory restriping */ + __u32 lmv_padding; + char lmv_pool_name[LOV_MAXPOOLNAME]; /* pool name */ + struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */ +}; + +union lmv_mds_md { + __u32 lmv_magic; + struct lmv_mds_md_v1 lmv_md_v1; + struct lmv_user_md lmv_user_md; +}; + +static inline int lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) +{ + switch (lmm_magic) { + case LMV_MAGIC_V1: { + struct lmv_mds_md_v1 *lmm1; + + return sizeof(*lmm1) + stripe_count * + sizeof(lmm1->lmv_stripe_fids[0]); + } + default: + return -EINVAL; + } +} -#define MAX_HASH_SIZE_32 0x7fffffffUL -#define MAX_HASH_SIZE 0x7fffffffffffffffULL -#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL +static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm) +{ + switch (le32_to_cpu(lmm->lmv_magic)) { + case LMV_MAGIC_V1: + return le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count); + case LMV_USER_MAGIC: + return le32_to_cpu(lmm->lmv_user_md.lum_stripe_count); + default: + return -EINVAL; + } +} + +static inline int lmv_mds_md_stripe_count_set(union lmv_mds_md *lmm, + unsigned int stripe_count) +{ + switch (le32_to_cpu(lmm->lmv_magic)) { + case LMV_MAGIC_V1: + lmm->lmv_md_v1.lmv_stripe_count = cpu_to_le32(stripe_count); + break; + case LMV_USER_MAGIC: + lmm->lmv_user_md.lum_stripe_count = cpu_to_le32(stripe_count); + break; + default: + return -EINVAL; + } + return 0; +} enum fld_rpc_opc { - FLD_QUERY = 900, - FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY + FLD_QUERY = 900, + FLD_READ = 901, + FLD_LAST_OPC, + FLD_FIRST_OPC = FLD_QUERY }; enum seq_rpc_opc { @@ -2653,6 +2756,12 @@ enum seq_op { SEQ_ALLOC_META = 1 }; +enum fld_op { + FLD_CREATE = 0, + FLD_DELETE = 1, + FLD_LOOKUP = 2, +}; + /* * LOV data structures */ @@ -3300,6 +3409,37 @@ struct obdo { #define o_cksum o_nlink #define o_grant_used o_data_version +struct lfsck_request { + __u32 lr_event; + __u32 lr_index; + __u32 lr_flags; + __u32 lr_valid; + union { + __u32 lr_speed; + __u32 lr_status; + }; + __u16 lr_version; + __u16 lr_active; + __u16 lr_param; + __u16 lr_async_windows; + __u32 lr_padding_1; + /* lr_fid is used on server-side only, and can be + * reused as others by client in the future. */ + struct lu_fid lr_fid; + __u64 lr_padding_2; + __u64 lr_padding_3; +}; + +void lustre_swab_lfsck_request(struct lfsck_request *lr); + +struct lfsck_reply { + __u32 lr_status; + __u32 lr_padding_1; + __u64 lr_padding_2; +}; + +void lustre_swab_lfsck_reply(struct lfsck_reply *lr); + static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd, struct obdo *wobdo, const struct obdo *lobdo)