From: ericm Date: Mon, 27 Feb 2006 20:53:36 +0000 (+0000) Subject: branch: b1_5 X-Git-Tag: v1_7_140~1^12~3^2~27 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=e5e79bcf9417568dc1ba471b38a82650bef6e0d2;p=fs%2Flustre-release.git branch: b1_5 fix massive confliction during merge from b1_4. --- diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h index dd4723d..7bbcca7 100644 --- a/lustre/include/linux/lustre_user.h +++ b/lustre/include/linux/lustre_user.h @@ -45,7 +45,8 @@ #include #endif -#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) +#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \ + defined(__craynv) typedef struct stat lstat_t; #define HAVE_LOV_USER_MDS_DATA #elif defined(__USE_LARGEFILE64) || defined(__KERNEL__) diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 34b9d1b..c6b8005 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -219,6 +219,8 @@ extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); +extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off, + int count, int *eof, void *data); extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, @@ -316,6 +318,9 @@ static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } +static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 15abb53..85a0268 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -43,12 +43,18 @@ #define LL_IOC_QUOTACHECK _IOW ('f', 160, int) #define LL_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) #define LL_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) +#define LL_IOC_JOIN _IOW ('f', 163, long) +#define LL_IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) + +#define LL_STATFS_MDC 1 +#define LL_STATFS_LOV 2 #define IOC_MDC_TYPE 'i' #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) #define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_mds_data *) #define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ +#define O_JOIN_FILE 0400000000 /* hopefully this does not conflict */ #define LL_FILE_IGNORE_LOCK 0x00000001 #define LL_FILE_GROUP_LOCKED 0x00000002 @@ -57,6 +63,8 @@ #define LOV_USER_MAGIC_V1 0x0BD10BD0 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1 +#define LOV_USER_MAGIC_JOIN 0x0BD20BD0 + #define LOV_PATTERN_RAID0 0x001 #define LOV_PATTERN_RAID1 0x002 #define LOV_PATTERN_FIRST 0x100 diff --git a/lustre/include/lustre_cfg.h b/lustre/include/lustre_cfg.h index 1290cde..cd13b97 100644 --- a/lustre/include/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -47,7 +47,9 @@ enum lcfg_command_type { LCFG_ADD_CONN = 0x00cf00b, LCFG_DEL_CONN = 0x00cf00c, LCFG_LOV_ADD_OBD = 0x00cf00d, - LCFG_LOV_DEL_OBD = 0x00cf00e + LCFG_LOV_DEL_OBD = 0x00cf00e, + LCFG_PARAM = 0x00cf00f, + LCFG_MARKER = 0x00cf010 }; struct lustre_cfg_bufs { @@ -56,6 +58,9 @@ struct lustre_cfg_bufs { uint32_t lcfg_bufcount; }; +/* Mountconf transitional hack, should go away after 1.6 */ +#define LCFG_FLG_MOUNTCONF 0x400 + struct lustre_cfg { uint32_t lcfg_version; uint32_t lcfg_command; @@ -198,7 +203,6 @@ static inline void lustre_cfg_free(struct lustre_cfg *lcfg) { int len; - ENTRY; len = lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens); OBD_FREE(lcfg, len); @@ -233,13 +237,46 @@ static inline int lustre_cfg_sanity_check(void *buf, int len) RETURN(0); } + +#define LMD_MAGIC 0xbdacbd03 +#define LMD_MAGIC_MASK (0xffffff00 & LMD_MAGIC) + +#define lmd_bad_magic(LMDP) \ +({ \ + struct lustre_mount_data *_lmd__ = (LMDP); \ + int _ret__ = 0; \ + if (!_lmd__) { \ + LCONSOLE_ERROR("Missing mount data: " \ + "check that /sbin/mount.lustre is installed.\n");\ + _ret__ = 1; \ + } else if (_lmd__->lmd_magic == LMD_MAGIC) { \ + _ret__ = 0; \ + } else if ((_lmd__->lmd_magic & LMD_MAGIC_MASK) == LMD_MAGIC_MASK) { \ + LCONSOLE_ERROR("You're using an old version of " \ + "/sbin/mount.lustre. Please install version " \ + "1.%d\n", LMD_MAGIC & 0xFF); \ + _ret__ = 1; \ + } else { \ + LCONSOLE_ERROR("Invalid mount data (%#x != %#x): " \ + "check that /sbin/mount.lustre is installed\n", \ + _lmd__->lmd_magic, LMD_MAGIC); \ + _ret__ = 1; \ + } \ + _ret__; \ +}) + +#define MAX_FAILOVER_NIDS 10 + /* Passed by mount */ +/* Any changes in the alignment of elements in this stuct require a change to + LMD_MAGIC */ struct lustre_mount_data { - uint32_t lmd_magic; - uint32_t lmd_flags; - uint64_t lmd_nid; - char lmd_mds[64]; - char lmd_profile[64]; + uint32_t lmd_magic; + uint32_t lmd_flags; + uint16_t lmd_nid_count; /* how many failover nids we have for the MDS */ + lnet_nid_t lmd_nid[MAX_FAILOVER_NIDS]; + char lmd_mds[64]; + char lmd_profile[64]; }; #define LMD_FLG_FLOCK 0x0001 diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 7dbfb5f..d38e4a5 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -27,7 +27,7 @@ struct obd_device; #define OBD_LDLM_DEVICENAME "ldlm" -#define LDLM_DEFAULT_LRU_SIZE 100 +#define LDLM_DEFAULT_LRU_SIZE (100 * smp_num_cpus) typedef enum { ELDLM_OK = 0, @@ -128,15 +128,7 @@ typedef enum { #define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX) #define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) -static ldlm_mode_t lck_compat_array[] = { - [LCK_EX] LCK_COMPAT_EX, - [LCK_PW] LCK_COMPAT_PW, - [LCK_PR] LCK_COMPAT_PR, - [LCK_CW] LCK_COMPAT_CW, - [LCK_CR] LCK_COMPAT_CR, - [LCK_NL] LCK_COMPAT_NL, - [LCK_GROUP] LCK_COMPAT_GROUP -}; +extern ldlm_mode_t lck_compat_array[]; static inline void lockmode_verify(ldlm_mode_t mode) { @@ -510,7 +502,7 @@ void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); void ldlm_lock_allow_match(struct ldlm_lock *lock); int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, - __u32 type, ldlm_policy_data_t *, ldlm_mode_t mode, + ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode, struct lustre_handle *); struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, int *flags); @@ -535,7 +527,7 @@ static inline void ldlm_proc_cleanup(void) {} /* resource.c - internal */ struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id, __u32 type, + struct ldlm_res_id, ldlm_type_t type, int create); struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); int ldlm_resource_putref(struct ldlm_resource *res); @@ -559,7 +551,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request *req, struct ldlm_namespace *ns, struct ldlm_res_id, - __u32 type, + ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode, int *flags, diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 2e1ce6e..32f93ce 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -14,16 +14,21 @@ struct mds_export_data { struct list_head med_open_head; spinlock_t med_open_lock; /* lock med_open_head, mfd_list*/ struct mds_client_data *med_mcd; + __u64 med_ibits_known; loff_t med_lr_off; int med_lr_idx; }; struct osc_creator { spinlock_t oscc_lock; + struct list_head oscc_list; struct obd_device *oscc_obd; + obd_id oscc_last_id;//last available pre-created object + obd_id oscc_next_id;// what object id to give out next + int oscc_grow_count; + struct obdo oscc_oa; int oscc_flags; - obd_id oscc_next_id; - cfs_waitq_t oscc_waitq; + cfs_waitq_t oscc_waitq; /* creating procs wait on this */ }; struct ldlm_export_data { diff --git a/lustre/include/lustre_idl.h b/lustre/include/lustre_idl.h index 3c4882f..2f85102 100644 --- a/lustre/include/lustre_idl.h +++ b/lustre/include/lustre_idl.h @@ -67,7 +67,7 @@ //#define OSC_REQUEST_PORTAL 3 #define OSC_REPLY_PORTAL 4 //#define OSC_BULK_PORTAL 5 -#define OST_REQUEST_PORTAL 6 +#define OST_IO_PORTAL 6 #define OST_CREATE_PORTAL 7 #define OST_BULK_PORTAL 8 //#define MDC_REQUEST_PORTAL 9 @@ -80,15 +80,13 @@ #define LDLM_CB_REPLY_PORTAL 16 #define LDLM_CANCEL_REQUEST_PORTAL 17 #define LDLM_CANCEL_REPLY_PORTAL 18 -#define PTLBD_REQUEST_PORTAL 19 -#define PTLBD_REPLY_PORTAL 20 -#define PTLBD_BULK_PORTAL 21 +//#define PTLBD_REQUEST_PORTAL 19 +//#define PTLBD_REPLY_PORTAL 20 +//#define PTLBD_BULK_PORTAL 21 #define MDS_SETATTR_PORTAL 22 #define MDS_READPAGE_PORTAL 23 -#define MGMT_REQUEST_PORTAL 24 -#define MGMT_REPLY_PORTAL 25 -#define MGMT_CLI_REQUEST_PORTAL 26 -#define MGMT_CLI_REPLY_PORTAL 27 + +#define OST_REQUEST_PORTAL 28 #define SVC_KILLED 1 #define SVC_EVENT 2 @@ -112,13 +110,29 @@ #define LUSTRE_OST_VERSION 0x00030000 #define LUSTRE_DLM_VERSION 0x00040000 #define LUSTRE_LOG_VERSION 0x00050000 -#define LUSTRE_PBD_VERSION 0x00060000 struct lustre_handle { __u64 cookie; }; #define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL +static inline int lustre_handle_is_used(struct lustre_handle *lh) +{ + return lh->cookie != 0ull; +} + +static inline int lustre_handle_equal(struct lustre_handle *lh1, + struct lustre_handle *lh2) +{ + return lh1->cookie == lh2->cookie; +} + +static inline void lustre_handle_copy(struct lustre_handle *tgt, + struct lustre_handle *src) +{ + tgt->cookie = src->cookie; +} + /* we depend on this structure to be 8-byte aligned */ /* this type is only endian-adjusted in lustre_unpack_msg() */ struct lustre_msg { @@ -197,27 +211,52 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define MSG_CONNECT_ASYNC 0x40 /* Connect flags */ -#define OBD_CONNECT_RDONLY 0x0001ULL -#define OBD_CONNECT_SRVLOCK 0x0010ULL /* server takes locks for client */ -#define OBD_CONNECT_ACL 0x0080ULL -#define OBD_CONNECT_USER_XATTR 0x0100ULL -#define OBD_CONNECT_CROW 0x0200ULL /* OST is CROW able */ - -#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | \ - OBD_CONNECT_ACL | \ - OBD_CONNECT_USER_XATTR) -#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_CROW) +#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */ +#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */ +#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */ +#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */ +#define OBD_CONNECT_ACL 0x80ULL /* client using access control lists */ +#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/ +#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST do object create-on-write */ +#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* server gets locks for punch b=9528 */ +#define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */ +#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ +#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ +/* also update obd_connect_names[] for lprocfs_rd_connect_flags() */ + +#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ + OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ + OBD_CONNECT_IBITS | OBD_CONNECT_JOIN) +#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ + OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ + OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX) #define ECHO_CONNECT_SUPPORTED (0) +#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\ + ((patch)<<8) + (fix)) +#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255) +#define OBD_OCD_VERSION_MINOR(version) ((int)((version)>>16)&255) +#define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255) +#define OBD_OCD_VERSION_FIX(version) ((int)(version)&255) + /* This structure is used for both request and reply. * * If we eventually have separate connect data for different types, which we * almost certainly will, then perhaps we stick a union in here. */ struct obd_connect_data { - __u64 ocd_connect_flags; /* connection flags, server should return - * subset of what is asked for. */ - - __u64 padding[8]; + __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ + __u32 ocd_version; /* lustre release version number */ + __u32 ocd_grant; /* initial cache grant amount (bytes) */ + __u32 ocd_index; /* LOV index to connect to */ + __u32 ocd_unused; + __u64 ocd_ibits_known; /* inode bits this client understands */ + __u64 padding2; /* also fix lustre_swab_connect */ + __u64 padding3; /* also fix lustre_swab_connect */ + __u64 padding4; /* also fix lustre_swab_connect */ + __u64 padding5; /* also fix lustre_swab_connect */ + __u64 padding6; /* also fix lustre_swab_connect */ }; extern void lustre_swab_connect(struct obd_connect_data *ocd); @@ -275,7 +314,14 @@ typedef uint32_t obd_count; #define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */ #define OBD_FL_NO_USRQUOTA (0x00000100) /* the object's owner is over quota */ #define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */ -#define OBD_FL_CREATE_CROW (0x00000400) /* object should be created with crow */ +#define OBD_FL_CREATE_CROW (0x00000400) /* object should be create on write */ + +/* + * set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs + * that declared OBD_CONNECT_TRUNCLOCK in their connect flags support this + * functionality. + */ +#define OBD_FL_TRUNCLOCK (0x00000800) /* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct * llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer @@ -294,7 +340,7 @@ struct obdo { obd_time o_ctime; obd_blocks o_blocks; /* brw: cli sent cached bytes */ obd_size o_grant; - + /* 32-bit fields start here: keep an even number of them via padding */ obd_blksize o_blksize; /* optimal IO blocksize */ obd_mode o_mode; /* brw: cli sent cache remain */ @@ -316,15 +362,12 @@ struct obdo { #define o_dropped o_misc #define o_cksum o_nlink -#define OBDO_URGENT_CREATE(oa) \ - (!((oa)->o_valid & OBD_MD_FLFLAGS) || \ - !((oa)->o_flags & OBD_FL_CREATE_CROW) || \ - ((oa)->o_flags & OBD_FL_RECREATE_OBJS)) - extern void lustre_swab_obdo (struct obdo *o); + #define LOV_MAGIC_V1 0x0BD10BD0 #define LOV_MAGIC LOV_MAGIC_V1 +#define LOV_MAGIC_JOIN 0x0BD20BD0 #define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */ #define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */ @@ -350,6 +393,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; + #define OBD_MD_FLID (0x00000001ULL) /* object ID */ #define OBD_MD_FLATIME (0x00000002ULL) /* access time */ #define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ @@ -380,6 +424,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ #define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ #define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */ +#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ #define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ @@ -419,7 +464,7 @@ struct obd_statfs { __u32 os_bsize; __u32 os_namelen; __u64 os_maxbytes; - __u32 os_state; + __u32 os_state; /* positive error code on server */ __u32 os_spare1; __u32 os_spare2; __u32 os_spare3; @@ -494,6 +539,14 @@ extern void lustre_swab_ost_lvb(struct ost_lvb *); * MDS REQ RECORDS */ +/* FIXME: this is different from HEAD, adjust it + * while merge GSS */ +#define MDS_REQ_REC_OFF 0 + +#define MDS_REQ_INTENT_LOCKREQ_OFF 0 +#define MDS_REQ_INTENT_IT_OFF 1 +#define MDS_REQ_INTENT_REC_OFF 2 + /* opcodes */ typedef enum { MDS_GETATTR = 33, @@ -557,7 +610,7 @@ typedef enum { struct ll_fid { __u64 id; /* holds object id */ __u32 generation; /* holds object generation */ - + __u32 f_type; /* holds object type or stripe idx when passing it to * OST for saving into EA. */ }; @@ -601,8 +654,8 @@ struct mds_body { __u32 suppgid; __u32 eadatasize; __u32 aclsize; - __u32 padding_2; /* also fix lustre_swab_mds_body */ - __u32 padding_3; /* also fix lustre_swab_mds_body */ + __u32 max_mdsize; + __u32 max_cookiesize; /* also fix lustre_swab_mds_body */ __u32 padding_4; /* also fix lustre_swab_mds_body */ }; @@ -672,6 +725,7 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ +#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file*/ #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ #define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ @@ -696,6 +750,13 @@ struct mds_rec_create { extern void lustre_swab_mds_rec_create (struct mds_rec_create *cr); +struct mds_rec_join { + struct ll_fid jr_fid; + __u64 jr_headsize; +}; + +extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr); + struct mds_rec_link { __u32 lk_opcode; __u32 lk_fsuid; @@ -771,10 +832,10 @@ struct lov_desc { __u32 ld_pattern; /* PATTERN_RAID0, PATTERN_RAID1 */ __u64 ld_default_stripe_size; /* in bytes */ __u64 ld_default_stripe_offset; /* in bytes */ - __u32 ld_qos_threshold; /* in MB */ - __u32 ld_qos_maxage; /* in second */ __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */ __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */ + __u32 ld_padding_3; /* also fix lustre_swab_lov_desc */ + __u32 ld_padding_4; /* also fix lustre_swab_lov_desc */ struct obd_uuid ld_uuid; }; @@ -867,7 +928,7 @@ extern void lustre_swab_ldlm_intent (struct ldlm_intent *i); struct ldlm_resource_desc { ldlm_type_t lr_type; - __u32 lr_padding; + __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */ struct ldlm_res_id lr_name; }; @@ -884,7 +945,7 @@ extern void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l); struct ldlm_request { __u32 lock_flags; - __u32 lock_padding; + __u32 lock_padding; /* also fix lustre_swab_ldlm_request */ struct ldlm_lock_desc lock_desc; struct lustre_handle lock_handle1; struct lustre_handle lock_handle2; @@ -894,7 +955,7 @@ extern void lustre_swab_ldlm_request (struct ldlm_request *rq); struct ldlm_reply { __u32 lock_flags; - __u32 lock_padding; + __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */ struct ldlm_lock_desc lock_desc; struct lustre_handle lock_handle; __u64 lock_policy_res1; @@ -904,57 +965,6 @@ struct ldlm_reply { extern void lustre_swab_ldlm_reply (struct ldlm_reply *r); /* - * ptlbd, portal block device requests - */ -typedef enum { - PTLBD_QUERY = 200, - PTLBD_READ = 201, - PTLBD_WRITE = 202, - PTLBD_FLUSH = 203, - PTLBD_CONNECT = 204, - PTLBD_DISCONNECT = 205, - PTLBD_LAST_OPC -} ptlbd_cmd_t; -#define PTLBD_FIRST_OPC PTLBD_QUERY - -struct ptlbd_op { - __u16 op_cmd; - __u16 op_lun; - __u16 op_niob_cnt; - __u16 op__padding; - __u32 op_block_cnt; -}; - -extern void lustre_swab_ptlbd_op (struct ptlbd_op *op); - -struct ptlbd_niob { - __u64 n_xid; - __u64 n_block_nr; - __u32 n_offset; - __u32 n_length; -}; - -extern void lustre_swab_ptlbd_niob (struct ptlbd_niob *n); - -struct ptlbd_rsp { - __u16 r_status; - __u16 r_error_cnt; -}; - -extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r); - -/* - * Opcodes for management/monitoring node. - */ -typedef enum { - MGMT_CONNECT = 250, - MGMT_DISCONNECT, - MGMT_EXCEPTION, /* node died, etc. */ - MGMT_LAST_OPC -} mgmt_cmd_t; -#define MGMT_FIRST_OPC MGMT_CONNECT - -/* * Opcodes for multiple servers. */ @@ -979,9 +989,19 @@ struct llog_logid { #define CATLIST "CATALOGS" struct llog_catid { struct llog_logid lci_logid; - __u32 lci_padding[3]; + __u32 lci_padding1; + __u32 lci_padding2; + __u32 lci_padding3; } __attribute__((packed)); +/*join file lov mds md*/ +struct lov_mds_md_join { + struct lov_mds_md lmmj_md; + /*join private info*/ + struct llog_logid lmmj_array_id; /*array object id*/ + __u32 lmmj_extent_count; /*array extent count*/ +}; + /* Log data record types - there is no specific reason that these need to * be related to the RPC opcodes, but no reason not to (may be handy later?) */ @@ -997,6 +1017,7 @@ typedef enum { OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, /* obsolete */ LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, + LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, } llog_op_type; @@ -1029,10 +1050,30 @@ struct llog_rec_tail { struct llog_logid_rec { struct llog_rec_hdr lid_hdr; struct llog_logid lid_id; - __u32 padding[5]; + __u32 padding1; + __u32 padding2; + __u32 padding3; + __u32 padding4; + __u32 padding5; struct llog_rec_tail lid_tail; } __attribute__((packed)); +/* MDS extent description + * It is for joined file extent info, each extent info for joined file + * just like (start, end, lmm). + */ +struct mds_extent_desc { + __u64 med_start; /* extent start */ + __u64 med_len; /* extent length */ + struct lov_mds_md med_lmm; /* extent's lmm */ +}; +/*Joined file array extent log record*/ +struct llog_array_rec { + struct llog_rec_hdr lmr_hdr; + struct mds_extent_desc lmr_med; + struct llog_rec_tail lmr_tail; +}; + struct llog_create_rec { struct llog_rec_hdr lcr_hdr; struct ll_fid lcr_fid; @@ -1134,6 +1175,8 @@ enum llogd_rpc_ops { LLOG_ORIGIN_HANDLE_CLOSE = 505, LLOG_ORIGIN_CONNECT = 506, LLOG_CATINFO = 507, /* for lfs catinfo */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, + LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ }; struct llogd_body { @@ -1152,8 +1195,32 @@ struct llogd_conn_body { __u32 lgdc_ctxt_idx; } __attribute__((packed)); +struct lov_user_ost_data_join { /* per-stripe data structure */ + __u64 l_extent_start; /* extent start*/ + __u64 l_extent_end; /* extent end*/ + __u64 l_object_id; /* OST object ID */ + __u64 l_object_gr; /* OST object group (creating MDS number) */ + __u32 l_ost_gen; /* generation of this OST index */ + __u32 l_ost_idx; /* OST index in LOV */ +} __attribute__((packed)); + +struct lov_user_md_join { /* LOV EA user data (host-endian) */ + __u32 lmm_magic; /* magic number = LOV_MAGIC_JOIN */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + __u64 lmm_object_id; /* LOV object ID */ + __u64 lmm_object_gr; /* LOV object group */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + __u32 lmm_stripe_count; /* num stripes in use for this object */ + __u32 lmm_extent_count; /* extent count of lmm*/ + __u64 lmm_tree_id; /* mds tree object id */ + __u64 lmm_tree_gen; /* mds tree object gen */ + struct llog_logid lmm_array_id; /* mds extent desc llog object id */ + struct lov_user_ost_data_join lmm_objects[0]; /* per-stripe data */ +} __attribute__((packed)); + extern void lustre_swab_lov_user_md(struct lov_user_md *lum); extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum); +extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj); /* llog_swab.c */ extern void lustre_swab_llogd_body (struct llogd_body *d); @@ -1179,4 +1246,5 @@ typedef enum { QUOTA_DQREL = 602, } quota_cmd_t; +#define JOIN_FILE_ALIGN 4096 #endif diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 5cd78ea..315dc01 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -38,6 +38,7 @@ enum obd_import_event { IMP_EVENT_INACTIVE = 0x808002, IMP_EVENT_INVALIDATE = 0x808003, IMP_EVENT_ACTIVE = 0x808004, + IMP_EVENT_OCD = 0x808005, }; struct obd_import_conn { @@ -53,7 +54,6 @@ struct obd_import { struct lustre_handle imp_dlm_handle; /* client's ldlm export */ struct ptlrpc_connection *imp_connection; struct ptlrpc_client *imp_client; - struct list_head imp_observers; struct list_head imp_pinger_chain; /* Lists of requests that are retained for replay, waiting for a reply, @@ -87,16 +87,14 @@ struct obd_import { /* flags */ unsigned int imp_invalid:1, imp_replayable:1, imp_dlm_fake:1, imp_server_timeout:1, - imp_initial_recov:1, imp_force_verify:1, - imp_pingable:1, imp_resend_replay:1, - imp_deactive:1; + imp_initial_recov:1, imp_initial_recov_bk:1, + imp_force_verify:1, imp_pingable:1, + imp_resend_replay:1, imp_deactive:1; __u32 imp_connect_op; struct obd_connect_data imp_connect_data; + __u64 imp_connect_flags_orig; }; -#define IMP_CROW_ABLE(imp) \ - ((imp)->imp_connect_data.ocd_connect_flags & OBD_CONNECT_CROW) - typedef void (*obd_import_callback)(struct obd_import *imp, void *closure, int event, void *event_arg, void *cb_data); diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 70b6d04..bf2093c 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -38,6 +38,10 @@ #error Unsupported operating system. #endif +/* prng.c */ +unsigned int ll_rand(void); /* returns a random 32-bit integer */ +void ll_srand(unsigned int, unsigned int); /* seed the generator */ + /* target.c */ struct ptlrpc_request; struct recovd_data; @@ -66,7 +70,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req); void target_cancel_recovery_timer(struct obd_device *obd); -#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) /* *waves hands* */ +#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler); void target_abort_recovery(void *data); void target_cleanup_recovery(struct obd_device *obd); @@ -312,7 +316,7 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) ENTRY; err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); - if ( err ) + if (err) RETURN(err); if (hdr.ioc_version != OBD_IOCTL_VERSION) { @@ -343,7 +347,7 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) data = (struct obd_ioctl_data *)*buf; err = copy_from_user(*buf, (void *)arg, hdr.ioc_len); - if ( err ) { + if (err) { OBD_VFREE(*buf, hdr.ioc_len); RETURN(err); } @@ -373,8 +377,7 @@ static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; } - EXIT; - return 0; + RETURN(0); } static inline int obd_ioctl_popdata(void *arg, void *data, int len) @@ -395,77 +398,76 @@ static inline void obd_ioctl_freedata(char *buf, int len) return; } -#define OBD_IOC_CREATE _IOR ('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETATTR _IOR ('f', 108, OBD_IOC_DATA_TYPE) -#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) -#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CREATE _IOR ('f', 101, long) +#define OBD_IOC_DESTROY _IOW ('f', 104, long) +#define OBD_IOC_PREALLOCATE _IOWR('f', 105, long) +#define OBD_IOC_SETATTR _IOW ('f', 107, long) +#define OBD_IOC_GETATTR _IOR ('f', 108, long) +#define OBD_IOC_READ _IOWR('f', 109, long) +#define OBD_IOC_WRITE _IOWR('f', 110, long) -#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) -#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) -#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) -#define OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) -#define OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) -#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) -#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE) -#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE) -#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE) -#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETNAME _IOR ('f', 131, OBD_IOC_DATA_TYPE) +#define OBD_IOC_STATFS _IOWR('f', 113, long) +#define OBD_IOC_SYNC _IOW ('f', 114, long) +#define OBD_IOC_READ2 _IOWR('f', 115, long) +#define OBD_IOC_FORMAT _IOWR('f', 116, long) +#define OBD_IOC_PARTITION _IOWR('f', 117, long) +#define OBD_IOC_COPY _IOWR('f', 120, long) +#define OBD_IOC_MIGR _IOWR('f', 121, long) +#define OBD_IOC_PUNCH _IOWR('f', 122, long) -#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) +#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, long) +#define OBD_IOC_BRW_READ _IOWR('f', 125, long) +#define OBD_IOC_BRW_WRITE _IOWR('f', 126, long) +#define OBD_IOC_NAME2DEV _IOWR('f', 127, long) +#define OBD_IOC_UUID2DEV _IOWR('f', 130, long) +#define OBD_IOC_GETNAME _IOR ('f', 131, long) +#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long) +#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long) #define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 ) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long) +#define OBD_IOC_SET_READONLY _IOW ('f', 141, long) +#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long) -#define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE) +#define OBD_GET_VERSION _IOWR ('f', 144, long) -#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long) -#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETDEVICE _IOWR ('f', 149, long) -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) +#define OBD_IOC_LOV_SETEA _IOW ('f', 156, long) #define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) #define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) #define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) -#define OBD_IOC_MOUNTOPT _IOWR('f', 170, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) - -#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) +#define OBD_IOC_MOUNTOPT _IOWR('f', 170, long) + +#define OBD_IOC_RECORD _IOWR('f', 180, long) +#define OBD_IOC_ENDRECORD _IOWR('f', 181, long) +#define OBD_IOC_PARSE _IOWR('f', 182, long) +#define OBD_IOC_DORECORD _IOWR('f', 183, long) +#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, long) +#define OBD_IOC_DUMP_LOG _IOWR('f', 185, long) +#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, long) + +#define OBD_IOC_CATLOGLIST _IOWR('f', 190, long) +#define OBD_IOC_LLOG_INFO _IOWR('f', 191, long) +#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, long) +#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, long) +#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, long) +#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, long) +#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, long) + +#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long) +#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long) +#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long) +#define ECHO_IOC_CANCEL _IOWR('f', 203, long) /* XXX _IOWR('f', 250, long) has been defined in * lnet/include/libcfs/kp30.h for debug, don't use it @@ -477,237 +479,273 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define POISON_BULK 0 -static inline int ll_insecure_random_int(void) -{ - struct timeval t; - do_gettimeofday(&t); - return (int)(t.tv_usec); -} - /* * l_wait_event is a flexible sleeping function, permitting simple caller * configuration of interrupt and timeout sensitivity along with actions to * be performed in the event of either exception. * - * Common usage looks like this: + * The first form of usage looks like this: * * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler, * intr_handler, callback_data); * rc = l_wait_event(waitq, condition, &lwi); * - * (LWI_TIMEOUT and LWI_INTR macros are available for timeout- and - * interrupt-only variants, respectively.) + * l_wait_event() makes the current process wait on 'waitq' until 'condition' + * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending. It + * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before + * 'condition' becomes true, it optionally calls the specified 'intr_handler' + * if not NULL, and returns -EINTR. + * + * If a non-zero timeout is specified, signals are ignored until the timeout + * has expired. At this time, if 'timeout_handler' is not NULL it is called. + * If it returns FALSE l_wait_event() continues to wait as described above with + * signals enabled. Otherwise it returns -ETIMEDOUT. + * + * LWI_INTR(intr_handler, callback_data) is shorthand for + * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data) + * + * The second form of usage looks like this: + * + * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler); + * rc = l_wait_event(waitq, condition, &lwi); + * + * This form is the same as the first except that it COMPLETELY IGNORES + * SIGNALS. The caller must therefore beware that if 'timeout' is zero, or if + * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that + * can unblock the current process is 'condition' becoming TRUE. + * + * Another form of usage is: + * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval, + * timeout_handler); + * rc = l_wait_event(waitq, condition, &lwi); + * This is the same as previous case, but condition is checked once every + * 'interval' jiffies (if non-zero). + * + * Subtle synchronization point: this macro does *not* necessary takes + * wait-queue spin-lock before returning, and, hence, following idiom is safe + * ONLY when caller provides some external locking: + * + * Thread1 Thread2 + * + * l_wait_event(&obj->wq, ....); (1) + * + * wake_up(&obj->wq): (2) + * spin_lock(&q->lock); (2.1) + * __wake_up_common(q, ...); (2.2) + * spin_unlock(&q->lock, flags); (2.3) + * + * OBD_FREE_PTR(obj); (3) + * + * As l_wait_event() may "short-cut" execution and return without taking + * wait-queue spin-lock, some additional synchronization is necessary to + * guarantee that step (3) can begin only after (2.3) finishes. * - * If a timeout is specified, the timeout_handler will be invoked in the event - * that the timeout expires before the process is awakened. (Note that any - * waking of the process will restart the timeout, even if the condition is - * not satisfied and the process immediately returns to sleep. This might be - * considered a bug.) If the timeout_handler returns non-zero, l_wait_event - * will return -ETIMEDOUT and the caller will continue. If the handler returns - * zero instead, the process will go back to sleep until it is awakened by the - * waitq or some similar mechanism, or an interrupt occurs (if the caller has - * asked for interrupts to be detected). The timeout will only fire once, so - * callers should take care that a timeout_handler which returns zero will take - * future steps to awaken the process. N.B. that these steps must include - * making the provided condition become true. + * XXX nikita: some ptlrpc daemon threads have races of that sort. * - * If the interrupt flag (lwi_signals) is non-zero, then the process will be - * interruptible, and will be awakened by any "killable" signal (SIGTERM, - * SIGKILL or SIGINT). If a timeout is also specified, then the process will - * only become interruptible _after_ the timeout has expired, though it can be - * awakened by a signal that was delivered before the timeout and is still - * pending when the timeout expires. If a timeout is not specified, the process - * will be interruptible at all times during l_wait_event. */ +#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1)) + struct l_wait_info { cfs_duration_t lwi_timeout; + cfs_duration_t lwi_interval; int (*lwi_on_timeout)(void *); - long lwi_signals; void (*lwi_on_signal)(void *); void *lwi_cb_data; }; -#define LWI_TIMEOUT(time, cb, data) \ -((struct l_wait_info) { \ - lwi_timeout: time, \ - lwi_on_timeout: cb, \ - lwi_cb_data: data \ +/* NB: LWI_TIMEOUT ignores signals completely */ +#define LWI_TIMEOUT(time, cb, data) \ +((struct l_wait_info) { \ + .lwi_timeout = time, \ + .lwi_on_timeout = cb, \ + .lwi_cb_data = data, \ + .lwi_interval = 0 \ }) -#define LWI_INTR(cb, data) \ -((struct l_wait_info) { \ - lwi_signals: 1, \ - lwi_on_signal: cb, \ - lwi_cb_data: data \ +#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data) \ +((struct l_wait_info) { \ + .lwi_timeout = time, \ + .lwi_on_timeout = cb, \ + .lwi_cb_data = data, \ + .lwi_interval = interval \ }) #define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \ ((struct l_wait_info) { \ - lwi_timeout: time, \ - lwi_on_timeout: time_cb, \ - lwi_signals: 1, \ - lwi_on_signal: sig_cb, \ - lwi_cb_data: data \ + .lwi_timeout = time, \ + .lwi_on_timeout = time_cb, \ + .lwi_on_signal = (sig_cb == NULL) ? LWI_ON_SIGNAL_NOOP : sig_cb, \ + .lwi_cb_data = data, \ + .lwi_interval = 0 \ }) +#define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) + +#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ + sigmask(SIGTERM) | sigmask(SIGQUIT) | \ + sigmask(SIGALRM)) + #ifdef __KERNEL__ +/* + * wait for @condition to become true, but no longer than timeout, specified + * by @info. + */ #define __l_wait_event(wq, condition, info, ret, excl) \ do { \ cfs_waitlink_t __wait; \ - cfs_duration_t __timed_out = 0; \ - cfs_sigset_t blocked; \ - cfs_time_t timeout_remaining; \ + cfs_duration_t __timeout = info->lwi_timeout; \ + unsigned long __irqflags; \ + cfs_sigset_t __blocked; \ + \ + ret = 0; \ + if (condition) \ + break; \ \ cfs_waitlink_init(&__wait); \ if (excl) \ - cfs_waitq_add_exclusive(&wq, &__wait); \ + cfs_waitq_add_exclusive(&wq, &__wait); \ else \ - cfs_waitq_add(&wq, &__wait); \ + cfs_waitq_add(&wq, &__wait); \ \ /* Block all signals (just the non-fatal ones if no timeout). */ \ - if (info->lwi_signals && !info->lwi_timeout) \ - blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ + if (info->lwi_on_signal != NULL && __timeout == 0) \ + __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ else \ - blocked = l_w_e_set_sigs(0); \ - \ - timeout_remaining = info->lwi_timeout; \ + __blocked = l_w_e_set_sigs(0); \ \ for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (info->lwi_timeout && !__timed_out) { \ - timeout_remaining = cfs_waitq_timedwait(&__wait, \ - CFS_TASK_INTERRUPTIBLE,\ - timeout_remaining); \ - if (timeout_remaining == 0) { \ - __timed_out = 1; \ - if (!info->lwi_on_timeout || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ + set_current_state(TASK_INTERRUPTIBLE); \ + \ + if (condition) \ break; \ - } \ - /* We'll take signals after a timeout. */ \ - if (info->lwi_signals) \ - (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - } \ - } else { \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);; \ - } \ - if (condition) \ - break; \ - if (cfs_signal_pending()) { \ - if (!info->lwi_timeout || __timed_out) { \ - break; \ - } else { \ - /* We have to do this here because some signals */ \ - /* are not blockable - ie from strace(1). */ \ - /* In these cases we want to schedule_timeout() */ \ - /* again, because we don't want that to return */ \ - /* -EINTR when the RPC actually succeeded. */ \ - /* the RECALC_SIGPENDING below will deliver the */ \ - /* signal properly. */ \ - cfs_clear_sigpending(); \ - } \ - } \ - } \ \ - cfs_block_sigs(blocked); \ + if (__timeout == 0) { \ + schedule(); \ + } else { \ + unsigned long interval = info->lwi_interval? \ + min_t(unsigned long, \ + info->lwi_interval,__timeout):\ + __timeout; \ + __timeout -= interval - schedule_timeout(interval); \ + if (__timeout == 0) { \ + if (info->lwi_on_timeout == NULL || \ + info->lwi_on_timeout(info->lwi_cb_data)) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + /* Take signals after the timeout expires. */ \ + if (info->lwi_on_signal != NULL) \ + (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ + } \ + } \ \ - if ((!info->lwi_timeout || __timed_out) && \ - cfs_signal_pending()) { \ - if (info->lwi_on_signal) \ - info->lwi_on_signal(info->lwi_cb_data); \ - ret = -EINTR; \ + if (condition) \ + break; \ + if (cfs_signal_pending()) { \ + if (info->lwi_on_signal != NULL && __timeout == 0) { \ + if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ + info->lwi_on_signal(info->lwi_cb_data);\ + ret = -EINTR; \ + break; \ + } \ + /* We have to do this here because some signals */ \ + /* are not blockable - ie from strace(1). */ \ + /* In these cases we want to schedule_timeout() */ \ + /* again, because we don't want that to return */ \ + /* -EINTR when the RPC actually succeeded. */ \ + /* the RECALC_SIGPENDING below will deliver the */ \ + /* signal properly. */ \ + cfs_sigmask_lock(__irqflags); \ + cfs_clear_sigpending(); \ + cfs_sigmask_unlock(__irqflags); \ + } \ } \ \ + cfs_sigmask_lock(__irqflags); \ + cfs_block_sigs(__blocked); \ + RECALC_SIGPENDING; /*XXX cfs_recalc_sigpending();*/ \ + cfs_sigmask_unlock(__irqflags); \ + \ set_current_state(TASK_RUNNING); \ cfs_waitq_del(&wq, &__wait); \ -} while(0) +} while (0) #else /* !__KERNEL__ */ #define __l_wait_event(wq, condition, info, ret, excl) \ -do { \ - long timeout = info->lwi_timeout, elapse, last = 0; \ - int __timed_out = 0; \ - \ - if (info->lwi_timeout == 0) \ - timeout = 1000000000; \ - else \ - last = time(NULL); \ - \ - for (;;) { \ - if (condition) \ - break; \ - if (liblustre_wait_event(timeout)) { \ - if (timeout == 0 || info->lwi_timeout == 0) \ - continue; \ - elapse = time(NULL) - last; \ - if (elapse) { \ - last += elapse; \ - timeout -= elapse; \ - if (timeout < 0) \ - timeout = 0; \ - } \ - continue; \ - } \ - if (info->lwi_timeout && !__timed_out) { \ - __timed_out = 1; \ - if (info->lwi_on_timeout == NULL || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - } \ - } \ +do { \ + long __timeout = info->lwi_timeout; \ + long __now; \ + long __then = 0; \ + int __timed_out = 0; \ + \ + ret = 0; \ + if (condition) \ + break; \ + \ + if (__timeout == 0) \ + __timeout = 1000000000; \ + else \ + __then = time(NULL); \ + \ + while (!(condition)) { \ + if (liblustre_wait_event(info->lwi_interval?:__timeout) || \ + (info->lwi_interval && info->lwi_interval < __timeout)) {\ + if (__timeout != 0 && info->lwi_timeout != 0) { \ + __now = time(NULL); \ + __timeout -= __now - __then; \ + if (__timeout < 0) \ + __timeout = 0; \ + __then = __now; \ + } \ + continue; \ + } \ + \ + if (info->lwi_timeout != 0 && !__timed_out) { \ + __timed_out = 1; \ + if (info->lwi_on_timeout == NULL || \ + info->lwi_on_timeout(info->lwi_cb_data)) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + } \ + } \ } while (0) #endif /* __KERNEL__ */ -#define l_wait_event(wq, condition, info) \ -({ \ - int __ret = 0; \ - struct l_wait_info *__info = (info); \ - if (!(condition)) \ - __l_wait_event(wq, condition, __info, __ret, 0); \ - __ret; \ +#define l_wait_event(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, __ret, 0); \ + __ret; \ }) -#define l_wait_event_exclusive(wq, condition, info) \ -({ \ - int __ret = 0; \ - struct l_wait_info *__info = (info); \ - if (!(condition)) \ - __l_wait_event(wq, condition, __info, __ret, 1); \ - __ret; \ +#define l_wait_event_exclusive(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, __ret, 1); \ + __ret; \ }) -#define LMD_MAGIC_R1 0xbdacbdac -#define LMD_MAGIC 0xbdacbd02 - -#define lmd_bad_magic(LMDP) \ -({ \ - struct lustre_mount_data *_lmd__ = (LMDP); \ - int _ret__ = 0; \ - if (!_lmd__) { \ - LCONSOLE_ERROR("Missing mount data: " \ - "check that /sbin/mount.lustre is installed.\n");\ - _ret__ = 1; \ - } else if (_lmd__->lmd_magic == LMD_MAGIC_R1) { \ - LCONSOLE_ERROR("You're using an old version of " \ - "/sbin/mount.lustre. Please install version " \ - "1.%d\n", LMD_MAGIC & 0xFF); \ - _ret__ = 1; \ - } else if (_lmd__->lmd_magic != LMD_MAGIC) { \ - LCONSOLE_ERROR("Invalid mount data (%#x != %#x): " \ - "check that /sbin/mount.lustre is installed\n", \ - _lmd__->lmd_magic, LMD_MAGIC); \ - _ret__ = 1; \ - } \ - _ret__; \ -}) +#ifdef __KERNEL__ +/* initialize ost_lvb according to inode */ +static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb) +{ + lvb->lvb_size = inode->i_size; + lvb->lvb_blocks = inode->i_blocks; + lvb->lvb_mtime = LTIME_S(inode->i_mtime); + lvb->lvb_atime = LTIME_S(inode->i_atime); + lvb->lvb_ctime = LTIME_S(inode->i_ctime); +} +#else +/* defined in liblustre/llite_lib.h */ +#endif #ifdef __KERNEL__ #define LIBLUSTRE_CLIENT (0) diff --git a/lustre/include/lustre_lite.h b/lustre/include/lustre_lite.h index f35b918..db7ad6c 100644 --- a/lustre/include/lustre_lite.h +++ b/lustre/include/lustre_lite.h @@ -16,6 +16,7 @@ #endif #include +#include #include #include #include @@ -82,4 +83,56 @@ static inline void lustre_build_lock_params(int cmd, unsigned long open_flags, LDLM_FL_BLOCK_NOWAIT : 0; } +/* + * This is embedded into liblustre and llite super-blocks to keep track of + * connect flags (capabilities) supported by all imports given mount is + * connected to. + */ +struct lustre_client_ocd { + /* + * This is conjunction of connect_flags across all imports (LOVs) this + * mount is connected to. This field is updated by ll_ocd_update() + * under ->lco_lock. + */ + __u64 lco_flags; + spinlock_t lco_lock; +}; + +/* + * This function is used as an upcall-callback hooked by liblustre and llite + * clients into obd_notify() listeners chain to handle notifications about + * change of import connect_flags. See llu_fsswop_mount() and + * lustre_common_fill_super(). + * + * Again, it is dumped into this header for the lack of a better place. + */ +static inline int ll_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner) +{ + struct lustre_client_ocd *lco; + struct client_obd *cli; + __u64 flags; + int result; + + ENTRY; + if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + cli = &watched->u.cli; + lco = owner; + flags = cli->cl_import->imp_connect_data.ocd_connect_flags; + CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n", + lco->lco_flags, flags); + spin_lock(&lco->lco_lock); + lco->lco_flags &= flags; + spin_unlock(&lco->lco_lock); + result = 0; + } else { + CERROR("unexpected notification of %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + result = -EINVAL; + } + RETURN(result); +} + #endif diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index c1184a0..d305fb4 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -46,6 +46,7 @@ #endif #include +#include #include #define LOG_NAME_LIMIT(logname, name) \ @@ -87,6 +88,8 @@ int llog_init_handle(struct llog_handle *handle, int flags, extern void llog_free_handle(struct llog_handle *handle); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data, void *catdata); +int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata); extern int llog_cancel_rec(struct llog_handle *loghandle, int index); extern int llog_close(struct llog_handle *cathandle); @@ -108,6 +111,7 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); /* llog_obd.c */ @@ -165,6 +169,8 @@ struct llog_operations { int (*lop_destroy)(struct llog_handle *handle); int (*lop_next_block)(struct llog_handle *h, int *curr_idx, int next_idx, __u64 *offset, void *buf, int len); + int (*lop_prev_block)(struct llog_handle *h, + int prev_idx, void *buf, int len); int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, struct llog_logid *logid, char *name); int (*lop_close)(struct llog_handle *handle); @@ -195,7 +201,7 @@ struct llog_ctxt { int loc_idx; /* my index the obd array of ctxt's */ struct llog_gen loc_gen; struct obd_device *loc_obd; /* points back to the containing obd*/ - struct obd_export *loc_exp; + struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */ struct obd_import *loc_imp; /* to use in RPC's: can be backward pointing import */ struct llog_operations *loc_logops; @@ -209,9 +215,9 @@ static inline void llog_gen_init(struct llog_ctxt *ctxt) { struct obd_device *obd = ctxt->loc_exp->exp_obd; - if (!strcmp(obd->obd_type->typ_name, "mds")) + if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count; - else if (!strstr(obd->obd_type->typ_name, "filter")) + else if (!strstr(obd->obd_type->typ_name, LUSTRE_FILTER_NAME)) ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count; else ctxt->loc_gen.mnt_cnt = 0; @@ -226,8 +232,9 @@ static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b) return(a.conn_cnt < b.conn_cnt ? 1 : 0); } -#define LLOG_GEN_INC(gen) ((gen).conn_cnt) ++ +#define LLOG_GEN_INC(gen) ((gen).conn_cnt ++) #define LLOG_PROC_BREAK 0x0001 +#define LLOG_DEL_RECORD 0x0002 static inline int llog_obd2ops(struct llog_ctxt *ctxt, struct llog_operations **lop) @@ -362,6 +369,23 @@ static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, RETURN(rc); } +static inline int llog_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(loghandle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_prev_block == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_prev_block(loghandle, prev_idx, buf, len); + RETURN(rc); +} + static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, struct llog_logid *logid, char *name) { diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index 3c59755..e824f0e 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -10,6 +10,10 @@ #ifndef _LUSTRE_MDS_H #define _LUSTRE_MDS_H +#define LUSTRE_MDS_NAME "mds" +#define LUSTRE_MDT_NAME "mdt" +#define LUSTRE_MDC_NAME "mdc" + #include #include #include @@ -37,14 +41,12 @@ struct ptlrpc_request; struct obd_device; struct ll_file_data; -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" -#define LUSTRE_MDC_NAME "mdc" - struct lustre_md { struct mds_body *body; struct lov_stripe_md *lsm; +#ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; +#endif }; struct mdc_op_data { @@ -78,51 +80,6 @@ struct mds_update_record { struct lvfs_grp_hash_entry *ur_grp_entry; }; -#define MDS_LR_SERVER_SIZE 512 - -#define MDS_LR_CLIENT_START 8192 -#define MDS_LR_CLIENT_SIZE 128 -#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE -#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE" -#endif - -#define MDS_CLIENT_SLOTS 17 - -#define MDS_ROCOMPAT_LOVOBJID 0x00000001 -#define MDS_ROCOMPAT_SUPP (MDS_ROCOMPAT_LOVOBJID) - -#define MDS_INCOMPAT_SUPP (0) - -/* Data stored per server at the head of the last_rcvd file. In le32 order. - * Try to keep this the same as fsd_server_data so we might one day merge. */ -struct mds_server_data { - __u8 msd_uuid[40]; /* server UUID */ - __u64 msd_last_transno; /* last completed transaction ID */ - __u64 msd_mount_count; /* MDS incarnation number */ - __u64 msd_unused; - __u32 msd_feature_compat; /* compatible feature flags */ - __u32 msd_feature_rocompat;/* read-only compatible feature flags */ - __u32 msd_feature_incompat;/* incompatible feature flags */ - __u32 msd_server_size; /* size of server data area */ - __u32 msd_client_start; /* start of per-client data area */ - __u16 msd_client_size; /* size of per-client data area */ - __u16 msd_subdir_count; /* number of subdirectories for objects */ - __u64 msd_catalog_oid; /* recovery catalog object id */ - __u32 msd_catalog_ogen; /* recovery catalog inode generation */ - __u8 msd_peeruuid[40]; /* UUID of LOV/OSC associated with MDS */ - __u8 msd_padding[MDS_LR_SERVER_SIZE - 140]; -}; - -/* Data stored per client in the last_rcvd file. In le32 order. */ -struct mds_client_data { - __u8 mcd_uuid[40]; /* client UUID */ - __u64 mcd_last_transno; /* last completed transaction ID */ - __u64 mcd_last_xid; /* xid for the last transaction */ - __u32 mcd_last_result; /* result from last RPC */ - __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ - __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 64]; -}; - /* file data for open files on MDS */ struct mds_file_data { struct portals_handle mfd_handle; /* must be first */ @@ -134,9 +91,13 @@ struct mds_file_data { }; /* ACL */ +#ifdef CONFIG_FS_POSIX_ACL #define LUSTRE_POSIX_ACL_MAX_ENTRIES (32) #define LUSTRE_POSIX_ACL_MAX_SIZE \ (xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES)) +#else +#define LUSTRE_POSIX_ACL_MAX_SIZE 0 +#endif /* mds/mds_reint.c */ int mds_reint_rec(struct mds_update_record *r, int offset, @@ -149,7 +110,7 @@ int it_disposition(struct lookup_intent *it, int flag); void it_set_disposition(struct lookup_intent *it, int flag); int it_open_error(int phase, struct lookup_intent *it); void mdc_set_lock_data(__u64 *lockh, void *data); -int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, +int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, ldlm_iterator_t it, void *data); int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *, @@ -179,7 +140,7 @@ int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, obd_valid valid, unsigned int ea_size, struct ptlrpc_request **request); int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid, - char *filename, int namelen, unsigned long valid, + const char *filename, int namelen, unsigned long valid, unsigned int ea_size, struct ptlrpc_request **request); int mdc_setattr(struct obd_export *exp, struct mdc_op_data *data, struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len, diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 86d9e814..bacef95 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -44,7 +44,7 @@ /* MD flags we _always_ use */ #define PTLRPC_MD_OPTIONS 0 -/* Define maxima for bulk I/O +/* Define maxima for bulk I/O * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) * these limits are system wide and not interface-local. */ #define PTLRPC_MAX_BRW_SIZE LNET_MTU @@ -84,7 +84,7 @@ * considered full when less than ?_MAXREQSIZE is left in them. */ -#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64) +#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64) #define LDLM_NBUFS 64 #define LDLM_BUFSIZE (8 * 1024) #define LDLM_MAXREQSIZE (5 * 1024) @@ -105,16 +105,19 @@ * * MDS_MAXREQSIZE ~= 4736 bytes = * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX + * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header + * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem: + * = 9210 bytes = lustre_msg + mds_body + 160 * (easize + cookiesize) * * Realistic size is about 512 bytes (20 character name + 128 char symlink), * except in the open case where there are a large number of OSTs in a LOV. */ #define MDS_MAXREQSIZE (5 * 1024) -#define MDS_MAXREPSIZE (9 * 1024) +#define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) -#define OST_MAX_THREADS 36UL -#define OST_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \ - OST_MAX_THREADS), 2UL) +#define OST_MAX_THREADS 512UL +#define OST_DEF_THREADS max_t(unsigned long, 2, \ + (num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus) #define OST_NBUFS (64 * smp_num_cpus) #define OST_BUFSIZE (8 * 1024) /* OST_MAXREQSIZE ~= 4768 bytes = @@ -126,11 +129,6 @@ #define OST_MAXREQSIZE (5 * 1024) #define OST_MAXREPSIZE (9 * 1024) -#define PTLBD_NUM_THREADS 4 -#define PTLBD_NBUFS 64 -#define PTLBD_BUFSIZE (32 * 1024) -#define PTLBD_MAXREQSIZE 1024 - struct ptlrpc_connection { struct list_head c_link; lnet_nid_t c_self; @@ -281,7 +279,17 @@ struct ptlrpc_request { spinlock_t rq_lock; /* client-side flags */ unsigned int rq_intr:1, rq_replied:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, rq_replay:1, + rq_timedout:1, rq_resend:1, rq_restart:1, + /* + * when ->rq_replay is set, request is kept by the client even + * after server commits corresponding transaction. This is + * used for operations that require sequence of multiple + * requests to be replayed. The only example currently is file + * open/close. When last request in such a sequence is + * committed, ->rq_replay is cleared on all requests in the + * sequence. + */ + rq_replay:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ @@ -297,7 +305,7 @@ struct ptlrpc_request { int rq_reqlen; struct lustre_msg *rq_reqmsg; - int rq_timeout; /* seconds */ + int rq_timeout; /* time to wait for reply (seconds) */ int rq_replen; struct lustre_msg *rq_repmsg; __u64 rq_transno; @@ -321,7 +329,6 @@ struct ptlrpc_request { struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ #if CRAY_XT3 -# error "Need to get the uid from the event?" __u32 rq_uid; /* peer uid, used in MDS only */ #endif @@ -445,7 +452,7 @@ struct ptlrpc_bulk_desc { struct ptlrpc_cb_id bd_cbid; /* network callback info */ lnet_handle_md_t bd_md_h; /* associated MD */ - + #if defined(__KERNEL__) lnet_kiov_t bd_iov[0]; #else @@ -493,8 +500,8 @@ struct ptlrpc_service { int srv_num_threads; /* # threads to start/started */ unsigned srv_cpu_affinity:1; /* bind threads to CPUs */ - __u32 srv_req_portal; - __u32 srv_rep_portal; + __u32 srv_req_portal; + __u32 srv_rep_portal; int srv_n_queued_reqs; /* # reqs waiting to be served */ struct list_head srv_request_queue; /* reqs waiting for service */ @@ -515,12 +522,14 @@ struct ptlrpc_service { struct list_head srv_active_replies; /* all the active replies */ struct list_head srv_reply_queue; /* replies waiting for service */ - cfs_waitq_t srv_waitq; /* all threads sleep on this */ + cfs_waitq_t srv_waitq; /* all threads sleep on this. This + * wait-queue is signalled when new + * incoming request arrives and when + * difficult reply has to be handled. */ struct list_head srv_threads; - struct obd_device *srv_obddev; svc_handler_t srv_handler; - + char *srv_name; /* only statically allocated strings here; we don't clean them */ spinlock_t srv_lock; @@ -529,9 +538,9 @@ struct ptlrpc_service { struct lprocfs_stats *srv_stats; /* List of free reply_states */ - struct list_head srv_free_rs_list; + struct list_head srv_free_rs_list; /* waitq to run, when adding stuff to srv_free_rs_list */ - cfs_waitq_t srv_free_rs_waitq; + cfs_waitq_t srv_free_rs_waitq; /* * if non-NULL called during thread creation (ptlrpc_start_thread()) @@ -549,7 +558,7 @@ struct ptlrpc_service { /* ptlrpc/events.c */ extern lnet_handle_eq_t ptlrpc_eq_h; -extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, +extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, lnet_process_id_t *peer, lnet_nid_t *self); extern void request_out_callback (lnet_event_t *ev); extern void reply_in_callback(lnet_event_t *ev); @@ -575,7 +584,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc); int ptlrpc_register_bulk(struct ptlrpc_request *req); void ptlrpc_unregister_bulk (struct ptlrpc_request *req); -static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc) +static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc) { unsigned long flags; int rc; @@ -590,8 +599,7 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int); int ptlrpc_reply(struct ptlrpc_request *req); int ptlrpc_error(struct ptlrpc_request *req); void ptlrpc_resend_req(struct ptlrpc_request *request); -int ptl_send_rpc(struct ptlrpc_request *request); -int ptl_send_rpc_nowait(struct ptlrpc_request *request); +int ptl_send_rpc(struct ptlrpc_request *request, int noreply); int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd); /* ptlrpc/client.c */ @@ -605,7 +613,7 @@ ptlrpc_client_receiving_reply (struct ptlrpc_request *req) { unsigned long flags; int rc; - + spin_lock_irqsave(&req->rq_lock, flags); rc = req->rq_receiving_reply; spin_unlock_irqrestore(&req->rq_lock, flags); @@ -617,7 +625,7 @@ ptlrpc_client_replied (struct ptlrpc_request *req) { unsigned long flags; int rc; - + spin_lock_irqsave(&req->rq_lock, flags); rc = req->rq_replied; spin_unlock_irqrestore(&req->rq_lock, flags); @@ -655,10 +663,11 @@ void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int, void (*populate_pool)(struct ptlrpc_request_pool *, int)); -struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, - int count, int *lengths, char **bufs); -struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, int opcode, - int count, int *lengths, +struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, + int opcode, int count, + int *lengths, char **bufs); +struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, + int opcode, int count, int *lengths, char **bufs, struct ptlrpc_request_pool *pool); void ptlrpc_free_req(struct ptlrpc_request *request); @@ -679,7 +688,7 @@ __u64 ptlrpc_sample_next_xid(void); __u64 ptlrpc_req_xid(struct ptlrpc_request *request); /* ptlrpc/service.c */ -void ptlrpc_save_lock (struct ptlrpc_request *req, +void ptlrpc_save_lock (struct ptlrpc_request *req, struct lustre_handle *lock, int mode); void ptlrpc_commit_replies (struct obd_device *obd); void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); @@ -717,6 +726,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp); /* ptlrpc/pack_generic.c */ int lustre_msg_swabbed(struct lustre_msg *msg); +int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); int lustre_pack_request(struct ptlrpc_request *, int count, int *lens, char **bufs); int lustre_pack_reply(struct ptlrpc_request *, int count, int *lens, @@ -782,6 +792,8 @@ static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {} /* ptlrpc/llog_server.c */ int llog_origin_handle_create(struct ptlrpc_request *req); +int llog_origin_handle_destroy(struct ptlrpc_request *req); +int llog_origin_handle_prev_block(struct ptlrpc_request *req); int llog_origin_handle_next_block(struct ptlrpc_request *req); int llog_origin_handle_read_header(struct ptlrpc_request *req); int llog_origin_handle_close(struct ptlrpc_request *req); diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h index 544d57d..7e2f3b3 100644 --- a/lustre/include/lustre_quota.h +++ b/lustre/include/lustre_quota.h @@ -75,9 +75,8 @@ struct dquot_id { #define QFILE_RD_INFO 2 #define QFILE_WR_INFO 3 #define QFILE_INIT_INFO 4 -#define QFILE_GET_QIDS 5 -#define QFILE_RD_DQUOT 6 -#define QFILE_WR_DQUOT 7 +#define QFILE_RD_DQUOT 5 +#define QFILE_WR_DQUOT 6 /* admin quotafile operations */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) @@ -87,7 +86,7 @@ int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); int lustre_read_dquot(struct lustre_dquot *dquot); int lustre_commit_dquot(struct lustre_dquot *dquot); int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_get_qids(struct lustre_quota_info *lqi, int type, +int lustre_get_qids(struct file *file, struct inode *inode, int type, struct list_head *list); #else diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 61f7a46..04aaf8d 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -62,10 +62,7 @@ struct lov_oinfo { /* per-stripe data structure */ unsigned loi_kms_valid:1; __u64 loi_kms; /* known minimum size */ - __u64 loi_rss; /* recently seen size */ - __u64 loi_mtime; /* recently seen mtime */ - __u64 loi_blocks; /* recently seen blocks */ - + struct ost_lvb loi_lvb; struct osc_async_rc loi_ar; }; @@ -82,30 +79,61 @@ static inline void loi_init(struct lov_oinfo *loi) CFS_INIT_LIST_HEAD(&loi->loi_read_item); } +/*extent array item for describing the joined file extent info*/ +struct lov_extent { + __u64 le_start; /* extent start */ + __u64 le_len; /* extent length */ + int le_loi_idx; /* extent #1 loi's index in lsm loi array */ + int le_stripe_count; /* extent stripe count*/ +}; + +/*Lov array info for describing joined file array EA info*/ +struct lov_array_info { + struct llog_logid lai_array_id; /* MDS med llog object id */ + unsigned lai_ext_count; /* number of extent count */ + struct lov_extent *lai_ext_array; /* extent desc array */ +}; + struct lov_stripe_md { spinlock_t lsm_lock; void *lsm_lock_owner; /* debugging */ - /* Public members. */ - __u64 lsm_object_id; /* lov object id */ - __u64 lsm_object_gr; /* lov object id */ - __u64 lsm_maxbytes; /* maximum possible file size */ - unsigned long lsm_xfersize; /* optimal transfer size */ - - /* LOV-private members start here -- only for use in lov/. */ - __u32 lsm_magic; - __u32 lsm_stripe_size; /* size of the stripe */ - __u32 lsm_pattern; /* striping pattern (RAID0, RAID1) */ - unsigned lsm_stripe_count; /* number of objects being striped over */ + struct { + /* Public members. */ + __u64 lw_object_id; /* lov object id */ + __u64 lw_object_gr; /* lov object id */ + __u64 lw_maxbytes; /* maximum possible file size */ + unsigned long lw_xfersize; /* optimal transfer size */ + + /* LOV-private members start here -- only for use in lov/. */ + __u32 lw_magic; + __u32 lw_stripe_size; /* size of the stripe */ + __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ + unsigned lw_stripe_count; /* number of objects being striped over */ + } lsm_wire; + + struct lov_array_info *lsm_array; /*Only for joined file array info*/ struct lov_oinfo lsm_oinfo[0]; }; -/* compare all fields except for semaphore */ +#define lsm_object_id lsm_wire.lw_object_id +#define lsm_object_gr lsm_wire.lw_object_gr +#define lsm_maxbytes lsm_wire.lw_maxbytes +#define lsm_xfersize lsm_wire.lw_xfersize +#define lsm_magic lsm_wire.lw_magic +#define lsm_stripe_size lsm_wire.lw_stripe_size +#define lsm_pattern lsm_wire.lw_pattern +#define lsm_stripe_count lsm_wire.lw_stripe_count + +/* compare all relevant fields. */ static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1, struct lov_stripe_md *m2) { - return memcmp(&m1->lsm_object_id, &m2->lsm_object_id, - (char *)&m2->lsm_oinfo[0] - (char *)&m2->lsm_object_id); + /* + * ->lsm_wire contains padding, but it should be zeroed out during + * allocation. + */ + return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire); } void lov_stripe_lock(struct lov_stripe_md *md); @@ -135,7 +163,7 @@ enum async_flags { or cancel the size of the io */ ASYNC_GROUP_SYNC = 0x8, /* ap_completion will not be called, instead the page is accounted for in the - obd_io_group given to + obd_io_group given to obd_queue_group_io */ }; @@ -163,7 +191,7 @@ struct obd_io_group { struct oig_callback_context { struct list_head occ_oig_item; /* called when the caller has received a signal while sleeping. - * callees of this method are encouraged to abort their state + * callees of this method are encouraged to abort their state * in the oig. This may be called multiple times. */ void (*occ_interrupted)(struct oig_callback_context *occ); unsigned int interrupted:1; @@ -203,22 +231,16 @@ struct filter_obd { cfs_dentry_t *fo_dentry_O; cfs_dentry_t **fo_dentry_O_groups; cfs_dentry_t **fo_dentry_O_sub; - spinlock_t fo_objidlock; /* protect fo_lastobjid - * increment */ - - spinlock_t fo_translock; /* protect fsd_last_rcvd - * increment */ - + spinlock_t fo_objidlock; /* protect fo_lastobjid */ + spinlock_t fo_translock; /* protect fsd_last_transno */ struct file *fo_rcvd_filp; + struct file *fo_health_check_filp; struct filter_server_data *fo_fsd; unsigned long *fo_last_rcvd_slots; __u64 fo_mount_count; int fo_destroy_in_progress; - - struct file_operations *fo_fop; - struct inode_operations *fo_iop; - struct address_space_operations *fo_aops; + struct semaphore fo_create_lock; struct list_head fo_export_list; int fo_subdir_count; @@ -232,11 +254,9 @@ struct filter_obd { struct obd_import *fo_mdc_imp; struct obd_uuid fo_mdc_uuid; struct lustre_handle fo_mdc_conn; -#if 0 - struct ptlrpc_client fo_mdc_client; -#endif struct file **fo_last_objid_files; - __u64 *fo_last_objids; /* last created objid for groups */ + __u64 *fo_last_objids; /* last created objid for groups, + * protected by fo_objidlock */ struct semaphore fo_alloc_lock; @@ -255,11 +275,8 @@ struct filter_obd { * * Locking: none, each OST thread uses only one element, determined by * its "ordinal number", ->t_id. - * - * This is (void *) array, because 2.4 and 2.6 use different iobuf - * structures. */ - void **fo_iobuf_pool; + struct filter_iobuf **fo_iobuf_pool; int fo_iobuf_count; struct obd_histogram fo_r_pages; @@ -278,19 +295,14 @@ struct filter_obd { struct lustre_quota_ctxt fo_quota_ctxt; spinlock_t fo_quotacheck_lock; atomic_t fo_quotachecking; - - /* objids black list stuff. See for detailed comment in - * filter_clear_orphans() */ - struct filter_ext *fo_blacklist; - spinlock_t fo_blacklist_lock; }; struct mds_server_data; #define OSC_MAX_RIF_DEFAULT 8 -#define OSC_MAX_RIF_MAX 64 -#define OSC_MAX_DIRTY_DEFAULT 32 -#define OSC_MAX_DIRTY_MB_MAX 512 /* totally arbitrary */ +#define OSC_MAX_RIF_MAX 256 +#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4) +#define OSC_MAX_DIRTY_MB_MAX 2048 /* totally arbitrary */ struct mdc_rpc_lock; struct client_obd { @@ -399,6 +411,7 @@ struct mds_obd { obd_id *mds_lov_objids; int mds_lov_nextid_set; struct file *mds_lov_objid_filp; + struct file *mds_health_check_filp; unsigned long *mds_client_bitmap; struct semaphore mds_orphan_recovery_sem; struct upcall_cache *mds_group_hash; @@ -419,37 +432,10 @@ struct echo_obd { atomic_t eo_prep; }; -/* - * this struct does double-duty acting as either a client or - * server instance .. maybe not wise. - */ -struct ptlbd_obd { - /* server's */ - struct ptlrpc_service *ptlbd_service; - struct file *filp; - /* client's */ - struct ptlrpc_client bd_client; - struct obd_import *bd_import; - struct obd_uuid bd_server_uuid; - struct obd_export *bd_exp; - int refcount; /* XXX sigh */ -}; - -struct recovd_obd { - spinlock_t recovd_lock; - struct list_head recovd_managed_items; /* items managed */ - struct list_head recovd_troubled_items; /* items in recovery */ - - cfs_waitq_t recovd_recovery_waitq; - cfs_waitq_t recovd_ctl_waitq; - cfs_waitq_t recovd_waitq; - cfs_task_t *recovd_thread; - __u32 recovd_state; -}; - struct ost_obd { struct ptlrpc_service *ost_service; struct ptlrpc_service *ost_create_service; + struct ptlrpc_service *ost_io_service; struct semaphore ost_health_sem; }; @@ -461,27 +447,23 @@ struct echo_client_obd { __u64 ec_unique; }; -struct cache_obd { - struct obd_export *cobd_target_exp;/* local connection to target obd */ - struct obd_export *cobd_cache_exp; /* local connection to cache obd */ -}; - struct lov_tgt_desc { struct obd_uuid uuid; __u32 ltd_gen; struct obd_export *ltd_exp; - int active; /* is this target up for requests */ - int index; /* index of target array in lov_obd */ - struct list_head qos_bavail_list; /* link entry to lov_obd */ + unsigned int active:1, /* is this target up for requests */ + reap:1; /* should this target be deleted */ }; struct lov_obd { - spinlock_t lov_lock; + struct semaphore lov_lock; + atomic_t refcount; struct lov_desc desc; int bufsize; - int refcount; + int connects; + int death_row; /* Do we have tgts scheduled to be deleted? + (Make this a linked list?) */ unsigned int lo_catalog_loaded:1; - struct list_head qos_bavail_list; /* tgts list, sorted by available space, protected by lov_lock */ struct lov_tgt_desc *tgts; }; @@ -513,9 +495,24 @@ struct obd_trans_info { int oti_numcookies; /* initial thread handling transaction */ - struct ptlrpc_thread *oti_thread; + int oti_thread_id; }; +static inline void oti_init(struct obd_trans_info *oti, + struct ptlrpc_request *req) +{ + if (oti == NULL) + return; + memset(oti, 0, sizeof *oti); + + if (req == NULL) + return; + + if (req->rq_repmsg && req->rq_reqmsg != 0) + oti->oti_transno = req->rq_repmsg->transno; + oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1; +} + static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) { if (!oti) @@ -558,9 +555,33 @@ enum llog_ctxt_id { LLOG_RD1_REPL_CTXT = 9, LLOG_TEST_ORIG_CTXT = 10, LLOG_TEST_REPL_CTXT = 11, + LLOG_LOVEA_ORIG_CTXT = 12, + LLOG_LOVEA_REPL_CTXT = 13, LLOG_MAX_CTXTS }; +/* + * Events signalled through obd_notify() upcall-chain. + */ +enum obd_notify_event { + /* Device activated */ + OBD_NOTIFY_ACTIVE, + /* Device deactivated */ + OBD_NOTIFY_INACTIVE, + /* Connect data for import were changed */ + OBD_NOTIFY_OCD +}; + +/* + * Data structure used to pass obd_notify()-event to non-obd listeners (llite + * and liblustre being main examples). + */ +struct obd_notify_upcall { + int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, + enum obd_notify_event ev, void *owner); + /* Opaque datum supplied by upper layer listener */ + void *onu_owner; +}; /* corresponds to one of the obd's */ struct obd_device { @@ -592,6 +613,7 @@ struct obd_device { struct lvfs_run_ctxt obd_lvfs_ctxt; struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; struct obd_device *obd_observer; + struct obd_notify_upcall obd_upcall; struct obd_export *obd_self_export; /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ struct list_head obd_exports_timed; @@ -624,12 +646,9 @@ struct obd_device { struct ost_obd ost; struct echo_client_obd echo_client; struct echo_obd echo; - struct recovd_obd recovd; struct lov_obd lov; - struct cache_obd cobd; - struct ptlbd_obd ptlbd; } u; - /* Fields used by LProcFS */ + /* Fields used by LProcFS */ unsigned int obd_cntr_base; struct lprocfs_stats *obd_stats; cfs_proc_dir_entry_t *obd_svc_procroot; @@ -641,6 +660,18 @@ struct obd_device { #define OBD_LLOG_FL_SENDNOW 0x0001 +/* Special case hack for MDS LOVs */ +#define OBD_CLEANUP_EARLY 0 +/* Precleanup stage 1, we must make sure all exports (other than the + self-export) get destroyed. */ +#define OBD_CLEANUP_EXPORTS 1 +/* Precleanup stage 2, do other type-specific cleanup requiring the + self-export. */ +#define OBD_CLEANUP_SELF_EXP 2 +/* FIXME we should eliminate the "precleanup" function and make them stages + of the "cleanup" function. */ +#define OBD_CLEANUP_OBD 3 + struct obd_ops { struct module *o_owner; int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, @@ -666,6 +697,9 @@ struct obd_ops { * asked for. If @ocd == NULL, use default parameters. */ int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *ocd); + int (*o_reconnect)(struct obd_export *exp, struct obd_device *src, + struct obd_uuid *cluuid, + struct obd_connect_data *ocd); int (*o_disconnect)(struct obd_export *exp); int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs, @@ -674,12 +708,15 @@ struct obd_ops { struct lov_stripe_md *mem_src); int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, struct lov_mds_md *disk_src, int disk_len); + int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp, + struct lov_stripe_md *mem_tgt); int (*o_preallocate)(struct lustre_handle *, obd_count *req, obd_id *ids); int (*o_create)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); int (*o_destroy)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); + struct lov_stripe_md *ea, struct obd_trans_info *oti, + struct obd_export *md_exp); int (*o_setattr)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti); int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa, @@ -696,26 +733,26 @@ struct obd_ops { struct lov_stripe_md *ea, obd_count oa_bufs, struct brw_page *pgarr, struct ptlrpc_request_set *, struct obd_trans_info *oti); - int (*o_prep_async_page)(struct obd_export *exp, + int (*o_prep_async_page)(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, + struct lov_oinfo *loi, cfs_page_t *page, obd_off offset, struct obd_async_page_ops *ops, void *data, void **res); - int (*o_queue_async_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, + int (*o_queue_async_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags); - int (*o_queue_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig, - void *cookie, int cmd, obd_off off, int count, + int (*o_queue_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags); - int (*o_trigger_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, + int (*o_trigger_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, struct obd_io_group *oig); int (*o_set_async_flags)(struct obd_export *exp, struct lov_stripe_md *lsm, @@ -724,6 +761,8 @@ struct obd_ops { int (*o_teardown_async_page)(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, void *cookie); + int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm, + struct ost_lvb *lvb, int kms_only); int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm, obd_off size, int shrink); int (*o_punch)(struct obd_export *exp, struct obdo *oa, @@ -762,7 +801,7 @@ struct obd_ops { __u32 mode, struct lustre_handle *); int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *, int flags, void *opaque); - int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, + int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, int join); int (*o_san_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, @@ -785,7 +824,7 @@ struct obd_ops { enum obd_import_event); int (*o_notify)(struct obd_device *obd, struct obd_device *watched, - int active); + enum obd_notify_event ev); int (*o_health_check)(struct obd_device *); @@ -793,7 +832,7 @@ struct obd_ops { int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); - /* + /* * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. * Also, add a wrapper function in include/linux/obd_class.h. @@ -804,6 +843,39 @@ struct obd_ops { */ }; +struct lsm_operations { + void (*lsm_free)(struct lov_stripe_md *); + int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, + struct obd_export *md_exp); + void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, + unsigned long *); + void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, + unsigned long *); + obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int); + int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off); + int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd); + int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, + int *stripe_count); + int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, + struct lov_mds_md *lmm); +}; + +extern struct lsm_operations lsm_plain_ops; +extern struct lsm_operations lsm_join_ops; +static inline struct lsm_operations *lsm_op_find(int magic) +{ + switch(magic) { + case LOV_MAGIC: + return &lsm_plain_ops; + case LOV_MAGIC_JOIN: + return &lsm_join_ops; + default: + CERROR("Cannot recognize lsm_magic %d", magic); + return NULL; + } +} + +int lvfs_check_io_health(struct obd_device *obd, struct file *file); static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, int error) diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 0692bf0..af2be21 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -405,6 +405,20 @@ static inline int obd_free_memmd(struct obd_export *exp, return obd_unpackmd(exp, mem_tgt, NULL, 0); } +static inline int obd_checkmd(struct obd_export *exp, + struct obd_export *md_exp, + struct lov_stripe_md *mem_tgt) +{ + int rc; + ENTRY; + + EXP_CHECK_OP(exp, checkmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, checkmd); + + rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt); + RETURN(rc); +} + static inline int obd_create(struct obd_export *exp, struct obdo *obdo, struct lov_stripe_md **ea, struct obd_trans_info *oti) @@ -421,7 +435,8 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo, static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, struct lov_stripe_md *ea, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct obd_export *md_exp) { int rc; ENTRY; @@ -429,7 +444,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, EXP_CHECK_OP(exp, destroy); OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); - rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti); + rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); RETURN(rc); } @@ -537,6 +552,26 @@ static inline int obd_connect(struct lustre_handle *conn, struct obd_device *obd RETURN(rc); } +static inline int obd_reconnect(struct obd_export *exp, + struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *d) +{ + int rc; + __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_OP(obd, reconnect, 0); + OBD_COUNTER_INCREMENT(obd, reconnect); + + rc = OBP(obd, reconnect)(exp, obd, cluuid, d); + /* check that only subset is granted */ + LASSERT(ergo(d != NULL, + (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); + RETURN(rc); +} + static inline int obd_disconnect(struct obd_export *exp) { int rc; @@ -820,10 +855,19 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, RETURN(rc); } -/* b1_4_bug5047 has changes to make this an obd_merge_lvb() method */ -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms_only); -__u64 lov_merge_blocks(struct lov_stripe_md *lsm); -__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time); +static inline int obd_merge_lvb(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct ost_lvb *lvb, int kms_only) +{ + int rc; + ENTRY; + + OBD_CHECK_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb); + + rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only); + RETURN(rc); +} static inline int obd_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, obd_off size, @@ -1003,7 +1047,7 @@ static inline void obd_import_event(struct obd_device *obd, static inline int obd_notify(struct obd_device *obd, struct obd_device *watched, - int active) + enum obd_notify_event ev) { ENTRY; OBD_CHECK_DEV(obd); @@ -1018,9 +1062,34 @@ static inline int obd_notify(struct obd_device *obd, } OBD_COUNTER_INCREMENT(obd, notify); - RETURN(OBP(obd, notify)(obd, watched, active)); + RETURN(OBP(obd, notify)(obd, watched, ev)); } +static inline int obd_notify_observer(struct obd_device *observer, + struct obd_device *observed, + enum obd_notify_event ev) +{ + int rc1; + int rc2; + + struct obd_notify_upcall *onu; + + if (observer->obd_observer) + rc1 = obd_notify(observer->obd_observer, observed, ev); + else + rc1 = 0; + /* + * Also, call non-obd listener, if any + */ + onu = &observer->obd_upcall; + if (onu->onu_upcall != NULL) + rc2 = onu->onu_upcall(observer, observed, ev, onu->onu_owner); + else + rc2 = 0; + + return rc1 ?: rc2; + } + static inline int obd_quotacheck(struct obd_export *exp, struct obd_quotactl *oqctl) { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 467dbf3..a938d31 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -32,7 +32,7 @@ extern unsigned int obd_fail_loc; extern unsigned int obd_dump_on_timeout; extern unsigned int obd_timeout; /* seconds */ #define PING_INTERVAL max(obd_timeout / 4, 1U) -#define STATFS_INTERVAL max(obd_timeout / 20, 1U) +#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) extern unsigned int ldlm_timeout; extern unsigned int obd_health_check_timeout; extern char obd_lustre_upcall[128]; @@ -163,9 +163,6 @@ extern cfs_waitq_t obd_race_waitq; #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 -#define OBD_FAIL_OST_CROW_EIO 0x801 -#define OBD_FAIL_OST_CLEAR_ORPHANS_RACE 0x802 - /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0357610..c7fd393 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -37,6 +37,7 @@ #include /* for LUSTRE_MDC_NAME */ #include #include +#include /* @priority: if non-zero, move the selected to the list head * @create: if zero, only search in existed connections @@ -200,7 +201,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) /* In a more perfect world, we would hang a ptlrpc_client off of * obd_type and just use the values from there. */ if (!strcmp(name, LUSTRE_OSC_NAME)) { - rq_portal = OST_REQUEST_PORTAL; + rq_portal = OST_IO_PORTAL; rp_portal = OSC_REPLY_PORTAL; connect_op = OST_CONNECT; } else if (!strcmp(name, LUSTRE_MDC_NAME)) { @@ -261,7 +262,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) if (num_physpages >> (20 - PAGE_SHIFT) <= 128) { /* <= 128 MB */ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 4; cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 4; - } else if (num_physpages >> (20 - PAGE_SHIFT) <= 512) { /* <= 512 MB */ + } else if (num_physpages >> (20 - PAGE_SHIFT) <= 256) { /* <= 256 MB */ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 2; cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 2; } else { @@ -376,8 +377,10 @@ int client_connect_import(struct lustre_handle *dlm_handle, GOTO(out_ldlm, rc); ocd = &imp->imp_connect_data; - if (data) + if (data) { *ocd = *data; + imp->imp_connect_flags_orig = data->ocd_connect_flags; + } rc = ptlrpc_connect_import(imp, NULL); if (rc != 0) { @@ -413,8 +416,8 @@ out_sem: int client_disconnect_export(struct obd_export *exp) { struct obd_device *obd = class_exp2obd(exp); - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; + struct client_obd *cli; + struct obd_import *imp; int rc = 0, err; ENTRY; @@ -424,6 +427,9 @@ int client_disconnect_export(struct obd_export *exp) RETURN(-EINVAL); } + cli = &obd->u.cli; + imp = cli->cl_import; + mutex_down(&cli->cl_sem); if (!cli->cl_conn_count) { CERROR("disconnecting disconnected device (%s)\n", @@ -480,7 +486,8 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, CWARN("%s reconnecting\n", cluuid->uuid); conn->cookie = exp->exp_handle.h_cookie; /* target_handle_connect() treats EALREADY and - * -EALREADY differently */ + * -EALREADY differently. EALREADY means we are + * doing a valid reconnect from the same client. */ RETURN(EALREADY); } else { CERROR("%s reconnecting from %s, " @@ -490,15 +497,15 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, hdl->cookie, conn->cookie); memset(conn, 0, sizeof *conn); /* target_handle_connect() treats EALREADY and - * -EALREADY differently */ + * -EALREADY differently. -EALREADY is an error + * (same UUID, different handle). */ RETURN(-EALREADY); } } conn->cookie = exp->exp_handle.h_cookie; - CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", - cluuid->uuid, exp); - CDEBUG(D_IOCTL, "connect: cookie "LPX64"\n", conn->cookie); + CDEBUG(D_HA, "connect export for UUID '%s' at %p, cookie "LPX64"\n", + cluuid->uuid, exp, conn->cookie); RETURN(0); } @@ -530,9 +537,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) obd_str2uuid (&tgtuuid, str); target = class_uuid2obd(&tgtuuid); - if (!target) { + if (!target) target = class_name2obd(str); - } if (!target || target->obd_stopping || !target->obd_set_up) { DEBUG_REQ(D_ERROR, req, "UUID '%s' is not available " @@ -583,6 +589,31 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (rc) GOTO(out, rc); + if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) { + if (!data) { + DEBUG_REQ(D_INFO, req, "Refusing old (unversioned) " + "libclient connection attempt\n"); + GOTO(out, rc = -EPROTO); + } else if (data->ocd_version < LUSTRE_VERSION_CODE - + LUSTRE_VERSION_ALLOWED_OFFSET) { + DEBUG_REQ(D_INFO, req, "Refusing old (%d.%d.%d.%d) " + "libclient connection attempt\n", + OBD_OCD_VERSION_MAJOR(data->ocd_version), + OBD_OCD_VERSION_MINOR(data->ocd_version), + OBD_OCD_VERSION_PATCH(data->ocd_version), + OBD_OCD_VERSION_FIX(data->ocd_version)); + data = lustre_msg_buf(req->rq_repmsg, 0, + offsetof(typeof(*data), + ocd_version) + + sizeof(data->ocd_version)); + if (data) { + data->ocd_connect_flags = OBD_CONNECT_VERSION; + data->ocd_version = LUSTRE_VERSION_CODE; + } + GOTO(out, rc = -EPROTO); + } + } + /* lctl gets a backstage, all-access pass. */ if (obd_uuid_equals(&cluuid, &target->obd_uuid)) goto dont_check_exports; @@ -603,11 +634,18 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (!export) { spin_unlock(&target->obd_dev_lock); } else if (req->rq_reqmsg->conn_cnt == 1) { - CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n", - cluuid.uuid); + CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; " + "cookies not random?\n", target->obd_name, + libcfs_nid2str(req->rq_peer.nid), cluuid.uuid); GOTO(out, rc = -EALREADY); } + /* We indicate the reconnection in a flag, not an error code. */ + if (rc == EALREADY) { + lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); + rc = 0; + } + /* Tell the client if we're in recovery. */ /* If this is the first client, start the recovery timer */ if (target->obd_recovering) { @@ -621,9 +659,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (export == NULL) { if (target->obd_recovering) { - CERROR("%s: denying connection for new client %s: " + CERROR("%s: denying connection for new client %s (%s): " "%d clients in recovery for %lds\n", - target->obd_name, cluuid.uuid, + target->obd_name, + libcfs_nid2str(req->rq_peer.nid), cluuid.uuid, target->obd_recoverable_clients, cfs_duration_sec(cfs_time_sub(cfs_timer_deadline(&target->obd_recovery_timer), cfs_time_current()))); @@ -632,8 +671,15 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) dont_check_exports: rc = obd_connect(&conn, target, &cluuid, data); } + } else { + rc = obd_reconnect(export, target, &cluuid, data); } + /* we want to handle EALREADY but *not* -EALREADY from + * target_handle_reconnect() */ + if (rc && rc != EALREADY) + GOTO(out, rc); + /* Return only the parts of obd_connect_data that we understand, so the * client knows that we don't understand the rest. */ if (data) @@ -643,13 +689,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) /* If all else goes well, this is our RPC return code. */ req->rq_status = 0; - /* we want to handle EALREADY but *not* -EALREADY from - * target_handle_reconnect() */ - if (rc && rc != EALREADY) - GOTO(out, rc); - req->rq_repmsg->handle = conn; + /* ownership of this export ref transfers to the request AFTER we + * drop any previous reference the request had, but we don't want + * that to go to zero before we get our new export reference. */ + export = class_conn2export(&conn); + LASSERT(export != NULL); + /* If the client and the server are the same node, we will already * have an export that really points to the client's DLM export, * because we have a shared handles table. @@ -660,15 +707,13 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (req->rq_export != NULL) class_export_put(req->rq_export); - /* ownership of this export ref transfers to the request */ - export = req->rq_export = class_conn2export(&conn); - LASSERT(export != NULL); + req->rq_export = export; spin_lock_irqsave(&export->exp_lock, flags); if (export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) { - CERROR("%s: already connected at a higher conn_cnt: %d > %d\n", - cluuid.uuid, export->exp_conn_cnt, - req->rq_reqmsg->conn_cnt); + CERROR("%s: %s already connected at higher conn_cnt: %d > %d\n", + cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), + export->exp_conn_cnt, req->rq_reqmsg->conn_cnt); spin_unlock_irqrestore(&export->exp_lock, flags); GOTO(out, rc = -EALREADY); } @@ -688,11 +733,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) export->exp_connection = ptlrpc_get_connection(req->rq_peer, req->rq_self, &remote_uuid); - if (rc == EALREADY) { - /* We indicate the reconnection in a flag, not an error code. */ - lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); + + if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) GOTO(out, rc = 0); - } if (target->obd_recovering) target->obd_connected_clients++; @@ -718,7 +761,6 @@ out: int target_handle_disconnect(struct ptlrpc_request *req) { - struct obd_export *exp; int rc; ENTRY; @@ -727,8 +769,7 @@ int target_handle_disconnect(struct ptlrpc_request *req) RETURN(rc); /* keep the rq_export around so we can send the reply */ - exp = class_export_get(req->rq_export); - req->rq_status = obd_disconnect(exp); + req->rq_status = obd_disconnect(class_export_get(req->rq_export)); RETURN(0); } @@ -765,7 +806,6 @@ static void target_release_saved_req(struct ptlrpc_request *req) static void target_finish_recovery(struct obd_device *obd) { struct list_head *tmp, *n; - int rc; CWARN("%s: sending delayed replies to recovered clients\n", obd->obd_name); @@ -774,12 +814,9 @@ static void target_finish_recovery(struct obd_device *obd) /* when recovery finished, cleanup orphans on mds and ost */ if (OBT(obd) && OBP(obd, postrecov)) { - rc = OBP(obd, postrecov)(obd); - if (rc >= 0) - CWARN("%s: all clients recovered, %d MDS " - "orphans deleted\n", obd->obd_name, rc); - else - CERROR("postrecov failed %d\n", rc); + int rc = OBP(obd, postrecov)(obd); + CWARN("%s: recovery %s: rc %d\n", obd->obd_name, + rc < 0 ? "failed" : "complete", rc); } list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { @@ -791,7 +828,6 @@ static void target_finish_recovery(struct obd_device *obd) target_release_saved_req(req); } obd->obd_recovery_end = CURRENT_SECONDS; - return; } static void abort_recovery_queue(struct obd_device *obd) @@ -1173,7 +1209,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) OBD_ALLOC(reqmsg, req->rq_reqlen); if (!reqmsg) LBUG(); - memcpy(saved_req, req, sizeof *saved_req); + *saved_req = *req; memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); /* Don't race cleanup */ @@ -1406,3 +1442,13 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) #endif /* !__KERNEL__ */ } #endif /* HAVE_QUOTA_SUPPORT */ + +ldlm_mode_t lck_compat_array[] = { + [LCK_EX] LCK_COMPAT_EX, + [LCK_PW] LCK_COMPAT_PW, + [LCK_PR] LCK_COMPAT_PR, + [LCK_CW] LCK_COMPAT_CW, + [LCK_CR] LCK_COMPAT_CR, + [LCK_NL] LCK_COMPAT_NL, + [LCK_GROUP] LCK_COMPAT_GROUP +}; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 8505f42..6da730d 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -255,22 +255,20 @@ static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, ldlm_lock_addref_internal(lock, mode); ldlm_lock2handle(lock, lockh); lock->l_flags |= LDLM_FL_LOCAL; - lock->l_flags |= *flags & LDLM_INHERIT_FLAGS; lock->l_lvb_swabber = lvb_swabber; if (policy != NULL) - memcpy(&lock->l_policy_data, policy, sizeof(*policy)); + lock->l_policy_data = *policy; if (type == LDLM_EXTENT) - memcpy(&lock->l_req_extent, &policy->l_extent, - sizeof(policy->l_extent)); + lock->l_req_extent = policy->l_extent; err = ldlm_lock_enqueue(ns, &lock, policy, flags); if (err != ELDLM_OK) GOTO(out, err); if (policy != NULL) - memcpy(policy, &lock->l_policy_data, sizeof(*policy)); + *policy = lock->l_policy_data; if ((*flags) & LDLM_FL_LOCK_CHANGED) - memcpy(&res_id, &lock->l_resource->lr_name, sizeof(res_id)); + res_id = lock->l_resource->lr_name; LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)", lock); @@ -325,7 +323,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ldlm_lock *lock; struct ldlm_request *body; struct ldlm_reply *reply; - int rc, size[2] = {sizeof(*body), lvb_len}, req_passed_in = 1; + int rc, size[] = {sizeof(*body), lvb_len}, req_passed_in = 1; int is_replay = *flags & LDLM_FL_REPLAY; int cleanup_phase = 0; ENTRY; @@ -354,11 +352,23 @@ int ldlm_cli_enqueue(struct obd_export *exp, ldlm_lock_addref_internal(lock, mode); ldlm_lock2handle(lock, lockh); lock->l_lvb_swabber = lvb_swabber; - if (policy != NULL) - memcpy(&lock->l_policy_data, policy, sizeof(*policy)); + if (policy != NULL) { + /* INODEBITS_INTEROP: If the server does not support + * inodebits, we will request a plain lock in the + * descriptor (ldlm_lock2desc() below) but use an + * inodebits lock internally with both bits set. + */ + if (type == LDLM_IBITS && !(exp->exp_connect_flags & + OBD_CONNECT_IBITS)) + lock->l_policy_data.l_inodebits.bits = + MDS_INODELOCK_LOOKUP | + MDS_INODELOCK_UPDATE; + else + lock->l_policy_data = *policy; + } + if (type == LDLM_EXTENT) - memcpy(&lock->l_req_extent, &policy->l_extent, - sizeof(policy->l_extent)); + lock->l_req_extent = policy->l_extent; LDLM_DEBUG(lock, "client-side enqueue START"); } @@ -366,33 +376,36 @@ int ldlm_cli_enqueue(struct obd_export *exp, cleanup_phase = 2; if (req == NULL) { - req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1, - size, NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, + LDLM_ENQUEUE, 1, size, NULL); if (req == NULL) GOTO(cleanup, rc = -ENOMEM); req_passed_in = 0; - } else if (req->rq_reqmsg->buflens[0] != sizeof(*body)) - LBUG(); + } else { + LASSERTF(req->rq_reqmsg->buflens[MDS_REQ_INTENT_LOCKREQ_OFF] == + sizeof(*body), "buflen[%d] = %d, not %d\n", + MDS_REQ_INTENT_LOCKREQ_OFF, + req->rq_reqmsg->buflens[MDS_REQ_INTENT_LOCKREQ_OFF], + (int)sizeof(*body)); + } + + lock->l_conn_export = exp; + lock->l_export = NULL; + lock->l_blocking_ast = blocking; /* Dump lock data into the request buffer */ - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); + body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_INTENT_LOCKREQ_OFF, + sizeof(*body)); ldlm_lock2desc(lock, &body->lock_desc); body->lock_flags = *flags; - memcpy(&body->lock_handle1, lockh, sizeof(*lockh)); + body->lock_handle1 = *lockh; /* Continue as normal. */ if (!req_passed_in) { - int buffers = 1; - if (lvb_len > 0) - buffers = 2; size[0] = sizeof(*reply); - req->rq_replen = lustre_msg_size(buffers, size); + req->rq_replen = lustre_msg_size(1 + (lvb_len > 0), size); } - lock->l_conn_export = exp; - lock->l_export = NULL; - lock->l_blocking_ast = blocking; - LDLM_DEBUG(lock, "sending request"); rc = ptlrpc_queue_wait(req); @@ -439,8 +452,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, /* lock enqueued on the server */ cleanup_phase = 1; - memcpy(&lock->l_remote_handle, &reply->lock_handle, - sizeof(lock->l_remote_handle)); + lock->l_remote_handle = reply->lock_handle; *flags = reply->lock_flags; lock->l_flags |= reply->lock_flags & LDLM_INHERIT_FLAGS; @@ -475,9 +487,10 @@ int ldlm_cli_enqueue(struct obd_export *exp, LDLM_DEBUG(lock, "client-side enqueue, new resource"); } if (policy != NULL) - memcpy(&lock->l_policy_data, - &reply->lock_desc.l_policy_data, - sizeof(reply->lock_desc.l_policy_data)); + if (!(type == LDLM_IBITS && !(exp->exp_connect_flags & + OBD_CONNECT_IBITS))) + lock->l_policy_data = + reply->lock_desc.l_policy_data; if (type != LDLM_PLAIN) LDLM_DEBUG(lock,"client-side enqueue, new policy data"); } @@ -587,13 +600,12 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) LDLM_DEBUG(lock, "client-side convert"); req = ptlrpc_prep_req(class_exp2cliimp(lock->l_conn_export), - LDLM_CONVERT, 1, &size, NULL); + LUSTRE_DLM_VERSION, LDLM_CONVERT, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); + body->lock_handle1 = lock->l_remote_handle; body->lock_desc.l_req_mode = new_mode; body->lock_flags = *flags; @@ -676,7 +688,8 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) goto local_cancel; } - req = ptlrpc_prep_req(imp, LDLM_CANCEL, 1, &size, NULL); + req = ptlrpc_prep_req(imp, LUSTRE_DLM_VERSION, LDLM_CANCEL, + 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); req->rq_no_resend = 1; @@ -686,19 +699,25 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); + body->lock_handle1 = lock->l_remote_handle; req->rq_replen = lustre_msg_size(0, NULL); rc = ptlrpc_queue_wait(req); if (rc == ESTALE) { - CERROR("client/server (nid %s) out of sync" - " -- not fatal, flags %d\n", - libcfs_nid2str(req->rq_import-> - imp_connection->c_peer.nid), - lock->l_flags); + /* For PLAIN (inodebits) locks on liblustre clients + this is a valid race between us cancelling a lock + from lru and sending notification and server + cancelling our lock at the same time */ +#ifndef __KERNEL__ + if (lock->l_resource->lr_type != LDLM_PLAIN /* IBITS */) +#endif + CERROR("client/server (nid %s) out of sync" + " -- not fatal, flags %d\n", + libcfs_nid2str(req->rq_import-> + imp_connection->c_peer.nid), + lock->l_flags); } else if (rc == -ETIMEDOUT) { ptlrpc_req_finished(req); GOTO(restart, rc); @@ -753,6 +772,13 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { LASSERT(!lock->l_readers && !lock->l_writers); + /* If we have chosen to canecl this lock voluntarily, we better + send cancel notification to server, so that it frees + appropriate state. This might lead to a race where while + we are doing cancel here, server is also silently + cancelling this lock. */ + lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + /* Setting the CBPENDING flag is a little misleading, but * prevents an important race; namely, once CBPENDING is set, * the lock can accumulate no more readers/writers. Since @@ -959,13 +985,14 @@ int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter, struct list_head *tmp, *next; struct ldlm_lock *lock; int rc = LDLM_ITER_CONTINUE; - struct ldlm_namespace *ns = res->lr_namespace; + struct ldlm_namespace *ns; ENTRY; if (!res) RETURN(LDLM_ITER_CONTINUE); + ns = res->lr_namespace; l_lock(&ns->ns_lock); list_for_each_safe(tmp, next, &res->lr_granted) { lock = list_entry(tmp, struct ldlm_lock, l_res_link); @@ -1099,8 +1126,7 @@ static int replay_lock_interpret(struct ptlrpc_request *req, GOTO (out, rc = -EPROTO); } - memcpy(&lock->l_remote_handle, &reply->lock_handle, - sizeof(lock->l_remote_handle)); + lock->l_remote_handle = reply->lock_handle; LDLM_DEBUG(lock, "replayed lock:"); ptlrpc_import_recovery_state_machine(req->rq_import); out: @@ -1119,8 +1145,16 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int buffers = 1; int size[2]; int flags; - ENTRY; + + /* If this is reply-less callback lock, we cannot replay it, since + * server might have long dropped it, but notification of that event was + * lost by network. (and server granted conflicting lock already) */ + if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { + LDLM_DEBUG(lock, "Not replaying reply-less lock:"); + ldlm_lock_cancel(lock); + RETURN(0); + } /* * If granted mode matches the requested mode, this lock is granted. * @@ -1145,7 +1179,8 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) flags = LDLM_FL_REPLAY; size[0] = sizeof(*body); - req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, size, NULL); + req = ptlrpc_prep_req(imp, LUSTRE_DLM_VERSION, LDLM_ENQUEUE, + 1, size, NULL); if (!req) RETURN(-ENOMEM); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index b8500d4..5fc4197 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -27,6 +27,7 @@ #define DEBUG_SUBSYSTEM S_LLITE #include +#include #include #include #include diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 00a1f8c..3d904c7 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -5,8 +5,13 @@ #ifndef LLITE_INTERNAL_H #define LLITE_INTERNAL_H +#ifdef CONFIG_FS_POSIX_ACL +# include +# include +#endif + #include -#include +#include /* struct lustre_intent_data { @@ -79,6 +84,8 @@ struct ll_inode_info { struct posix_acl *lli_posix_acl; + struct list_head lli_dead_list; + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif @@ -136,6 +143,7 @@ struct ll_ra_info { #define LL_SBI_FLOCK 0x04 #define LL_SBI_USER_XATTR 0x08 /* support user xattr */ #define LL_SBI_ACL 0x10 /* support ACL */ +#define LL_SBI_JOIN 0x20 /* support JOIN */ struct ll_sb_info { struct list_head ll_list; @@ -152,7 +160,7 @@ struct ll_sb_info { int ll_flags; struct list_head ll_conn_chain; /* per-conn chain of SBs */ - __u64 ll_connect_flags; + struct lustre_client_ocd ll_lco; struct hlist_head ll_orphan_dentry_list; /*please don't ask -p*/ struct ll_close_queue *ll_lcq; @@ -167,6 +175,9 @@ struct ll_sb_info { struct ll_ra_info ll_ra_info; unsigned int ll_namelen; struct file_operations *ll_fop; + + struct list_head ll_deathrow; /* inodes to be destroyed (b1443) */ + spinlock_t ll_deathrow_lock; }; struct ll_ra_read { @@ -240,10 +251,6 @@ extern spinlock_t inode_lock; extern struct proc_dir_entry *proc_lustre_fs_root; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define hlist_del_init list_del_init -#endif - static inline struct inode *ll_info2i(struct ll_inode_info *lli) { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) @@ -389,6 +396,7 @@ extern void ll_set_dd(struct dentry *de); void ll_unhash_aliases(struct inode *); void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft); void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry); +int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name); /* llite/llite_lib.c */ @@ -420,6 +428,8 @@ void lustre_dump_dentry(struct dentry *, int recur); void lustre_dump_inode(struct inode *); struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, struct list_head *list); +int ll_obd_statfs(struct inode *inode, void *arg); +int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); /* llite/llite_nfs.c */ __u32 get_uuid2int(const char *name, int len); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index ca474c8..7dee781 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -83,6 +83,7 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb) RETURN(NULL); spin_lock_init(&sbi->ll_lock); + spin_lock_init(&sbi->ll_lco.lco_lock); INIT_LIST_HEAD(&sbi->ll_pglist); sbi->ll_pglist_gen = 0; if (num_physpages >> (20 - PAGE_SHIFT) < 512) @@ -103,6 +104,9 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb) spin_lock(&ll_sb_lock); list_add_tail(&sbi->ll_list, &ll_super_blocks); spin_unlock(&ll_sb_lock); + + INIT_LIST_HEAD(&sbi->ll_deathrow); + spin_lock_init(&sbi->ll_deathrow_lock); RETURN(sbi); } @@ -121,6 +125,10 @@ void lustre_free_sbi(struct super_block *sb) EXIT; } +static struct dentry_operations ll_d_root_ops = { + .d_compare = ll_dcompare, +}; + int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) { struct inode *root = 0; @@ -153,12 +161,15 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) CERROR("could not register mount in /proc/lustre"); } + /* indicate that inodebits locking is supported by this client */ + data->ocd_connect_flags |= OBD_CONNECT_IBITS; + data->ocd_ibits_known = MDS_INODELOCK_FULL; + if (sb->s_flags & MS_RDONLY) data->ocd_connect_flags |= OBD_CONNECT_RDONLY; if (sbi->ll_flags & LL_SBI_USER_XATTR) - data->ocd_connect_flags |= OBD_CONNECT_USER_XATTR; - if (sbi->ll_flags & LL_SBI_ACL) - data->ocd_connect_flags |= OBD_CONNECT_ACL; + data->ocd_connect_flags |= OBD_CONNECT_XATTR; + data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN; if (sbi->ll_flags & LL_SBI_FLOCK) { sbi->ll_fop = &ll_file_operations_flock; @@ -166,6 +177,9 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) sbi->ll_fop = &ll_file_operations; } + data->ocd_connect_flags |= OBD_CONNECT_VERSION; + data->ocd_version = LUSTRE_VERSION_CODE; + err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { CERROR("An MDS (mdc %s) is performing recovery, of which this" @@ -193,17 +207,22 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) sbi->ll_namelen = osfs.os_namelen; if ((sbi->ll_flags & LL_SBI_USER_XATTR) && - !(data->ocd_connect_flags & OBD_CONNECT_USER_XATTR)) { + !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) { LCONSOLE_INFO("Disabling user_xattr feature because " "it is not supported on the server\n"); sbi->ll_flags &= ~LL_SBI_USER_XATTR; } - if (((sbi->ll_flags & LL_SBI_ACL) == 0) != - ((data->ocd_connect_flags & OBD_CONNECT_ACL) == 0)) { - CERROR("Server return unexpected ACL flags\n"); - GOTO(out_mdc, err = -EBADE); - } + if (data->ocd_connect_flags & OBD_CONNECT_ACL) { +#ifdef MS_POSIXACL + sb->s_flags |= MS_POSIXACL; +#endif + sbi->ll_flags |= LL_SBI_ACL; + } else + sbi->ll_flags &= ~LL_SBI_ACL; + + if (data->ocd_connect_flags & OBD_CONNECT_JOIN) + sbi->ll_flags |= LL_SBI_JOIN; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) /* We set sb->s_dev equal on all lustre clients in order to support @@ -221,6 +240,16 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) GOTO(out_mdc, err); } + data->ocd_connect_flags = + OBD_CONNECT_GRANT|OBD_CONNECT_VERSION|OBD_CONNECT_REQPORTAL; + + CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " + "ocd_grant: %d\n", data->ocd_connect_flags, + data->ocd_version, data->ocd_grant); + + obd->obd_upcall.onu_owner = &sbi->ll_lco; + obd->obd_upcall.onu_upcall = ll_ocd_update; + err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { CERROR("An OST (osc %s) is performing recovery, of which this" @@ -232,7 +261,9 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) GOTO(out_mdc, err); } sbi->ll_osc_exp = class_conn2export(&osc_conn); - sbi->ll_connect_flags = data->ocd_connect_flags; + spin_lock(&sbi->ll_lco.lco_lock); + sbi->ll_lco.lco_flags = data->ocd_connect_flags; + spin_unlock(&sbi->ll_lco.lco_lock); mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp); @@ -261,7 +292,8 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) /* make root inode * XXX: move this to after cbd setup? */ err = mdc_getattr(sbi->ll_mdc_exp, &rootfid, - OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLACL, + OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | + (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0), 0, &request); if (err) { CERROR("mdc_getattr failed for root: rc = %d\n", err); @@ -304,6 +336,7 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) sb->s_root = d_alloc_root(root); if (data != NULL) OBD_FREE(data, sizeof(*data)); + sb->s_root->d_op = &ll_d_root_ops; RETURN(err); out_root: @@ -320,6 +353,20 @@ out: RETURN(err); } +int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL); + size = sizeof(int); + rc = obd_get_info(sbi->ll_mdc_exp, strlen("max_easize"), "max_easize", + &size, lmmsize); + if (rc) + CERROR("Get max mdsize error rc %d \n", rc); + + RETURN(rc); +} + void ll_dump_inode(struct inode *inode) { struct list_head *tmp; @@ -363,14 +410,76 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) } } +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +void lustre_throw_orphan_dentries(struct super_block *sb) +{ + struct hlist_node *tmp, *next; + struct ll_sb_info *sbi = ll_s2sbi(sb); + + /* Do this to get rid of orphaned dentries. That is not really trw. */ + hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { + struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); + CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping " + "before and after shrink_dcache_parent\n", + dentry->d_name.len, dentry->d_name.name, dentry, next); + lustre_dump_dentry(dentry, 1); + shrink_dcache_parent(dentry); + lustre_dump_dentry(dentry, 1); + } +} +#else +#define lustre_throw_orphan_dentries(sb) +#endif + +static void prune_deathrow(struct ll_sb_info *sbi, int try) +{ + LIST_HEAD(throw_away); + int locked = 0; + ENTRY; + + if (try) { + locked = spin_trylock(&sbi->ll_deathrow_lock); + } else { + spin_lock(&sbi->ll_deathrow_lock); + locked = 1; + } + + if (!locked) { + EXIT; + return; + } + + list_splice_init(&sbi->ll_deathrow, &throw_away); + spin_unlock(&sbi->ll_deathrow_lock); + + while (!list_empty(&throw_away)) { + struct ll_inode_info *lli; + struct inode *inode; + + lli = list_entry(throw_away.next, struct ll_inode_info, + lli_dead_list); + list_del_init(&lli->lli_dead_list); + + inode = ll_info2i(lli); + d_prune_aliases(inode); + + CDEBUG(D_INODE, "prune duplicate inode %p inum %lu count %u\n", + inode, inode->i_ino, atomic_read(&inode->i_count)); + iput(inode); + } + EXIT; +} + void lustre_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); - struct hlist_node *tmp, *next; ENTRY; ll_close_thread_shutdown(sbi->ll_lcq); + /* destroy inodes in deathrow */ + prune_deathrow(sbi, 0); + list_del(&sbi->ll_conn_chain); obd_disconnect(sbi->ll_osc_exp); @@ -382,16 +491,7 @@ void lustre_common_put_super(struct super_block *sb) obd_disconnect(sbi->ll_mdc_exp); - // We do this to get rid of orphaned dentries. That is not really trw. - hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { - struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); - CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping " - "before and after shrink_dcache_parent\n", - dentry->d_name.len, dentry->d_name.name, dentry, next); - lustre_dump_dentry(dentry, 1); - shrink_dcache_parent(dentry); - lustre_dump_dentry(dentry, 1); - } + lustre_throw_orphan_dentries(sb); EXIT; } @@ -453,9 +553,9 @@ void ll_options(char *options, char **ost, char **mdc, int *flags) #endif { CDEBUG(D_SUPER, "this_char %s\n", this_char); - if (!*ost && (*ost = ll_read_opt("osc", this_char))) + if (!*ost && (*ost = ll_read_opt(LUSTRE_OSC_NAME, this_char))) continue; - if (!*mdc && (*mdc = ll_read_opt("mdc", this_char))) + if (!*mdc && (*mdc = ll_read_opt(LUSTRE_MDC_NAME, this_char))) continue; tmp = ll_set_opt("nolock", this_char, LL_SBI_NOLCK); if (tmp) { @@ -484,12 +584,13 @@ void ll_options(char *options, char **ost, char **mdc, int *flags) } tmp = ll_set_opt("acl", this_char, LL_SBI_ACL); if (tmp) { - *flags |= tmp; + /* Ignore deprecated mount option. The client will + * always try to mount with ACL support, whether this + * is used depends on whether server supports it. */ continue; } tmp = ll_set_opt("noacl", this_char, LL_SBI_ACL); if (tmp) { - *flags &= ~tmp; continue; } } @@ -505,6 +606,7 @@ void ll_lli_init(struct ll_inode_info *lli) spin_lock_init(&lli->lli_lock); INIT_LIST_HEAD(&lli->lli_pending_write_llaps); lli->lli_inode_magic = LLI_INODE_MAGIC; + INIT_LIST_HEAD(&lli->lli_dead_list); } int ll_fill_super(struct super_block *sb, void *data, int silent) @@ -546,12 +648,31 @@ out: RETURN(err); } /* ll_read_super */ -int lustre_process_log(struct lustre_mount_data *lmd, char * profile, - struct config_llog_instance *cfg, int allow_recov) +static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, + char *s1, char *s2) { - struct lustre_cfg *lcfg = NULL; struct lustre_cfg_bufs bufs; - char * peer = "MDS_PEER_UUID"; + struct lustre_cfg * lcfg = NULL; + int err; + + CDEBUG(D_TRACE, "lcfg %s %#x %s %s\n", cfgname, cmd, s1, s2); + + lustre_cfg_bufs_reset(&bufs, cfgname); + if (s1) + lustre_cfg_bufs_set_string(&bufs, 1, s1); + if (s2) + lustre_cfg_bufs_set_string(&bufs, 2, s2); + + lcfg = lustre_cfg_new(cmd, &bufs); + lcfg->lcfg_nid = nid; + err = class_process_config(lcfg); + lustre_cfg_free(lcfg); + return(err); +} + +static int lustre_process_log(struct lustre_mount_data *lmd, char * profile, + struct config_llog_instance *cfg) +{ struct obd_device *obd; struct lustre_handle mdc_conn = {0, }; struct obd_export *exp; @@ -559,8 +680,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, class_uuid_t uuid; struct obd_uuid mdc_uuid; struct llog_ctxt *ctxt; - struct obd_connect_data *ocd = NULL; - int rc = 0; + struct obd_connect_data ocd = { 0 }; + lnet_nid_t nid; + int i, rc = 0, recov_bk = 1; int err; ENTRY; @@ -570,35 +692,18 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, lustre_generate_random_uuid(uuid); class_uuid_unparse(uuid, &mdc_uuid); CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid); - - lustre_cfg_bufs_reset(&bufs, name); - lustre_cfg_bufs_set_string(&bufs, 1, peer); - - lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); - lcfg->lcfg_nid = lmd->lmd_nid; - LASSERT(lcfg->lcfg_nid != LNET_NID_ANY); - rc = class_process_config(lcfg); - lustre_cfg_free(lcfg); + + nid = lmd->lmd_nid[0]; + LASSERT(nid != LNET_NID_ANY); + rc = do_lcfg(name, nid, LCFG_ADD_UUID, libcfs_nid2str(nid), 0); if (rc < 0) GOTO(out, rc); - lustre_cfg_bufs_reset(&bufs, name); - lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME); - lustre_cfg_bufs_set_string(&bufs, 2, mdc_uuid.uuid); - - lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs); - rc = class_process_config(lcfg); - lustre_cfg_free(lcfg); + rc = do_lcfg(name, 0, LCFG_ATTACH, LUSTRE_MDC_NAME, mdc_uuid.uuid); if (rc < 0) GOTO(out_del_uuid, rc); - lustre_cfg_bufs_reset(&bufs, name); - lustre_cfg_bufs_set_string(&bufs, 1, lmd->lmd_mds); - lustre_cfg_bufs_set_string(&bufs, 2, peer); - - lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); - rc = class_process_config(lcfg); - lustre_cfg_free(lcfg); + rc = do_lcfg(name, 0, LCFG_SETUP, lmd->lmd_mds, libcfs_nid2str(nid)); if (rc < 0) { LCONSOLE_ERROR("I couldn't establish a connection with the MDS." " Check that the MDS host NID is correct and the" @@ -610,21 +715,31 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, if (obd == NULL) GOTO(out_cleanup, rc = -EINVAL); - /* Disable initial recovery on this import */ + /* Add the redundant MDS nids */ + for (i = 1; i < lmd->lmd_nid_count; i++) { + nid = lmd->lmd_nid[i]; + rc = do_lcfg(name, nid, LCFG_ADD_UUID, libcfs_nid2str(nid), 0); + if (rc) { + CERROR("Add uuid for %s failed %d\n", + libcfs_nid2str(nid), rc); + continue; + } + rc = do_lcfg(name, 0, LCFG_ADD_CONN, libcfs_nid2str(nid), 0); + if (rc) + CERROR("Add conn for %s failed %d\n", + libcfs_nid2str(nid), rc); + } + + /* Try all connections, but only once. */ rc = obd_set_info(obd->obd_self_export, - strlen("initial_recov"), "initial_recov", - sizeof(allow_recov), &allow_recov); + strlen("init_recov_bk"), "init_recov_bk", + sizeof(recov_bk), &recov_bk); if (rc) GOTO(out_cleanup, rc); - if (lmd->lmd_flags & LMD_FLG_ACL) { - OBD_ALLOC(ocd, sizeof(*ocd)); - if (ocd == NULL) - GOTO(out_cleanup, rc = -ENOMEM); - ocd->ocd_connect_flags |= OBD_CONNECT_ACL; - } + ocd.ocd_connect_flags = OBD_CONNECT_ACL; - rc = obd_connect(&mdc_conn, obd, &mdc_uuid, ocd); + rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd); if (rc) { CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, rc); GOTO(out_cleanup, rc); @@ -663,33 +778,27 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, CERROR("obd_disconnect failed: rc = %d\n", err); out_cleanup: - lustre_cfg_bufs_reset(&bufs, name); - lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs); - err = class_process_config(lcfg); - lustre_cfg_free(lcfg); + err = do_lcfg(name, 0, LCFG_CLEANUP, 0, 0); if (err) CERROR("mdc_cleanup failed: rc = %d\n", err); out_detach: - lustre_cfg_bufs_reset(&bufs, name); - lcfg = lustre_cfg_new(LCFG_DETACH, &bufs); - err = class_process_config(lcfg); - lustre_cfg_free(lcfg); + err = do_lcfg(name, 0, LCFG_DETACH, 0, 0); if (err) CERROR("mdc_detach failed: rc = %d\n", err); out_del_uuid: - lustre_cfg_bufs_reset(&bufs, name); - lustre_cfg_bufs_set_string(&bufs, 1, peer); - lcfg = lustre_cfg_new(LCFG_DEL_UUID, &bufs); - err = class_process_config(lcfg); - lustre_cfg_free(lcfg); - if (err) - CERROR("del MDC UUID failed: rc = %d\n", err); - + /* class_add_uuid adds a nid even if the same uuid exists; we might + delete any copy here. So they all better match. */ + for (i = 0; i < lmd->lmd_nid_count; i++) { + nid = lmd->lmd_nid[i]; + err = do_lcfg(name, nid, LCFG_DEL_UUID, libcfs_nid2str(nid), 0); + if (err) + CERROR("del MDC UUID %s failed: rc = %d\n", + libcfs_nid2str(nid), err); + } + /* class_import_put will get rid of the additional connections */ out: - if (ocd) - OBD_FREE(ocd, sizeof(*ocd)); RETURN(rc); } @@ -700,7 +809,7 @@ static void lustre_manual_cleanup(struct ll_sb_info *sbi) while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { class_manual_cleanup(obd); - } + } if (sbi->ll_lmd != NULL) class_del_profile(sbi->ll_lmd->lmd_profile); @@ -741,8 +850,6 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) sbi->ll_flags |= LL_SBI_FLOCK; if (lmd->lmd_flags & LMD_FLG_USER_XATTR) sbi->ll_flags |= LL_SBI_USER_XATTR; - if (lmd->lmd_flags & LMD_FLG_ACL) - sbi->ll_flags |= LL_SBI_ACL; /* generate a string unique to this super, let's try the address of the super itself.*/ @@ -750,7 +857,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) cfg.cfg_instance = ll_instance; cfg.cfg_uuid = sbi->ll_sb_uuid; - err = lustre_process_log(lmd, lmd->lmd_profile, &cfg, 0); + err = lustre_process_log(lmd, lmd->lmd_profile, &cfg); if (err < 0) { CERROR("Unable to process log: %s\n", lmd->lmd_profile); GOTO(out_free, err); @@ -818,13 +925,13 @@ void lustre_put_super(struct super_block *sb) obd = class_exp2obd(sbi->ll_mdc_exp); if (obd) { int next = 0; - /* We need to set force before the lov_disconnect in + /* We need to set force before the lov_disconnect in lustre_common_put_super, since l_d cleans up osc's as well. */ force = obd->obd_no_recov; - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) + while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { obd->obd_force = force; - } + } } lustre_common_put_super(sb); @@ -926,14 +1033,20 @@ void ll_clear_inode(struct inode *inode) lli->lli_symlink_name = NULL; } +#ifdef CONFIG_FS_POSIX_ACL if (lli->lli_posix_acl) { LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; } +#endif lli->lli_inode_magic = LLI_INODE_DEAD; + spin_lock(&sbi->ll_deathrow_lock); + list_del_init(&lli->lli_dead_list); + spin_unlock(&sbi->ll_deathrow_lock); + EXIT; } @@ -1034,14 +1147,14 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) * above to avoid invoking vmtruncate, otherwise it is important * to call vmtruncate in inode_setattr to update inode->i_size * (bug 6196) */ - inode_setattr(inode, attr); + rc = inode_setattr(inode, attr); ll_update_inode(inode, &md); ptlrpc_req_finished(request); if (!lsm || !S_ISREG(inode->i_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - RETURN(0); + RETURN(rc); } } else { /* The OST doesn't check permissions, but the alternative is @@ -1063,7 +1176,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) } /* Won't invoke vmtruncate, as we already cleared ATTR_SIZE */ - inode_setattr(inode, attr); + rc = inode_setattr(inode, attr); } /* We really need to get our PW lock before we change inode->i_size. @@ -1255,6 +1368,23 @@ void ll_inode_size_unlock(struct inode *inode, int unlock_lsm) up(&lli->lli_size_sem); } +static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + dump_lsm(D_INODE, lsm); + dump_lsm(D_INODE, lli->lli_smd); + LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN, + "lsm must be joined lsm %p\n", lsm); + obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd); + CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n", + lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode); + lli->lli_smd = lsm; + lli->lli_maxbytes = lsm->lsm_maxbytes; + if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) + lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; +} + void ll_update_inode(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1264,7 +1394,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { if (lli->lli_smd == NULL) { - if (lsm->lsm_magic != LOV_MAGIC) { + if (lsm->lsm_magic != LOV_MAGIC && + lsm->lsm_magic != LOV_MAGIC_JOIN) { dump_lsm(D_ERROR, lsm); LBUG(); } @@ -1278,15 +1409,20 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; } else { - if (lov_stripe_md_cmp(lli->lli_smd, lsm)) { - CERROR("lsm mismatch for inode %ld\n", - inode->i_ino); - CERROR("lli_smd:\n"); - dump_lsm(D_ERROR, lli->lli_smd); - CERROR("lsm:\n"); - dump_lsm(D_ERROR, lsm); - LBUG(); - } + if (lli->lli_smd->lsm_magic == lsm->lsm_magic && + lli->lli_smd->lsm_stripe_count == + lsm->lsm_stripe_count) { + if (lov_stripe_md_cmp(lli->lli_smd, lsm)) { + CERROR("lsm mismatch for inode %ld\n", + inode->i_ino); + CERROR("lli_smd:\n"); + dump_lsm(D_ERROR, lli->lli_smd); + CERROR("lsm:\n"); + dump_lsm(D_ERROR, lsm); + LBUG(); + } + } else + ll_replace_lsm(inode, lsm); } /* bug 2844 - limit i_blksize for broken user-space apps */ LASSERTF(lsm->lsm_xfersize != 0, "%lu\n", lsm->lsm_xfersize); @@ -1298,6 +1434,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) inode->i_sb->s_blocksize); } +#ifdef CONFIG_FS_POSIX_ACL LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL)); if (body->valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); @@ -1306,10 +1443,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) lli->lli_posix_acl = md->posix_acl; spin_unlock(&lli->lli_lock); } +#endif if (body->valid & OBD_MD_FLID) inode->i_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) + if (body->valid & OBD_MD_FLATIME && + body->atime > LTIME_S(inode->i_atime)) LTIME_S(inode->i_atime) = body->atime; if (body->valid & OBD_MD_FLMTIME && body->mtime > LTIME_S(inode->i_mtime)) { @@ -1592,7 +1731,13 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, struct ptlrpc_request *req, int offset,struct super_block *sb) { struct lustre_md md; + struct ll_sb_info *sbi = NULL; int rc = 0; + ENTRY; + + LASSERT(*inode || sb); + sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); + prune_deathrow(sbi, 1); rc = mdc_req2lustre_md(req, offset, exp, &md); if (rc) @@ -1607,9 +1752,13 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, mdc_free_lustre_md(exp, &md); rc = -ENOMEM; CERROR("new_inode -fatal: rc %d\n", rc); + GOTO(out, rc); } } + rc = obd_checkmd(exp, ll_i2mdcexp(*inode), + ll_i2info(*inode)->lli_smd); +out: RETURN(rc); } @@ -1638,3 +1787,69 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, LBUG(); return NULL; } + +int ll_obd_statfs(struct inode *inode, void *arg) +{ + struct ll_sb_info *sbi = NULL; + struct obd_device *client_obd = NULL, *lov_obd = NULL; + struct lov_obd *lov = NULL; + struct obd_import *client_imp = NULL; + struct obd_statfs stat_buf = {0}; + char *buf = NULL; + struct obd_ioctl_data *data = NULL; + __u32 type, index; + int len, rc; + + if (!inode || !(sbi = ll_i2sbi(inode))) + GOTO(out_statfs, rc = -EINVAL); + + rc = obd_ioctl_getdata(&buf, &len, arg); + if (rc) + GOTO(out_statfs, rc); + + data = (void*)buf; + if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 || + !data->ioc_pbuf1 || !data->ioc_pbuf2) + GOTO(out_statfs, rc = -EINVAL); + + memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); + memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); + + if (type == LL_STATFS_MDC) { + if (index > 0) + GOTO(out_statfs, rc = -ENODEV); + client_obd = class_exp2obd(sbi->ll_mdc_exp); + client_imp = class_exp2cliimp(sbi->ll_mdc_exp); + } else if (type == LL_STATFS_LOV) { + lov_obd = class_exp2obd(sbi->ll_osc_exp); + lov = &lov_obd->u.lov; + + if (index >= lov->desc.ld_tgt_count) + GOTO(out_statfs, rc = -ENODEV); + + client_obd = class_exp2obd(lov->tgts[index].ltd_exp); + client_imp = class_exp2cliimp(lov->tgts[index].ltd_exp); + if (!lov->tgts[index].active) + GOTO(out_uuid, rc = -ENODATA); + } + + if (!client_obd || !client_imp) + GOTO(out_statfs, rc = -EINVAL); + + rc = obd_statfs(client_obd, &stat_buf, jiffies - 1); + if (rc) + GOTO(out_statfs, rc); + + if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1)) + GOTO(out_statfs, rc = -EFAULT); + +out_uuid: + if (copy_to_user(data->ioc_pbuf2, &client_imp->imp_target_uuid, + data->ioc_plen2)) + rc = -EFAULT; + +out_statfs: + if (buf) + obd_ioctl_freedata(buf, len); + return rc; +} diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index c08020d..ffc71de 100755 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -30,14 +30,16 @@ #ifdef __KERNEL__ #include +#include #else #include #endif -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "lov_internal.h" diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 5829fa9..2af645e 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -132,8 +132,8 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off); /* lov_qos.c */ void qos_shrink_lsm(struct lov_request_set *set); -int qos_prep_create(struct obd_export *exp, struct lov_request_set *set); -void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs); +int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, + int newea); int qos_remedy_create(struct lov_request_set *set, struct lov_request *req); /* lov_request.c */ diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 0ebda37..f0a409f 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -49,7 +49,44 @@ #include "lov_internal.h" -/* obd methods */ + +/* FIXME add lov_get/putrefs around every access to lov->tgts for on-line non- + quiescent ost removal */ +/* Keep a refcount of lov->tgt usage to prevent racing with deletion */ +static void lov_getref(struct obd_device *obd) +{ + struct lov_obd *lov = &obd->u.lov; + + /* nobody gets through here until lov_putref is done */ + down(&lov->lov_lock); + atomic_inc(&lov->refcount); + up(&lov->lov_lock); + return; +} + +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); + +static void lov_putref(struct obd_device *obd) +{ + struct lov_obd *lov = &obd->u.lov; + down(&lov->lov_lock); + /* ok to dec to 0 more than once -- ltd_exp's will be null */ + if (atomic_dec_and_test(&lov->refcount) && lov->death_row) { + struct lov_tgt_desc *tgt; + int i; + CDEBUG(D_CONFIG, "destroying %d lov targets\n", lov->death_row); + for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; + i++, tgt++) { + if (!tgt->reap) + continue; + /* Disconnect and delete from list */ + __lov_del_obd(obd, tgt); + lov->death_row--; + } + } + up(&lov->lov_lock); +} + #define MAX_STRING_SIZE 128 static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt, int activate, struct obd_connect_data *data) @@ -118,6 +155,7 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt, } tgt->active = 1; + tgt->reap = 0; lov->desc.ld_active_tgt_count++; #ifdef __KERNEL__ @@ -167,8 +205,8 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, /* We don't want to actually do the underlying connections more than * once, so keep track. */ - lov->refcount++; - if (lov->refcount > 1) { + lov->connects++; + if (lov->connects > 1) { class_export_put(exp); RETURN(0); } @@ -176,6 +214,8 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { if (obd_uuid_empty(&tgt->uuid)) continue; + if (connect_flags & OBD_CONNECT_INDEX) + data->ocd_index = i; rc = lov_connect_obd(obd, tgt, 0, data); if (rc) GOTO(out_disc, rc); @@ -259,43 +299,38 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) RETURN(0); } -static int -lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen); +static int lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, + int index, int gen); static int lov_disconnect(struct obd_export *exp) { struct obd_device *obd = class_exp2obd(exp); - struct obd_device *osc_obd; struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; - int rc, i; + int i, rc; ENTRY; - rc = class_disconnect(exp); - if (!lov->tgts) - RETURN(rc); + goto out; /* Only disconnect the underlying layers on the final disconnect. */ - lov->refcount--; - if (lov->refcount != 0) - RETURN(rc); + lov->connects--; + if (lov->connects != 0) + goto out; + /* Let's hold another reference so lov_del_obd doesn't spin through + putref every time */ + lov_getref(obd); for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { if (tgt->ltd_exp) { - osc_obd = class_exp2obd(tgt->ltd_exp); - /* Disconnect and delete from list */ + /* Disconnection is the last we know about an obd */ lov_del_obd(obd, &tgt->uuid, i, tgt->ltd_gen); - /* Cleanup the osc now - can't do it from - lov_cleanup because we just lost our only reference - to it. */ - /* Use lov's force/fail flags. */ - osc_obd->obd_force = obd->obd_force; - osc_obd->obd_fail = obd->obd_fail; - class_manual_cleanup(osc_obd); } } + lov_putref(obd); +out: + rc = class_disconnect(exp); RETURN(rc); } @@ -315,7 +350,6 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid, CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n", lov, uuid->uuid, activate); - spin_lock(&lov->lov_lock); for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { if (tgt->ltd_exp == NULL) continue; @@ -346,18 +380,17 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid, EXIT; out: - spin_unlock(&lov->lov_lock); return rc; } static int lov_notify(struct obd_device *obd, struct obd_device *watched, - int active) + enum obd_notify_event ev) { - int rc; struct obd_uuid *uuid; - + int rc; ENTRY; - if (strcmp(watched->obd_type->typ_name, "osc")) { + + if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); @@ -365,19 +398,24 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, } uuid = &watched->u.cli.cl_import->imp_target_uuid; - /* Set OSC as active before notifying the observer, so the - * observer can use the OSC normally. - */ - rc = lov_set_osc_active(&obd->u.lov, uuid, active); - if (rc) { - CERROR("%sactivation of %s failed: %d\n", - active ? "" : "de", uuid->uuid, rc); - RETURN(rc); + if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { + /* Set OSC as active before notifying the observer, so the + * observer can use the OSC normally. + */ + lov_getref(obd); + rc = lov_set_osc_active(&obd->u.lov, uuid, + ev == OBD_NOTIFY_ACTIVE); + lov_putref(obd); + if (rc) { + CERROR("%sactivation of %s failed: %d\n", + (ev == OBD_NOTIFY_ACTIVE) ? "" : "de", + uuid->uuid, rc); + RETURN(rc); + } } - if (obd->obd_observer) - /* Pass the notification up the chain. */ - rc = obd_notify(obd->obd_observer, watched, active); + /* Pass the notification up the chain. */ + rc = obd_notify_observer(obd, watched, ev); RETURN(rc); } @@ -387,8 +425,9 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; + obd_id params[2]; int rc, old_count; - __u32 bufsize; + __u32 bufsize, size = 2; ENTRY; CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d\n", @@ -415,15 +454,9 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) RETURN(-ENOMEM); } + memset(tgt, 0, bufsize); if (lov->tgts) { - int i; memcpy(tgt, lov->tgts, lov->bufsize); - LASSERT(index == lov->desc.ld_tgt_count); - for (i = 0; i < index; i++) { - INIT_LIST_HEAD(&tgt[i].qos_bavail_list); - list_splice(&lov->tgts[i].qos_bavail_list, - &tgt[i].qos_bavail_list); - } OBD_FREE(lov->tgts, lov->bufsize); } @@ -443,8 +476,6 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) tgt->uuid = *uuidp; /* XXX - add a sanity check on the generation number. */ tgt->ltd_gen = gen; - tgt->index = index; - INIT_LIST_HEAD(&tgt->qos_bavail_list); old_count = lov->desc.ld_tgt_count; if (index >= lov->desc.ld_tgt_count) @@ -453,7 +484,7 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", index, tgt->ltd_gen, lov->desc.ld_tgt_count); - if (lov->refcount == 0) + if (lov->connects == 0) /* lov_connect hasn't been called yet. So we'll do the lov_connect_obd on this obd when that fn first runs. */ RETURN(0); @@ -475,7 +506,18 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) obd_llog_finish(obd->obd_observer, old_count); llog_cat_initialize(obd->obd_observer, lov->desc.ld_tgt_count); - rc = lov_notify(obd, tgt->ltd_exp->exp_obd, 1); + params[0] = index; + rc = obd_get_info(tgt->ltd_exp, strlen("last_id"), "last_id", &size, + ¶ms[1]); + if (rc) + GOTO(out, rc); + + rc = obd_set_info(obd->obd_observer->obd_self_export, + strlen("next_id"),"next_id", 2, params); + if (rc) + GOTO(out, rc); + + rc = lov_notify(obd, tgt->ltd_exp->exp_obd, OBD_NOTIFY_ACTIVE); GOTO(out, rc); out: if (rc && tgt->ltd_exp != NULL) @@ -483,6 +525,7 @@ lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) return rc; } +/* Schedule a target for deletion */ static int lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) { @@ -492,9 +535,6 @@ lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) int rc = 0; ENTRY; - CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d\n", - uuidp->uuid, index, gen); - if (index >= count) { CERROR("LOV target index %d >= number of LOV OBDs %d.\n", index, count); @@ -514,6 +554,25 @@ lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) RETURN(-EINVAL); } + CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n", + tgt->uuid.uuid, index, tgt->ltd_gen, tgt->ltd_exp, tgt->active); + + lov_getref(obd); + tgt->reap = 1; + lov->death_row++; + /* we really delete it from lov_putref */ + lov_putref(obd); + + RETURN(rc); +} + +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) +{ + struct obd_device *osc_obd; + + LASSERT(tgt->reap); + osc_obd = class_exp2obd(tgt->ltd_exp); + if (tgt->ltd_exp) lov_disconnect_obd(obd, tgt); @@ -524,10 +583,15 @@ lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen) /* lt_gen = 0 will mean it will not match the gen of any valid loi */ memset(tgt, 0, sizeof(*tgt)); - CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n", - tgt->uuid.uuid, index, tgt->ltd_gen, tgt->ltd_exp, tgt->active); - - RETURN(rc); + /* Manual cleanup - no cleanup logs to clean up the osc's. We must + do it ourselves. And we can't do it from lov_cleanup, + because we just lost our only reference to it. */ + if (osc_obd) { + /* Use lov's force/fail flags. */ + osc_obd->obd_force = obd->obd_force; + osc_obd->obd_fail = obd->obd_fail; + class_manual_cleanup(osc_obd); + } } static int lov_setup(struct obd_device *obd, obd_count len, void *buf) @@ -536,8 +600,7 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf) struct lustre_cfg *lcfg = buf; struct lov_desc *desc; struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgts; - int count, i; + int count; ENTRY; if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { @@ -602,15 +665,12 @@ static int lov_setup(struct obd_device *obd, obd_count len, void *buf) CERROR("Out of memory\n"); RETURN(-EINVAL); } - for (i = 0, tgts = lov->tgts; i < max(count, 1); i++, tgts++) { - tgts->index = i; - INIT_LIST_HEAD(&tgts->qos_bavail_list); - } + memset(lov->tgts, 0, lov->bufsize); desc->ld_active_tgt_count = 0; lov->desc = *desc; - spin_lock_init(&lov->lov_lock); - INIT_LIST_HEAD(&lov->qos_bavail_list); + sema_init(&lov->lov_lock, 1); + atomic_set(&lov->refcount, 0); lprocfs_init_vars(lov, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); @@ -635,13 +695,23 @@ static int lov_precleanup(struct obd_device *obd, int stage) int rc = 0; ENTRY; - if (stage < 2) - RETURN(0); - - rc = obd_llog_finish(obd, 0); - if (rc != 0) - CERROR("failed to cleanup llogging subsystems\n"); - + switch (stage) { + case OBD_CLEANUP_EARLY: { + struct lov_obd *lov = &obd->u.lov; + int i; + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + if (!lov->tgts[i].active) + continue; + obd_precleanup(class_exp2obd(lov->tgts[i].ltd_exp), + OBD_CLEANUP_EARLY); + } + break; + } + case OBD_CLEANUP_SELF_EXP: + rc = obd_llog_finish(obd, 0); + if (rc != 0) + CERROR("failed to cleanup llogging subsystems\n"); + } RETURN(rc); } @@ -649,15 +719,18 @@ static int lov_cleanup(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; - ENTRY; lprocfs_obd_cleanup(obd); if (lov->tgts) { int i; struct lov_tgt_desc *tgt; for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { - if (!obd_uuid_empty(&tgt->uuid)) + /* We should never get here - these should have + been removed in the disconnect. */ + if (!obd_uuid_empty(&tgt->uuid)) { + CERROR("lov tgt %d not cleaned!\n", i); lov_del_obd(obd, &tgt->uuid, i, 0); + } } OBD_FREE(lov->tgts, lov->bufsize); } @@ -747,8 +820,6 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, continue; memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - tmp_oa->o_valid |= OBD_MD_FLID; - tmp_oa->o_id = oti->oti_objid[i]; LASSERT(lov->tgts[i].ltd_exp); /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ @@ -766,16 +837,52 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, RETURN(rc); } +static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti) +{ + struct lov_stripe_md *obj_mdp, *lsm; + struct lov_obd *lov = &exp->exp_obd->u.lov; + unsigned ost_idx; + int rc, i; + ENTRY; + + LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS && + src_oa->o_flags & OBD_FL_RECREATE_OBJS); + + OBD_ALLOC(obj_mdp, sizeof(*obj_mdp)); + if (obj_mdp == NULL) + RETURN(-ENOMEM); + + ost_idx = src_oa->o_nlink; + lsm = *ea; + if (lsm == NULL) + GOTO(out, rc = -EINVAL); + if (ost_idx >= lov->desc.ld_tgt_count) + GOTO(out, rc = -EINVAL); + + for (i = 0; i < lsm->lsm_stripe_count; i++) { + if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) { + if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id) + GOTO(out, rc = -EINVAL); + break; + } + } + if (i == lsm->lsm_stripe_count) + GOTO(out, rc = -EINVAL); + + rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti); +out: + OBD_FREE(obj_mdp, sizeof(*obj_mdp)); + RETURN(rc); +} + /* the LOV expects oa->o_id to be set to the LOV object id */ -static int -lov_create(struct obd_export *exp, struct obdo *src_oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) +static int lov_create(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct lov_request_set *set = NULL; struct lov_obd *lov; - struct obd_statfs osfs; - cfs_time_t maxage; - struct lov_request *req; + struct lov_request_set *set = NULL; + struct list_head *pos; int rc = 0; ENTRY; @@ -789,22 +896,25 @@ lov_create(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } - LASSERT(ergo(src_oa->o_valid & OBD_MD_FLFLAGS, - !!(src_oa->o_flags & OBD_FL_CREATE_CROW) != - !!(src_oa->o_flags & OBD_FL_RECREATE_OBJS))); - lov = &exp->exp_obd->u.lov; if (!lov->desc.ld_active_tgt_count) RETURN(-EIO); - - maxage = cfs_time_shift(-lov->desc.ld_qos_maxage); - obd_statfs(exp->exp_obd, &osfs, maxage); + + /* Recreate a specific object id at the given OST index */ + if ((src_oa->o_valid & OBD_MD_FLFLAGS) && + (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) { + rc = lov_recreate(exp, src_oa, ea, oti); + RETURN(rc); + } rc = lov_prep_create_set(exp, ea, src_oa, oti, &set); if (rc) RETURN(rc); - list_for_each_entry(req, &set->set_list, rq_link) { + list_for_each (pos, &set->set_list) { + struct lov_request *req = + list_entry(pos, struct lov_request, rq_link); + /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ rc = obd_create(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, &req->rq_md, oti); @@ -814,15 +924,17 @@ lov_create(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } -#define ASSERT_LSM_MAGIC(lsmp) \ -do { \ - LASSERT((lsmp) != NULL); \ - LASSERTF((lsmp)->lsm_magic == LOV_MAGIC, "%p->lsm_magic=%x\n", \ - (lsmp), (lsmp)->lsm_magic); \ +#define ASSERT_LSM_MAGIC(lsmp) \ +do { \ + LASSERT((lsmp) != NULL); \ + LASSERTF(((lsmp)->lsm_magic == LOV_MAGIC || \ + (lsmp)->lsm_magic == LOV_MAGIC_JOIN), "%p->lsm_magic=%x\n", \ + (lsmp), (lsmp)->lsm_magic); \ } while (0) static int lov_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti) + struct lov_stripe_md *lsm, struct obd_trans_info *oti, + struct obd_export *md_exp) { struct lov_request_set *set; struct lov_request *req; @@ -848,7 +960,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, /* XXX update the cookie position */ oti->oti_logcookies = set->set_cookies + req->rq_stripe; rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, - NULL, oti); + NULL, oti, NULL); err = lov_update_common_set(set, req, rc); if (rc) { CERROR("error: destroying objid "LPX64" subobj " @@ -860,6 +972,10 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, } } lov_fini_destroy_set(set); + if (rc == 0) { + LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); + rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp); + } RETURN(rc); } @@ -1001,7 +1117,7 @@ static int lov_setattr(struct obd_export *exp, struct obdo *src_oa, OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLINLINE | OBD_MD_FLFID | OBD_MD_FLGENER))); lov = &exp->exp_obd->u.lov; - rc = lov_prep_setattr_set(exp, src_oa, lsm, NULL, &set); + rc = lov_prep_setattr_set(exp, src_oa, lsm, oti, &set); if (rc) RETURN(rc); @@ -1046,7 +1162,7 @@ static int lov_setattr_async(struct obd_export *exp, struct obdo *src_oa, LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID| OBD_MD_FLCOOKIE | - OBD_MD_FLFID | OBD_MD_FLGENER))); + OBD_MD_FLFID | OBD_MD_FLGENER))); lov = &exp->exp_obd->u.lov; loi = lsm->lsm_oinfo; @@ -1393,7 +1509,6 @@ static int lov_queue_async_io(struct obd_export *exp, struct lov_async_page *lap; int rc; - ENTRY; LASSERT(loi == NULL); ASSERT_LSM_MAGIC(lsm); @@ -1417,7 +1532,6 @@ static int lov_set_async_flags(struct obd_export *exp, struct lov_async_page *lap; int rc; - ENTRY; LASSERT(loi == NULL); ASSERT_LSM_MAGIC(lsm); @@ -1442,7 +1556,6 @@ static int lov_queue_group_io(struct obd_export *exp, struct lov_async_page *lap; int rc; - ENTRY; LASSERT(loi == NULL); ASSERT_LSM_MAGIC(lsm); @@ -1468,7 +1581,6 @@ static int lov_trigger_group_io(struct obd_export *exp, struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i, err; - ENTRY; LASSERT(loi == NULL); ASSERT_LSM_MAGIC(lsm); @@ -1496,7 +1608,6 @@ static int lov_teardown_async_page(struct obd_export *exp, struct lov_async_page *lap; int rc; - ENTRY; LASSERT(loi == NULL); ASSERT_LSM_MAGIC(lsm); @@ -1771,7 +1882,7 @@ static int lov_join_lru(struct obd_export *exp, } while(0) static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, - cfs_time_t max_age) + unsigned long max_age) { struct lov_obd *lov = &obd->u.lov; struct obd_statfs lov_sfs; @@ -1796,7 +1907,6 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, rc = err; continue; } - qos_update(lov, i, &lov_sfs); if (!set) { memcpy(osfs, &lov_sfs, sizeof(lov_sfs)); @@ -1804,7 +1914,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, } else { #ifdef MIN_DF /* Sandia requested that df (and so, statfs) only - returned minimal available space on + returned minimal available space on a single OST, so people would be able to write this much data guaranteed. */ if (osfs->os_bavail > lov_sfs.os_bavail) { @@ -1956,12 +2066,14 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; - int i; + int i, rc; ENTRY; if (!vallen || !val) RETURN(-EFAULT); + lov_getref(obddev); + if (keylen > strlen("lock_to_stripe") && strcmp(key, "lock_to_stripe") == 0) { struct { @@ -1969,11 +2081,12 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, struct ldlm_lock *lock; struct lov_stripe_md *lsm; } *data = key; + struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name; struct lov_oinfo *loi; __u32 *stripe = val; if (*vallen < sizeof(*stripe)) - RETURN(-EFAULT); + GOTO(out, rc = -EFAULT); *vallen = sizeof(*stripe); /* XXX This is another one of those bits that will need to @@ -1985,49 +2098,40 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, for (i = 0, loi = data->lsm->lsm_oinfo; i < data->lsm->lsm_stripe_count; i++, loi++) { - if (lov->tgts[loi->loi_ost_idx].ltd_exp == - data->lock->l_conn_export) { + if (lov->tgts[loi->loi_ost_idx].ltd_exp == + data->lock->l_conn_export && + loi->loi_id == res_id->name[0] && + loi->loi_gr == res_id->name[2]) { *stripe = i; - RETURN(0); + GOTO(out, rc = 0); } } - LDLM_ERROR(data->lock, "lock on inode without such object\n"); + LDLM_ERROR(data->lock, "lock on inode without such object"); dump_lsm(D_ERROR, data->lsm); - RETURN(-ENXIO); - } else if (keylen >= strlen("size_to_stripe") && - strcmp(key, "size_to_stripe") == 0) { - struct { - int stripe_number; - __u64 size; - struct lov_stripe_md *lsm; - } *data = val; - - if (*vallen < sizeof(*data)) - RETURN(-EFAULT); - - data->size = lov_size_to_stripe(data->lsm, data->size, - data->stripe_number); - RETURN(0); + GOTO(out, rc = -ENXIO); } else if (keylen >= strlen("last_id") && strcmp(key, "last_id") == 0) { obd_id *ids = val; - int rc, size = sizeof(obd_id); + int size = sizeof(obd_id); for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->tgts[i].active) continue; rc = obd_get_info(lov->tgts[i].ltd_exp, keylen, key, &size, &(ids[i])); if (rc != 0) - RETURN(rc); + GOTO(out, rc); } - RETURN(0); + GOTO(out, rc = 0); } else if (keylen >= strlen("lovdesc") && strcmp(key, "lovdesc") == 0) { struct lov_desc *desc_ret = val; *desc_ret = lov->desc; - RETURN(0); + GOTO(out, rc = 0); } - RETURN(-EINVAL); + rc = -EINVAL; +out: + lov_putref(obddev); + RETURN(rc); } static int lov_set_info(struct obd_export *exp, obd_count keylen, @@ -2038,7 +2142,15 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, int i, rc = 0, err; ENTRY; - if (KEY_IS("checksum")) { + if (KEY_IS("next_id")) { + if (vallen != lov->desc.ld_tgt_count) + RETURN(-EINVAL); + vallen = sizeof(obd_id); + } + + lov_getref(obddev); + + if (KEY_IS("next_id") || KEY_IS("checksum")) { for (i = 0; i < lov->desc.ld_tgt_count; i++) { /* OST was disconnected */ if (!lov->tgts[i].ltd_exp) @@ -2050,7 +2162,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (!rc) rc = err; } - RETURN(rc); + GOTO(out, rc); } if (KEY_IS("evict_by_nid")) { @@ -2064,14 +2176,14 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (!rc) rc = err; } - RETURN(rc); + GOTO(out, rc); } if (KEY_IS("mds_conn") || KEY_IS("unlinked")) { if (vallen != 0) - RETURN(-EINVAL); + GOTO(out, rc = -EINVAL); } else { - RETURN(-EINVAL); + GOTO(out, rc = -EINVAL); } for (i = 0; i < lov->desc.ld_tgt_count; i++) { @@ -2090,6 +2202,23 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (!rc) rc = err; } +out: + lov_putref(obddev); + RETURN(rc); +} + +static int lov_checkmd(struct obd_export *exp, struct obd_export *md_exp, + struct lov_stripe_md *lsm) +{ + int rc; + ENTRY; + + if (!lsm) + RETURN(0); + LASSERT(md_exp); + LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); + rc = lsm_op_find(lsm->lsm_magic)->lsm_revalidate(lsm, md_exp->exp_obd); + RETURN(rc); } @@ -2226,6 +2355,7 @@ struct obd_ops lov_obd_ops = { .o_statfs = lov_statfs, .o_packmd = lov_packmd, .o_unpackmd = lov_unpackmd, + .o_checkmd = lov_checkmd, .o_create = lov_create, .o_destroy = lov_destroy, .o_getattr = lov_getattr, @@ -2240,6 +2370,7 @@ struct obd_ops lov_obd_ops = { .o_queue_group_io = lov_queue_group_io, .o_trigger_group_io = lov_trigger_group_io, .o_teardown_async_page = lov_teardown_async_page, + .o_merge_lvb = lov_merge_lvb, .o_adjust_kms = lov_adjust_kms, .o_punch = lov_punch, .o_sync = lov_sync, @@ -2257,7 +2388,7 @@ struct obd_ops lov_obd_ops = { .o_notify = lov_notify, }; -static quota_interface_t *quota_interface = NULL; +static quota_interface_t *quota_interface; extern quota_interface_t lov_quota_interface; int __init lov_init(void) @@ -2270,7 +2401,7 @@ int __init lov_init(void) quota_interface = PORTAL_SYMBOL_GET(lov_quota_interface); init_obd_quota_ops(quota_interface, &lov_obd_ops); - + rc = class_register_type(&lov_obd_ops, lvars.module_vars, OBD_LOV_DEVICENAME); if (rc && quota_interface) @@ -2292,5 +2423,6 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver"); MODULE_LICENSE("GPL"); -cfs_module(lov, "1.0.0", lov_init, lov_exit); +module_init(lov_init); +module_exit(lov_exit); #endif diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index ab73a1b..a3b9b42 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -84,21 +84,23 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) int stripe, i, rc = -EIO; ENTRY; - ost_idx = (req->rq_idx + 1) % ost_count; + ost_idx = (req->rq_idx + lsm->lsm_stripe_count) % ost_count; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { if (lov->tgts[ost_idx].active == 0) { CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); continue; } /* check if objects has been created on this ost */ - for (stripe = req->rq_stripe; stripe >= 0; stripe--) { + for (stripe = 0; stripe < lsm->lsm_stripe_count; stripe++) { + if (stripe == req->rq_stripe) + continue; if (ost_idx == lsm->lsm_oinfo[stripe].loi_ost_idx) break; } - if (stripe < 0) { + if (stripe >= lsm->lsm_stripe_count) { req->rq_idx = ost_idx; - rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, + rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, &req->rq_md, set->set_oti); if (!rc) break; @@ -109,343 +111,73 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) #define LOV_CREATE_RESEED_MULT 4 #define LOV_CREATE_RESEED_MIN 1000 -/* alloc objects on osts with round-robin algorithm */ -static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt) +/* FIXME use real qos data to prepare the lov create request */ +int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea) { - static int ost_start_count, ost_start_idx; + static int ost_start_idx, ost_start_count; unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; unsigned ost_active_count = lov->desc.ld_active_tgt_count; - int i, *idx_pos = idx_arr; - ENTRY; - - if (--ost_start_count <= 0) { - ost_start_idx = ll_insecure_random_int(); - ost_start_count = - (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + - LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); - } else if (*stripe_cnt >= lov->desc.ld_active_tgt_count) { - /* If we allocate from all of the stripes, make the - * next file start on the next OST. */ - ++ost_start_idx; - } - ost_idx = ost_start_idx % ost_count; - - for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - ++ost_start_idx; - - if (lov->tgts[ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); - continue; - } - - *idx_pos = ost_idx; - idx_pos++; - /* got enough ost */ - if (idx_pos - idx_arr == *stripe_cnt) - RETURN(0); - } - *stripe_cnt = idx_pos - idx_arr; - RETURN(0); -} - -/* alloc objects on osts with specific stripe offset */ -static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm, - int *idx_arr) -{ - unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; - int i, *idx_pos = idx_arr; + struct lov_stripe_md *lsm = set->set_md; + struct obdo *src_oa = set->set_oa; + int i, rc = 0; ENTRY; - ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; - for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - if (lov->tgts[ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); - continue; - } - *idx_pos = ost_idx; - idx_pos++; - /* got enough ost */ - if (idx_pos - idx_arr == lsm->lsm_stripe_count) - RETURN(0); - } - /* If we were passed specific striping params, then a failure to - * meet those requirements is an error, since we can't reallocate - * that memory (it might be part of a larger array or something). - * - * We can only get here if lsm_stripe_count was originally > 1. - */ - CERROR("can't lstripe objid "LPX64": have %u want %u\n", - lsm->lsm_object_id, idx_pos - idx_arr, lsm->lsm_stripe_count); - RETURN(-EFBIG); -} - -/* free space OST must have to be used for object allocation. */ -#define QOS_MIN (lov->desc.ld_qos_threshold << 20) - -#define TGT_BAVAIL(tgt) (tgt->ltd_exp->exp_obd->obd_osfs.os_bavail * \ - tgt->ltd_exp->exp_obd->obd_osfs.os_bsize) -#define TGT_FFREE(tgt) (tgt->ltd_exp->exp_obd->obd_osfs.os_ffree) + LASSERT(src_oa->o_valid & OBD_MD_FLID); -/* alloc objects on osts with free space weighted algorithm */ -static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) -{ - struct lov_obd *lov = &exp->exp_obd->u.lov; - unsigned ost_count = lov->desc.ld_tgt_count; - __u64 cur_bavail, rand, *availspace, total_bavail = 0; - int *indexes, nfound, good_osts, i, warn = 0, rc = 0; - struct lov_tgt_desc *tgt; - int shift, require_stripes = *stripe_cnt; - static time_t last_warn = 0; - time_t now = cfs_time_current_sec(); - ENTRY; - - availspace = NULL; - indexes = NULL; - OBD_ALLOC(availspace, sizeof(__u64) * ost_count); - OBD_ALLOC(indexes, sizeof(int) * require_stripes); - if (!availspace || !indexes) - GOTO(out_free, rc = -EAGAIN); - - spin_lock(&lov->lov_lock); - - /* if free space is below some threshold, just go - * to do round-robin allocation */ - total_bavail = (exp->exp_obd->obd_osfs.os_bavail * \ - exp->exp_obd->obd_osfs.os_bsize); - if (ost_count < 2 || total_bavail <= QOS_MIN) { - spin_unlock(&lov->lov_lock); - GOTO(out_free, rc = -EAGAIN); + lsm->lsm_object_id = src_oa->o_id; + if (!lsm->lsm_stripe_size) + lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; + if (!lsm->lsm_pattern) { + lsm->lsm_pattern = lov->desc.ld_pattern ? + lov->desc.ld_pattern : LOV_PATTERN_RAID0; } - /* if each ost has almost same free space, go to - * do rr allocation for better creation performance */ - if (!list_empty(&lov->qos_bavail_list)) { - __u64 max, min, val; - tgt = list_entry(lov->qos_bavail_list.next, - struct lov_tgt_desc, qos_bavail_list); - max = TGT_BAVAIL(tgt); - tgt = list_entry(lov->qos_bavail_list.prev, - struct lov_tgt_desc, qos_bavail_list); - min = TGT_BAVAIL(tgt); - - val = (max >= min) ? (max - min) : (min - max); - min = (min * 13) >> 8; /* less than 5% of gap */ - - if (val < min) { - spin_unlock(&lov->lov_lock); - GOTO(out_free, rc = -EAGAIN); + if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) { + if (--ost_start_count <= 0) { + ost_start_idx = ll_rand(); + ost_start_count = + (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + + LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); + } else if (lsm->lsm_stripe_count >= ost_active_count) { + /* If we allocate from all of the stripes, make the + * next file start on the next OST. */ + ++ost_start_idx; } + ost_idx = ost_start_idx % ost_count; } else { - spin_unlock(&lov->lov_lock); - GOTO(out_free, rc = -EAGAIN); - } - - total_bavail = 0; - good_osts = 0; - /* warn zero available space/inode every 30 min */ - if (cfs_time_sub(now, last_warn) > 60 * 30) - warn = 1; - list_for_each_entry(tgt, &lov->qos_bavail_list, qos_bavail_list) { - if (!tgt->active) - continue; - if (!TGT_BAVAIL(tgt)) { - if (warn) { - CWARN("avail space on %s is zero\n", - tgt->uuid.uuid); - last_warn = now; - } - continue; - } - if (!TGT_FFREE(tgt)) { - if (warn) { - CWARN("free inode on %s is zero\n", - tgt->uuid.uuid); - last_warn = now; - } - continue; - } - if ((TGT_BAVAIL(tgt) <= QOS_MIN) && (good_osts >= *stripe_cnt)) - break; - availspace[good_osts] = TGT_BAVAIL(tgt); - indexes[good_osts] = tgt->index; - total_bavail += availspace[good_osts]; - good_osts++; - } - - spin_unlock(&lov->lov_lock); - - if (!total_bavail) - GOTO(out_free, rc = -ENOSPC); - - /* if we don't have enough good OSTs, we reduce the stripe count. */ - if (good_osts < *stripe_cnt) - *stripe_cnt = good_osts; - - if (!*stripe_cnt) - GOTO(out_free, rc = -EAGAIN); - - nfound = shift = 0; - while ((total_bavail >> shift) > 0) - shift++; - shift++; - /* search enough OSTs with free space weighted random allocation */ - while (nfound < *stripe_cnt) { - cur_bavail = 0; - - get_random_bytes(&rand, sizeof(rand)); - if (shift < 64) - rand &= ((1 << shift) - 1); - while (rand > total_bavail) - rand -= total_bavail; - - for (i = 0; i < good_osts; i++) { - cur_bavail += availspace[i]; - if (cur_bavail >= rand) { - total_bavail -= availspace[i]; - availspace[i] = 0; - idx_arr[nfound] = indexes[i]; - nfound++; - break; - } - } - /* should never satisfy below condition */ - if (cur_bavail == 0) - break; + ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; } - LASSERT(nfound == *stripe_cnt); - -out_free: - if (availspace) - OBD_FREE(availspace, sizeof(__u64) * ost_count); - if (indexes) - OBD_FREE(indexes, sizeof(int) * require_stripes); - if (rc != -EAGAIN) - RETURN(rc); - - rc = alloc_rr(lov, idx_arr, stripe_cnt); - RETURN(rc); -} - -/* return new alloced stripe count in success */ -static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, - int newea, int **idx_arr, int *arr_cnt) -{ - struct lov_obd *lov = &exp->exp_obd->u.lov; - int stripe_cnt = lsm->lsm_stripe_count; - int i, rc = 0; - int *tmp_arr = NULL; - ENTRY; - - *arr_cnt = stripe_cnt; - OBD_ALLOC(tmp_arr, *arr_cnt * sizeof(int)); - if (tmp_arr == NULL) - RETURN(-ENOMEM); - for (i = 0; i < *arr_cnt; i++) - tmp_arr[i] = -1; - - if (newea || - lsm->lsm_oinfo[0].loi_ost_idx >= lov->desc.ld_tgt_count) - rc = alloc_qos(exp, tmp_arr, &stripe_cnt); - else - rc = alloc_specific(lov, lsm, tmp_arr); - if (rc) - GOTO(out_arr, rc); + CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", + lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); - *idx_arr = tmp_arr; - RETURN(stripe_cnt); -out_arr: - OBD_FREE(tmp_arr, *arr_cnt * sizeof(int)); - *arr_cnt = 0; - RETURN(rc); -} - -static void free_idx_array(int *idx_arr, int arr_cnt) -{ - if (arr_cnt) - OBD_FREE(idx_arr, arr_cnt * sizeof(int)); -} - -int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) -{ - struct lov_obd *lov = &exp->exp_obd->u.lov; - struct lov_stripe_md *lsm; - struct obdo *src_oa = set->set_oa; - struct obd_trans_info *oti = set->set_oti; - int i, stripes, rc = 0, newea = 0; - int *idx_arr, idx_cnt = 0; - ENTRY; - - LASSERT(src_oa->o_valid & OBD_MD_FLID); - - if (set->set_md == NULL) { - int stripe_cnt = lov_get_stripecnt(lov, 0); - - /* If the MDS file was truncated up to some size, stripe over - * enough OSTs to allow the file to be created at that size. */ - if (src_oa->o_valid & OBD_MD_FLSIZE) { - struct lov_tgt_desc *tgt; - stripes = 1; - - spin_lock(&lov->lov_lock); - list_for_each_entry(tgt, &lov->qos_bavail_list, - qos_bavail_list) { - if (!tgt->active) - continue; - if (TGT_BAVAIL(tgt) * stripes > src_oa->o_size) - break; - stripes++; - } - spin_unlock(&lov->lov_lock); + for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { + struct lov_request *req; - if (stripes < stripe_cnt) - stripes = stripe_cnt; - } else { - stripes = stripe_cnt; + ++ost_start_idx; + if (lov->tgts[ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); + continue; } - rc = lov_alloc_memmd(&set->set_md, stripes, - lov->desc.ld_pattern ? - lov->desc.ld_pattern : LOV_PATTERN_RAID0); - if (rc < 0) - GOTO(out_err, rc); - rc = 0; - newea = 1; - } - lsm = set->set_md; - - lsm->lsm_object_id = src_oa->o_id; - if (!lsm->lsm_stripe_size) - lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; - if (!lsm->lsm_pattern) { - LASSERT(lov->desc.ld_pattern); - lsm->lsm_pattern = lov->desc.ld_pattern; - } - - stripes = alloc_idx_array(exp, lsm, newea, &idx_arr, &idx_cnt); - LASSERT(stripes <= lsm->lsm_stripe_count); - if (stripes <= 0) - GOTO(out_err, rc = stripes ? stripes : -EIO); - - for (i = 0; i < stripes; i++) { - struct lov_request *req; - int ost_idx = idx_arr[i]; - LASSERT(ost_idx >= 0); - OBD_ALLOC(req, sizeof(*req)); if (req == NULL) - GOTO(out_err, rc = -ENOMEM); - lov_set_add_req(req, set); + GOTO(out, rc = -ENOMEM); req->rq_buflen = sizeof(*req->rq_md); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) - GOTO(out_err, rc = -ENOMEM); - + if (req->rq_md == NULL) { + OBD_FREE_PTR(req); + GOTO(out, rc = -ENOMEM); + } + req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) - GOTO(out_err, rc = -ENOMEM); - + if (req->rq_oa == NULL) { + OBD_FREE_PTR(req->rq_md); + OBD_FREE_PTR(req); + GOTO(out, rc = -ENOMEM); + } + req->rq_idx = ost_idx; req->rq_stripe = i; /* create data objects with "parent" OA */ @@ -456,74 +188,41 @@ int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) * stripe which holds the existing file size. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { - req->rq_oa->o_size = - lov_size_to_stripe(lsm, src_oa->o_size, i); + if (lov_stripe_offset(lsm, src_oa->o_size, i, + &req->rq_oa->o_size) < 0 && + req->rq_oa->o_size) + req->rq_oa->o_size--; CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", i, req->rq_oa->o_size, src_oa->o_size); } + lov_set_add_req(req, set); + + /* If we have allocated enough objects, we are OK */ + if (set->set_count == lsm->lsm_stripe_count) + GOTO(out, rc = 0); } - LASSERT(set->set_count == stripes); - if (stripes < lsm->lsm_stripe_count) - qos_shrink_lsm(set); + if (set->set_count == 0) + GOTO(out, rc = -EIO); - if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { - oti_alloc_cookies(oti, set->set_count); - if (!oti->oti_logcookies) - GOTO(out_err, rc = -ENOMEM); - set->set_cookies = oti->oti_logcookies; + /* If we were passed specific striping params, then a failure to + * meet those requirements is an error, since we can't reallocate + * that memory (it might be part of a larger array or something). + * + * We can only get here if lsm_stripe_count was originally > 1. + */ + if (!newea) { + CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", + lsm->lsm_object_id, set->set_count, + lsm->lsm_stripe_count, rc); + rc = rc ? rc : -EFBIG; + } else { + qos_shrink_lsm(set); + rc = 0; } -out_err: - if (newea && rc) - obd_free_memmd(exp, &set->set_md); - free_idx_array(idx_arr, idx_cnt); - EXIT; - return rc; -} - -/* An caveat here is don't use list_move() on same list */ -#define list_adjust(tgt, lov, list_name, value) \ -{ \ - struct list_head *element; \ - struct lov_tgt_desc *tmp; \ - if (list_empty(&(tgt)->list_name)) \ - list_add(&(tgt)->list_name, &(lov)->list_name); \ - element = (tgt)->list_name.next; \ - while((element != &(lov)->list_name) && \ - (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \ - (value(tgt) < value(tmp))) \ - element = element->next; \ - if (element != (tgt)->list_name.next) { \ - list_del_init(&(tgt)->list_name); \ - list_add(&(tgt)->list_name, element->prev); \ - } \ - element = (tgt)->list_name.prev; \ - while ((element != &(lov)->list_name) && \ - (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \ - (value(tgt) > value(tmp))) \ - element = element->prev; \ - if (element != (tgt)->list_name.prev) { \ - list_del_init(&(tgt)->list_name); \ - list_add_tail(&(tgt)->list_name, element->prev); \ - } \ -} +out: -void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs) -{ - struct lov_tgt_desc *tgt = &lov->tgts[idx]; - __u64 bavail; - ENTRY; - - bavail = osfs->os_bavail * osfs->os_bsize; - if (!bavail) - CWARN("ost %d has zero avail space!\n", idx); - - CDEBUG(D_OTHER, "QOS: bfree now "LPU64"\n", bavail); - - spin_lock(&lov->lov_lock); - list_adjust(tgt, lov, qos_bavail_list, TGT_BAVAIL); - spin_unlock(&lov->lov_lock); + RETURN(rc); } - diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 975bb9c..4403eda 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -129,26 +129,24 @@ int lov_update_enqueue_set(struct lov_request_set *set, * can be addressed then. */ if (rc == ELDLM_OK) { struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp); - __u64 tmp = req->rq_md->lsm_oinfo->loi_rss; + __u64 tmp = req->rq_md->lsm_oinfo->loi_lvb.lvb_size; LASSERT(lock != NULL); lov_stripe_lock(set->set_md); - loi->loi_rss = tmp; - loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime; - loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks; + loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb; /* Extend KMS up to the end of this lock and no further * A lock on [x,y] means a KMS of up to y + 1 bytes! */ if (tmp > lock->l_policy_data.l_extent.end) tmp = lock->l_policy_data.l_extent.end + 1; if (tmp >= loi->loi_kms) { - LDLM_DEBUG(lock, "lock acquired, setting rss=" - LPU64", kms="LPU64, loi->loi_rss, tmp); + LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64 + ", kms="LPU64, loi->loi_lvb.lvb_size, tmp); loi->loi_kms = tmp; loi->loi_kms_valid = 1; } else { LDLM_DEBUG(lock, "lock acquired, setting rss=" LPU64"; leaving kms="LPU64", end="LPU64, - loi->loi_rss, loi->loi_kms, + loi->loi_lvb.lvb_size, loi->loi_kms, lock->l_policy_data.l_extent.end); } lov_stripe_unlock(set->set_md); @@ -157,12 +155,10 @@ int lov_update_enqueue_set(struct lov_request_set *set, } else if (rc == ELDLM_LOCK_ABORTED && flags & LDLM_FL_HAS_INTENT) { memset(lov_lockhp, 0, sizeof(*lov_lockhp)); lov_stripe_lock(set->set_md); - loi->loi_rss = req->rq_md->lsm_oinfo->loi_rss; - loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime; - loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks; + loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb; lov_stripe_unlock(set->set_md); CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving" - " kms="LPU64"\n", loi->loi_rss, loi->loi_kms); + " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms); rc = ELDLM_OK; } else { struct obd_export *exp = set->set_exp; @@ -202,7 +198,7 @@ static int enqueue_done(struct lov_request_set *set, __u32 mode) lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; LASSERT(lov_lockhp); - if (lov_lockhp->cookie == 0) + if (!lustre_handle_is_used(lov_lockhp)) continue; rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md, @@ -293,10 +289,8 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm, req->rq_md->lsm_object_id = loi->loi_id; req->rq_md->lsm_stripe_count = 0; req->rq_md->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid; - req->rq_md->lsm_oinfo->loi_rss = loi->loi_rss; req->rq_md->lsm_oinfo->loi_kms = loi->loi_kms; - req->rq_md->lsm_oinfo->loi_blocks = loi->loi_blocks; - req->rq_md->lsm_oinfo->loi_mtime = loi->loi_mtime; + req->rq_md->lsm_oinfo->loi_lvb = loi->loi_lvb; lov_set_add_req(req, set); } @@ -417,10 +411,10 @@ int lov_fini_cancel_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_lockh) lov_llh_put(set->set_lockh); @@ -458,7 +452,7 @@ int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm, struct lustre_handle *lov_lockhp; lov_lockhp = set->set_lockh->llh_handles + i; - if (lov_lockhp->cookie == 0) { + if (!lustre_handle_is_used(lov_lockhp)) { CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", loi->loi_ost_idx, loi->loi_id); continue; @@ -567,7 +561,7 @@ cleanup: continue; sub_exp = lov->tgts[req->rq_idx].ltd_exp; - err = obd_destroy(sub_exp, req->rq_oa, NULL, oti); + err = obd_destroy(sub_exp, req->rq_oa, NULL, oti, NULL); if (err) CERROR("Failed to uncreate objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", @@ -594,11 +588,13 @@ int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); - if (set->set_completes) + LASSERT(set->set_exp); + if (set->set_completes) { rc = create_done(set->set_exp, set, lsmp); + /* FIXME update qos data here */ + } if (atomic_dec_and_test(&set->set_refcount)) lov_finish_set(set); @@ -653,8 +649,9 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp, struct obdo *src_oa, struct obd_trans_info *oti, struct lov_request_set **reqset) { + struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_request_set *set; - int rc = 0; + int rc = 0, newea = 0; ENTRY; OBD_ALLOC(set, sizeof(*set)); @@ -666,14 +663,54 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp, set->set_md = *lsmp; set->set_oa = src_oa; set->set_oti = oti; - - rc = qos_prep_create(exp, set); + + if (set->set_md == NULL) { + int stripes, stripe_cnt; + stripe_cnt = lov_get_stripecnt(lov, 0); + + /* If the MDS file was truncated up to some size, stripe over + * enough OSTs to allow the file to be created at that size. */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1; + do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12)); + + if (stripes > lov->desc.ld_active_tgt_count) + GOTO(out_set, rc = -EFBIG); + if (stripes < stripe_cnt) + stripes = stripe_cnt; + } else { + stripes = stripe_cnt; + } + + rc = lov_alloc_memmd(&set->set_md, stripes, + lov->desc.ld_pattern ? + lov->desc.ld_pattern : LOV_PATTERN_RAID0, + LOV_MAGIC); + if (rc < 0) + goto out_set; + newea = 1; + } + + rc = qos_prep_create(lov, set, newea); if (rc) - lov_fini_create_set(set, lsmp); - else - *reqset = set; + goto out_lsm; + + if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { + oti_alloc_cookies(oti, set->set_count); + if (!oti->oti_logcookies) + goto out_lsm; + set->set_cookies = oti->oti_logcookies; + } + *reqset = set; + RETURN(rc); + +out_lsm: + if (*lsmp == NULL) + obd_free_memmd(exp, &set->set_md); +out_set: + lov_fini_create_set(set, lsmp); RETURN(rc); -} +} static int common_attr_done(struct lov_request_set *set) { @@ -733,7 +770,7 @@ static int brw_done(struct lov_request_set *set) loi = &lsm->lsm_oinfo[req->rq_stripe]; if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS) - loi->loi_blocks = req->rq_oa->o_blocks; + loi->loi_lvb.lvb_blocks = req->rq_oa->o_blocks; } RETURN(0); @@ -744,9 +781,9 @@ int lov_fini_brw_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) { rc = brw_done(set); /* FIXME update qos data here */ @@ -874,9 +911,9 @@ int lov_fini_getattr_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) rc = common_attr_done(set); @@ -942,9 +979,9 @@ int lov_fini_destroy_set(struct lov_request_set *set) { ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) { /* FIXME update qos data here */ } @@ -1021,9 +1058,9 @@ int lov_fini_setattr_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) { rc = common_attr_done(set); /* FIXME update qos data here */ @@ -1098,6 +1135,7 @@ int lov_update_setattr_set(struct lov_request_set *set, struct lov_request *req, int rc) { struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + struct lov_stripe_md *lsm = set->set_md; ENTRY; lov_update_set(set, req, rc); @@ -1108,10 +1146,17 @@ int lov_update_setattr_set(struct lov_request_set *set, /* FIXME: LOV STACKING update loi data should be done by OSC * * when this is gone we can go back to using lov_update_common_set() */ - if (rc == 0 && req->rq_oa->o_valid & OBD_MD_FLMTIME) - set->set_md->lsm_oinfo[req->rq_stripe].loi_mtime = - req->rq_oa->o_mtime; - /* ditto loi_atime, loi_ctime when available */ + if (rc == 0) { + if (req->rq_oa->o_valid & OBD_MD_FLMTIME) + lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_ctime = + req->rq_oa->o_ctime; + if (req->rq_oa->o_valid & OBD_MD_FLMTIME) + lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_mtime = + req->rq_oa->o_mtime; + if (req->rq_oa->o_valid & OBD_MD_FLATIME) + lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_atime = + req->rq_oa->o_atime; + } RETURN(rc); } @@ -1134,9 +1179,9 @@ int lov_fini_punch_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) { if (!set->set_success) rc = -EIO; @@ -1215,9 +1260,9 @@ int lov_fini_sync_set(struct lov_request_set *set) int rc = 0; ENTRY; - LASSERT(set->set_exp); if (set == NULL) RETURN(0); + LASSERT(set->set_exp); if (set->set_completes) { if (!set->set_success) rc = -EIO; diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 5ae9f62..5fc85fe 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -118,68 +118,6 @@ static int lov_rd_desc_uuid(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n", lov->desc.ld_uuid.uuid); } -static int lov_rd_qos_threshold(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *dev = (struct obd_device*) data; - struct lov_obd *lov; - - LASSERT(dev != NULL); - lov = &dev->u.lov; - *eof = 1; - return snprintf(page, count, "%u MB\n", lov->desc.ld_qos_threshold); -} - -static int lov_wr_qos_threshold(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - struct lov_obd *lov; - int val, rc; - LASSERT(dev != NULL); - - lov = &dev->u.lov; - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= 0) - return -EINVAL; - lov->desc.ld_qos_threshold = val; - return count; -} - -static int lov_rd_qos_maxage(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *dev = (struct obd_device*) data; - struct lov_obd *lov; - - LASSERT(dev != NULL); - lov = &dev->u.lov; - *eof = 1; - return snprintf(page, count, "%u Sec\n", lov->desc.ld_qos_maxage); -} - -static int lov_wr_qos_maxage(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - struct lov_obd *lov; - int val, rc; - LASSERT(dev != NULL); - - lov = &dev->u.lov; - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= 0) - return -EINVAL; - lov->desc.ld_qos_maxage = val; - return count; -} - static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos) { struct obd_device *dev = p->private; @@ -250,8 +188,6 @@ struct lprocfs_vars lprocfs_obd_vars[] = { { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "desc_uuid", lov_rd_desc_uuid, 0, 0 }, - { "qos_threshold",lov_rd_qos_threshold, lov_wr_qos_threshold, 0 }, - { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 }, { 0 } }; diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 4d1f154..41162e6 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -1,11 +1,12 @@ #include -void mdc_pack_req_body(struct ptlrpc_request *); +void mdc_pack_req_body(struct ptlrpc_request *req, int offset, + __u64 valid, struct ll_fid *fid, int ea_size); void mdc_pack_rep_body(struct ptlrpc_request *); -void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, - struct ll_fid *mdc_fid); +void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset, + __u32 size, struct ll_fid *mdc_fid); void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int flags, struct mdc_op_data *data); -void mdc_setattr_pack(struct ptlrpc_request *req, +void mdc_setattr_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data, struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len); @@ -16,6 +17,8 @@ void mdc_create_pack(struct ptlrpc_request *req, int offset, void mdc_open_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 flags, const void *data, int datalen); +void mdc_join_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *op_data, __u64 head_size); void mdc_unlink_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data); void mdc_link_pack(struct ptlrpc_request *req, int offset, diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 72f8a7c..4b4eeba 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -53,25 +53,26 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, int level, int msg_flags) { struct ptlrpc_request *req; - struct mds_body *body; - int rc, size = sizeof(*body); + int rc, size[] = { [MDS_REQ_REC_OFF] = sizeof(struct mds_body) }; ENTRY; - req = ptlrpc_prep_req(imp, MDS_GETSTATUS, 1, &size, NULL); + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS, + 1, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); req->rq_send_state = level; - req->rq_replen = lustre_msg_size(1, &size); + req->rq_replen = lustre_msg_size(1, size); - mdc_pack_req_body(req); + mdc_pack_req_body(req, MDS_REQ_REC_OFF, 0, NULL, 0); req->rq_reqmsg->flags |= msg_flags; rc = ptlrpc_queue_wait(req); if (!rc) { - body = lustre_swab_repbuf (req, 0, sizeof (*body), - lustre_swab_mds_body); + struct mds_body *body; + + body = lustre_swab_repbuf(req, 0, sizeof(*body), + lustre_swab_mds_body); if (body == NULL) { CERROR ("Can't extract mds_body\n"); GOTO (out, rc = -EPROTO); @@ -147,6 +148,16 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, RETURN (-EPROTO); } } + + if (body->valid & OBD_MD_FLMODEASIZE) { + if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize) + exp->exp_obd->u.cli.cl_max_mds_easize = + body->max_mdsize; + if (exp->exp_obd->u.cli.cl_max_mds_cookiesize < + body->max_cookiesize) + exp->exp_obd->u.cli.cl_max_mds_cookiesize = + body->max_cookiesize; + } RETURN (0); } @@ -164,16 +175,12 @@ int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, /* XXX do we need to make another request here? We just did a getattr * to do the lookup in the first place. */ - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_GETATTR, 1, &size, - NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_GETATTR, 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - body->valid = valid; - body->eadatasize = ea_size; - mdc_pack_req_body(req); + mdc_pack_req_body(req, MDS_REQ_REC_OFF, valid, fid, ea_size); /* currently only root inode will call us with FLACL */ if (valid & OBD_MD_FLACL) @@ -190,29 +197,24 @@ int mdc_getattr(struct obd_export *exp, struct ll_fid *fid, } int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid, - char *filename, int namelen, unsigned long valid, - unsigned int ea_size, struct ptlrpc_request **request) + const char *filename, int namelen, unsigned long valid, + unsigned int ea_len, struct ptlrpc_request **request) { struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[2] = {sizeof(*body), namelen}; + int rc, size[] = { sizeof(struct mds_body), namelen }; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_GETATTR_NAME, 2, - size, NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_GETATTR_NAME, 2, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - body->valid = valid; - body->eadatasize = ea_size; - mdc_pack_req_body(req); - + mdc_pack_req_body(req, MDS_REQ_REC_OFF, valid, fid, ea_len); + LASSERT (strnlen (filename, namelen) == namelen - 1); memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen); - rc = mdc_getattr_common(exp, ea_size, 0, req); + rc = mdc_getattr_common(exp, ea_len, 0, req); if (rc != 0) { ptlrpc_req_finished (req); req = NULL; @@ -244,18 +246,15 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid, size[bufcnt++] = input_size; } - req = ptlrpc_prep_req(class_exp2cliimp(exp), opcode, + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, opcode, bufcnt, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); /* request data */ - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - body->valid = valid; - body->eadatasize = output_size; + body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof(*body)); + mdc_pack_req_body(req, MDS_REQ_REC_OFF, valid, fid, output_size); body->flags = flags; - mdc_pack_req_body(req); if (xattr_name) { tmp = lustre_msg_buf(req->rq_reqmsg, 1, xattr_namelen); @@ -330,10 +329,10 @@ int mdc_getxattr(struct obd_export *exp, struct ll_fid *fid, void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, int repoff) { - struct mds_rec_create *rec = - lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec)); - struct mds_body *body = - lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body)); + struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff, + sizeof(*rec)); + struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff, + sizeof(*body)); LASSERT (rec != NULL); LASSERT (body != NULL); @@ -349,6 +348,7 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, rec->cr_replayfid.generation, rec->cr_replayfid.id); } +#ifdef CONFIG_FS_POSIX_ACL static int mdc_unpack_acl(struct obd_export *exp, struct ptlrpc_request *req, struct lustre_md *md, unsigned int offset) @@ -387,6 +387,9 @@ int mdc_unpack_acl(struct obd_export *exp, struct ptlrpc_request *req, md->posix_acl = acl; return 0; } +#else +#define mdc_unpack_acl(exp, req, md, offset) 0 +#endif int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, struct obd_export *exp, @@ -452,10 +455,12 @@ void mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) if (md->lsm) obd_free_memmd(exp, &md->lsm); +#ifdef CONFIG_FS_POSIX_ACL if (md->posix_acl) { posix_acl_release(md->posix_acl); md->posix_acl = NULL; } +#endif } static void mdc_commit_open(struct ptlrpc_request *req) @@ -479,7 +484,7 @@ static void mdc_replay_open(struct ptlrpc_request *req) struct mdc_open_data *mod = req->rq_cb_data; struct obd_client_handle *och; struct ptlrpc_request *close_req; - struct lustre_handle old; + struct lustre_handle old; struct mds_body *body; ENTRY; @@ -495,20 +500,21 @@ static void mdc_replay_open(struct ptlrpc_request *req) och = mod->mod_och; if (och != NULL) { - struct lustre_handle *file_fh; + struct lustre_handle *file_fh; LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); file_fh = &och->och_fh; CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n", file_fh->cookie, body->handle.cookie); memcpy(&old, file_fh, sizeof(old)); memcpy(file_fh, &body->handle, sizeof(*file_fh)); - } + } close_req = mod->mod_close_req; if (close_req != NULL) { struct mds_body *close_body; LASSERT(close_req->rq_reqmsg->opc == MDS_CLOSE); - close_body = lustre_msg_buf(close_req->rq_reqmsg, 0, + close_body = lustre_msg_buf(close_req->rq_reqmsg, + MDS_REQ_REC_OFF, sizeof(*close_body)); if (och != NULL) LASSERT(!memcmp(&old, &close_body->handle, sizeof old)); @@ -524,15 +530,16 @@ void mdc_set_open_replay_data(struct obd_client_handle *och, struct ptlrpc_request *open_req) { struct mdc_open_data *mod; - struct mds_rec_create *rec = - lustre_msg_buf(open_req->rq_reqmsg, 2, sizeof(*rec)); - struct mds_body *body = - lustre_msg_buf(open_req->rq_repmsg, 1, sizeof(*body)); + struct mds_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg, + MDS_REQ_INTENT_REC_OFF, + sizeof(*rec)); + struct mds_body *body = lustre_msg_buf(open_req->rq_repmsg, 1, + sizeof(*body)); - LASSERT(rec != NULL); - /* outgoing messages always in my byte order */ LASSERT(body != NULL); /* incoming message in my byte order (it's been swabbed) */ + LASSERT(rec != NULL); + /* outgoing messages always in my byte order */ LASSERT_REPSWABBED(open_req, 1); OBD_ALLOC(mod, sizeof(*mod)); @@ -602,58 +609,20 @@ static void mdc_commit_close(struct ptlrpc_request *req) spin_unlock(&open_req->rq_lock); } -static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc) -{ - union ptlrpc_async_args *aa = data; - struct mdc_rpc_lock *rpc_lock; - struct obd_device *obd = aa->pointer_arg[1]; - unsigned long flags; - - spin_lock_irqsave(&req->rq_lock, flags); - rpc_lock = aa->pointer_arg[0]; - aa->pointer_arg[0] = NULL; - spin_unlock_irqrestore(&req->rq_lock, flags); - - if (rpc_lock == NULL) { - CERROR("called with NULL rpc_lock\n"); - } else { - LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n", - rpc_lock, obd->u.cli.cl_rpc_lock); - mdc_put_rpc_lock(rpc_lock, NULL); - } - cfs_waitq_signal(&req->rq_reply_waitq); - RETURN(rc); -} - -/* We can't use ptlrpc_check_reply, because we don't want to wake up for - * anything but a reply or an error. */ -static int mdc_close_check_reply(struct ptlrpc_request *req) -{ - int rc = 0; - unsigned long flags; - - spin_lock_irqsave(&req->rq_lock, flags); - if (req->rq_async_args.pointer_arg[0] == NULL) - rc = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); - return rc; -} - int mdc_close(struct obd_export *exp, struct obdo *oa, struct obd_client_handle *och, struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); - int reqsize = sizeof(struct mds_body); - int rc, repsize[3] = {sizeof(struct mds_body), + int size[] = { sizeof(struct mds_body) }; + int rc, repsize[] = { sizeof(struct mds_body), obd->u.cli.cl_max_mds_easize, obd->u.cli.cl_max_mds_cookiesize}; struct ptlrpc_request *req; struct mdc_open_data *mod; - struct l_wait_info lwi; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_CLOSE, 1, &reqsize, - NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_CLOSE, 1, size, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); @@ -665,7 +634,7 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, mod->mod_close_req = req; if (mod->mod_open_req->rq_type == LI_POISON) { /* FIXME This should be an ASSERT, but until we - figure out why it can be poisoned here, give + figure out why it can be poisoned here, give a reasonable return. bug 6155 */ CERROR("LBUG POISONED open %p!\n", mod->mod_open_req); ptlrpc_req_finished(req); @@ -676,24 +645,17 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); } - mdc_close_pack(req, 0, oa, oa->o_valid, och); + mdc_close_pack(req, MDS_REQ_REC_OFF, oa, oa->o_valid, och); req->rq_replen = lustre_msg_size(3, repsize); req->rq_commit_cb = mdc_commit_close; LASSERT(req->rq_cb_data == NULL); req->rq_cb_data = mod; - /* We hand a ref to the rpcd here, so we need another one of our own. */ - ptlrpc_request_addref(req); - mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); - req->rq_interpret_reply = mdc_close_interpret; - req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock; - req->rq_async_args.pointer_arg[1] = obd; - ptlrpcd_add_req(req); - lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL); - rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req), - &lwi); + rc = ptlrpc_queue_wait(req); + mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); + if (req->rq_repmsg == NULL) { CDEBUG(D_HA, "request failed to send: %p, %d\n", req, req->rq_status); @@ -716,14 +678,10 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, rc = -EPROTO; } } - if (req->rq_async_args.pointer_arg[0] != NULL) { - CERROR("returned without dropping rpc_lock: rc %d\n", rc); - mdc_close_interpret(req, &req->rq_async_args, rc); - } EXIT; - out: *request = req; + out: return rc; } @@ -731,15 +689,15 @@ int mdc_done_writing(struct obd_export *exp, struct obdo *obdo) { struct ptlrpc_request *req; struct mds_body *body; - int rc, size = sizeof(*body); + int rc, size[] = { [MDS_REQ_REC_OFF] = sizeof(*body) }; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_DONE_WRITING, 1, - &size, NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_DONE_WRITING, 1, size, NULL); if (req == NULL) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof(*body)); mdc_pack_fid(&body->fid1, obdo->o_id, 0, obdo->o_mode); body->size = obdo->o_size; body->blocks = obdo->o_blocks; @@ -747,28 +705,30 @@ int mdc_done_writing(struct obd_export *exp, struct obdo *obdo) body->valid = obdo->o_valid; // memcpy(&body->handle, &och->och_fh, sizeof(body->handle)); - req->rq_replen = lustre_msg_size(1, &size); + req->rq_replen = lustre_msg_size(1, size); rc = ptlrpc_queue_wait(req); ptlrpc_req_finished(req); RETURN(rc); } -int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, __u64 offset, +int mdc_readpage(struct obd_export *exp, struct ll_fid *fid, __u64 offset, struct page *page, struct ptlrpc_request **request) { struct obd_import *imp = class_exp2cliimp(exp); struct ptlrpc_request *req = NULL; struct ptlrpc_bulk_desc *desc = NULL; struct mds_body *body; - int rc, size = sizeof(*body); + int rc, size[] = { sizeof(*body) }; ENTRY; - CDEBUG(D_INODE, "inode: %ld\n", (long)mdc_fid->id); + CDEBUG(D_INODE, "inode: "LPU64"\n", fid->id); - req = ptlrpc_prep_req(imp, MDS_READPAGE, 1, &size, NULL); + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, + 1, size, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); + /* XXX FIXME bug 249 */ req->rq_request_portal = MDS_READPAGE_PORTAL; @@ -779,9 +739,9 @@ int mdc_readpage(struct obd_export *exp, struct ll_fid *mdc_fid, __u64 offset, ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE); - mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, mdc_fid); + mdc_readdir_pack(req, MDS_REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid); - req->rq_replen = lustre_msg_size(1, &size); + req->rq_replen = lustre_msg_size(1, size); rc = ptlrpc_queue_wait(req); if (rc == 0) { @@ -873,8 +833,7 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, struct obd_import *imp = class_exp2cliimp(exp); int rc = -EINVAL; - if (keylen == strlen("initial_recov") && - memcmp(key, "initial_recov", strlen("initial_recov")) == 0) { + if (KEY_IS("initial_recov")) { if (vallen != sizeof(int)) RETURN(-EINVAL); imp->imp_initial_recov = *(int *)val; @@ -882,8 +841,18 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, exp->exp_obd->obd_name, imp->imp_initial_recov); RETURN(0); } - if (keylen == strlen("read-only") && - memcmp(key, "read-only", strlen("read-only")) == 0) { + /* Turn off initial_recov after we try all backup servers once */ + if (KEY_IS("init_recov_bk")) { + if (vallen != sizeof(int)) + RETURN(-EINVAL); + imp->imp_initial_recov_bk = *(int *)val; + if (imp->imp_initial_recov_bk) + imp->imp_initial_recov = 1; + CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", + exp->exp_obd->obd_name, imp->imp_initial_recov_bk); + RETURN(0); + } + if (KEY_IS("read-only")) { struct ptlrpc_request *req; int size[2] = {keylen, vallen}; char *bufs[2] = {key, val}; @@ -899,7 +868,8 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, ~OBD_CONNECT_RDONLY; } - req = ptlrpc_prep_req(imp, MDS_SET_INFO, 2, size, bufs); + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, + MDS_SET_INFO, 2, size, bufs); if (req == NULL) RETURN(-ENOMEM); @@ -908,7 +878,28 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, ptlrpc_req_finished(req); RETURN(rc); } - + + RETURN(rc); +} + +int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, + __u32 *vallen, void *val) +{ + int rc = -EINVAL; + + if (keylen == strlen("max_easize") && + memcmp(key, "max_easize", strlen("max_easize")) == 0) { + int mdsize, *max_easize; + + if (*vallen != sizeof(int)) + RETURN(-EINVAL); + mdsize = *(int*)val; + if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize) + exp->exp_obd->u.cli.cl_max_mds_easize = mdsize; + max_easize = val; + *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; + RETURN(0); + } RETURN(rc); } @@ -926,7 +917,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, * during mount that would help a bit). Having relative timestamps * is not so great if request processing is slow, while absolute * timestamps are not ideal because they need time synchronization. */ - req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL); + req = ptlrpc_prep_req(obd->u.cli.cl_import, LUSTRE_MDS_VERSION, + MDS_STATFS, 0, NULL, NULL); if (!req) RETURN(-ENOMEM); @@ -958,18 +950,19 @@ static int mdc_pin(struct obd_export *exp, obd_id ino, __u32 gen, int type, { struct ptlrpc_request *req; struct mds_body *body; - int rc, size = sizeof(*body); + int rc, size[] = { [MDS_REQ_REC_OFF] = sizeof(struct mds_body) }; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_PIN, 1, &size, NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_PIN, 1, size, NULL); if (req == NULL) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); + body = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, sizeof (*body)); mdc_pack_fid(&body->fid1, ino, gen, type); body->flags = flag; - req->rq_replen = lustre_msg_size(1, &size); + req->rq_replen = lustre_msg_size(1, size); mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); @@ -1004,17 +997,18 @@ static int mdc_unpin(struct obd_export *exp, { struct ptlrpc_request *req; struct mds_body *body; - int rc, size = sizeof(*body); + int rc, size[] = { [MDS_REQ_REC_OFF] = sizeof(struct mds_body) }; ENTRY; if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC) RETURN(0); - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_CLOSE, 1, &size, NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_CLOSE, 1, size, NULL); if (req == NULL) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + body = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*body)); memcpy(&body->handle, &handle->och_fh, sizeof(body->handle)); body->flags = flag; @@ -1036,22 +1030,17 @@ int mdc_sync(struct obd_export *exp, struct ll_fid *fid, struct ptlrpc_request **request) { struct ptlrpc_request *req; - struct mds_body *body; - int size = sizeof(*body); - int rc; + int rc, size[] = { [MDS_REQ_REC_OFF] = sizeof(struct mds_body) }; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), MDS_SYNC, 1,&size,NULL); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_SYNC, 1, size, NULL); if (!req) RETURN(rc = -ENOMEM); - if (fid) { - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - mdc_pack_req_body(req); - } + mdc_pack_req_body(req, MDS_REQ_REC_OFF, 0, fid, 0); - req->rq_replen = lustre_msg_size(1, &size); + req->rq_replen = lustre_msg_size(1, size); rc = ptlrpc_queue_wait(req); if (rc || request == NULL) @@ -1062,8 +1051,7 @@ int mdc_sync(struct obd_export *exp, struct ll_fid *fid, RETURN(rc); } -static int mdc_import_event(struct obd_device *obd, - struct obd_import *imp, +static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) { int rc = 0; @@ -1075,8 +1063,7 @@ static int mdc_import_event(struct obd_device *obd, break; } case IMP_EVENT_INACTIVE: { - if (obd->obd_observer) - rc = obd_notify(obd->obd_observer, obd, 0); + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE); break; } case IMP_EVENT_INVALIDATE: { @@ -1087,12 +1074,14 @@ static int mdc_import_event(struct obd_device *obd, break; } case IMP_EVENT_ACTIVE: { - if (obd->obd_observer) - rc = obd_notify(obd->obd_observer, obd, 1); + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE); break; } + case IMP_EVENT_OCD: + break; + default: - CERROR("Unknown import event %d\n", event); + CERROR("Unknown import event %x\n", event); LBUG(); } RETURN(rc); @@ -1150,28 +1139,35 @@ int mdc_init_ea_size(struct obd_export *mdc_exp, struct obd_export *lov_exp) struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC }; struct lov_desc desc; __u32 valsize = sizeof(desc); + __u32 stripes; int rc, size; ENTRY; - size = obd_size_diskmd(lov_exp, NULL); - if (cli->cl_max_mds_easize < size) - cli->cl_max_mds_easize = size; - rc = obd_get_info(lov_exp, strlen("lovdesc") + 1, "lovdesc", &valsize, &desc); if (rc) RETURN(rc); + stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = stripes; + size = obd_size_diskmd(lov_exp, &lsm); + + if (cli->cl_max_mds_easize < size) + cli->cl_max_mds_easize = size; + lsm.lsm_stripe_count = desc.ld_default_stripe_count; size = obd_size_diskmd(lov_exp, &lsm); if (cli->cl_default_mds_easize < size) cli->cl_default_mds_easize = size; - size = desc.ld_tgt_count * sizeof(struct llog_cookie); + size = stripes * sizeof(struct llog_cookie); if (cli->cl_max_mds_cookiesize < size) cli->cl_max_mds_cookiesize = size; + CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", + cli->cl_max_mds_easize, cli->cl_max_mds_cookiesize); + RETURN(0); } @@ -1179,8 +1175,8 @@ static int mdc_precleanup(struct obd_device *obd, int stage) { int rc = 0; ENTRY; - - if (stage < 2) + + if (stage < OBD_CLEANUP_SELF_EXP) RETURN(0); rc = obd_llog_finish(obd, 0); @@ -1218,6 +1214,13 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, ctxt->loc_imp = obd->u.cli.cl_import; } + rc = llog_setup(obd, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, + &llog_client_ops); + if (rc == 0) { + ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); + ctxt->loc_imp = obd->u.cli.cl_import; + } + RETURN(rc); } @@ -1226,6 +1229,10 @@ static int mdc_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; + rc = llog_cleanup(llog_get_context(obd, LLOG_LOVEA_REPL_CTXT)); + if (rc) { + CERROR("can not cleanup LLOG_CONFIG_REPL_CTXT rc %d\n", rc); + } rc = llog_cleanup(llog_get_context(obd, LLOG_CONFIG_REPL_CTXT)); RETURN(rc); } @@ -1241,6 +1248,7 @@ struct obd_ops mdc_obd_ops = { .o_disconnect = client_disconnect_export, .o_iocontrol = mdc_iocontrol, .o_set_info = mdc_set_info, + .o_get_info = mdc_get_info, .o_statfs = mdc_statfs, .o_pin = mdc_pin, .o_unpin = mdc_unpin, @@ -1249,7 +1257,7 @@ struct obd_ops mdc_obd_ops = { .o_llog_finish = mdc_llog_finish, }; -static quota_interface_t *quota_interface = NULL; +static quota_interface_t *quota_interface; extern quota_interface_t mdc_quota_interface; int __init mdc_init(void) @@ -1257,14 +1265,14 @@ int __init mdc_init(void) int rc; struct lprocfs_static_vars lvars; lprocfs_init_vars(mdc, &lvars); - + quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface); init_obd_quota_ops(quota_interface, &mdc_obd_ops); - + rc = class_register_type(&mdc_obd_ops, lvars.module_vars, LUSTRE_MDC_NAME); if (rc && quota_interface) - PORTAL_SYMBOL_PUT(osc_quota_interface); + PORTAL_SYMBOL_PUT(mdc_quota_interface); RETURN(rc); } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index a1a287d..759a836 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "mds_internal.h" @@ -248,6 +249,55 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, RETURN(result); } +static int mds_connect_internal(struct obd_export *exp, + struct obd_connect_data *data) +{ + struct obd_device *obd = exp->exp_obd; + if (data != NULL) { + data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED; + data->ocd_ibits_known &= MDS_INODELOCK_FULL; + + /* If no known bits (which should not happen, probably, + as everybody should support LOOKUP and UPDATE bits at least) + revert to compat mode with plain locks. */ + if (!data->ocd_ibits_known && + data->ocd_connect_flags & OBD_CONNECT_IBITS) + data->ocd_connect_flags &= ~OBD_CONNECT_IBITS; + + if (!obd->u.mds.mds_fl_acl) + data->ocd_connect_flags &= ~OBD_CONNECT_ACL; + + if (!obd->u.mds.mds_fl_user_xattr) + data->ocd_connect_flags &= ~OBD_CONNECT_XATTR; + + exp->exp_connect_flags = data->ocd_connect_flags; + data->ocd_version = LUSTRE_VERSION_CODE; + exp->exp_mds_data.med_ibits_known = data->ocd_ibits_known; + } + + if (obd->u.mds.mds_fl_acl && + ((exp->exp_connect_flags & OBD_CONNECT_ACL) == 0)) { + CWARN("%s: MDS requires ACL support but client does not\n", + obd->obd_name); + return -EBADE; + } + return 0; +} + +static int mds_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + int rc; + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + rc = mds_connect_internal(exp, data); + + RETURN(rc); +} /* Establish a connection to the MDS. * @@ -291,28 +341,13 @@ static int mds_connect(struct lustre_handle *conn, struct obd_device *obd, LASSERT(exp); med = &exp->exp_mds_data; - if (data != NULL) { - data->ocd_connect_flags &= MDS_CONNECT_SUPPORTED; - - if (!obd->u.mds.mds_fl_user_xattr) - data->ocd_connect_flags &= ~OBD_CONNECT_USER_XATTR; - - exp->exp_connect_flags = data->ocd_connect_flags; - } - - if ((obd->u.mds.mds_fl_acl == 0) != - ((exp->exp_connect_flags & OBD_CONNECT_ACL) == 0)) { - CWARN("%s require ACL support but %s doesn't\n", - obd->u.mds.mds_fl_acl ? "MDS" : "client", - obd->u.mds.mds_fl_acl ? "client" : "MDS"); - GOTO(out, rc = -EBADE); - } + rc = mds_connect_internal(exp, data); + if (rc) + GOTO(out, rc); OBD_ALLOC(mcd, sizeof(*mcd)); - if (!mcd) { - CERROR("mds: out of memory for client data\n"); + if (!mcd) GOTO(out, rc = -ENOMEM); - } memcpy(mcd->mcd_uuid, cluuid, sizeof(mcd->mcd_uuid)); med->med_mcd = mcd; @@ -334,7 +369,7 @@ out: RETURN(rc); } -static int mds_init_export(struct obd_export *exp) +static int mds_init_export(struct obd_export *exp) { struct mds_export_data *med = &exp->exp_mds_data; @@ -379,7 +414,7 @@ static int mds_destroy_export(struct obd_export *export) /* child orphan sem protects orphan_dec_test and * is_orphan race, mds_mfd_close drops it */ MDS_DOWN_WRITE_ORPHAN_SEM(dentry->d_inode); - rc = mds_mfd_close(NULL, obd, mfd, + rc = mds_mfd_close(NULL, MDS_REQ_REC_OFF, obd, mfd, !(export->exp_flags & OBD_OPT_FAILOVER)); if (rc) @@ -457,7 +492,7 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, if (lock) down(&inode->i_sem); - rc = fsfilt_get_md(obd, inode, md, *size); + rc = fsfilt_get_md(obd, inode, md, *size, "lov"); if (rc < 0) { CERROR("Error %d reading eadata for ino %lu\n", @@ -472,6 +507,8 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, } else if (rc > 0) { *size = rc; } + } else { + *size = 0; } if (lock) up(&inode->i_sem); @@ -510,7 +547,7 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, inode->i_ino, lmm_size, mds->mds_max_mdsize); // RETURN(-EINVAL); } - + rc = mds_get_md(obd, inode, lmm, &lmm_size, lock); if (rc > 0) { if (S_ISDIR(inode->i_mode)) @@ -524,6 +561,7 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, RETURN(rc); } +#ifdef CONFIG_FS_POSIX_ACL static int mds_pack_posix_acl(struct inode *inode, struct lustre_msg *repmsg, struct mds_body *repbody, int repoff) @@ -559,6 +597,9 @@ out: repbody->valid |= OBD_MD_FLACL; return 0; } +#else +#define mds_pack_posix_acl(inode, repmsg, repbody, repoff) 0 +#endif int mds_pack_acl(struct mds_export_data *med, struct inode *inode, struct lustre_msg *repmsg, struct mds_body *repbody, @@ -592,7 +633,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, inode, 1); /* If we have LOV EA data, the OST holds size, atime, mtime */ - if (!(body->valid & OBD_MD_FLEASIZE) && + if (!(body->valid & OBD_MD_FLEASIZE) && !(body->valid & OBD_MD_FLDIREA)) body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | OBD_MD_FLMTIME); @@ -625,9 +666,17 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, reply_off++; } + if (reqbody->valid & OBD_MD_FLMODEASIZE) { + struct mds_obd *mds = mds_req2mds(req); + body->max_cookiesize = mds->mds_max_cookiesize; + body->max_mdsize = mds->mds_max_mdsize; + body->valid |= OBD_MD_FLMODEASIZE; + } + if (rc) RETURN(rc); +#ifdef CONFIG_FS_POSIX_ACL if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && (reqbody->valid & OBD_MD_FLACL)) { rc = mds_pack_acl(&req->rq_export->exp_mds_data, @@ -638,6 +687,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, if (body->aclsize) reply_off++; } +#endif RETURN(rc); } @@ -657,7 +707,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) || (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) { down(&inode->i_sem); - rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); + rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0, + "lov"); up(&inode->i_sem); CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n", rc, inode->i_ino); @@ -686,6 +737,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, inode->i_size + 1, body->eadatasize); } +#ifdef CONFIG_FS_POSIX_ACL if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && (body->valid & OBD_MD_FLACL)) { struct dentry de = { .d_inode = inode }; @@ -707,6 +759,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, } bufcount++; } +#endif if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) { CERROR("failed MDS_GETATTR_PACK test\n"); @@ -740,7 +793,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, char *name; ENTRY; - LASSERT(!strcmp(obd->obd_type->typ_name, "mds")); + LASSERT(!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)); /* Swab now, before anyone looks inside the request */ @@ -763,9 +816,9 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, if (rc) GOTO(cleanup, rc); - LASSERT (offset == 0 || offset == 2); + LASSERT (offset == MDS_REQ_REC_OFF || offset == MDS_REQ_INTENT_REC_OFF); /* if requests were at offset 2, the getattr reply goes back at 1 */ - if (offset) { + if (offset == MDS_REQ_INTENT_REC_OFF) { rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); offset = 1; } @@ -797,7 +850,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, } #endif - if (child_lockh->cookie != 0) { + if (lustre_handle_is_used(child_lockh)) { LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT); resent_req = 1; } @@ -886,7 +939,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, return rc; } -static int mds_getattr(int offset, struct ptlrpc_request *req) +static int mds_getattr(struct ptlrpc_request *req, int offset) { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; @@ -979,7 +1032,7 @@ out: return 0; } -static int mds_sync(struct ptlrpc_request *req) +static int mds_sync(struct ptlrpc_request *req, int offset) { struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = &obd->u.mds; @@ -1030,7 +1083,7 @@ out: * * If we were to take another one here, a deadlock will result, if another * thread is already waiting for a PW lock. */ -static int mds_readpage(struct ptlrpc_request *req) +static int mds_readpage(struct ptlrpc_request *req, int offset) { struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = &obd->u.mds; @@ -1052,7 +1105,8 @@ static int mds_readpage(struct ptlrpc_request *req) GOTO(out, rc); } - body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_mds_body); + body = lustre_swab_reqbuf(req, offset, sizeof(*body), + lustre_swab_mds_body); if (body == NULL) GOTO (out, rc = -EFAULT); @@ -1246,6 +1300,74 @@ static int mds_handle_quotactl(struct ptlrpc_request *req) RETURN(0); } +static int mds_msg_check_version(struct lustre_msg *msg) +{ + int rc; + + /* TODO: enable the below check while really introducing msg version. + * it's disabled because it will break compatibility with b1_4. + */ + return (0); + + switch (msg->opc) { + case MDS_CONNECT: + case MDS_DISCONNECT: + case OBD_PING: + rc = lustre_msg_check_version(msg, LUSTRE_OBD_VERSION); + if (rc) + CERROR("bad opc %u version %08x, expecting %08x\n", + msg->opc, msg->version, LUSTRE_OBD_VERSION); + break; + case MDS_GETSTATUS: + case MDS_GETATTR: + case MDS_GETATTR_NAME: + case MDS_STATFS: + case MDS_READPAGE: + case MDS_REINT: + case MDS_CLOSE: + case MDS_DONE_WRITING: + case MDS_PIN: + case MDS_SYNC: + case MDS_GETXATTR: + case MDS_SETXATTR: + case MDS_SET_INFO: + case MDS_QUOTACHECK: + case MDS_QUOTACTL: + case QUOTA_DQACQ: + case QUOTA_DQREL: + rc = lustre_msg_check_version(msg, LUSTRE_MDS_VERSION); + if (rc) + CERROR("bad opc %u version %08x, expecting %08x\n", + msg->opc, msg->version, LUSTRE_MDS_VERSION); + break; + case LDLM_ENQUEUE: + case LDLM_CONVERT: + case LDLM_BL_CALLBACK: + case LDLM_CP_CALLBACK: + rc = lustre_msg_check_version(msg, LUSTRE_DLM_VERSION); + if (rc) + CERROR("bad opc %u version %08x, expecting %08x\n", + msg->opc, msg->version, LUSTRE_DLM_VERSION); + break; + case OBD_LOG_CANCEL: + case LLOG_ORIGIN_HANDLE_CREATE: + case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: + case LLOG_ORIGIN_HANDLE_PREV_BLOCK: + case LLOG_ORIGIN_HANDLE_READ_HEADER: + case LLOG_ORIGIN_HANDLE_CLOSE: + case LLOG_CATINFO: + rc = lustre_msg_check_version(msg, LUSTRE_LOG_VERSION); + if (rc) + CERROR("bad opc %u version %08x, expecting %08x\n", + msg->opc, msg->version, LUSTRE_LOG_VERSION); + break; + default: + CERROR("MDS unknown opcode %d\n", msg->opc); + rc = -ENOTSUPP; + } + return rc; +} + int mds_handle(struct ptlrpc_request *req) { int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET; @@ -1257,6 +1379,13 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); LASSERT(current->journal_info == NULL); + + rc = mds_msg_check_version(req->rq_reqmsg); + if (rc) { + CERROR("MDS drop mal-formed request\n"); + RETURN(rc); + } + /* XXX identical to OST */ if (req->rq_reqmsg->opc != MDS_CONNECT) { struct mds_export_data *med; @@ -1332,7 +1461,7 @@ int mds_handle(struct ptlrpc_request *req) case MDS_GETATTR: DEBUG_REQ(D_INODE, req, "getattr"); OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NET, 0); - rc = mds_getattr(0, req); + rc = mds_getattr(req, MDS_REQ_REC_OFF); break; case MDS_SETXATTR: @@ -1348,7 +1477,7 @@ int mds_handle(struct ptlrpc_request *req) break; case MDS_GETATTR_NAME: { - struct lustre_handle lockh; + struct lustre_handle lockh = { 0 }; DEBUG_REQ(D_INODE, req, "getattr_name"); OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0); @@ -1356,11 +1485,11 @@ int mds_handle(struct ptlrpc_request *req) * acquiring any new locks in mds_getattr_name, so we don't * want to cancel. */ - lockh.cookie = 0; - rc = mds_getattr_name(0, req, MDS_INODELOCK_UPDATE, &lockh); + rc = mds_getattr_name(MDS_REQ_REC_OFF, req, + MDS_INODELOCK_UPDATE, &lockh); /* this non-intent call (from an ioctl) is special */ req->rq_status = rc; - if (rc == 0 && lockh.cookie) + if (rc == 0 && lustre_handle_is_used(&lockh)) ldlm_lock_decref(&lockh, LCK_CR); break; } @@ -1373,7 +1502,7 @@ int mds_handle(struct ptlrpc_request *req) case MDS_READPAGE: DEBUG_REQ(D_INODE, req, "readpage"); OBD_FAIL_RETURN(OBD_FAIL_MDS_READPAGE_NET, 0); - rc = mds_readpage(req); + rc = mds_readpage(req, MDS_REQ_REC_OFF); if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_SENDPAGE)) { RETURN(0); @@ -1382,9 +1511,10 @@ int mds_handle(struct ptlrpc_request *req) break; case MDS_REINT: { - __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp)); + __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, MDS_REQ_REC_OFF, + sizeof (*opcp)); __u32 opc; - int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize, + int size[] = { sizeof(struct mds_body), mds->mds_max_mdsize, mds->mds_max_cookiesize}; int bufcount; @@ -1416,7 +1546,7 @@ int mds_handle(struct ptlrpc_request *req) if (rc) break; - rc = mds_reint(req, 0, NULL); + rc = mds_reint(req, MDS_REQ_REC_OFF, NULL); fail = OBD_FAIL_MDS_REINT_NET_REP; break; } @@ -1424,25 +1554,25 @@ int mds_handle(struct ptlrpc_request *req) case MDS_CLOSE: DEBUG_REQ(D_INODE, req, "close"); OBD_FAIL_RETURN(OBD_FAIL_MDS_CLOSE_NET, 0); - rc = mds_close(req); + rc = mds_close(req, MDS_REQ_REC_OFF); break; case MDS_DONE_WRITING: DEBUG_REQ(D_INODE, req, "done_writing"); OBD_FAIL_RETURN(OBD_FAIL_MDS_DONE_WRITING_NET, 0); - rc = mds_done_writing(req); + rc = mds_done_writing(req, MDS_REQ_REC_OFF); break; case MDS_PIN: DEBUG_REQ(D_INODE, req, "pin"); OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0); - rc = mds_pin(req); + rc = mds_pin(req, MDS_REQ_REC_OFF); break; case MDS_SYNC: DEBUG_REQ(D_INODE, req, "sync"); OBD_FAIL_RETURN(OBD_FAIL_MDS_SYNC_NET, 0); - rc = mds_sync(req); + rc = mds_sync(req, MDS_REQ_REC_OFF); break; case MDS_SET_INFO: @@ -1497,11 +1627,21 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_create(req); break; + case LLOG_ORIGIN_HANDLE_DESTROY: + DEBUG_REQ(D_INODE, req, "llog_init"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); + rc = llog_origin_handle_destroy(req); + break; case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: DEBUG_REQ(D_INODE, req, "llog next block"); OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_next_block(req); break; + case LLOG_ORIGIN_HANDLE_PREV_BLOCK: + DEBUG_REQ(D_INODE, req, "llog prev block"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); + rc = llog_origin_handle_prev_block(req); + break; case LLOG_ORIGIN_HANDLE_READ_HEADER: DEBUG_REQ(D_INODE, req, "llog read header"); OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); @@ -1581,20 +1721,29 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) } static -void fsoptions_to_mds_flags(struct mds_obd *mds, const char *options) +void fsoptions_to_mds_flags(struct mds_obd *mds, char *options) { - const char *p = options; + char *p = options; while (*options) { + int len; + while (*p && *p != ',') p++; - if ((p - options == sizeof("user_xattr") - 1) && - !memcmp(options, "user_xattr", sizeof("user_xattr") - 1)) + len = p - options; + if (len == sizeof("user_xattr") - 1 && + memcmp(options, "user_xattr", len) == 0) { mds->mds_fl_user_xattr = 1; - else if ((p - options == sizeof("acl") - 1) && - !memcmp(options, "acl", sizeof("acl") - 1)) + } else if (len == sizeof("acl") - 1 && + memcmp(options, "acl", len) == 0) { +#ifdef CONFIG_FS_POSIX_ACL mds->mds_fl_acl = 1; +#else + CWARN("ignoring unsupported acl mount option\n"); + memmove(options, p, strlen(p) + 1); +#endif + } options = ++p; } @@ -1610,9 +1759,11 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; - char *options = NULL; struct mds_obd *mds = &obd->u.mds; struct vfsmount *mnt; + struct obd_uuid uuid; + __u8 *uuid_ptr; + char *options, *str, *label; char ns_name[48]; unsigned long page; int rc = 0; @@ -1641,7 +1792,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) /* here we use "iopen_nopriv" hardcoded, because it affects MDS utility * and the rest of options are passed by mount options. Probably this * should be moved to somewhere else like startup scripts or lconf. */ - sprintf(options, "iopen_nopriv"); + strcpy(options, "iopen_nopriv"); if (LUSTRE_CFG_BUFLEN(lcfg, 4) > 0 && lustre_cfg_buf(lcfg, 4)) { sprintf(options + strlen(options), ",%s", @@ -1662,7 +1813,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) CDEBUG(D_SUPER, "%s: mnt = %p\n", lustre_cfg_string(lcfg, 1), mnt); LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))); - + sema_init(&mds->mds_orphan_recovery_sem, 1); sema_init(&mds->mds_epoch_sem, 1); spin_lock_init(&mds->mds_transno_lock); @@ -1710,7 +1861,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) rc = lquota_setup(quota_interface, obd, lcfg); if (rc) GOTO(err_fs, rc); - + mds->mds_group_hash = upcall_cache_init(obd->obd_name); if (IS_ERR(mds->mds_group_hash)) { rc = PTR_ERR(mds->mds_group_hash); @@ -1718,8 +1869,8 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_qctxt, rc); } - /* Wait for mds_postrecov trying to clear orphans until 9439 is fixed */ - obd->obd_async_recov = 0; + /* Don't wait for mds_postrecov trying to clear orphans */ + obd->obd_async_recov = 1; rc = mds_postsetup(obd); if (rc) GOTO(err_qctxt, rc); @@ -1728,25 +1879,34 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) lprocfs_init_vars(mds, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); + uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb); + if (uuid_ptr != NULL) { + class_uuid_unparse(uuid_ptr, &uuid); + str = uuid.uuid; + } else { + str = "no UUID"; + } + + label = fsfilt_label(obd, obd->u.obt.obt_sb); if (obd->obd_recovering) { - LCONSOLE_WARN("MDT %s now serving %s, but will be in recovery " - "until %d %s reconnect, or if no clients " - "reconnect for %d:%.02d; during that time new " + LCONSOLE_WARN("MDT %s now serving %s (%s%s%s), but will be in " + "recovery until %d %s reconnect, or if no clients" + " reconnect for %d:%.02d; during that time new " "clients will not be allowed to connect. " "Recovery progress can be monitored by watching " "/proc/fs/lustre/mds/%s/recovery_status.\n", - obd->obd_name, - lustre_cfg_string(lcfg, 1), + obd->obd_name, lustre_cfg_string(lcfg, 1), + label ?: "", label ? "/" : "", str, obd->obd_recoverable_clients, - (obd->obd_recoverable_clients == 1) + (obd->obd_recoverable_clients == 1) ? "client" : "clients", (int)(OBD_RECOVERY_TIMEOUT) / 60, (int)(OBD_RECOVERY_TIMEOUT) % 60, obd->obd_name); } else { - LCONSOLE_INFO("MDT %s now serving %s with recovery %s.\n", - obd->obd_name, - lustre_cfg_string(lcfg, 1), + LCONSOLE_INFO("MDT %s now serving %s (%s%s%s) with recovery " + "%s\n", obd->obd_name, lustre_cfg_string(lcfg, 1), + label ?: "", label ? "/" : "", str, obd->obd_replayable ? "enabled" : "disabled"); } @@ -1756,7 +1916,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(0); err_qctxt: - lquota_cleanup(quota_interface, obd); + lquota_cleanup(quota_interface, obd); err_fs: /* No extra cleanup needed for llog_init_commit_thread() */ mds_fs_cleanup(obd); @@ -1775,6 +1935,36 @@ err_ops: return rc; } +static int mds_lov_clean(struct obd_device *obd) +{ + struct mds_obd *mds = &obd->u.mds; + struct obd_device *osc = mds->mds_osc_obd; + ENTRY; + + if (mds->mds_profile) { + class_del_profile(mds->mds_profile); + OBD_FREE(mds->mds_profile, strlen(mds->mds_profile) + 1); + mds->mds_profile = NULL; + } + + /* There better be a lov */ + if (!osc) + RETURN(0); + + obd_register_observer(osc, NULL); + + /* Give lov our same shutdown flags */ + osc->obd_force = obd->obd_force; + osc->obd_fail = obd->obd_fail; + + /* Cleanup the lov */ + obd_disconnect(mds->mds_osc_exp); + class_manual_cleanup(osc); + mds->mds_osc_exp = NULL; + + RETURN(0); +} + static int mds_postsetup(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; @@ -1786,6 +1976,11 @@ static int mds_postsetup(struct obd_device *obd) if (rc) RETURN(rc); + rc = llog_setup(obd, LLOG_LOVEA_ORIG_CTXT, obd, 0, NULL, + &llog_lvfs_ops); + if (rc) + RETURN(rc); + if (mds->mds_profile) { struct lvfs_run_ctxt saved; struct lustre_profile *lprof; @@ -1830,27 +2025,33 @@ err_cleanup: mds_lov_clean(obd); err_llog: llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); + llog_cleanup(llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT)); RETURN(rc); } int mds_postrecov(struct obd_device *obd) { - int rc, item = 0; + int rc; ENTRY; - if (obd->obd_fail) + if (obd->obd_fail) RETURN(0); LASSERT(!obd->obd_recovering); LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); + /* set nextid first, so we are sure it happens */ + rc = mds_lov_set_nextid(obd); + if (rc) { + CERROR("%s: mds_lov_set_nextid failed\n", + obd->obd_name); + GOTO(out, rc); + } + /* clean PENDING dir */ rc = mds_cleanup_pending(obd); - if (rc < 0) { + if (rc < 0) GOTO(out, rc); - } else { - item = rc; - } /* Does anyone need this to be synchronous ever? */ mds_lov_start_synchronize(obd, NULL, obd->obd_async_recov); @@ -1859,37 +2060,20 @@ int mds_postrecov(struct obd_device *obd) lquota_recovery(quota_interface, obd); out: - RETURN(rc < 0 ? rc : item); + RETURN(rc); } -int mds_lov_clean(struct obd_device *obd) +/* We need to be able to stop an mds_lov_synchronize */ +static int mds_lov_early_clean(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; struct obd_device *osc = mds->mds_osc_obd; - ENTRY; - - if (mds->mds_profile) { - class_del_profile(mds->mds_profile); - OBD_FREE(mds->mds_profile, strlen(mds->mds_profile) + 1); - mds->mds_profile = NULL; - } - /* There better be a lov */ - if (!osc) - RETURN(0); - - obd_register_observer(osc, NULL); - - /* Give lov our same shutdown flags */ - osc->obd_force = obd->obd_force; - osc->obd_fail = obd->obd_fail; - - /* Cleanup the lov */ - obd_disconnect(mds->mds_osc_exp); - class_manual_cleanup(osc); - mds->mds_osc_exp = NULL; + if (!osc || (!obd->obd_force && !obd->obd_fail)) + return(0); - RETURN(0); + CDEBUG(D_HA, "abort inflight\n"); + return (obd_precleanup(osc, OBD_CLEANUP_EARLY)); } static int mds_precleanup(struct obd_device *obd, int stage) @@ -1898,14 +2082,15 @@ static int mds_precleanup(struct obd_device *obd, int stage) ENTRY; switch (stage) { - case 1: - mds_lov_set_cleanup_flags(obd); + case OBD_CLEANUP_EXPORTS: target_cleanup_recovery(obd); + mds_lov_early_clean(obd); break; - case 2: + case OBD_CLEANUP_SELF_EXP: mds_lov_disconnect(obd); mds_lov_clean(obd); llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); + llog_cleanup(llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT)); rc = obd_llog_finish(obd, 0); } RETURN(rc); @@ -1951,7 +2136,7 @@ static int mds_cleanup(struct obd_device *obd) /* We can only unlock kernel if we are in the context of sys_ioctl, otherwise we never called lock_kernel */ - if (kernel_locked()) { + if (ll_kernel_locked()) { unlock_kernel(); must_relock++; } @@ -1980,7 +2165,7 @@ static int mds_cleanup(struct obd_device *obd) RETURN(0); } -static void fixup_handle_for_resent_req(struct ptlrpc_request *req, +static void fixup_handle_for_resent_req(struct ptlrpc_request *req, int offset, struct ldlm_lock *new_lock, struct ldlm_lock **old_lock, struct lustre_handle *lockh) @@ -1988,7 +2173,7 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, struct obd_export *exp = req->rq_export; struct obd_device *obd = exp->exp_obd; struct ldlm_request *dlmreq = - lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*dlmreq)); + lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*dlmreq)); struct lustre_handle remote_hdl = dlmreq->lock_handle1; struct list_head *iter; @@ -2017,7 +2202,7 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, /* If the xid matches, then we know this is a resent request, * and allow it. (It's probably an OPEN, for which we don't * send a lock */ - if (req->rq_xid == + if (req->rq_xid == le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid)) return; @@ -2057,15 +2242,16 @@ static int mds_intent_policy(struct ldlm_namespace *ns, struct lustre_handle lockh = { 0 }; struct ldlm_lock *new_lock = NULL; int getattr_part = MDS_INODELOCK_UPDATE; - int rc, offset = 2; - int repbufcnt = 3, repsize[4] = {sizeof(struct ldlm_reply), - sizeof(struct mds_body), - mds->mds_max_mdsize}; + int repsize[4] = {sizeof(*rep), + sizeof(struct mds_body), + mds->mds_max_mdsize}; + int repbufcnt = 3, offset = MDS_REQ_INTENT_REC_OFF; + int rc; ENTRY; LASSERT(req != NULL); - if (req->rq_reqmsg->bufcount <= 1) { + if (req->rq_reqmsg->bufcount <= MDS_REQ_INTENT_IT_OFF) { /* No intent was provided */ int size = sizeof(struct ldlm_reply); rc = lustre_pack_reply(req, 1, &size, NULL); @@ -2073,7 +2259,8 @@ static int mds_intent_policy(struct ldlm_namespace *ns, RETURN(0); } - it = lustre_swab_reqbuf(req, 1, sizeof(*it), lustre_swab_ldlm_intent); + it = lustre_swab_reqbuf(req, MDS_REQ_INTENT_IT_OFF, sizeof(*it), + lustre_swab_ldlm_intent); if (it == NULL) { CERROR("Intent missing\n"); RETURN(req->rq_status = -EFAULT); @@ -2083,6 +2270,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && (it->opc & (IT_OPEN | IT_GETATTR | IT_LOOKUP))) + /* we should never allow OBD_CONNECT_ACL if not configured */ repsize[repbufcnt++] = LUSTRE_POSIX_ACL_MAX_SIZE; else if (it->opc & IT_UNLINK) repsize[repbufcnt++] = mds->mds_max_cookiesize; @@ -2099,7 +2287,8 @@ static int mds_intent_policy(struct ldlm_namespace *ns, switch ((long)it->opc) { case IT_OPEN: case IT_CREAT|IT_OPEN: - fixup_handle_for_resent_req(req, lock, NULL, &lockh); + fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF, + lock, NULL, &lockh); /* XXX swab here to assert that an mds_open reint * packet is following */ rep->lock_policy_res2 = mds_reint(req, offset, &lockh); @@ -2110,7 +2299,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, RETURN(ELDLM_LOCK_ABORTED); if (intent_disposition(rep, DISP_LOOKUP_NEG) && !intent_disposition(rep, DISP_OPEN_OPEN)) -#endif +#endif RETURN(ELDLM_LOCK_ABORTED); break; case IT_LOOKUP: @@ -2118,10 +2307,20 @@ static int mds_intent_policy(struct ldlm_namespace *ns, case IT_GETATTR: getattr_part |= MDS_INODELOCK_LOOKUP; case IT_READDIR: - fixup_handle_for_resent_req(req, lock, &new_lock, &lockh); + fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF, + lock, &new_lock, &lockh); + + /* INODEBITS_INTEROP: if this lock was converted from a + * plain lock (client does not support inodebits), then + * child lock must be taken with both lookup and update + * bits set for all operations. + */ + if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS)) + getattr_part = MDS_INODELOCK_LOOKUP | + MDS_INODELOCK_UPDATE; + rep->lock_policy_res2 = mds_getattr_name(offset, req, getattr_part, &lockh); - /* FIXME: LDLM can set req->rq_status. MDS sets policy_res{1,2} with disposition and status. - replay: returns 0 & req->status is old status @@ -2216,8 +2415,8 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_MAXREPSIZE, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, MDS_SERVICE_WATCHDOG_TIMEOUT, - mds_handle, "mds", obd->obd_proc_entry, NULL, - MDT_NUM_THREADS); + mds_handle, LUSTRE_MDS_NAME, + obd->obd_proc_entry, NULL, MDT_NUM_THREADS); if (!mds->mds_service) { CERROR("failed to start service\n"); @@ -2300,7 +2499,7 @@ static int mdt_health_check(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; int rc = 0; - + down(&mds->mds_health_sem); rc |= ptlrpc_service_health_check(mds->mds_readpage_service); rc |= ptlrpc_service_health_check(mds->mds_setattr_service); @@ -2313,11 +2512,10 @@ static int mdt_health_check(struct obd_device *obd) */ if(rc != 0) rc = 1; - + return rc; } - static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, void *data) { @@ -2328,6 +2526,21 @@ static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, return mds_fid2dentry(&obd->u.mds, &fid, NULL); } +static int mds_health_check(struct obd_device *obd) +{ + struct obd_device_target *odt = &obd->u.obt; + struct mds_obd *mds = &obd->u.mds; + int rc = 0; + + if (odt->obt_sb->s_flags & MS_RDONLY) + rc = 1; + + LASSERT(mds->mds_health_check_filp != NULL); + rc |= !!lvfs_check_io_health(obd, mds->mds_health_check_filp); + + return rc; +} + struct lvfs_callback_ops mds_lvfs_ops = { l_fid2dentry: mds_lvfs_fid2dentry, }; @@ -2336,6 +2549,7 @@ struct lvfs_callback_ops mds_lvfs_ops = { static struct obd_ops mds_obd_ops = { .o_owner = THIS_MODULE, .o_connect = mds_connect, + .o_reconnect = mds_reconnect, .o_init_export = mds_init_export, .o_destroy_export = mds_destroy_export, .o_disconnect = mds_disconnect, @@ -2350,6 +2564,7 @@ static struct obd_ops mds_obd_ops = { .o_llog_init = mds_llog_init, .o_llog_finish = mds_llog_finish, .o_notify = mds_notify, + .o_health_check = mds_health_check, }; static struct obd_ops mdt_obd_ops = { @@ -2359,8 +2574,8 @@ static struct obd_ops mdt_obd_ops = { .o_health_check = mdt_health_check, }; -quota_interface_t *quota_interface = NULL; -extern quota_interface_t mds_quota_interface; +quota_interface_t *quota_interface; +quota_interface_t mds_quota_interface; static int __init mds_init(void) { diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 8d0565d..4cf39d6 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -5,8 +5,45 @@ #ifndef _MDS_INTERNAL_H #define _MDS_INTERNAL_H +#include /* XXX */ #include +#define MDT_ROCOMPAT_SUPP (OBD_ROCOMPAT_LOVOBJID) + +#define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT) + +/* Data stored per server at the head of the last_rcvd file. In le32 order. + * Try to keep this the same as fsd_server_data so we might one day merge. */ +struct mds_server_data { + __u8 msd_uuid[40]; /* server UUID */ + __u64 msd_last_transno; /* last completed transaction ID */ + __u64 msd_mount_count; /* MDS incarnation number */ + __u64 msd_mount_count_new; /* future MDS incarnation number */ + __u32 msd_feature_compat; /* compatible feature flags */ + __u32 msd_feature_rocompat;/* read-only compatible feature flags */ + __u32 msd_feature_incompat;/* incompatible feature flags */ + __u32 msd_server_size; /* size of server data area */ + __u32 msd_client_start; /* start of per-client data area */ + __u16 msd_client_size; /* size of per-client data area */ + __u16 msd_subdir_count; /* number of subdirectories for objects */ + __u64 msd_catalog_oid; /* recovery catalog object id */ + __u32 msd_catalog_ogen; /* recovery catalog inode generation */ + __u8 msd_peeruuid[40]; /* UUID of LOV/OSC associated with MDS */ + __u32 msd_ost_index; /* index number of OST in LOV */ + __u32 msd_mds_index; /* index number of MDS in LMV */ + __u8 msd_padding[LR_SERVER_SIZE - 148]; +}; + +/* Data stored per client in the last_rcvd file. In le32 order. */ +struct mds_client_data { + __u8 mcd_uuid[40]; /* client UUID */ + __u64 mcd_last_transno; /* last completed transaction ID */ + __u64 mcd_last_xid; /* xid for the last transaction */ + __u32 mcd_last_result; /* result from last RPC */ + __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ + __u8 mcd_padding[LR_CLIENT_SIZE - 64]; +}; + #define MDS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000) #define MAX_ATIME_DIFF 60 @@ -127,6 +164,23 @@ int mds_osc_setattr_async(struct obd_device *obd, struct inode *inode, struct lov_mds_md *lmm, int lmm_size, struct llog_cookie *logcookies, struct ll_fid *fid); +int mds_get_parents_children_locked(struct obd_device *obd, + struct mds_obd *mds, + struct ll_fid *p1_fid, + struct dentry **de_srcdirp, + struct ll_fid *p2_fid, + struct dentry **de_tgtdirp, + int parent_mode, + const char *old_name, int old_len, + struct dentry **de_oldp, + const char *new_name, int new_len, + struct dentry **de_newp, + struct lustre_handle *dlm_handles, + int child_mode); + +void mds_shrink_reply(struct obd_device *obd, struct ptlrpc_request *req, + struct mds_body *body); +int mds_get_cookie_size(struct obd_device *obd, struct lov_mds_md *lmm); /* mds/mds_lib.c */ int mds_update_unpack(struct ptlrpc_request *, int offset, struct mds_update_record *); @@ -152,7 +206,6 @@ int mds_llog_finish(struct obd_device *obd, int count); /* mds/mds_lov.c */ int mds_lov_connect(struct obd_device *obd, char * lov_name); int mds_lov_disconnect(struct obd_device *obd); -void mds_lov_set_cleanup_flags(struct obd_device *); int mds_lov_write_objids(struct obd_device *obd); void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); @@ -160,23 +213,28 @@ int mds_lov_set_nextid(struct obd_device *obd); int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, int nonblock); int mds_post_mds_lovconf(struct obd_device *obd); -int mds_notify(struct obd_device *obd, struct obd_device *watched, int active); +int mds_notify(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev); int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, struct lov_mds_md *lmm, int lmm_size); void mds_objids_from_lmm(obd_id *ids, struct lov_mds_md *lmm, struct lov_desc *desc); +int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp); /* mds/mds_open.c */ int mds_query_write_access(struct inode *inode); int mds_open(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *); -int mds_pin(struct ptlrpc_request *req); +int mds_pin(struct ptlrpc_request *req, int offset); void mds_mfd_unlink(struct mds_file_data *mfd, int decref); -int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd, +int mds_mfd_close(struct ptlrpc_request *req, int offset, struct obd_device *obd, struct mds_file_data *mfd, int unlink_orphan); -int mds_close(struct ptlrpc_request *req); -int mds_done_writing(struct ptlrpc_request *req); +int mds_close(struct ptlrpc_request *req, int offset); +int mds_done_writing(struct ptlrpc_request *req, int offset); +/*mds/mds_join.c*/ +int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, + struct dentry *dchild, struct lustre_handle *lockh); /* mds/mds_fs.c */ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, @@ -185,11 +243,11 @@ int mds_client_free(struct obd_export *exp); int mds_obd_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); + struct lov_stripe_md *ea, struct obd_trans_info *oti, + struct obd_export *md_exp); /* mds/handler.c */ extern struct lvfs_callback_ops mds_lvfs_ops; -int mds_lov_clean(struct obd_device *obd); extern int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg); int mds_postrecov(struct obd_device *obd); @@ -206,6 +264,7 @@ int mds_pack_acl(struct mds_export_data *med, struct inode *inode, int repoff); /* quota stuff */ +extern quota_interface_t mds_quota_interface; extern quota_interface_t *quota_interface; /* mds/mds_xattr.c */ diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index fdc3189..6432379 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -30,18 +30,18 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" -#include struct mdsea_cb_data { struct llog_handle *mc_llh; diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index b14ad93..a76be7d 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index a39e33a..b392f14 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "mds_internal.h" @@ -106,40 +107,82 @@ int mds_lov_write_objids(struct obd_device *obd) int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid) { + int rc; + struct obdo oa; + struct obd_trans_info oti = {0}; struct lov_stripe_md *empty_ea = NULL; - struct obd_trans_info oti = { 0 }; - struct obdo *oa; + ENTRY; + + LASSERT(mds->mds_lov_objids != NULL); + + /* This create will in fact either create or destroy: If the OST is + * missing objects below this ID, they will be created. If it finds + * objects above this ID, they will be removed. */ + memset(&oa, 0, sizeof(oa)); + oa.o_valid = OBD_MD_FLFLAGS; + oa.o_flags = OBD_FL_DELORPHAN; + if (ost_uuid != NULL) { + memcpy(&oa.o_inline, ost_uuid, sizeof(*ost_uuid)); + oa.o_valid |= OBD_MD_FLINLINE; + } + rc = obd_create(mds->mds_osc_exp, &oa, &empty_ea, &oti); + + RETURN(rc); +} + +/* update the LOV-OSC knowledge of the last used object id's */ +int mds_lov_set_nextid(struct obd_device *obd) +{ + struct mds_obd *mds = &obd->u.mds; int rc; ENTRY; + LASSERT(!obd->obd_recovering); + LASSERT(mds->mds_lov_objids != NULL); - oa = obdo_alloc(); - if (oa == NULL) - RETURN(-ENOMEM); + rc = obd_set_info(mds->mds_osc_exp, strlen("next_id"), "next_id", + mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids); + RETURN(rc); +} - oa->o_valid = OBD_MD_FLFLAGS; - oa->o_flags = OBD_FL_DELORPHAN; +int mds_init_lov_desc(struct obd_device *obd, struct obd_export *osc_exp) +{ + struct mds_obd *mds = &obd->u.mds; + int valsize, rc, tgt_count; + __u32 stripes; + ENTRY; - if (ost_uuid != NULL) { - memcpy(&oa->o_inline, ost_uuid, sizeof(*ost_uuid)); - oa->o_valid |= OBD_MD_FLINLINE; + mds->mds_has_lov_desc = 0; + valsize = sizeof(mds->mds_lov_desc); + rc = obd_get_info(mds->mds_osc_exp, strlen("lovdesc") + 1, + "lovdesc", &valsize, &mds->mds_lov_desc); + if (rc) { + CERROR("can't get lov_desc, rc %d\n", rc); + RETURN(rc); } - oti.oti_objid = mds->mds_lov_objids; - rc = obd_create(mds->mds_osc_exp, oa, &empty_ea, &oti); + mds->mds_has_lov_desc = 1; + tgt_count = mds->mds_lov_desc.ld_tgt_count; + stripes = min(tgt_count, LOV_MAX_STRIPE_COUNT); - obdo_free(oa); - RETURN(rc); + mds->mds_max_mdsize = lov_mds_md_size(stripes); + mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie); + + CDEBUG(D_HA, "updated lov_desc, tgt_count: %d\n", tgt_count); + + CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", + mds->mds_max_mdsize, mds->mds_max_cookiesize); + + RETURN(0); } /* update the LOV-OSC knowledge of the last used object id's */ int mds_lov_connect(struct obd_device *obd, char * lov_name) { - struct obd_connect_data *data = NULL; struct mds_obd *mds = &obd->u.mds; struct lustre_handle conn = {0,}; - int valsize; + struct obd_connect_data *data; int rc, i; ENTRY; @@ -156,15 +199,14 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) RETURN(-ENOTCONN); } - OBD_ALLOC_PTR(data); - if (!data) + OBD_ALLOC(data, sizeof(*data)); + if (data == NULL) RETURN(-ENOMEM); - data->ocd_connect_flags = OBD_CONNECT_CROW; - - rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid, - data); - OBD_FREE_PTR(data); - + data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX; + data->ocd_version = LUSTRE_VERSION_CODE; + /* NB: lov_connect() needs to fill in .ocd_index for each OST */ + rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid, data); + OBD_FREE(data, sizeof(*data)); if (rc) { CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc); mds->mds_osc_obd = ERR_PTR(rc); @@ -179,16 +221,11 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) GOTO(err_discon, rc); } - valsize = sizeof(mds->mds_lov_desc); - rc = obd_get_info(mds->mds_osc_exp, strlen("lovdesc") + 1, "lovdesc", - &valsize, &mds->mds_lov_desc); + /* init lov_desc + easize */ + rc = mds_init_lov_desc(obd, mds->mds_osc_exp); if (rc) GOTO(err_reg, rc); - mds->mds_max_mdsize = lov_mds_md_size(mds->mds_lov_desc.ld_tgt_count); - mds->mds_max_cookiesize = mds->mds_lov_desc.ld_tgt_count* - sizeof(struct llog_cookie); - mds->mds_has_lov_desc = 1; rc = mds_lov_read_objids(obd); if (rc) { CERROR("cannot read %s: rc = %d\n", "lov_objids", rc); @@ -258,34 +295,6 @@ int mds_lov_disconnect(struct obd_device *obd) RETURN(rc); } -/* for consistency, let's make the lov and the lov's - * osc's see the same cleanup flags as our mds */ -void mds_lov_set_cleanup_flags(struct obd_device *obd) -{ - struct mds_obd *mds = &obd->u.mds; - struct lov_obd *lov; - - if (IS_ERR(mds->mds_osc_obd) || (mds->mds_osc_exp == NULL)) - return; - - lov = &mds->mds_osc_obd->u.lov; - mds->mds_osc_obd->obd_force = obd->obd_force; - mds->mds_osc_obd->obd_fail = obd->obd_fail; - if (lov->tgts) { - struct obd_export *osc_exp; - int i; - spin_lock(&lov->lov_lock); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (lov->tgts[i].ltd_exp != NULL) { - osc_exp = lov->tgts[i].ltd_exp; - osc_exp->exp_obd->obd_force = obd->obd_force; - osc_exp->exp_obd->obd_fail = obd->obd_fail; - } - } - spin_unlock(&lov->lov_lock); - } -} - int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -517,7 +526,10 @@ static int __mds_lov_syncronize(void *data) CWARN("MDS %s: %s now active, resetting orphans\n", obd->obd_name, uuid ? (char *)uuid->uuid : "All OSC's"); - + + if (obd->obd_stopping) + GOTO(out, rc = -ENODEV); + rc = mds_lov_clear_orphans(&obd->u.mds, uuid); if (rc != 0) { CERROR("%s: failed at mds_lov_clear_orphans: %d\n", @@ -525,9 +537,10 @@ static int __mds_lov_syncronize(void *data) GOTO(out, rc); } + EXIT; out: - class_export_put(obd->obd_self_export); - RETURN(rc); + class_decref(obd); + return rc; } int mds_lov_synchronize(void *data) @@ -560,9 +573,16 @@ int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, mlsi->mlsi_obd = obd; mlsi->mlsi_uuid = uuid; - - /* We need to lock the mds in place for our new thread context. */ - class_export_get(obd->obd_self_export); + + /* Although class_export_get(obd->obd_self_export) would lock + the MDS in place, since it's only a self-export + it doesn't lock the LOV in place. The LOV can be disconnected + during MDS precleanup, leaving nothing for __mds_lov_syncronize. + Simply taking an export ref on the LOV doesn't help, because it's + still disconnected. Taking an obd reference insures that we don't + disconnect the LOV. This of course means a cleanup won't + finish for as long as the sync is blocking. */ + atomic_inc(&obd->obd_refcount); if (nonblock) { /* Syncronize in the background */ @@ -570,7 +590,7 @@ int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, if (rc < 0) { CERROR("%s: error starting mds_lov_synchronize: %d\n", obd->obd_name, rc); - class_export_put(obd->obd_self_export); + class_decref(obd); } else { CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n", obd->obd_name, rc); @@ -583,16 +603,18 @@ int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, RETURN(rc); } -int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) +int mds_notify(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev) { + struct mds_obd *mds = &obd->u.mds; struct obd_uuid *uuid; int rc = 0; ENTRY; - if (!active) + if (ev != OBD_NOTIFY_ACTIVE) RETURN(0); - if (strcmp(watched->obd_type->typ_name, "osc")) { + if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { CERROR("unexpected notification of %s %s!\n", watched->obd_type->typ_name, watched->obd_name); RETURN(-EINVAL); @@ -600,14 +622,19 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) uuid = &watched->u.cli.cl_import->imp_target_uuid; if (obd->obd_recovering) { + /* in the case OBD is in recovery we do not reinit desc and + * easize, as that will be done in mds_lov_connect() after + * recovery is finished. */ CWARN("MDS %s: in recovery, not resetting orphans on %s\n", obd->obd_name, uuid->uuid); } else { LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); - - rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"), - "mds_conn", 0, uuid); - if (rc != 0) + + /* this may be called also in case of adding new OST, thus, we + * have to update MDS lov_desc and re-init MDS easize. The same + * should be done on clients. */ + rc = mds_init_lov_desc(obd, mds->mds_osc_exp); + if (rc) RETURN(rc); rc = mds_lov_start_synchronize(obd, uuid, 1); @@ -636,12 +663,14 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, int rc, err; ENTRY; - if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC) + if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC || + le32_to_cpu(lmm->lmm_magic == LOV_MAGIC_JOIN)) RETURN(0); CDEBUG(D_INODE, "converting LOV EA on %lu/%u from %#08x to %#08x\n", inode->i_ino, inode->i_generation, le32_to_cpu(lmm->lmm_magic), LOV_MAGIC); + rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, lmm, lmm_size); if (rc < 0) GOTO(conv_end, rc); @@ -657,7 +686,7 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, GOTO(conv_free, rc); } - rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size); + rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, "lov"); err = fsfilt_commit(obd, inode, handle, 0); if (!rc) diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 9e15740..b877e69 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 6cdb442..ff70e59 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -23,7 +23,7 @@ obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o obdclass-all-objs += class_obd.o obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o -obdclass-all-objs += statfs_pack.o obdo.o obd_config.o +obdclass-all-objs += statfs_pack.o obdo.o obd_config.o prng.o obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs) diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 391c339..f144fda 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -41,6 +41,7 @@ #include #ifdef __KERNEL__ #include +#include #endif #include #include "llog_internal.h" @@ -323,9 +324,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) CERROR("Device %d not attached\n", obd->obd_minor); GOTO(out, err = -ENODEV); } - CDEBUG(D_IOCTL, - "disabling committed-transno notifications on %d\n", - obd->obd_minor); + CDEBUG(D_HA, "%s: disabling committed-transno notification\n", + obd->obd_name); obd->obd_no_transno = 1; GOTO(out, err = 0); } @@ -414,6 +414,7 @@ EXPORT_SYMBOL(class_handle_unhash); EXPORT_SYMBOL(class_handle2object); /* config.c */ +EXPORT_SYMBOL(class_decref); EXPORT_SYMBOL(class_get_profile); EXPORT_SYMBOL(class_del_profile); EXPORT_SYMBOL(class_process_config); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 47f4bd8..aeefaf9 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -119,12 +119,12 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, #ifdef LPROCFS type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root, vars, type); -#endif if (IS_ERR(type->typ_procroot)) { rc = PTR_ERR(type->typ_procroot); type->typ_procroot = NULL; GOTO (failed, rc); } +#endif spin_lock(&obd_types_lock); list_add(&type->typ_chain, &obd_types); @@ -1201,8 +1201,8 @@ search_again: list_for_each(p, &obd->obd_exports) { doomed_exp[num_to_evict] = list_entry(p, struct obd_export, exp_obd_chain); - if (strcmp(obd_export_nid2str(doomed_exp[num_to_evict]), nid) - == 0) { + if (strcmp(obd_export_nid2str(doomed_exp[num_to_evict]), + nid) == 0) { class_export_get(doomed_exp[num_to_evict]); if (++num_to_evict == EVICT_BATCH) break; @@ -1212,8 +1212,8 @@ search_again: for (i = 0; i < num_to_evict; i++) { exports_evicted++; - CERROR("evicting NID '%s' (%s) #%d at adminstrative request\n", - nid, doomed_exp[i]->exp_client_uuid.uuid, + CWARN("%s: evict NID '%s' (%s) #%d at adminstrative request\n", + obd->obd_name, nid, doomed_exp[i]->exp_client_uuid.uuid, exports_evicted); class_fail_export(doomed_exp[i]); class_export_put(doomed_exp[i]); @@ -1224,7 +1224,8 @@ search_again: } if (!exports_evicted) - CERROR("can't disconnect NID '%s': no exports found\n", nid); + CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n", + obd->obd_name, nid); return exports_evicted; } EXPORT_SYMBOL(obd_export_evict_by_nid); @@ -1251,10 +1252,11 @@ int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid) spin_unlock(&obd->obd_dev_lock); if (doomed_exp == NULL) { - CERROR("can't disconnect %s: no exports found\n", uuid); + CERROR("%s: can't disconnect %s: no exports found\n", + obd->obd_name, uuid); } else { - CERROR("evicting %s at adminstrative request\n", - doomed_exp->exp_client_uuid.uuid); + CWARN("%s: evicting %s at adminstrative request\n", + obd->obd_name, doomed_exp->exp_client_uuid.uuid); class_fail_export(doomed_exp); class_export_put(doomed_exp); exports_evicted++; diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index a6edbb7..a364bec 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -40,6 +40,7 @@ #endif #include +#include #include #include diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 55039cc..28e47c1 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -40,6 +40,7 @@ #endif #include +#include #include #include diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index f9c1ec0..0cafada 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -29,6 +29,7 @@ #endif #include +#include #include #include #include "llog_internal.h" diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 594a00f..7ea246d 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index c987642..c8c2cf1 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -33,6 +33,7 @@ #endif #include +#include #include #include #include "llog_internal.h" diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index e12003f..303505b 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -32,6 +32,7 @@ #include #endif +#include #include static void print_llogd_body(struct llogd_body *d) diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index 89dac0a..68f0d6a 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -34,8 +34,8 @@ #include #include -#include #include /* for LUSTRE_MDC_NAME */ +#include static int llog_test_rand; static struct obd_uuid uuid = { .uuid = "test_uuid" }; diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c index 1d4fa3a..21d6f50 100644 --- a/lustre/obdclass/lustre_handles.c +++ b/lustre/obdclass/lustre_handles.c @@ -26,7 +26,7 @@ #define DEBUG_SUBSYSTEM S_CLASS #ifndef __KERNEL__ # include -#endif +#endif #include #include @@ -40,6 +40,10 @@ static int handle_count = 0; #define HANDLE_HASH_SIZE (1 << 14) #define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1) +/* + * Generate a unique 64bit cookie (hash) for a handle and insert it into + * global (per-node) hash-table. + */ void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb) { struct list_head *bucket; @@ -49,19 +53,33 @@ void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb) LASSERT(list_empty(&h->h_link)); spin_lock(&handle_lock); + + /* + * This is fast, but simplistic cookie generation algorithm, it will + * need a re-do at some point in the future for security. + */ h->h_cookie = handle_base; handle_base += HANDLE_INCR; - spin_unlock(&handle_lock); - h->h_addref = cb; bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); - CDEBUG(D_INFO, "adding object %p with handle "LPX64" to hash\n", - h, h->h_cookie); - - spin_lock(&handle_lock); list_add(&h->h_link, bucket); handle_count++; + + if (unlikely(handle_base == 0)) { + /* + * Cookie of zero is "dangerous", because in many places it's + * assumed that 0 means "unassigned" handle, not bound to any + * object. + */ + CWARN("The universe has been exhausted: cookie wrap-around.\n"); + handle_base += HANDLE_INCR; + } + spin_unlock(&handle_lock); + + h->h_addref = cb; + CDEBUG(D_INFO, "added object %p with handle "LPX64" to hash\n", + h, h->h_cookie); EXIT; } diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index ca38953..67082af 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -33,6 +33,7 @@ #include #include #endif +#include #include #include #include diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 0dac6d4..a95eb77 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -194,7 +194,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, if (lsm->lsm_stripe_size == 0) lsm->lsm_stripe_size = CFS_PAGE_SIZE; - idx = ll_insecure_random_int(); + idx = ll_rand(); /* setup stripes: indices + default ids if required */ for (i = 0; i < lsm->lsm_stripe_count; i++) { @@ -239,7 +239,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, oa->o_id, on_target ? " (undoing create)" : ""); if (on_target) - obd_destroy(ec->ec_exp, oa, lsm, oti); + obd_destroy(ec->ec_exp, oa, lsm, oti, NULL); rc = -EEXIST; goto failed; @@ -280,11 +280,11 @@ echo_get_object (struct ec_object **ecop, struct obd_device *obd, spin_lock (&ec->ec_lock); eco = echo_find_object_locked (obd, oa->o_id); if (eco != NULL) { - if (eco->eco_deleted) { /* being deleted */ - spin_unlock (&ec->ec_lock); - return (-EAGAIN); /* (see comment in cleanup) */ + if (eco->eco_deleted) { /* being deleted */ + spin_unlock(&ec->ec_lock); /* (see comment in cleanup) */ + return (-EAGAIN); } - + eco->eco_refcount++; spin_unlock (&ec->ec_lock); *ecop = eco; @@ -509,11 +509,11 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, gfp_mask = ((oa->o_id & 2) == 0) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER; LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); + LASSERT(lsm != NULL); + LASSERT(lsm->lsm_object_id == oa->o_id); if (count <= 0 || - (count & (CFS_PAGE_SIZE - 1)) != 0 || - (lsm != NULL && - lsm->lsm_object_id != oa->o_id)) + (count & (CFS_PAGE_SIZE - 1)) != 0) return (-EINVAL); /* XXX think again with misaligned I/O */ @@ -936,9 +936,8 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, rnb[i].len = CFS_PAGE_SIZE; } - /* XXX this can't be the best.. */ - memset(oti, 0, sizeof(*oti)); ioo.ioo_bufcnt = npages; + oti->oti_transno = 0; ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti); if (ret != 0) @@ -986,7 +985,7 @@ int echo_client_brw_ioctl(int rw, struct obd_export *exp, { struct obd_device *obd = class_exp2obd(exp); struct echo_client_obd *ec = &obd->u.echo_client; - struct obd_trans_info dummy_oti; + struct obd_trans_info dummy_oti = { .oti_thread_id = -1 }; struct ec_object *eco; int rc; ENTRY; @@ -995,8 +994,6 @@ int echo_client_brw_ioctl(int rw, struct obd_export *exp, if (rc) RETURN(rc); - memset(&dummy_oti, 0, sizeof(dummy_oti)); - data->ioc_obdo1.o_valid &= ~OBD_MD_FLHANDLE; data->ioc_obdo1.o_valid |= OBD_MD_FLGROUP; data->ioc_obdo1.o_gr = FILTER_GROUP_ECHO; @@ -1223,7 +1220,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, oa->o_gr = FILTER_GROUP_ECHO; oa->o_valid |= OBD_MD_FLGROUP; rc = obd_destroy(ec->ec_exp, oa, eco->eco_lsm, - &dummy_oti); + &dummy_oti, NULL); if (rc == 0) eco->eco_deleted = 1; echo_put_object(eco); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 6c80076..214b1ad 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -52,10 +52,12 @@ #include #include #include +#include #include #include #include #include +#include #include "filter_internal.h" @@ -160,15 +162,14 @@ static int filter_client_add(struct obd_device *obd, struct filter_obd *filter, * there's no need for extra complication here */ if (new_client) { - cl_idx = find_first_zero_bit(bitmap, FILTER_LR_MAX_CLIENTS); + cl_idx = find_first_zero_bit(bitmap, LR_MAX_CLIENTS); repeat: - if (cl_idx >= FILTER_LR_MAX_CLIENTS) { - CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n"); + if (cl_idx >= LR_MAX_CLIENTS) { + CERROR("no client slots - fix LR_MAX_CLIENTS\n"); RETURN(-EOVERFLOW); } if (test_and_set_bit(cl_idx, bitmap)) { - cl_idx = find_next_zero_bit(bitmap, - FILTER_LR_MAX_CLIENTS, + cl_idx = find_next_zero_bit(bitmap, LR_MAX_CLIENTS, cl_idx); goto repeat; } @@ -302,7 +303,7 @@ static int filter_free_server_data(struct filter_obd *filter) { OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd)); filter->fo_fsd = NULL; - OBD_FREE(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS / 8); + OBD_FREE(filter->fo_last_rcvd_slots, LR_MAX_CLIENTS / 8); filter->fo_last_rcvd_slots = NULL; return 0; } @@ -369,17 +370,17 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) int rc; /* ensure padding in the struct is the correct size */ - LASSERT (offsetof(struct filter_server_data, fsd_padding) + - sizeof(fsd->fsd_padding) == FILTER_LR_SERVER_SIZE); - LASSERT (offsetof(struct filter_client_data, fcd_padding) + - sizeof(fcd->fcd_padding) == FILTER_LR_CLIENT_SIZE); + CLASSERT(offsetof(struct filter_server_data, fsd_padding) + + sizeof(fsd->fsd_padding) == LR_SERVER_SIZE); + CLASSERT(offsetof(struct filter_client_data, fcd_padding) + + sizeof(fcd->fcd_padding) == LR_CLIENT_SIZE); OBD_ALLOC(fsd, sizeof(*fsd)); if (!fsd) RETURN(-ENOMEM); filter->fo_fsd = fsd; - OBD_ALLOC(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS / 8); + OBD_ALLOC(filter->fo_last_rcvd_slots, LR_MAX_CLIENTS / 8); if (filter->fo_last_rcvd_slots == NULL) { OBD_FREE(fsd, sizeof(*fsd)); RETURN(-ENOMEM); @@ -391,9 +392,9 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid)); fsd->fsd_last_transno = 0; mount_count = fsd->fsd_mount_count = 0; - fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE); - fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START); - fsd->fsd_client_size = cpu_to_le16(FILTER_LR_CLIENT_SIZE); + fsd->fsd_server_size = cpu_to_le32(LR_SERVER_SIZE); + fsd->fsd_client_start = cpu_to_le32(LR_CLIENT_START); + fsd->fsd_client_size = cpu_to_le16(LR_CLIENT_SIZE); fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT); filter->fo_subdir_count = FILTER_SUBDIR_COUNT; } else { @@ -413,14 +414,14 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) } if (fsd->fsd_feature_incompat & ~cpu_to_le32(FILTER_INCOMPAT_SUPP)) { - CERROR("unsupported feature %x\n", - le32_to_cpu(fsd->fsd_feature_incompat) & + CERROR("%s: unsupported incompat filesystem feature(s) %x\n", + obd->obd_name, le32_to_cpu(fsd->fsd_feature_incompat) & ~FILTER_INCOMPAT_SUPP); GOTO(err_fsd, rc = -EINVAL); } if (fsd->fsd_feature_rocompat & ~cpu_to_le32(FILTER_ROCOMPAT_SUPP)) { - CERROR("read-only feature %x\n", - le32_to_cpu(fsd->fsd_feature_rocompat) & + CERROR("%s: unsupported read-only filesystem feature(s) %x\n", + obd->obd_name, le32_to_cpu(fsd->fsd_feature_rocompat) & ~FILTER_ROCOMPAT_SUPP); /* Do something like remount filesystem read-only */ GOTO(err_fsd, rc = -EINVAL); @@ -529,7 +530,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) obd->obd_recovery_start = CURRENT_SECONDS; /* Only used for lprocfs_status */ obd->obd_recovery_end = obd->obd_recovery_start + - OBD_RECOVERY_TIMEOUT; + OBD_RECOVERY_TIMEOUT / HZ; } out: @@ -558,12 +559,6 @@ static int filter_cleanup_groups(struct obd_device *obd) int i; ENTRY; - if (filter->fo_blacklist != NULL) { - OBD_FREE(filter->fo_blacklist, - FILTER_GROUPS * sizeof(struct filter_ext)); - filter->fo_blacklist = NULL; - } - if (filter->fo_dentry_O_groups != NULL) { for (i = 0; i < FILTER_GROUPS; i++) { dentry = filter->fo_dentry_O_groups[i]; @@ -616,11 +611,6 @@ static int filter_prep_groups(struct obd_device *obd) int i, rc = 0, cleanup_phase = 0; ENTRY; - OBD_ALLOC(filter->fo_blacklist, - FILTER_GROUPS * sizeof(struct filter_ext)); - if (!filter->fo_blacklist) - GOTO(cleanup, rc = -ENOMEM); - O_dentry = simple_mkdir(current->fs->pwd, "O", 0700, 1); CDEBUG(D_INODE, "got/created O: %p\n", O_dentry); if (IS_ERR(O_dentry)) { @@ -664,7 +654,7 @@ static int filter_prep_groups(struct obd_device *obd) GOTO(cleanup_O0, rc); } filter->fo_fsd->fsd_feature_incompat |= - cpu_to_le32(FILTER_INCOMPAT_GROUPS); + cpu_to_le32(OBD_INCOMPAT_GROUPS); rc = filter_update_server_data(obd, filter->fo_rcvd_filp, filter->fo_fsd, 1); GOTO(cleanup_O0, rc); @@ -716,15 +706,7 @@ static int filter_prep_groups(struct obd_device *obd) filter->fo_last_objid_files[i] = filp; if (filp->f_dentry->d_inode->i_size == 0) { - if (i == 0 && filter->fo_fsd->fsd_unused != 0) { - /* OST conversion, remove sometime post 1.0 */ - filter->fo_last_objids[0] = - le64_to_cpu(filter->fo_fsd->fsd_unused); - CWARN("saving old objid "LPU64" to LAST_ID\n", - filter->fo_last_objids[0]); - } else { - filter->fo_last_objids[i] = FILTER_INIT_OBJID; - } + filter->fo_last_objids[i] = FILTER_INIT_OBJID; rc = filter_update_last_objid(obd, i, 1); if (rc) GOTO(cleanup, rc); @@ -791,30 +773,47 @@ static int filter_prep(struct obd_device *obd) LAST_RCVD, rc); GOTO(out, rc); } - + filter->fo_rcvd_filp = file; if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD, file->f_dentry->d_inode->i_mode); GOTO(err_filp, rc = -ENOENT); } - /* steal operations */ - inode = file->f_dentry->d_inode; - filter->fo_fop = file->f_op; - filter->fo_iop = inode->i_op; - filter->fo_aops = inode->i_mapping->a_ops; + inode = file->f_dentry->d_parent->d_inode; + /* We use i_op->unlink directly in filter_vfs_unlink() */ + if (!inode->i_op || !inode->i_op->create || !inode->i_op->unlink) { + CERROR("%s: filesystem does not support create/unlink ops\n", + obd->obd_name); + GOTO(err_filp, rc = -EOPNOTSUPP); + } rc = filter_init_server_data(obd, file); if (rc) { CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_filp, rc); } - filter->fo_rcvd_filp = file; + /* open and create health check io file*/ + file = filp_open(HEALTH_CHECK, O_RDWR | O_CREAT, 0644); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + CERROR("OBD filter: cannot open/create %s rc = %d\n", + HEALTH_CHECK, rc); + GOTO(err_filp, rc); + } + filter->fo_health_check_filp = file; + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + CERROR("%s is not a regular file!: mode = %o\n", HEALTH_CHECK, + file->f_dentry->d_inode->i_mode); + GOTO(err_health_check, rc = -ENOENT); + } + rc = lvfs_check_io_health(obd, file); + if (rc) + GOTO(err_health_check, rc); rc = filter_prep_groups(obd); if (rc) GOTO(err_server_data, rc); - out: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); @@ -823,8 +822,12 @@ static int filter_prep(struct obd_device *obd) err_server_data: //class_disconnect_exports(obd, 0); filter_free_server_data(filter); + err_health_check: + if (filp_close(filter->fo_health_check_filp, 0)) + CERROR("can't close %s after error\n", HEALTH_CHECK); + filter->fo_health_check_filp = NULL; err_filp: - if (filp_close(file, 0)) + if (filp_close(filter->fo_rcvd_filp, 0)) CERROR("can't close %s after error\n", LAST_RCVD); filter->fo_rcvd_filp = NULL; goto out; @@ -859,40 +862,44 @@ static void filter_post(struct obd_device *obd) if (rc) CERROR("error closing %s: rc = %d\n", LAST_RCVD, rc); + rc = filp_close(filter->fo_health_check_filp, 0); + filter->fo_health_check_filp = NULL; + if (rc) + CERROR("error closing %s: rc = %d\n", HEALTH_CHECK, rc); + filter_cleanup_groups(obd); filter_free_server_data(filter); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); } -static void filter_set_last_id(struct filter_obd *filter, - int group, obd_id id) +static void filter_set_last_id(struct filter_obd *filter, struct obdo *oa, + obd_id id) { + obd_gr group = 0; LASSERT(filter->fo_fsd != NULL); - LASSERT(group <= FILTER_GROUPS); - spin_lock(&filter->fo_objidlock); - filter->fo_last_objids[group] = id; - spin_unlock(&filter->fo_objidlock); -} - -static void filter_grow_last_id(struct filter_obd *filter, - int group, obd_id id) -{ - LASSERT(filter->fo_fsd != NULL); - LASSERT(group <= FILTER_GROUPS); + if (oa != NULL) { + LASSERT(oa->o_gr <= FILTER_GROUPS); + group = oa->o_gr; + } spin_lock(&filter->fo_objidlock); - if (id > filter->fo_last_objids[group]) filter->fo_last_objids[group] = id; spin_unlock(&filter->fo_objidlock); } -__u64 filter_last_id(struct filter_obd *filter, int group) +__u64 filter_last_id(struct filter_obd *filter, struct obdo *oa) { obd_id id; + obd_gr group = 0; LASSERT(filter->fo_fsd != NULL); - LASSERT(group < FILTER_GROUPS); + if (oa != NULL) { + LASSERT(oa->o_gr <= FILTER_GROUPS); + group = oa->o_gr; + } + + /* FIXME: object groups */ spin_lock(&filter->fo_objidlock); id = filter->fo_last_objids[group]; spin_unlock(&filter->fo_objidlock); @@ -900,46 +907,12 @@ __u64 filter_last_id(struct filter_obd *filter, int group) return id; } -static void filter_lock_dentry(struct obd_device *obd, - struct dentry *dparent) +static int filter_lock_dentry(struct obd_device *obd, struct dentry *dparent) { down(&dparent->d_inode->i_sem); + return 0; } -static void filter_unlock_dentry(struct obd_device *obd, - struct dentry *dparent) -{ - up(&dparent->d_inode->i_sem); -} - -static void filter_parents_access(struct obd_device *obd, - obd_gr group, int lock) -{ - void (*access_func) (struct obd_device *, struct dentry *); - struct filter_obd *filter = &obd->u.filter; - struct dentry *dparent; - int i = 0; - - access_func = lock ? filter_lock_dentry : - filter_unlock_dentry; - - if (group > 0 || filter->fo_subdir_count == 0) { - dparent = filter->fo_dentry_O_groups[group]; - access_func(obd, dparent); - } else { - for (i = 0; i < filter->fo_subdir_count; i++) { - dparent = filter->fo_dentry_O_sub[i]; - access_func(obd, dparent); - } - } -} - -#define LOCK_PARENTS(obd, group) \ - filter_parents_access(obd, group, 1) - -#define UNLOCK_PARENTS(obd, group) \ - filter_parents_access(obd, group, 0) - /* We never dget the object parent, so DON'T dput it either */ struct dentry *filter_parent(struct obd_device *obd, obd_gr group, obd_id objid) { @@ -956,22 +929,22 @@ struct dentry *filter_parent(struct obd_device *obd, obd_gr group, obd_id objid) struct dentry *filter_parent_lock(struct obd_device *obd, obd_gr group, obd_id objid) { - struct dentry *dparent = filter_parent(obd, group, objid); unsigned long now = jiffies; + struct dentry *dparent = filter_parent(obd, group, objid); + int rc; if (IS_ERR(dparent)) return dparent; - filter_lock_dentry(obd, dparent); + rc = filter_lock_dentry(obd, dparent); fsfilt_check_slow(now, obd_timeout, "parent lock"); - return dparent; + return rc ? ERR_PTR(rc) : dparent; } -/* we never dget the object parent, so DON'T dput it either */ -static void filter_parent_unlock(struct obd_device *obd, - struct dentry *dparent) +/* We never dget the object parent, so DON'T dput it either */ +static void filter_parent_unlock(struct dentry *dparent) { - filter_unlock_dentry(obd, dparent); + up(&dparent->d_inode->i_sem); } /* How to get files, dentries, inodes from object id's. @@ -991,10 +964,8 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, int len; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) { - CERROR("test case OBD_FAIL_OST_ENOENT\n"); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) RETURN(ERR_PTR(-ENOENT)); - } if (id == 0) { CERROR("fatal: invalid object id 0\n"); @@ -1015,7 +986,7 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, dparent->d_name.len, dparent->d_name.name, name); dchild = /*ll_*/lookup_one_len(name, dparent, len); if (dir_dentry == NULL) - filter_parent_unlock(obd, dparent); + filter_parent_unlock(dparent); if (IS_ERR(dchild)) { CERROR("%s: child lookup error %ld\n", obd->obd_name, PTR_ERR(dchild)); @@ -1059,14 +1030,67 @@ static int filter_prepare_destroy(struct obd_device *obd, obd_id objid) RETURN(rc); } +/* This is vfs_unlink() without down(i_sem). If we call regular vfs_unlink() + * we have 2.6 lock ordering issues with filter_commitrw_write() as it takes + * i_sem before starting a handle, while filter_destroy() + vfs_unlink do the + * reverse. Caller must take i_sem before starting the transaction and we + * drop it here before the inode is removed from the dentry. bug 4180/6984 */ +int filter_vfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int rc; + ENTRY; + + /* don't need dir->i_zombie for 2.4, it is for rename/unlink of dir + * itself we already hold dir->i_sem for child create/unlink ops */ + LASSERT(down_trylock(&dir->i_sem) != 0); + LASSERT(down_trylock(&dentry->d_inode->i_sem) != 0); + + /* may_delete() */ + if (!dentry->d_inode || dentry->d_parent->d_inode != dir) + GOTO(out, rc = -ENOENT); + + rc = ll_permission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (rc) + GOTO(out, rc); + + if (IS_APPEND(dir)) + GOTO(out, rc = -EPERM); + + /* check_sticky() */ + if ((dentry->d_inode->i_uid != current->fsuid && !capable(CAP_FOWNER))|| + IS_APPEND(dentry->d_inode) || IS_IMMUTABLE(dentry->d_inode)) + GOTO(out, rc = -EPERM); + + /* NOTE: This might need to go outside i_sem, though it isn't clear if + * that was done because of journal_start (which is already done + * here) or some other ordering issue. */ + DQUOT_INIT(dir); + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + rc = security_inode_unlink(dir, dentry); + if (rc) + GOTO(out, rc); +#endif + + rc = dir->i_op->unlink(dir, dentry); +out: + /* need to drop i_sem before we lose inode reference */ + up(&dentry->d_inode->i_sem); + if (rc == 0) + d_delete(dentry); + + RETURN(rc); +} + /* Caller must hold LCK_PW on parent and push us into kernel context. + * Caller must hold child i_sem, we drop it always. * Caller is also required to ensure that dchild->d_inode exists. */ -static int filter_unlink(struct obd_device *obd, obd_id objid, - struct dentry *dparent, struct dentry *dchild) +static int filter_destroy_internal(struct obd_device *obd, obd_id objid, + struct dentry *dparent, + struct dentry *dchild) { struct inode *inode = dchild->d_inode; int rc; - ENTRY; if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n", @@ -1075,11 +1099,11 @@ static int filter_unlink(struct obd_device *obd, obd_id objid, atomic_read(&inode->i_count)); } - rc = vfs_unlink(dparent->d_inode, dchild); + rc = filter_vfs_unlink(dparent->d_inode, dchild); if (rc) CERROR("error unlinking objid %.*s: rc %d\n", dchild->d_name.len, dchild->d_name.name, rc); - RETURN(rc); + return(rc); } static int filter_intent_policy(struct ldlm_namespace *ns, @@ -1220,6 +1244,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, } RETURN(ELDLM_LOCK_ABORTED); } + /* * This check is for lock taken in filter_prepare_destroy() that does * not have l_glimpse_ast set. So the logic is: if there is a lock @@ -1268,7 +1293,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, * unknown at the time of OST thread creation. * * Instead array of iobuf's is attached to struct filter_obd (->fo_iobuf_pool - * field). This array has size OST_NUM_THREADS, so that each OST thread uses + * field). This array has size OST_MAX_THREADS, so that each OST thread uses * it's very own iobuf. * * Functions below @@ -1288,18 +1313,18 @@ static int filter_intent_policy(struct ldlm_namespace *ns, */ static void filter_iobuf_pool_done(struct filter_obd *filter) { - void **pool; + struct filter_iobuf **pool; int i; ENTRY; pool = filter->fo_iobuf_pool; if (pool != NULL) { - for (i = 0; i < OST_NUM_THREADS; ++ i) { + for (i = 0; i < filter->fo_iobuf_count; ++ i) { if (pool[i] != NULL) filter_free_iobuf(pool[i]); } - OBD_FREE(pool, OST_NUM_THREADS * sizeof pool[0]); + OBD_FREE(pool, filter->fo_iobuf_count * sizeof pool[0]); filter->fo_iobuf_pool = NULL; } EXIT; @@ -1308,48 +1333,45 @@ static void filter_iobuf_pool_done(struct filter_obd *filter) /* * pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write(). */ -static int filter_iobuf_pool_init(struct filter_obd *filter, int count) +static int filter_iobuf_pool_init(struct filter_obd *filter) { void **pool; - int i; - int result = 0; ENTRY; - LASSERT(count <= OST_NUM_THREADS); - - OBD_ALLOC_GFP(pool, OST_NUM_THREADS * sizeof pool[0], CFS_ALLOC_STD); - if (pool == NULL) + OBD_ALLOC_GFP(filter->fo_iobuf_pool, OST_MAX_THREADS * sizeof(*pool), + GFP_KERNEL); + if (filter->fo_iobuf_pool == NULL) RETURN(-ENOMEM); - filter->fo_iobuf_pool = pool; - filter->fo_iobuf_count = count; - for (i = 0; i < count; ++ i) { - /* - * allocate kiobuf to be used by i-th OST thread. - */ - result = filter_alloc_iobuf(filter, OBD_BRW_WRITE, - PTLRPC_MAX_BRW_PAGES, - &pool[i]); - if (result != 0) { - filter_iobuf_pool_done(filter); - break; - } - } - RETURN(result); + filter->fo_iobuf_count = OST_MAX_THREADS; + + RETURN(0); } -/* - * return iobuf preallocated by filter_iobuf_pool_init() for @thread. - */ -void *filter_iobuf_get(struct ptlrpc_thread *thread, struct filter_obd *filter) +/* Return iobuf allocated for @thread_id. We don't know in advance how + * many threads there will be so we allocate a large empty array and only + * fill in those slots that are actually in use. + * If we haven't allocated a pool entry for this thread before, do so now. */ +void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti) { - void *kio; + int thread_id = oti ? oti->oti_thread_id : -1; + struct filter_iobuf *pool = NULL; + struct filter_iobuf **pool_place = NULL; + + if (thread_id >= 0) { + LASSERT(thread_id < filter->fo_iobuf_count); + pool = *(pool_place = &filter->fo_iobuf_pool[thread_id]); + } - LASSERT(thread->t_id < filter->fo_iobuf_count); - kio = filter->fo_iobuf_pool[thread->t_id]; - LASSERT(kio != NULL); - return kio; + if (unlikely(pool == NULL)) { + pool = filter_alloc_iobuf(filter, OBD_BRW_WRITE, + PTLRPC_MAX_BRW_PAGES); + if (pool_place != NULL) + *pool_place = pool; + } + + return pool; } /* mount the file system (secretly). lustre_cfg parameters are: @@ -1364,7 +1386,9 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, struct lustre_cfg* lcfg = buf; struct filter_obd *filter = &obd->u.filter; struct vfsmount *mnt; - char *str; + struct obd_uuid uuid; + __u8 *uuid_ptr; + char *str, *label; char ns_name[48]; int rc; ENTRY; @@ -1378,7 +1402,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, if (IS_ERR(obd->obd_fsops)) RETURN(PTR_ERR(obd->obd_fsops)); - rc = filter_iobuf_pool_init(filter, OST_NUM_THREADS); + rc = filter_iobuf_pool_init(filter); if (rc != 0) GOTO(err_ops, rc); @@ -1421,8 +1445,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, GOTO(err_mntput, rc); filter->fo_destroy_in_progress = 0; - - spin_lock_init(&filter->fo_blacklist_lock); + sema_init(&filter->fo_create_lock, 1); spin_lock_init(&filter->fo_translock); spin_lock_init(&filter->fo_objidlock); spin_lock_init(&filter->fo_stats_lock); @@ -1463,25 +1486,34 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, if (rc) GOTO(err_post, rc); + uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb); + if (uuid_ptr != NULL) { + class_uuid_unparse(uuid_ptr, &uuid); + str = uuid.uuid; + } else { + str = "no UUID"; + } + label = fsfilt_label(obd, obd->u.obt.obt_sb); + if (obd->obd_recovering) { - LCONSOLE_WARN("OST %s now serving %s, but will be in recovery " - "until %d %s reconnect, or if no clients " - "reconnect for %d:%.02d; during that time new " + LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in" + "recovery until %d %s reconnect, or if no clients" + " reconnect for %d:%.02d; during that time new " "clients will not be allowed to connect. " "Recovery progress can be monitored by watching " "/proc/fs/lustre/obdfilter/%s/recovery_status.\n", - obd->obd_name, - lustre_cfg_string(lcfg, 1), + obd->obd_name, lustre_cfg_string(lcfg, 1), + label ?: "", label ? "/" : "", str, obd->obd_recoverable_clients, (obd->obd_recoverable_clients == 1) ? "client" : "clients", - (int)(OBD_RECOVERY_TIMEOUT) / 60, - (int)(OBD_RECOVERY_TIMEOUT) % 60, + (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60, + (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60, obd->obd_name); } else { - LCONSOLE_INFO("OST %s now serving %s with recovery %s.\n", - obd->obd_name, - lustre_cfg_string(lcfg, 1), + LCONSOLE_INFO("OST %s now serving %s (%s%s%s) with recovery " + "%s\n", obd->obd_name, lustre_cfg_string(lcfg, 1), + label ?: "", label ? "/" : "", str, obd->obd_replayable ? "enabled" : "disabled"); } @@ -1600,10 +1632,10 @@ static int filter_precleanup(struct obd_device *obd, int stage) ENTRY; switch(stage) { - case 1: + case OBD_CLEANUP_EXPORTS: target_cleanup_recovery(obd); break; - case 2: + case OBD_CLEANUP_SELF_EXP: rc = filter_llog_finish(obd, 0); } RETURN(rc); @@ -1655,7 +1687,7 @@ static int filter_cleanup(struct obd_device *obd) /* We can only unlock kernel if we are in the context of sys_ioctl, otherwise we never called lock_kernel */ - if (kernel_locked()) { + if (ll_kernel_locked()) { unlock_kernel(); must_relock++; } @@ -1678,9 +1710,85 @@ static int filter_cleanup(struct obd_device *obd) RETURN(0); } +static int filter_connect_internal(struct obd_export *exp, + struct obd_connect_data *data) +{ + if (!data) + RETURN(0); + + CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64 + " ocd_version: %x ocd_grant: %d\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + data->ocd_connect_flags, data->ocd_version, + data->ocd_grant); + + data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; + exp->exp_connect_flags = data->ocd_connect_flags; + data->ocd_version = LUSTRE_VERSION_CODE; + + if (exp->exp_connect_flags & OBD_CONNECT_GRANT) { + obd_size left, want; + + spin_lock(&exp->exp_obd->obd_osfs_lock); + left = filter_grant_space_left(exp); + want = data->ocd_grant; + data->ocd_grant = filter_grant(exp, 0, want, left); + spin_unlock(&exp->exp_obd->obd_osfs_lock); + + CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: " + "%lld left: %lld\n", exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, exp, + data->ocd_grant, want, left); + } + + if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { + struct filter_obd *filter = &exp->exp_obd->u.filter; + struct filter_server_data *fsd = filter->fo_fsd; + int index = le32_to_cpu(fsd->fsd_ost_index); + + if (!(fsd->fsd_feature_compat & + cpu_to_le32(OBD_COMPAT_OST))) { + /* this will only happen on the first connect */ + fsd->fsd_ost_index = le32_to_cpu(data->ocd_index); + fsd->fsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST); + filter_update_server_data(exp->exp_obd, + filter->fo_rcvd_filp, fsd, 1); + } else if (index != data->ocd_index) { + LCONSOLE_ERROR("Connection from %s to index " + "%u doesn't match actual OST " + "index %u, bad configuration?\n", + obd_export_nid2str(exp), index, + data->ocd_index); + RETURN(-EBADF); + } + } + /* FIXME: Do the same with the MDS UUID and fsd_peeruuid. + * FIXME: We don't strictly need the COMPAT flag for that, + * FIXME: as fsd_peeruuid[0] will tell us if that is set. + * FIXME: We needed it for the index, as index 0 is valid. */ + + RETURN(0); +} + +static int filter_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + int rc; + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + rc = filter_connect_internal(exp, data); + + RETURN(rc); +} + /* nearly identical to mds_connect */ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data) + struct obd_uuid *cluuid, + struct obd_connect_data *data) { struct obd_export *exp; struct filter_export_data *fed; @@ -1700,13 +1808,12 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, fed = &exp->exp_filter_data; - if (data != NULL) { - data->ocd_connect_flags &= OST_CONNECT_SUPPORTED; - exp->exp_connect_flags = data->ocd_connect_flags; - } - spin_lock_init(&fed->fed_lock); + rc = filter_connect_internal(exp, data); + if (rc) + GOTO(cleanup, rc); + if (!obd->obd_replayable) GOTO(cleanup, rc = 0); @@ -1720,6 +1827,7 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, fed->fed_fcd = fcd; rc = filter_client_add(obd, filter, fed, -1); + GOTO(cleanup, rc); cleanup: @@ -1831,6 +1939,7 @@ static void filter_grant_discard(struct obd_export *exp) "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n", obd->obd_name, filter->fo_tot_pending, exp->exp_client_uuid.uuid, exp, fed->fed_pending); + /* fo_tot_pending is handled in filter_grant_commit as bulk finishes */ LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty, "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n", obd->obd_name, filter->fo_tot_dirty, @@ -1855,6 +1964,8 @@ static int filter_destroy_export(struct obd_export *exp) if (exp->exp_obd->obd_replayable) filter_client_free(exp); + else + fsfilt_sync(exp->exp_obd, exp->exp_obd->u.obt.obt_sb); filter_grant_discard(exp); @@ -1949,6 +2060,45 @@ static int filter_getattr(struct obd_export *exp, struct obdo *oa, RETURN(rc); } +/* this should be enabled/disabled in condition to enabled/disabled large + * inodes (fast EAs) in backing store FS. */ +int filter_update_fidea(struct obd_export *exp, struct inode *inode, + void *handle, struct obdo *oa) +{ + struct obd_device *obd = exp->exp_obd; + int rc = 0; + ENTRY; + + if (oa->o_valid & OBD_MD_FLFID) { + struct filter_fid ff; + obd_gr group = 0; + + if (oa->o_valid & OBD_MD_FLGROUP) + group = oa->o_gr; + + /* packing fid and converting it to LE for storing into EA. + * Here ->o_stripe_idx should be filled by LOV and rest of + * fields - by client. */ + ff.ff_fid.id = cpu_to_le64(oa->o_fid); + ff.ff_fid.f_type = cpu_to_le32(oa->o_stripe_idx); + ff.ff_fid.generation = cpu_to_le32(oa->o_generation); + ff.ff_objid = cpu_to_le64(oa->o_id); + ff.ff_group = cpu_to_le64(group); + + CDEBUG(D_INODE, "storing filter fid EA ("LPU64"/%u/%u" + LPU64"/"LPU64")\n", oa->o_fid, oa->o_stripe_idx, + oa->o_generation, oa->o_id, group); + + rc = fsfilt_set_md(obd, inode, handle, &ff, sizeof(ff), "fid"); + if (rc) + CERROR("store fid in object failed! rc: %d\n", rc); + } else { + CDEBUG(D_HA, "OSS object without fid info!\n"); + } + + RETURN(rc); +} + /* this is called from filter_truncate() until we have filter_punch() */ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, struct obdo *oa, struct obd_trans_info *oti) @@ -1956,17 +2106,22 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, unsigned int orig_ids[MAXQUOTAS] = {0, 0}; struct llog_cookie *fcc = NULL; struct filter_obd *filter; + int rc, err, locked = 0; + unsigned int ia_valid; + struct inode *inode; struct iattr iattr; void *handle; - int rc, err; ENTRY; LASSERT(dentry != NULL); LASSERT(!IS_ERR(dentry)); - LASSERT(dentry->d_inode != NULL); + + inode = dentry->d_inode; + LASSERT(inode != NULL); filter = &exp->exp_obd->u.filter; iattr_from_obdo(&iattr, oa, oa->o_valid); + ia_valid = iattr.ia_valid; if (oa->o_valid & OBD_MD_FLCOOKIE) { OBD_ALLOC(fcc, sizeof(*fcc)); @@ -1974,16 +2129,48 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc)); } - if (iattr.ia_valid & ATTR_SIZE) - down(&dentry->d_inode->i_sem); + if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) { + down(&inode->i_sem); + locked = 1; + } + + /* If the inode still has SUID+SGID bits set (see filter_precreate()) + * then we will accept the UID+GID sent by the client during write for + * initializing the ownership of this inode. We only allow this to + * happen once so clear these bits in setattr. In 2.6 kernels it is + * possible to get ATTR_UID and ATTR_GID separately, so we only clear + * the flags that are actually being set. */ + if (ia_valid & (ATTR_UID | ATTR_GID)) { + CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n", + (unsigned long)oa->o_uid, (unsigned long)oa->o_gid); + + if ((inode->i_mode & S_ISUID) && (ia_valid & ATTR_UID)) { + if (!(ia_valid & ATTR_MODE)) { + iattr.ia_mode = inode->i_mode; + iattr.ia_valid |= ATTR_MODE; + } + iattr.ia_mode &= ~S_ISUID; + } + if ((inode->i_mode & S_ISGID) && (ia_valid & ATTR_GID)) { + if (!(iattr.ia_valid & ATTR_MODE)) { + iattr.ia_mode = inode->i_mode; + iattr.ia_valid |= ATTR_MODE; + } + iattr.ia_mode &= ~S_ISGID; + } - if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) { - orig_ids[USRQUOTA] = dentry->d_inode->i_uid; - orig_ids[GRPQUOTA] = dentry->d_inode->i_gid; - handle = fsfilt_start_log(exp->exp_obd, dentry->d_inode, + orig_ids[USRQUOTA] = inode->i_uid; + orig_ids[GRPQUOTA] = inode->i_gid; + handle = fsfilt_start_log(exp->exp_obd, inode, FSFILT_OP_SETATTR, oti, 1); + + /* update inode EA only once when inode is suid bit marked. As + * on 2.6.x UID and GID may be set separately, we check here + * only one of them to avoid double setting. */ + if (inode->i_mode & S_ISUID) + filter_update_fidea(exp, inode, handle, oa); } else { - handle = fsfilt_start(exp->exp_obd, dentry->d_inode, + handle = fsfilt_start(exp->exp_obd, inode, FSFILT_OP_SETATTR, oti); } @@ -1991,9 +2178,8 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, GOTO(out_unlock, rc = PTR_ERR(handle)); if (oa->o_valid & OBD_MD_FLFLAGS) { - rc = fsfilt_iocontrol(exp->exp_obd, dentry->d_inode, - NULL, EXT3_IOC_SETFLAGS, - (long)&iattr.ia_attr_flags); + rc = fsfilt_iocontrol(exp->exp_obd, inode, NULL, + EXT3_IOC_SETFLAGS, (long)&oa->o_flags); } else { rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); if (fcc != NULL) @@ -2004,9 +2190,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, fcc); } + if (locked) { + up(&inode->i_sem); + locked = 0; + } + rc = filter_finish_transno(exp, oti, rc); - - err = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0); + + err = fsfilt_commit(exp->exp_obd, inode, handle, 0); if (err) { CERROR("error on commit, err = %d\n", err); if (!rc) @@ -2014,14 +2205,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, } EXIT; out_unlock: - if (iattr.ia_valid & ATTR_SIZE) - up(&dentry->d_inode->i_sem); + if (locked) + up(&inode->i_sem); /* trigger quota release */ - if (iattr.ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { + if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { unsigned int cur_ids[MAXQUOTAS] = {oa->o_uid, oa->o_gid}; - int rc2= lquota_adjust(quota_interface, exp->exp_obd, cur_ids, - orig_ids, rc, FSFILT_OP_SETATTR); + int rc2 = lquota_adjust(quota_interface, exp->exp_obd, cur_ids, + orig_ids, rc, FSFILT_OP_SETATTR); CDEBUG(rc2 ? D_ERROR : D_QUOTA, "filter adjust qunit. (rc:%d)\n", rc2); } @@ -2041,16 +2232,13 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa, int rc; ENTRY; - LASSERT(oti != NULL); + dentry = __filter_oa2dentry(exp->exp_obd, oa, + __FUNCTION__, 1); + if (IS_ERR(dentry)) + RETURN(PTR_ERR(dentry)); filter = &exp->exp_obd->u.filter; push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); - - /* make sure that object is allocated. */ - dentry = filter_crow_object(exp->exp_obd, oa); - if (IS_ERR(dentry)) - GOTO(out_pop, rc = PTR_ERR(dentry)); - lock_kernel(); /* setting objects attributes (including owner/group) */ @@ -2060,7 +2248,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa, res = ldlm_resource_get(exp->exp_obd->obd_namespace, NULL, res_id, LDLM_EXTENT, 0); - + if (res != NULL) { ns_lvbo = res->lr_namespace->ns_lvbo; if (ns_lvbo && ns_lvbo->lvbo_update) @@ -2069,7 +2257,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa, } oa->o_valid = OBD_MD_FLID; - + /* Quota release need uid/gid info */ obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID); @@ -2078,7 +2266,6 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa, out_unlock: unlock_kernel(); f_dput(dentry); -out_pop: pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); return rc; } @@ -2133,6 +2320,96 @@ static int filter_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(lsm_size); } +static void filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, + struct filter_obd *filter) +{ + struct obdo doa; /* XXX obdo on stack */ + __u64 last, id; + ENTRY; + LASSERT(oa); + + memset(&doa, 0, sizeof(doa)); + if (oa->o_valid & OBD_MD_FLGROUP) { + doa.o_valid |= OBD_MD_FLGROUP; + doa.o_gr = oa->o_gr; + } else { + doa.o_gr = 0; + } + doa.o_mode = S_IFREG; + + filter->fo_destroy_in_progress = 1; + down(&filter->fo_create_lock); + if (!filter->fo_destroy_in_progress) { + CERROR("%s: destroy_in_progress already cleared\n", + exp->exp_obd->obd_name); + up(&filter->fo_create_lock); + EXIT; + return; + } + + last = filter_last_id(filter, &doa); + CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n", + exp->exp_obd->obd_name, oa->o_id + 1, last); + for (id = oa->o_id + 1; id <= last; id++) { + doa.o_id = id; + filter_destroy(exp, &doa, NULL, NULL, NULL); + } + + CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n", + exp->exp_obd->obd_name, doa.o_gr, oa->o_id); + + spin_lock(&filter->fo_objidlock); + filter->fo_last_objids[doa.o_gr] = oa->o_id; + spin_unlock(&filter->fo_objidlock); + + filter->fo_destroy_in_progress = 0; + up(&filter->fo_create_lock); + + EXIT; +} + +/* returns a negative error or a nonnegative number of files to create */ +static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, + obd_gr group) +{ + struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + int diff, rc; + ENTRY; + + diff = oa->o_id - filter_last_id(filter, oa); + CDEBUG(D_INFO, "filter_last_id() = "LPU64" -> diff = %d\n", + filter_last_id(filter, oa), diff); + + /* delete orphans request */ + if ((oa->o_valid & OBD_MD_FLFLAGS) && + (oa->o_flags & OBD_FL_DELORPHAN)) { + if (diff >= 0) + RETURN(diff); + if (-diff > OST_MAX_PRECREATE) { + CERROR("%s: ignoring bogus orphan destroy request: " + "obdid "LPU64" last_id "LPU64"\n", obd->obd_name, + oa->o_id, filter_last_id(filter, oa)); + RETURN(-EINVAL); + } + filter_destroy_precreated(exp, oa, filter); + rc = filter_update_last_objid(obd, group, 0); + if (rc) + CERROR("%s: unable to write lastobjid, but orphans" + "were deleted\n", obd->obd_name); + RETURN(0); + } else { + /* only precreate if group == 0 and o_id is specfied */ + if (!(oa->o_valid & OBD_FL_DELORPHAN) && + (group != 0 || oa->o_id == 0)) + RETURN(1); + + LASSERTF(diff >= 0,"%s: "LPU64" - "LPU64" = %d\n",obd->obd_name, + oa->o_id, filter_last_id(filter, oa), diff); + RETURN(diff); + } +} + static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, unsigned long max_age) { @@ -2157,9 +2434,9 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, filter_grant_sanity_check(obd, __FUNCTION__); - osfs->os_bavail -= min(osfs->os_bavail, - (filter->fo_tot_dirty + filter->fo_tot_pending + - osfs->os_bsize - 1) >> blockbits); + osfs->os_bavail -= min(osfs->os_bavail, GRANT_FOR_LLOG(obd) + + ((filter->fo_tot_dirty + filter->fo_tot_pending + + osfs->os_bsize - 1) >> blockbits)); /* set EROFS to state field if FS is mounted as RDONLY. The goal is to * stop creating files on MDS if OST is not good shape to create @@ -2169,187 +2446,243 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, RETURN(rc); } -struct dentry * -filter_create_object(struct obd_device *obd, struct obdo *oa) +/* We rely on the fact that only one thread will be creating files in a given + * group at a time, which is why we don't need an atomic filter_get_new_id. + * Even if we had that atomic function, the following race would exist: + * + * thread 1: gets id x from filter_next_id + * thread 2: gets id (x + 1) from filter_next_id + * thread 2: creates object (x + 1) + * thread 1: tries to create object x, gets -ENOSPC + */ +static int filter_precreate(struct obd_device *obd, struct obdo *oa, + obd_gr group, int *num) { - struct dentry *dparent = NULL; - struct dentry *dchild = NULL; - struct lvfs_ucred uc = {0,}; - struct lvfs_run_ctxt saved; + struct dentry *dchild = NULL, *dparent = NULL; struct filter_obd *filter; - int cleanup_phase = 0; - int err = 0, rc = 0; + struct obd_statfs *osfs; + int err = 0, rc = 0, recreate_obj = 0, i; + unsigned long enough_time = jiffies + (obd_timeout * HZ) / 4; + __u64 next_id; void *handle = NULL; - obd_gr group = 0; ENTRY; filter = &obd->u.filter; - CDEBUG(D_INFO, "create objid "LPU64"\n", oa->o_id); - - if (oa->o_valid & OBD_MD_FLGROUP) - group = oa->o_gr; - - dparent = filter_parent_lock(obd, group, oa->o_id); - if (IS_ERR(dparent)) - GOTO(cleanup, dchild = dparent); - cleanup_phase = 1; - - /* check if object is in blacklist. This should be done under parent - * lock. */ - spin_lock(&filter->fo_blacklist_lock); - if (oa->o_id > filter->fo_blacklist[group].fe_start && - oa->o_id <= filter->fo_blacklist[group].fe_end) { - spin_unlock(&filter->fo_blacklist_lock); - GOTO(cleanup, dchild = ERR_PTR(-ENOENT)); + if ((oa->o_valid & OBD_MD_FLFLAGS) && + (oa->o_flags & OBD_FL_RECREATE_OBJS)) { + recreate_obj = 1; + } else { + OBD_ALLOC(osfs, sizeof(*osfs)); + if (osfs == NULL) + RETURN(-ENOMEM); + rc = filter_statfs(obd, osfs, jiffies - HZ); + if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) { + CDEBUG(D_HA, "OST out of space! avail "LPU64"\n", + osfs->os_bavail << + filter->fo_obt.obt_sb->s_blocksize_bits); + *num = 0; + rc = -ENOSPC; + } + OBD_FREE(osfs, sizeof(*osfs)); + if (rc) { + RETURN(rc); + } } - spin_unlock(&filter->fo_blacklist_lock); - /* check if object is already allocated */ - dchild = filter_fid2dentry(obd, dparent, - group, oa->o_id); - if (IS_ERR(dchild)) - GOTO(cleanup, dchild); + CDEBUG(D_HA, "%s: precreating %d objects in group "LPU64" at "LPU64"\n", + obd->obd_name, *num, group, oa->o_id); - if (dchild->d_inode) - GOTO(cleanup, dchild); + down(&filter->fo_create_lock); - /* create new object */ - handle = fsfilt_start_log(obd, dparent->d_inode, - FSFILT_OP_CREATE, NULL, 1); - if (IS_ERR(handle)) - GOTO(cleanup, dchild = handle); - cleanup_phase = 2; + for (i = 0; i < *num && err == 0; i++) { + int cleanup_phase = 0; - uc.luc_fsuid = oa->o_valid & OBD_MD_FLUID ? - oa->o_uid : 0; - uc.luc_fsgid = oa->o_valid & OBD_MD_FLGID ? - oa->o_gid : 0; - uc.luc_cap = current->cap_effective; - - cap_raise(uc.luc_cap, CAP_SYS_RESOURCE); + if (filter->fo_destroy_in_progress) { + CWARN("%s: precreate aborted by destroy\n", + obd->obd_name); + break; + } - push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); - rc = ll_vfs_create(dparent->d_inode, dchild, S_IFREG, NULL); - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); + if (recreate_obj) { + __u64 last_id; + next_id = oa->o_id; + last_id = filter_last_id(filter, oa); + if (next_id > last_id) { + CERROR("Error: Trying to recreate obj greater" + "than last id "LPD64" > "LPD64"\n", + next_id, last_id); + GOTO(cleanup, rc = -EINVAL); + } + } else + next_id = filter_last_id(filter, oa) + 1; + + CDEBUG(D_INFO, "precreate objid "LPU64"\n", next_id); + + dparent = filter_parent_lock(obd, group, next_id); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); + cleanup_phase = 1; + + dchild = filter_fid2dentry(obd, dparent, group, next_id); + if (IS_ERR(dchild)) + GOTO(cleanup, rc = PTR_ERR(dchild)); + cleanup_phase = 2; + + if (dchild->d_inode != NULL) { + /* This would only happen if lastobjid was bad on disk*/ + /* Could also happen if recreating missing obj but + * already exists + */ + if (recreate_obj) { + CERROR("%s: recreating existing object %.*s?\n", + obd->obd_name, dchild->d_name.len, + dchild->d_name.name); + } else { + CERROR("%s: Serious error: objid %.*s already " + "exists; is this filesystem corrupt?\n", + obd->obd_name, dchild->d_name.len, + dchild->d_name.name); + LBUG(); + } + GOTO(cleanup, rc = -EEXIST); + } - if (rc) { - CERROR("create failed rc = %d\n", rc); - f_dput(dchild); - GOTO(cleanup, dchild = ERR_PTR(rc)); - } + handle = fsfilt_start_log(obd, dparent->d_inode, + FSFILT_OP_CREATE, NULL, 1); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + cleanup_phase = 3; + + /* We mark object SUID+SGID to flag it for accepting UID+GID + * from client on first write. Currently the permission bits + * on the OST are never used, so this is OK. */ + rc = ll_vfs_create(dparent->d_inode, dchild, + S_IFREG | S_ISUID | S_ISGID | 0666, NULL); + if (rc) { + CERROR("create failed rc = %d\n", rc); + GOTO(cleanup, rc); + } - /* grow last created object id. */ - filter_grow_last_id(filter, group, oa->o_id); - rc = filter_update_last_objid(obd, group, 0); - if (rc) { - CERROR("unable to write lastobjid, but " - "object is created, err = %d\n", - rc); - rc = 0; - } + if (!recreate_obj) { + filter_set_last_id(filter, oa, next_id); + err = filter_update_last_objid(obd, group, 0); + if (err) + CERROR("unable to write lastobjid " + "but file created\n"); + } - /* nobody else is touching this newly created object */ - LASSERT(dchild->d_inode); - - if (oa->o_valid & OBD_MD_FLFID) { - struct ll_fid fid; - - /* packing fid and converting it to LE for storing into EA. Here - * oa->o_stripe_idx should be filled by LOV and rest of fields - - * by client. */ - fid.id = cpu_to_le64(oa->o_fid); - fid.f_type = cpu_to_le32(oa->o_stripe_idx); - fid.generation = cpu_to_le32(oa->o_generation); - - down(&dchild->d_inode->i_sem); - rc = fsfilt_set_md(obd, dchild->d_inode, handle, - &fid, sizeof(struct ll_fid)); - up(&dchild->d_inode->i_sem); - if (rc) { - CERROR("store fid in object failed! rc:%d\n", rc); + cleanup: + switch(cleanup_phase) { + case 3: + err = fsfilt_commit(obd, dparent->d_inode, handle, 0); + if (err) { + CERROR("error on commit, err = %d\n", err); + if (!rc) + rc = err; + } + case 2: f_dput(dchild); - GOTO(cleanup, dchild = ERR_PTR(rc)); + case 1: + filter_parent_unlock(dparent); + case 0: + break; } - } else { - CDEBUG(D_HA, "create OSS object without fid!\n"); - } -cleanup: - switch(cleanup_phase) { - case 2: - err = fsfilt_commit(obd, dparent->d_inode, handle, 0); - if (err) { - CERROR("error on commit, err = %d\n", err); - if (!rc) { - rc = err; - f_dput(dchild); - dchild = ERR_PTR(rc); - } + if (rc) + break; + if (time_after(jiffies, enough_time)) { + CDEBUG(D_HA, "%s: precreate slow - want %d got %d \n", + obd->obd_name, *num, i); + break; } - case 1: - filter_parent_unlock(obd, dparent); - case 0: - break; } + *num = i; - RETURN(dchild); + up(&filter->fo_create_lock); + + CDEBUG(D_HA, "%s: created %d objects for group "LPU64": "LPU64"\n", + obd->obd_name, i, group, filter->fo_last_objids[group]); + + RETURN(rc); } -struct dentry * -filter_crow_object(struct obd_device *obd, struct obdo *oa) +static int filter_create(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct filter_obd *filter; - struct dentry *dentry; + struct obd_device *obd = NULL; + struct lvfs_run_ctxt saved; + struct lov_stripe_md *lsm = NULL; obd_gr group = 0; + int rc = 0, diff; ENTRY; - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_CROW_EIO)) - RETURN(ERR_PTR(-EIO)); - - filter = &obd->u.filter; - if (oa->o_valid & OBD_MD_FLGROUP) group = oa->o_gr; - /* try to create new object (if it is not yet) */ - dentry = filter_create_object(obd, oa); - if (IS_ERR(dentry)) { - CERROR("cannot create OSS object "LPU64"/"LPU64 - ", err = %d\n", oa->o_id, group, - (int)PTR_ERR(dentry)); - RETURN(dentry); + CDEBUG(D_INFO, "filter_create(od->o_gr="LPU64",od->o_id="LPU64")\n", + group, oa->o_id); + if (ea != NULL) { + lsm = *ea; + if (lsm == NULL) { + rc = obd_alloc_memmd(exp, &lsm); + if (rc < 0) + RETURN(rc); + } + } + + obd = exp->exp_obd; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + if ((oa->o_valid & OBD_MD_FLFLAGS) && + (oa->o_flags & OBD_FL_RECREATE_OBJS)) { + if (oa->o_id > filter_last_id(&obd->u.filter, oa)) { + CERROR("recreate objid "LPU64" > last id "LPU64"\n", + oa->o_id, filter_last_id(&obd->u.filter, oa)); + rc = -EINVAL; + } else { + diff = 1; + rc = filter_precreate(obd, oa, group, &diff); + } + } else { + diff = filter_should_precreate(exp, oa, group); + if (diff > 0) { + oa->o_id = filter_last_id(&obd->u.filter, oa); + rc = filter_precreate(obd, oa, group, &diff); + oa->o_id = filter_last_id(&obd->u.filter, oa); + oa->o_valid = OBD_MD_FLID; + } + } + + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (rc && ea != NULL && *ea != lsm) { + obd_free_memmd(exp, &lsm); + } else if (rc == 0 && ea != NULL) { + /* XXX LOV STACKING: the lsm that is passed to us from + * LOV does not have valid lsm_oinfo data structs, so + * don't go touching that. This needs to be fixed in a + * big way. */ + lsm->lsm_object_id = oa->o_id; + *ea = lsm; } - RETURN(dentry); + RETURN(rc); } -/* destroys object @oa. Takes care of locking if @lock says that parent is not - * yet locked. Also drops parent lock before taking ldlm PW lock to avoid - * deadlocks in lock retraction related paths. - * - * This function does not change locking and does not imply hiden locking - * knowladge. After this fucntion is finished, all parents stay at the same - * locking state. - - * If @lock == 1, this means that parent of @oa is not locked and should be - * locked for destroy operation. However, after operation is finished, parent - * will be unlocked. The same is true about opposite case, when parent is - * already locked and filter_destroy_internal() does not need to lock it. */ -static int -filter_destroy_internal(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti, - int lock) +int filter_destroy(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *md, struct obd_trans_info *oti, + struct obd_export *md_exp) { + unsigned int qcids[MAXQUOTAS] = {0, 0}; struct obd_device *obd; struct filter_obd *filter; - struct dentry *dchild = NULL, *dparent = NULL; + struct dentry *dchild = NULL, *dparent; struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; - int rc, rc2, cleanup_phase = 0, have_prepared = 0; - unsigned int qcids[MAXQUOTAS] = {0, 0}; + int rc, rc2, cleanup_phase = 0; obd_gr group = 0; + struct iattr iattr; ENTRY; if (oa->o_valid & OBD_MD_FLGROUP) @@ -2359,16 +2692,9 @@ filter_destroy_internal(struct obd_export *exp, struct obdo *oa, filter = &obd->u.filter; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - acquire_locks: - dparent = lock ? - filter_parent_lock(obd, group, oa->o_id): - filter_parent(obd, group, oa->o_id); - if (IS_ERR(dparent)) - GOTO(cleanup, rc = PTR_ERR(dparent)); cleanup_phase = 1; - dchild = filter_fid2dentry(obd, dparent, group, oa->o_id); + dchild = filter_fid2dentry(obd, NULL, group, oa->o_id); if (IS_ERR(dchild)) GOTO(cleanup, rc = PTR_ERR(dchild)); cleanup_phase = 2; @@ -2385,37 +2711,7 @@ filter_destroy_internal(struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc = -ENOENT); } - if (!have_prepared) { - /* If we're really going to destroy the object, get ready by - * getting the clients to discard their cached data. - * - * We have to drop the parent lock, because - * filter_prepare_destroy() will acquire a PW on the object, and - * we don't want to deadlock with an incoming write to the - * object, which has the extent PW and then wants to get the - * parent dentry to do the lookup. - * - * We dput the child because it's not worth the extra - * complication of condition the above code to skip it on the - * second time through. */ - f_dput(dchild); - - filter_unlock_dentry(obd, dparent); - filter_prepare_destroy(obd, oa->o_id); - - /* lock parent dentry again, to keep locking state the same as - * before calling this function. */ - if (!lock) - filter_lock_dentry(obd, dparent); - - have_prepared = 1; - goto acquire_locks; - } - - handle = fsfilt_start_log(obd, dparent->d_inode,FSFILT_OP_UNLINK,oti,1); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - cleanup_phase = 3; + filter_prepare_destroy(obd, oa->o_id); /* Our MDC connection is established by the MDS to us */ if (oa->o_valid & OBD_MD_FLCOOKIE) { @@ -2424,13 +2720,58 @@ filter_destroy_internal(struct obd_export *exp, struct obdo *oa, memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc)); } + /* we're gonna truncate it first in order to avoid possible deadlock: + * P1 P2 + * open trasaction open transaction + * down(i_zombie) down(i_zombie) + * restart transaction + * (see BUG 4180) -bzzz + */ + down(&dchild->d_inode->i_sem); + handle = fsfilt_start_log(obd, dchild->d_inode, FSFILT_OP_SETATTR, + NULL, 1); + if (IS_ERR(handle)) { + up(&dchild->d_inode->i_sem); + GOTO(cleanup, rc = PTR_ERR(handle)); + } + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = 0; + rc = fsfilt_setattr(obd, dchild, handle, &iattr, 1); + rc2 = fsfilt_commit(obd, dchild->d_inode, handle, 0); + up(&dchild->d_inode->i_sem); + if (rc) + GOTO(cleanup, rc); + if (rc2) + GOTO(cleanup, rc = rc2); + + /* We don't actually need to lock the parent until we are unlinking + * here, and not while truncating above. That avoids holding the + * parent lock for a long time during truncate, which can block other + * threads from doing anything to objects in that directory. bug 7171 */ + dparent = filter_parent_lock(obd, group, oa->o_id); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); + cleanup_phase = 3; /* filter_parent_unlock */ + + down(&dchild->d_inode->i_sem); + handle = fsfilt_start_log(obd, dparent->d_inode,FSFILT_OP_UNLINK,oti,1); + if (IS_ERR(handle)) { + up(&dchild->d_inode->i_sem); + GOTO(cleanup, rc = PTR_ERR(handle)); + } + cleanup_phase = 4; /* fsfilt_commit */ + /* Quota release need uid/gid of inode */ obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID|OBD_MD_FLGID); - rc = filter_unlink(obd, oa->o_id, dparent, dchild); + /* this drops dchild->d_inode->i_sem unconditionally */ + rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild); + + EXIT; cleanup: switch(cleanup_phase) { - case 3: + case 4: if (fcc != NULL) { fsfilt_add_journal_cb(obd, 0, oti ? oti->oti_handle : handle, @@ -2443,12 +2784,11 @@ cleanup: if (!rc) rc = rc2; } + case 3: + filter_parent_unlock(dparent); case 2: f_dput(dchild); case 1: - if (lock) - filter_parent_unlock(obd, dparent); - case 0: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); break; default: @@ -2463,206 +2803,7 @@ cleanup: FSFILT_OP_UNLINK); CDEBUG(rc2 ? D_ERROR : D_QUOTA, "filter adjust qunit! (rc:%d)\n", rc2); - - RETURN(rc); -} - -/* destroy oject with taking lock on parent first. */ -int filter_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - int rc; - - ENTRY; - rc = filter_destroy_internal(exp, oa, md, oti, 1); - RETURN(rc); -} - -static int -filter_clear_orphans(struct obd_export *exp, struct obdo *oa) -{ - struct filter_obd *filter; - struct obd_device *obd; - struct obdo *doa; - obd_gr group = 0; - int rc, orphans; - __u64 last, id; - ENTRY; - - LASSERT(oa); - - OBD_RACE(OBD_FAIL_OST_CLEAR_ORPHANS_RACE); - - obd = exp->exp_obd; - filter = &obd->u.filter; - - if (oa->o_valid & OBD_MD_FLGROUP) - group = oa->o_gr; - - filter->fo_destroy_in_progress = 1; - - LOCK_PARENTS(obd, group); - if (!filter->fo_destroy_in_progress) { - UNLOCK_PARENTS(obd, group); - CDEBUG(D_HA, "cleanup orphans is already canceled\n"); - RETURN(0); - } - - last = filter_last_id(filter, group); - orphans = last - oa->o_id; - - if (orphans <= 0) { - filter->fo_destroy_in_progress = 0; - UNLOCK_PARENTS(obd, group); - CDEBUG(D_HA, "nothing to cleanup, MDS objid "LPU64 - " is not bigger than OST one "LPU64"\n", - oa->o_id, last); - RETURN(0); - } - - CDEBUG(D_HA, "adding orphans extent "LPU64":"LPU64"-"LPU64 - " to blacklist\n", group, oa->o_id, last); - - /* making all orphans entries in blacklist, that will deny to re-create - * them by CROW in filter_create_object(). This is done for case when - * orphans already exist on client and will be tried to write something - * and we want to stop them. - * - * In fact the issue is even worse, as we want to put in blacklist not - * only the objects which we just destroed, but also those which not yet - * created on OST (and OST has no idea about) but possibly existing on - * clients. */ - spin_lock(&filter->fo_blacklist_lock); - filter->fo_blacklist[group].fe_start = oa->o_id; - filter->fo_blacklist[group].fe_end = last; - spin_unlock(&filter->fo_blacklist_lock); - - doa = obdo_alloc(); - if (doa == NULL) { - filter->fo_destroy_in_progress = 0; - UNLOCK_PARENTS(obd, group); - RETURN(-ENOMEM); - } - - doa->o_gr = group; - doa->o_mode = S_IFREG; - doa->o_valid = oa->o_valid & (OBD_MD_FLGROUP | OBD_MD_FLID); - - CDEBUG(D_ERROR, "%s:["LPU64"] deleting orphan objects from "LPU64" to " - LPU64"\n", exp->exp_obd->obd_name, doa->o_gr, oa->o_id, last); - - for (id = last; id > oa->o_id; id--) { - doa->o_id = id; - - /* remove object @doa. It will not lock parent as parents - * already locked. */ - filter_destroy_internal(exp, doa, NULL, NULL, 0); - - /* update last id just for case when OST will down in cleanup - * orphans time. */ - filter_set_last_id(filter, group, id); - - /* update last_id on disk periodicaly */ - if ((id & 1023) == 0) - filter_update_last_objid(obd, group, 0); - } - - UNLOCK_PARENTS(obd, group); - - /* return next free id to be used as a new start of sequence. As we - * return last id from OST, this will make sure that MDS will start new - * sequence from object id which is far from existing and there will not - * be object id sharing. */ - oa->o_id = last + 1; - filter_set_last_id(filter, group, oa->o_id); - - CDEBUG(D_ERROR, "%s:["LPU64"] after destroy: set last_objids = " - LPU64"\n", exp->exp_obd->obd_name, doa->o_gr, oa->o_id); - - rc = filter_update_last_objid(obd, group, 1); - filter->fo_destroy_in_progress = 0; - - obdo_free(doa); - RETURN(rc); -} - -static int filter_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) -{ - struct filter_export_data *fed; - struct lvfs_run_ctxt saved; - struct filter_obd *filter; - obd_gr group = oa->o_gr; - struct obd_device *obd; - int rc = 0; - ENTRY; - - obd = exp->exp_obd; - fed = &exp->exp_filter_data; - filter = &obd->u.filter; - - CDEBUG(D_INFO, "filter_create(od->o_gr="LPU64",od->o_id="LPU64")\n", - group, oa->o_id); - - if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags == OBD_FL_DELORPHAN) { - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - rc = filter_clear_orphans(exp, oa); - if (rc) { - CERROR("cannot clear orphans starting from " - LPU64", err = %d\n", oa->o_id, rc); - } - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - RETURN(rc); - } - - LASSERT(ergo(oa->o_valid & OBD_MD_FLFLAGS, - !!(oa->o_flags & OBD_FL_CREATE_CROW) != - !!(oa->o_flags & OBD_FL_RECREATE_OBJS))); - - /* echo, llog and other "create asap" cases. */ - if (OBDO_URGENT_CREATE(oa)) { - struct obd_statfs *osfs; - struct dentry *dentry; - - /* check space first. As this is real create and client does not - * have yet file created, this is good place to check space. */ - OBD_ALLOC_PTR(osfs); - if (!osfs) - RETURN(-ENOMEM); - - rc = filter_statfs(obd, osfs, jiffies - HZ); - if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) { - CDEBUG(D_HA, "OST out of space! avail "LPU64"\n", - osfs->os_bavail << filter->fo_obt.obt_sb->s_blocksize_bits); - rc = -ENOSPC; - } - - OBD_FREE_PTR(osfs); - if (rc) - RETURN(rc); - - dentry = filter_create_object(obd, oa); - if (!IS_ERR(dentry)) { - f_dput(dentry); - if (ea != NULL) { - struct lov_stripe_md *lsm = *ea; - if (lsm == NULL) { - rc = obd_alloc_memmd(exp, &lsm); - if (rc) - RETURN(rc); - } - lsm->lsm_object_id = oa->o_id; - *ea = lsm; - rc = 0; - } - } - } else { - CERROR("wrong @oa flags detected 0x%lx. Not an urgent " - "create and not recovery.\n", (unsigned long)oa->o_flags); - LBUG(); - } - RETURN(rc); + return rc; } /* NB start and end are used for punch, but not truncate */ @@ -2673,9 +2814,11 @@ static int filter_truncate(struct obd_export *exp, struct obdo *oa, int rc; ENTRY; - if (end != OBD_OBJECT_EOF) + if (end != OBD_OBJECT_EOF) { CERROR("PUNCH not supported, only truncate: end = "LPX64"\n", end); + RETURN(-EFAULT); + } CDEBUG(D_INODE, "calling truncate for object "LPU64", valid = "LPX64 ", o_size = "LPD64"\n", oa->o_id, oa->o_valid, start); @@ -2875,15 +3018,19 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, static int filter_health_check(struct obd_device *obd) { + struct filter_obd *filter = &obd->u.filter; int rc = 0; /* * health_check to return 0 on healthy * and 1 on unhealthy. */ - if(obd->u.obt.obt_sb->s_flags & MS_RDONLY) + if (obd->u.obt.obt_sb->s_flags & MS_RDONLY) rc = 1; + LASSERT(filter->fo_health_check_filp != NULL); + rc |= !!lvfs_check_io_health(obd, filter->fo_health_check_filp); + return rc; } @@ -2905,6 +3052,7 @@ static struct obd_ops filter_obd_ops = { .o_precleanup = filter_precleanup, .o_cleanup = filter_cleanup, .o_connect = filter_connect, + .o_reconnect = filter_reconnect, .o_disconnect = filter_disconnect, .o_statfs = filter_statfs, .o_getattr = filter_getattr, @@ -2932,6 +3080,7 @@ static struct obd_ops filter_sanobd_ops = { .o_precleanup = filter_precleanup, .o_cleanup = filter_cleanup, .o_connect = filter_connect, + .o_reconnect = filter_reconnect, .o_disconnect = filter_disconnect, .o_statfs = filter_statfs, .o_getattr = filter_getattr, @@ -2951,7 +3100,7 @@ static struct obd_ops filter_sanobd_ops = { .o_iocontrol = filter_iocontrol, }; -quota_interface_t *quota_interface = NULL; +quota_interface_t *quota_interface; extern quota_interface_t filter_quota_interface; static int __init obdfilter_init(void) diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index b562df5..8de82d1 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -10,6 +10,7 @@ #endif #include #include +#include #include #define FILTER_LAYOUT_VERSION "2" @@ -22,34 +23,26 @@ # define OBD_FILTER_SAN_DEVICENAME "sanobdfilter" #endif -#define LAST_RCVD "last_rcvd" +#define HEALTH_CHECK "health_check" #define FILTER_INIT_OBJID 0 -#define FILTER_LR_SERVER_SIZE 512 - -#define FILTER_LR_CLIENT_START 8192 -#define FILTER_LR_CLIENT_SIZE 128 - -/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ -#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8) - #define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ #define FILTER_GROUPS 3 /* must be at least 3; not dynamic yet */ -#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ +#define FILTER_ROCOMPAT_SUPP (0) -#define FILTER_ROCOMPAT_SUPP (0) +#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ -#define FILTER_INCOMPAT_GROUPS 0x00000001 -#define FILTER_INCOMPAT_SUPP (FILTER_INCOMPAT_GROUPS) +#define FILTER_INCOMPAT_SUPP (OBD_INCOMPAT_GROUPS) #define FILTER_GRANT_CHUNK (2ULL * PTLRPC_MAX_BRW_SIZE) +#define GRANT_FOR_LLOG(obd) 16 /* Data stored per server at the head of the last_rcvd file. In le32 order. * Try to keep this the same as mds_server_data so we might one day merge. */ struct filter_server_data { __u8 fsd_uuid[40]; /* server UUID */ - __u64 fsd_unused; /* was fsd_last_objid - don't use for now */ + __u64 fsd_last_transno_new;/* future last completed transaction ID */ __u64 fsd_last_transno; /* last completed transaction ID */ __u64 fsd_mount_count; /* FILTER incarnation number */ __u32 fsd_feature_compat; /* compatible feature flags */ @@ -62,7 +55,9 @@ struct filter_server_data { __u64 fsd_catalog_oid; /* recovery catalog object id */ __u32 fsd_catalog_ogen; /* recovery catalog inode generation */ __u8 fsd_peeruuid[40]; /* UUID of MDS associated with this OST */ - __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 140]; + __u32 fsd_ost_index; /* index number of OST in LOV */ + __u32 fsd_mds_index; /* index number of MDS in LMV */ + __u8 fsd_padding[LR_SERVER_SIZE - 148]; }; /* Data stored per client in the last_rcvd file. In le32 order. */ @@ -70,17 +65,20 @@ struct filter_client_data { __u8 fcd_uuid[40]; /* client UUID */ __u64 fcd_last_rcvd; /* last completed transaction ID */ __u64 fcd_last_xid; /* client RPC xid for the last transaction */ - __u8 fcd_padding[FILTER_LR_CLIENT_SIZE - 56]; + __u8 fcd_padding[LR_CLIENT_SIZE - 56]; }; -#define FILTER_DENTRY_MAGIC 0x9efba101 -#define FILTER_FLAG_DESTROY 0x0001 /* destroy dentry on last file close */ - /* Limit the returned fields marked valid to those that we actually might set */ #define FILTER_VALID_FLAGS (OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLGENER |\ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ|\ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME) +struct filter_fid { + struct ll_fid ff_fid; + __u64 ff_objid; + __u64 ff_group; +}; + enum { LPROC_FILTER_READ_BYTES = 0, LPROC_FILTER_WRITE_BYTES = 1, @@ -102,19 +100,20 @@ struct dentry *filter_fid2dentry(struct obd_device *, struct dentry *dir, struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa, const char *what, int quiet); #define filter_oa2dentry(obd, oa) __filter_oa2dentry(obd, oa, __FUNCTION__, 0) -#define filter_oa2dentry_quiet(obd, oa) __filter_oa2dentry(obd, oa, __FUNCTION__, 1) int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc); -__u64 filter_last_id(struct filter_obd *, int group); +__u64 filter_next_id(struct filter_obd *, struct obdo *); +__u64 filter_last_id(struct filter_obd *, struct obdo *); +int filter_update_fidea(struct obd_export *exp, struct inode *inode, + void *handle, struct obdo *oa); int filter_update_server_data(struct obd_device *, struct file *, struct filter_server_data *, int force_sync); int filter_update_last_objid(struct obd_device *, obd_gr, int force_sync); int filter_common_setup(struct obd_device *, obd_count len, void *buf, void *option); int filter_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *); -struct dentry *filter_crow_object(struct obd_device *obd, struct obdo *oa); - + struct lov_stripe_md *md, struct obd_trans_info *, + struct obd_export *); int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, struct obdo *oa, struct obd_trans_info *oti); int filter_setattr(struct obd_export *exp, struct obdo *oa, @@ -139,6 +138,7 @@ int filter_brw(int cmd, struct obd_export *, struct obdo *, void flip_into_page_cache(struct inode *inode, struct page *new_page); /* filter_io_*.c */ +struct filter_iobuf; int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti, @@ -148,13 +148,15 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, obd_size want, obd_size fs_space_left); void filter_grant_commit(struct obd_export *exp, int niocount, struct niobuf_local *res); -int filter_alloc_iobuf(struct filter_obd *, int rw, int num_pages, void **ret); -void filter_free_iobuf(void *iobuf); -int filter_iobuf_add_page(struct obd_device *obd, void *iobuf, +struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *, int rw, + int num_pages); +void filter_free_iobuf(struct filter_iobuf *iobuf); +int filter_iobuf_add_page(struct obd_device *obd, struct filter_iobuf *iobuf, struct inode *inode, struct page *page); -void *filter_iobuf_get(struct ptlrpc_thread *thread, struct filter_obd *filter); -void filter_iobuf_put(void *iobuf); -int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, +void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti); +void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf, + struct obd_trans_info *oti); +int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf, struct obd_export *exp, struct iattr *attr, struct obd_trans_info *oti, void **wait_handle); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index 4a797c9..5421993 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "filter_internal.h" diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 557e036..a823d98 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -48,87 +48,224 @@ #include #include "osc_internal.h" -int oscc_recovering(struct osc_creator *oscc) +static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) { - int recov = 0; + struct osc_creator *oscc; + struct ost_body *body = NULL; + ENTRY; + if (req->rq_repmsg) { + body = lustre_swab_repbuf(req, 0, sizeof(*body), + lustre_swab_ost_body); + if (body == NULL && rc == 0) + rc = -EPROTO; + } + + oscc = req->rq_async_args.pointer_arg[0]; + LASSERT(oscc && (oscc->oscc_obd != LP_POISON)); + spin_lock(&oscc->oscc_lock); - recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING; - spin_unlock(&oscc->oscc_lock); + oscc->oscc_flags &= ~OSCC_FLAG_CREATING; + if (rc == -ENOSPC || rc == -EROFS) { + oscc->oscc_flags |= OSCC_FLAG_NOSPC; + if (body && rc == -ENOSPC) { + oscc->oscc_grow_count = OST_MIN_PRECREATE; + oscc->oscc_last_id = body->oa.o_id; + } + spin_unlock(&oscc->oscc_lock); + DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); + } else if (rc != 0 && rc != -EIO) { + oscc->oscc_flags |= OSCC_FLAG_RECOVERING; + oscc->oscc_grow_count = OST_MIN_PRECREATE; + spin_unlock(&oscc->oscc_lock); + DEBUG_REQ(D_ERROR, req, + "unknown rc %d from async create: failing oscc", rc); + ptlrpc_fail_import(req->rq_import, req->rq_import_generation); + } else { + if (rc == 0) { + oscc->oscc_flags &= ~OSCC_FLAG_LOW; + if (body) { + int diff = body->oa.o_id - oscc->oscc_last_id; + if (diff != oscc->oscc_grow_count) + oscc->oscc_grow_count = + max(diff/3, OST_MIN_PRECREATE); + oscc->oscc_last_id = body->oa.o_id; + } + } + spin_unlock(&oscc->oscc_lock); + } - return recov; + CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n", + oscc->oscc_last_id, oscc->oscc_next_id); + + wake_up(&oscc->oscc_waitq); + RETURN(rc); } -static int osc_check_state(struct obd_export *exp) +static int oscc_internal_create(struct osc_creator *oscc) { - int rc; + struct ptlrpc_request *request; + struct ost_body *body; + int size = sizeof(*body); ENTRY; - /* ->os_state contains positive error code on remote OST. To convert it - * to usual errno form we have to make an sign inversion. */ - spin_lock(&exp->exp_obd->obd_osfs_lock); - rc = -exp->exp_obd->obd_osfs.os_state; - spin_unlock(&exp->exp_obd->obd_osfs_lock); - - RETURN(rc); + spin_lock(&oscc->oscc_lock); + if (oscc->oscc_grow_count < OST_MAX_PRECREATE && + !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) && + (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <= + (oscc->oscc_grow_count / 4 + 1)) { + oscc->oscc_flags |= OSCC_FLAG_LOW; + oscc->oscc_grow_count *= 2; + } + + if (oscc->oscc_grow_count > OST_MAX_PRECREATE / 2) + oscc->oscc_grow_count = OST_MAX_PRECREATE / 2; + + if (oscc->oscc_flags & OSCC_FLAG_CREATING || + oscc->oscc_flags & OSCC_FLAG_RECOVERING) { + spin_unlock(&oscc->oscc_lock); + RETURN(0); + } + oscc->oscc_flags |= OSCC_FLAG_CREATING; + spin_unlock(&oscc->oscc_lock); + + request = ptlrpc_prep_req(oscc->oscc_obd->u.cli.cl_import, + LUSTRE_OST_VERSION, OST_CREATE, 1, + &size, NULL); + if (request == NULL) { + spin_lock(&oscc->oscc_lock); + oscc->oscc_flags &= ~OSCC_FLAG_CREATING; + spin_unlock(&oscc->oscc_lock); + RETURN(-ENOMEM); + } + + request->rq_request_portal = OST_CREATE_PORTAL; //XXX FIXME bug 249 + body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body)); + + spin_lock(&oscc->oscc_lock); + body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count; + body->oa.o_valid |= OBD_MD_FLID; + spin_unlock(&oscc->oscc_lock); + CDEBUG(D_HA, "preallocating through id "LPU64" (last used "LPU64")\n", + body->oa.o_id, oscc->oscc_next_id); + + request->rq_replen = lustre_msg_size(1, &size); + + request->rq_async_args.pointer_arg[0] = oscc; + request->rq_interpret_reply = osc_interpret_create; + ptlrpcd_add_req(request); + + RETURN(0); +} + +static int oscc_has_objects(struct osc_creator *oscc, int count) +{ + int have_objs; + spin_lock(&oscc->oscc_lock); + have_objs = ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count); + spin_unlock(&oscc->oscc_lock); + + if (!have_objs) + oscc_internal_create(oscc); + + return have_objs; } -static int osc_check_nospc(struct obd_export *exp) +static int oscc_wait_for_objects(struct osc_creator *oscc, int count) { - __u64 blocks, bavail; + int have_objs; + int ost_full; + int osc_invalid; + + have_objs = oscc_has_objects(oscc, count); + + spin_lock(&oscc->oscc_lock); + ost_full = (oscc->oscc_flags & OSCC_FLAG_NOSPC); + spin_unlock(&oscc->oscc_lock); + + osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid; + + return have_objs || ost_full || osc_invalid; +} + +static int oscc_precreate(struct osc_creator *oscc, int wait) +{ + struct l_wait_info lwi = { 0 }; int rc = 0; ENTRY; - spin_lock(&exp->exp_obd->obd_osfs_lock); - blocks = exp->exp_obd->obd_osfs.os_blocks; - bavail = exp->exp_obd->obd_osfs.os_bavail; - spin_unlock(&exp->exp_obd->obd_osfs_lock); - - /* return 1 if available space smaller then (blocks >> 10) of all space - * on OST. The main point of this water mark is to stop create files at - * some point, to let all created and opened files finish possible - * writes. */ - if (blocks > 0 && bavail < (blocks >> 10)) - rc = 1; + if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) + RETURN(0); + + if (!wait) + RETURN(0); + + /* no rc check -- a no-INTR, no-TIMEOUT wait can't fail */ + l_wait_event(oscc->oscc_waitq, oscc_wait_for_objects(oscc, 1), &lwi); + + if (!oscc_has_objects(oscc, 1) && (oscc->oscc_flags & OSCC_FLAG_NOSPC)) + rc = -ENOSPC; + + if (oscc->oscc_obd->u.cli.cl_import->imp_invalid) + rc = -EIO; RETURN(rc); } +int oscc_recovering(struct osc_creator *oscc) +{ + int recov = 0; + + spin_lock(&oscc->oscc_lock); + recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING; + spin_unlock(&oscc->oscc_lock); + + return recov; +} + int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { + struct lov_stripe_md *lsm; struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; int try_again = 1, rc = 0; ENTRY; + LASSERT(oa); + LASSERT(ea); + + if ((oa->o_valid & OBD_MD_FLGROUP) && (oa->o_gr != 0)) + RETURN(osc_real_create(exp, oa, ea, oti)); + + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_RECREATE_OBJS) { + RETURN(osc_real_create(exp, oa, ea, oti)); + } - LASSERT(oa != NULL); - LASSERT(ea != NULL); - /* this is the special case where create removes orphans */ - if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags == OBD_FL_DELORPHAN) { + if ((oa->o_valid & OBD_MD_FLFLAGS) && + oa->o_flags == OBD_FL_DELORPHAN) { spin_lock(&oscc->oscc_lock); if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) { spin_unlock(&oscc->oscc_lock); - return -EBUSY; + RETURN(-EBUSY); } if (!(oscc->oscc_flags & OSCC_FLAG_RECOVERING)) { spin_unlock(&oscc->oscc_lock); - return 0; + RETURN(0); } oscc->oscc_flags |= OSCC_FLAG_SYNC_IN_PROGRESS; spin_unlock(&oscc->oscc_lock); CDEBUG(D_HA, "%s: oscc recovery started\n", oscc->oscc_obd->obd_name); - LASSERT(oscc->oscc_flags & OSCC_FLAG_RECOVERING); + + /* delete from next_id on up */ + oa->o_valid |= OBD_MD_FLID; + oa->o_id = oscc->oscc_next_id - 1; CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", oscc->oscc_obd->obd_name, oa->o_id); rc = osc_real_create(exp, oa, ea, NULL); - if (oscc->oscc_obd == NULL) { - CWARN("the obd for oscc %p has been freed\n", oscc); - RETURN(rc); - } spin_lock(&oscc->oscc_lock); oscc->oscc_flags &= ~OSCC_FLAG_SYNC_IN_PROGRESS; @@ -136,44 +273,31 @@ int osc_create(struct obd_export *exp, struct obdo *oa, if (rc == -ENOSPC) oscc->oscc_flags |= OSCC_FLAG_NOSPC; oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; - CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", - oscc->oscc_obd->obd_name, rc); + oscc->oscc_last_id = oa->o_id; + CDEBUG(D_HA, "%s: oscc recovery finished, last_id: " + LPU64", rc: %d\n", oscc->oscc_obd->obd_name, + oscc->oscc_last_id, rc); cfs_waitq_signal(&oscc->oscc_waitq); } else { CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", oscc->oscc_obd->obd_name, rc); } spin_unlock(&oscc->oscc_lock); - RETURN(rc); - } - LASSERT(ergo(oa->o_valid & OBD_MD_FLFLAGS, - !!(oa->o_flags & OBD_FL_CREATE_CROW) != - !!(oa->o_flags & OBD_FL_RECREATE_OBJS))); - - /* perform urgent create if asked or import is not crow capable or - * ENOSPC case if detected. */ - if (OBDO_URGENT_CREATE(oa) || !IMP_CROW_ABLE(class_exp2cliimp(exp)) || - osc_check_nospc(exp)) { - CDEBUG(D_HA, "perform urgent create\n"); - oa->o_flags &= ~OBD_FL_CREATE_CROW; - if (!oa->o_flags) - oa->o_valid &= ~OBD_MD_FLFLAGS; - rc = osc_real_create(exp, oa, ea, oti); + RETURN(rc); } - /* check OST fs state. */ - rc = osc_check_state(exp); - if (rc) { - CDEBUG(D_HA,"OST is in bad shape to create objects, err %d\n", - rc); - RETURN(rc); + lsm = *ea; + if (lsm == NULL) { + rc = obd_alloc_memmd(exp, &lsm); + if (rc < 0) + RETURN(rc); } - + while (try_again) { - /* if orphans are being recovered, then we must wait until it is - * finished before we can continue with create. */ + /* If orphans are being recovered, then we must wait until + it is finished before we can continue with create. */ if (oscc_recovering(oscc)) { struct l_wait_info lwi; @@ -186,7 +310,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa, !oscc_recovering(oscc), &lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); if (rc == -ETIMEDOUT) { - CDEBUG(D_HA, "%p: timeout waiting on recovery\n", + CDEBUG(D_HA,"%p: timeout waiting on recovery\n", oscc); RETURN(rc); } @@ -200,22 +324,33 @@ int osc_create(struct obd_export *exp, struct obdo *oa, break; } - if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { + if (oscc->oscc_last_id >= oscc->oscc_next_id) { + memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); + oa->o_id = oscc->oscc_next_id; + lsm->lsm_object_id = oscc->oscc_next_id; + *ea = lsm; + oscc->oscc_next_id++; + try_again = 0; + + CDEBUG(D_HA, "%s: set oscc_next_id = "LPU64"\n", + exp->exp_obd->obd_name, oscc->oscc_next_id); + } else if (oscc->oscc_flags & OSCC_FLAG_NOSPC) { rc = -ENOSPC; spin_unlock(&oscc->oscc_lock); break; } - - oscc->oscc_next_id++; - oa->o_id = oscc->oscc_next_id; - try_again = 0; spin_unlock(&oscc->oscc_lock); + rc = oscc_precreate(oscc, try_again); + if (rc) + break; + } + if (rc == 0) CDEBUG(D_HA, "%s: returning objid "LPU64"\n", oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid, - oa->o_id); - } - + lsm->lsm_object_id); + else if (*ea == NULL) + obd_free_memmd(exp, &lsm); RETURN(rc); } @@ -227,10 +362,18 @@ void oscc_init(struct obd_device *obd) return; oscc = &obd->u.cli.cl_oscc; - memset(oscc, 0, sizeof(*oscc)); - oscc->oscc_obd = obd; + memset(oscc, 0, sizeof(*oscc)); + INIT_LIST_HEAD(&oscc->oscc_list); + init_waitqueue_head(&oscc->oscc_waitq); spin_lock_init(&oscc->oscc_lock); + oscc->oscc_obd = obd; + oscc->oscc_grow_count = OST_MIN_PRECREATE; + + oscc->oscc_next_id = 2; + oscc->oscc_last_id = 1; oscc->oscc_flags |= OSCC_FLAG_RECOVERING; cfs_waitq_init(&oscc->oscc_waitq); + /* XXX the export handle should give the oscc the last object */ + /* oed->oed_oscc.oscc_last_id = exph->....; */ } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 0f80243..23f535d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -53,6 +53,7 @@ #include #include +#include #include #include #include "osc_internal.h" @@ -176,8 +177,8 @@ static int osc_getattr_async(struct obd_export *exp, struct obdo *oa, struct osc_getattr_async_args *aa; ENTRY; - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_GETATTR, 1, - &size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_GETATTR, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -203,8 +204,8 @@ static int osc_getattr(struct obd_export *exp, struct obdo *oa, int rc, size = sizeof(*body); ENTRY; - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_GETATTR, 1, - &size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_GETATTR, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -247,8 +248,8 @@ static int osc_setattr(struct obd_export *exp, struct obdo *oa, int rc, size = sizeof(*body); ENTRY; - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SETATTR, 1, &size, - NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_SETATTR, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -285,8 +286,8 @@ static int osc_setattr_async(struct obd_export *exp, struct obdo *oa, LASSERT(oti); - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SETATTR, 1, - &size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_SETATTR, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -307,7 +308,6 @@ static int osc_setattr_async(struct obd_export *exp, struct obdo *oa, int osc_real_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { - struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc; struct ptlrpc_request *request; struct ost_body *body; struct lov_stripe_md *lsm; @@ -324,8 +324,8 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, RETURN(rc); } - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_CREATE, - 1, &size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_CREATE, 1, &size, NULL); if (!request) GOTO(out, rc = -ENOMEM); @@ -353,16 +353,6 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, GOTO (out_req, rc = -EPROTO); } - if ((oa->o_valid & OBD_MD_FLFLAGS) && oa->o_flags == OBD_FL_DELORPHAN) { - struct obd_import *imp = class_exp2cliimp(exp); - /* MDS declares last known object, OSS responses - * with next possible object -bzzz */ - spin_lock(&oscc->oscc_lock); - oscc->oscc_next_id = body->oa.o_id; - spin_unlock(&oscc->oscc_lock); - CDEBUG(D_HA, "%s: set nextid "LPD64" after recovery\n", - imp->imp_target_uuid.uuid, oa->o_id); - } memcpy(oa, &body->oa, sizeof(*oa)); /* This should really be sent by the OST */ @@ -411,8 +401,8 @@ static int osc_punch(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_PUNCH, 1, &size, - NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_PUNCH, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -458,8 +448,8 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SYNC, 1, &size, - NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_SYNC, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -493,7 +483,8 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, } static int osc_destroy(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti) + struct lov_stripe_md *ea, struct obd_trans_info *oti, + struct obd_export *md_export) { struct ptlrpc_request *request; struct ost_body *body; @@ -505,8 +496,8 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_DESTROY, 1, - &size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_DESTROY, 1, &size, NULL); if (!request) RETURN(-ENOMEM); @@ -628,6 +619,17 @@ void osc_wake_cache_waiters(struct client_obd *cli) EXIT; } +static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) +{ + spin_lock(&cli->cl_loi_list_lock); + cli->cl_avail_grant = ocd->ocd_grant; + spin_unlock(&cli->cl_loi_list_lock); + + CDEBUG(D_CACHE, "setting cl_avail_grant: %ld cl_lost_grant: %ld\n", + cli->cl_avail_grant, cli->cl_lost_grant); + LASSERT(cli->cl_avail_grant >= 0); +} + static void osc_update_grant(struct client_obd *cli, struct ost_body *body) { client_obd_list_lock(&cli->cl_loi_list_lock); @@ -783,10 +785,15 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, size[2] = niocount * sizeof(*niobuf); OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM); - req = ptlrpc_prep_req_pool(imp, opc, 3, size, NULL, pool); + req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 3, + size, NULL, pool); if (req == NULL) RETURN (-ENOMEM); + /* FIXME bug 249. Also see bug 7198 */ + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) + req->rq_request_portal = OST_IO_PORTAL; + if (opc == OST_WRITE) desc = ptlrpc_prep_bulk_imp (req, page_count, BULK_GET_SOURCE, OST_BULK_PORTAL); @@ -1141,9 +1148,9 @@ static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages) LASSERT (pages > 0); offset = pg->off & (CFS_PAGE_SIZE - 1); - for (;;) { - pages--; - if (pages == 0) /* that's all */ + for (;;) { + pages--; + if (pages == 0) /* that's all */ return count; if (offset + pg->count < CFS_PAGE_SIZE) @@ -1154,14 +1161,16 @@ static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages) if (offset != 0) /* doesn't start on page boundary */ return count; - count++; - } + count++; + } } static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, obd_count page_count, struct brw_page *pga, struct obd_trans_info *oti) { + struct obdo *saved_oa = NULL; + int rc; ENTRY; if (cmd & OBD_BRW_CHECK) { @@ -1174,9 +1183,10 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, RETURN(0); } + rc = 0; + while (page_count) { obd_count pages_per_brw; - int rc; if (page_count > PTLRPC_MAX_BRW_PAGES) pages_per_brw = PTLRPC_MAX_BRW_PAGES; @@ -1186,15 +1196,32 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, sort_brw_pages(pga, pages_per_brw); pages_per_brw = max_unfragmented_pages(pga, pages_per_brw); + if (saved_oa != NULL) { + /* restore previously saved oa */ + *oa = *saved_oa; + } else if (page_count > pages_per_brw) { + /* save a copy of oa (brw will clobber it) */ + OBD_ALLOC(saved_oa, sizeof(*saved_oa)); + if (saved_oa == NULL) { + CERROR("Can't save oa (ENOMEM)\n"); + RETURN(-ENOMEM); + } + *saved_oa = *oa; + } + rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga); if (rc != 0) - RETURN(rc); + break; page_count -= pages_per_brw; pga += pages_per_brw; } - RETURN(0); + + if (saved_oa != NULL) + OBD_FREE(saved_oa, sizeof(*saved_oa)); + + RETURN(rc); } static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, @@ -1241,6 +1268,9 @@ static void osc_check_rpcs(struct client_obd *cli); static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, int sent); +/* This maintains the lists of pending pages to read/write for a given object + * (lop). This is used by osc_check_rpcs->osc_next_loi() and loi_list_maint() + * to quickly find objects that are ready to send an RPC. */ static int lop_makes_rpc(struct client_obd *cli, struct loi_oap_pages *lop, int cmd) { @@ -1410,9 +1440,13 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, if (rc == 0 && oa != NULL) { if (oa->o_valid & OBD_MD_FLBLOCKS) - oap->oap_loi->loi_blocks = oa->o_blocks; + oap->oap_loi->loi_lvb.lvb_blocks = oa->o_blocks; if (oa->o_valid & OBD_MD_FLMTIME) - oap->oap_loi->loi_mtime = oa->o_mtime; + oap->oap_loi->loi_lvb.lvb_mtime = oa->o_mtime; + if (oa->o_valid & OBD_MD_FLATIME) + oap->oap_loi->loi_lvb.lvb_atime = oa->o_atime; + if (oa->o_valid & OBD_MD_FLCTIME) + oap->oap_loi->loi_lvb.lvb_ctime = oa->o_ctime; } if (oap->oap_oig) { @@ -1771,6 +1805,8 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, !list_empty(&(LOI)->loi_read_lop.lop_urgent), \ args) \ +/* This is called by osc_check_rpcs() to find which objects have pages that + * we could be sending. These lists are maintained by lop_makes_rpc(). */ struct lov_oinfo *osc_next_loi(struct client_obd *cli) { ENTRY; @@ -2042,7 +2078,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, #ifdef HAVE_QUOTA_SUPPORT if ((cmd & OBD_BRW_WRITE) && !(cmd & OBD_BRW_NOQUOTA)){ struct obd_async_page_ops *ops; - struct obdo *oa = NULL; + struct obdo *oa; oa = obdo_alloc(); if (oa == NULL) @@ -2313,6 +2349,7 @@ static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa, struct niobuf_remote *nioptr; struct obd_ioobj *iooptr; int rc, size[3] = {sizeof(*body)}, mapped = 0; + struct obd_import *imp = class_exp2cliimp(exp); int swab; ENTRY; @@ -2321,11 +2358,16 @@ static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa, size[1] = sizeof(struct obd_ioobj); size[2] = page_count * sizeof(*nioptr); - request = ptlrpc_prep_req(class_exp2cliimp(exp), OST_SAN_READ, 3, - size, NULL); + request = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_SAN_READ, 3, size, NULL); if (!request) RETURN(-ENOMEM); + /* FIXME bug 249 */ + /* See bug 7198 */ + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) + request->rq_request_portal = OST_IO_PORTAL; + body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body)); iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr)); nioptr = lustre_msg_buf(request->rq_reqmsg, 2, @@ -2443,6 +2485,7 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa, struct ost_body *body; struct niobuf_remote *nioptr; struct obd_ioobj *iooptr; + struct obd_import *imp = class_exp2cliimp(exp); int rc, size[3] = {sizeof(*body)}, mapped = 0; int swab; ENTRY; @@ -2450,11 +2493,17 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa, size[1] = sizeof(struct obd_ioobj); size[2] = page_count * sizeof(*nioptr); - request = ptlrpc_prep_req_pool(class_exp2cliimp(exp), OST_SAN_WRITE, + request = ptlrpc_prep_req_pool(class_exp2cliimp(exp), + LUSTRE_OST_VERSION, OST_SAN_WRITE, 3, size, NULL, cli->cl_rq_pool); if (!request) RETURN(-ENOMEM); + /* FIXME bug 249 */ + /* See bug 7198 */ + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) + request->rq_request_portal = OST_IO_PORTAL; + body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr)); nioptr = lustre_msg_buf(request->rq_reqmsg, 2, @@ -2687,7 +2736,8 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, if (*flags & LDLM_FL_HAS_INTENT) { int size[2] = {sizeof(struct ldlm_request), sizeof(lvb)}; - req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1, + req = ptlrpc_prep_req(class_exp2cliimp(exp), + LUSTRE_DLM_VERSION, LDLM_ENQUEUE, 1, size, NULL); if (req == NULL) RETURN(-ENOMEM); @@ -2715,9 +2765,7 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, if ((*flags & LDLM_FL_HAS_INTENT && rc == ELDLM_LOCK_ABORTED) || !rc) { CDEBUG(D_INODE,"got kms "LPU64" blocks "LPU64" mtime "LPU64"\n", lvb.lvb_size, lvb.lvb_blocks, lvb.lvb_mtime); - lsm->lsm_oinfo->loi_rss = lvb.lvb_size; - lsm->lsm_oinfo->loi_mtime = lvb.lvb_mtime; - lsm->lsm_oinfo->loi_blocks = lvb.lvb_blocks; + lsm->lsm_oinfo->loi_lvb = lvb; } RETURN(rc); @@ -2811,8 +2859,8 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs, * during mount that would help a bit). Having relative timestamps * is not so great if request processing is slow, while absolute * timestamps are not ideal because they need time synchronization. */ - request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS, 0, - NULL, NULL); + request = ptlrpc_prep_req(obd->u.cli.cl_import, LUSTRE_OST_VERSION, + OST_STATFS,0,NULL,NULL); if (!request) RETURN(-ENOMEM); @@ -2994,8 +3042,8 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen, obd_id *reply; char *bufs[1] = {key}; int rc; - req = ptlrpc_prep_req(class_exp2cliimp(exp), OST_GET_INFO, 1, - &keylen, bufs); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_GET_INFO, 1, &keylen, bufs); if (req == NULL) RETURN(-ENOMEM); @@ -3031,6 +3079,17 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10); + if (KEY_IS("next_id")) { + if (vallen != sizeof(obd_id)) + RETURN(-EINVAL); + obd->u.cli.cl_oscc.oscc_next_id = *((obd_id*)val) + 1; + CDEBUG(D_HA, "%s: set oscc_next_id = "LPU64"\n", + exp->exp_obd->obd_name, + obd->u.cli.cl_oscc.oscc_next_id); + + RETURN(0); + } + if (KEY_IS("unlinked")) { struct osc_creator *oscc = &obd->u.cli.cl_oscc; spin_lock(&oscc->oscc_lock); @@ -3039,7 +3098,6 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(0); } - if (KEY_IS("initial_recov")) { struct obd_import *imp = exp->exp_obd->u.cli.cl_import; if (vallen != sizeof(int)) @@ -3062,7 +3120,8 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(-EINVAL); - req = ptlrpc_prep_req(imp, OST_SET_INFO, 2, size, bufs); + req = ptlrpc_prep_req(imp, LUSTRE_OST_VERSION, OST_SET_INFO, + 2, size, bufs); if (req == NULL) RETURN(-ENOMEM); @@ -3133,6 +3192,33 @@ static int osc_llog_finish(struct obd_device *obd, int count) RETURN(rc); } +static int osc_reconnect(struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *data) +{ + struct client_obd *cli = &obd->u.cli; + + if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { + long lost_grant; + + spin_lock(&cli->cl_loi_list_lock); + data->ocd_grant = cli->cl_avail_grant ?: + 2 * cli->cl_max_pages_per_rpc << PAGE_SHIFT; + lost_grant = cli->cl_lost_grant; + cli->cl_lost_grant = 0; + spin_unlock(&cli->cl_loi_list_lock); + + CDEBUG(D_CACHE, "request ocd_grant: %d cl_avail_grant: %ld " + "cl_lost_grant: %ld\n", data->ocd_grant, + cli->cl_avail_grant, lost_grant); + CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d" + " ocd_grant: %d\n", data->ocd_connect_flags, + data->ocd_version, data->ocd_grant); + } + + RETURN(0); +} + static int osc_disconnect(struct obd_export *exp) { struct obd_device *obd = class_exp2obd(exp); @@ -3171,8 +3257,7 @@ static int osc_import_event(struct obd_device *obd, break; } case IMP_EVENT_INACTIVE: { - if (obd->obd_observer) - rc = obd_notify(obd->obd_observer, obd, 0); + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE); break; } case IMP_EVENT_INVALIDATE: { @@ -3200,8 +3285,20 @@ static int osc_import_event(struct obd_device *obd, oscc->oscc_flags &= ~OSCC_FLAG_NOSPC; spin_unlock(&oscc->oscc_lock); } - if (obd->obd_observer) - rc = obd_notify(obd->obd_observer, obd, 1); + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE); + break; + } + case IMP_EVENT_OCD: { + struct obd_connect_data *ocd = &imp->imp_connect_data; + + if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT) + osc_init_grant(&obd->u.cli, ocd); + + /* See bug 7198 */ + if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL) + imp->imp_client->cli_request_portal =OST_REQUEST_PORTAL; + + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD); break; } default: @@ -3252,13 +3349,20 @@ static int osc_precleanup(struct obd_device *obd, int stage) int rc = 0; ENTRY; - if (stage < 2) - RETURN(0); - - rc = obd_llog_finish(obd, 0); - if (rc != 0) - CERROR("failed to cleanup llogging subsystems\n"); - + switch (stage) { + case OBD_CLEANUP_EARLY: { + struct obd_import *imp; + imp = obd->u.cli.cl_import; + CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name); + /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */ + ptlrpc_deactivate_import(imp); + break; + } + case OBD_CLEANUP_SELF_EXP: + rc = obd_llog_finish(obd, 0); + if (rc != 0) + CERROR("failed to cleanup llogging subsystems\n"); + } RETURN(rc); } @@ -3280,9 +3384,10 @@ int osc_cleanup(struct obd_device *obd) /* free memory of osc quota cache */ lquota_cleanup(quota_interface, obd); + rc = client_obd_cleanup(obd); + ptlrpc_free_rq_pool(cli->cl_rq_pool); - rc = client_obd_cleanup(obd); ptlrpcd_decref(); RETURN(rc); } @@ -3296,6 +3401,7 @@ struct obd_ops osc_obd_ops = { .o_add_conn = client_import_add_conn, .o_del_conn = client_import_del_conn, .o_connect = client_connect_import, + .o_reconnect = osc_reconnect, .o_disconnect = osc_disconnect, .o_statfs = osc_statfs, .o_packmd = osc_packmd, @@ -3337,6 +3443,7 @@ struct obd_ops sanosc_obd_ops = { .o_add_conn = client_import_add_conn, .o_del_conn = client_import_del_conn, .o_connect = client_connect_import, + .o_reconnect = osc_reconnect, .o_disconnect = client_disconnect_export, .o_statfs = osc_statfs, .o_packmd = osc_packmd, @@ -3363,6 +3470,9 @@ struct obd_ops sanosc_obd_ops = { }; #endif +static quota_interface_t *quota_interface; +extern quota_interface_t osc_quota_interface; + int __init osc_init(void) { struct lprocfs_static_vars lvars; diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 562eb09..c81868d 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 0c8ce7b..45abef3 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ptlrpc_internal.h" @@ -126,14 +127,13 @@ int ptlrpc_set_import_discon(struct obd_import *imp) "service will %s.\n", target_len, target_start, libcfs_nid2str(imp->imp_connection->c_peer.nid), - imp->imp_replayable - ? "wait for recovery to complete" - : "fail"); + imp->imp_replayable ? + "wait for recovery to complete" : "fail"); if (obd_dump_on_timeout) libcfs_debug_dumplog(); - CWARN("%s: connection lost to %s@%s\n", + CDEBUG(D_HA, "%s: connection lost to %s@%s\n", imp->imp_obd->obd_name, imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); @@ -330,11 +330,10 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) imp->imp_conn_cnt++; imp->imp_resend_replay = 0; - if (imp->imp_remote_handle.cookie == 0) { + if (!lustre_handle_is_used(&imp->imp_remote_handle)) initial_connect = 1; - } else { + else committed_before_reconnect = imp->imp_peer_committed_transno; - } spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -351,7 +350,25 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) if (rc) GOTO(out, rc); - request = ptlrpc_prep_req(imp, imp->imp_connect_op, 4, size, tmp); + if (imp->imp_initial_recov_bk && initial_connect && + /* last in list */ + (imp->imp_conn_current->oic_item.next == &imp->imp_conn_list)) { + CDEBUG(D_HA, "Last connection attempt (%d) for %s\n", + imp->imp_conn_cnt, imp->imp_target_uuid.uuid); + /* Don't retry if connect fails */ + rc = 0; + obd_set_info(obd->obd_self_export, + strlen("initial_recov"), "initial_recov", + sizeof(rc), &rc); + } + + rc = obd_reconnect(imp->imp_obd->obd_self_export, obd, + &obd->obd_uuid, &imp->imp_connect_data); + if (rc) + GOTO(out, rc); + + request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, imp->imp_connect_op, + 4, size, tmp); if (!request) GOTO(out, rc = -ENOMEM); @@ -365,7 +382,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) request->rq_replen = lustre_msg_size(1, size); request->rq_interpret_reply = ptlrpc_connect_interpret; - LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args)); + CLASSERT(sizeof (*aa) <= sizeof (request->rq_async_args)); aa = (struct ptlrpc_connect_async_args *)&request->rq_async_args; memset(aa, 0, sizeof *aa); @@ -374,7 +391,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) if (aa->pcaa_initial_connect) { imp->imp_replayable = 1; - /* On an initial connect, we don't know which one of a + /* On an initial connect, we don't know which one of a failover server pair is up. Don't wait long. */ request->rq_timeout = max((int)(obd_timeout / 20), 5); } @@ -537,6 +554,7 @@ finish: } } else { struct obd_connect_data *ocd; + struct obd_export *exp; ocd = lustre_swab_repbuf(request, 0, sizeof *ocd, lustre_swab_connect); @@ -555,11 +573,39 @@ finish: ocd->ocd_connect_flags); imp->imp_connect_data = *ocd; - - if (IMP_CROW_ABLE(imp)) { - CDEBUG(D_HA, "connected to CROW capable target: %s\n", - imp->imp_target_uuid.uuid); + if (!ocd->ocd_ibits_known && + ocd->ocd_connect_flags & OBD_CONNECT_IBITS) + CERROR("Inodebits aware server returned zero compatible" + " bits?\n"); + + exp = class_conn2export(&imp->imp_dlm_handle); + LASSERT(exp); + exp->exp_connect_flags = ocd->ocd_connect_flags; + class_export_put(exp); + + obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD); + + if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + (ocd->ocd_version > LUSTRE_VERSION_CODE + + LUSTRE_VERSION_OFFSET_WARN)) { + /* Sigh, some compilers do not like #ifdef in the middle + of macro arguments */ +#ifdef __KERNEL__ + char *action = "upgrading this client"; +#else + char *action = "recompiling this application"; +#endif + + CWARN("Server %s version (%d.%d.%d.%d) is much newer. " + "Consider %s (%s).\n", + imp->imp_target_uuid.uuid, + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version), + action, LUSTRE_VERSION_STRING); } + if (imp->imp_conn_current != NULL) { list_del(&imp->imp_conn_current->oic_item); list_add(&imp->imp_conn_current->oic_item, @@ -579,8 +625,33 @@ finish: out: if (rc != 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); - if (aa->pcaa_initial_connect && !imp->imp_initial_recov) { + if (aa->pcaa_initial_connect && !imp->imp_initial_recov) ptlrpc_deactivate_import(imp); + + if (rc == -EPROTO) { + struct obd_connect_data *ocd; + ocd = lustre_swab_repbuf(request, 0, + sizeof *ocd, + lustre_swab_connect); + if (ocd && + (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && + (ocd->ocd_version != LUSTRE_VERSION_CODE)) { + /* Actually servers are only supposed to refuse + connection from liblustre clients, so we should + never see this from VFS context */ + CERROR("Server %s version (%d.%d.%d.%d) " + "refused connection from this client " + "as too old version (%s). Client must " + "be recompiled\n", + imp->imp_target_uuid.uuid, + OBD_OCD_VERSION_MAJOR(ocd->ocd_version), + OBD_OCD_VERSION_MINOR(ocd->ocd_version), + OBD_OCD_VERSION_PATCH(ocd->ocd_version), + OBD_OCD_VERSION_FIX(ocd->ocd_version), + LUSTRE_VERSION_STRING); + IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED); + } + RETURN(-EPROTO); } ptlrpc_maybe_ping_import_soon(imp); @@ -619,7 +690,8 @@ static int signal_completed_replay(struct obd_import *imp) LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); atomic_inc(&imp->imp_replay_inflight); - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); + req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, + 0, NULL, NULL); if (!req) { atomic_dec(&imp->imp_replay_inflight); RETURN(-ENOMEM); @@ -726,8 +798,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) } if (imp->imp_state == LUSTRE_IMP_RECOVER) { - char *nidstr; - CDEBUG(D_HA, "reconnected to %s@%s\n", imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); @@ -740,15 +810,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) deuuidify(imp->imp_target_uuid.uuid, NULL, &target_start, &target_len); - nidstr = libcfs_nid2str(imp->imp_connection->c_peer.nid); - - LCONSOLE_INFO("Connection restored to service %.*s using nid " - "%s.\n", target_len, target_start, nidstr); - - CWARN("%s: connection restored to %s@%s\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); + LCONSOLE_INFO("%s: Connection restored to service %.*s " + "using nid %s.\n", imp->imp_obd->obd_name, + target_len, target_start, + libcfs_nid2str(imp->imp_connection->c_peer.nid)); } if (imp->imp_state == LUSTRE_IMP_FULL) { @@ -797,7 +862,8 @@ int ptlrpc_disconnect_import(struct obd_import *imp) spin_unlock_irqrestore(&imp->imp_lock, flags); - request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL); + request = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, rq_opc, + 0, NULL, NULL); if (request) { /* We are disconnecting, do not retry a failed DISCONNECT rpc if * it fails. We can get through the above with a down server diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index d714a84..b250f0c 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 735ed31..0d21734 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -42,6 +42,7 @@ #endif #include +#include #include #include #include diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index cf588d3..1d81f2e 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -37,6 +37,7 @@ #endif #include +#include #include #include #include diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index dd4063f..774021b 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -46,6 +46,14 @@ int lustre_msg_swabbed(struct lustre_msg *msg) return (msg->magic == __swab32(PTLRPC_MSG_MAGIC)); } +int lustre_msg_check_version(struct lustre_msg *msg, __u32 version) +{ + if (lustre_msg_swabbed(msg)) + return (__swab32(msg->version) & LUSTRE_VERSION_MASK) != version; + + return (msg->version & LUSTRE_VERSION_MASK) != version; +} + static void lustre_init_msg (struct lustre_msg *msg, int count, int *lens, char **bufs) { @@ -321,7 +329,7 @@ int lustre_unpack_msg(struct lustre_msg *m, int len) RETURN (-EINVAL); } - if (m->version != PTLRPC_MSG_VERSION) { + if ((m->version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) { CERROR("wrong lustre_msg version %#08x\n", m->version); RETURN (-EINVAL); } @@ -494,6 +502,16 @@ void *lustre_swab_repbuf(struct ptlrpc_request *req, int index, int min_size, void lustre_swab_connect(struct obd_connect_data *ocd) { __swab64s (&ocd->ocd_connect_flags); + __swab32s (&ocd->ocd_version); + __swab32s (&ocd->ocd_grant); + __swab32s (&ocd->ocd_index); + __swab32s (&ocd->ocd_unused); + __swab64s (&ocd->ocd_ibits_known); + CLASSERT(offsetof(typeof(*ocd), padding2) != 0); + CLASSERT(offsetof(typeof(*ocd), padding3) != 0); + CLASSERT(offsetof(typeof(*ocd), padding4) != 0); + CLASSERT(offsetof(typeof(*ocd), padding5) != 0); + CLASSERT(offsetof(typeof(*ocd), padding6) != 0); } void lustre_swab_obdo (struct obdo *o) @@ -531,7 +549,7 @@ void lustre_swab_obd_statfs (struct obd_statfs *os) __swab64s (&os->os_bavail); __swab64s (&os->os_files); __swab64s (&os->os_ffree); - /* no need to swap os_fsid */ + /* no need to swab os_fsid */ __swab32s (&os->os_bsize); __swab32s (&os->os_namelen); __swab64s (&os->os_maxbytes); @@ -605,8 +623,8 @@ void lustre_swab_mds_body (struct mds_body *b) __swab32s (&b->suppgid); __swab32s (&b->eadatasize); __swab32s (&b->aclsize); - __swab32s (&b->padding_2); - __swab32s (&b->padding_3); + __swab32s (&b->max_mdsize); + __swab32s (&b->max_cookiesize); __swab32s (&b->padding_4); } @@ -629,7 +647,7 @@ static void lustre_swab_obd_dqblk (struct obd_dqblk *b) __swab64s (&b->dqb_btime); __swab64s (&b->dqb_itime); __swab32s (&b->dqb_valid); - __swab32s (&b->padding); + CLASSERT(offsetof(typeof(*b), padding) != 0); } void lustre_swab_obd_quotactl (struct obd_quotactl *q) @@ -659,7 +677,13 @@ void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) __swab32s (&sa->sa_uid); __swab32s (&sa->sa_gid); __swab32s (&sa->sa_attr_flags); - __swab32s (&sa->sa_padding); + CLASSERT(offsetof(typeof(*sa), sa_padding) != 0); +} + +void lustre_swab_mds_rec_join (struct mds_rec_join *jr) +{ + __swab64s(&jr->jr_headsize); + lustre_swab_ll_fid(&jr->jr_fid); } void lustre_swab_mds_rec_create (struct mds_rec_create *cr) @@ -675,11 +699,11 @@ void lustre_swab_mds_rec_create (struct mds_rec_create *cr) __swab64s (&cr->cr_time); __swab64s (&cr->cr_rdev); __swab32s (&cr->cr_suppgid); - __swab32s (&cr->cr_padding_1); - __swab32s (&cr->cr_padding_2); - __swab32s (&cr->cr_padding_3); - __swab32s (&cr->cr_padding_4); - __swab32s (&cr->cr_padding_5); + CLASSERT(offsetof(typeof(*cr), cr_padding_1) != 0); + CLASSERT(offsetof(typeof(*cr), cr_padding_2) != 0); + CLASSERT(offsetof(typeof(*cr), cr_padding_3) != 0); + CLASSERT(offsetof(typeof(*cr), cr_padding_4) != 0); + CLASSERT(offsetof(typeof(*cr), cr_padding_5) != 0); } void lustre_swab_mds_rec_link (struct mds_rec_link *lk) @@ -693,10 +717,10 @@ void lustre_swab_mds_rec_link (struct mds_rec_link *lk) lustre_swab_ll_fid (&lk->lk_fid1); lustre_swab_ll_fid (&lk->lk_fid2); __swab64s (&lk->lk_time); - __swab32s (&lk->lk_padding_1); - __swab32s (&lk->lk_padding_2); - __swab32s (&lk->lk_padding_3); - __swab32s (&lk->lk_padding_4); + CLASSERT(offsetof(typeof(*lk), lk_padding_1) != 0); + CLASSERT(offsetof(typeof(*lk), lk_padding_2) != 0); + CLASSERT(offsetof(typeof(*lk), lk_padding_3) != 0); + CLASSERT(offsetof(typeof(*lk), lk_padding_4) != 0); } void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul) @@ -710,10 +734,10 @@ void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul) lustre_swab_ll_fid (&ul->ul_fid1); lustre_swab_ll_fid (&ul->ul_fid2); __swab64s (&ul->ul_time); - __swab32s (&ul->ul_padding_1); - __swab32s (&ul->ul_padding_2); - __swab32s (&ul->ul_padding_3); - __swab32s (&ul->ul_padding_4); + CLASSERT(offsetof(typeof(*ul), ul_padding_1) != 0); + CLASSERT(offsetof(typeof(*ul), ul_padding_2) != 0); + CLASSERT(offsetof(typeof(*ul), ul_padding_3) != 0); + CLASSERT(offsetof(typeof(*ul), ul_padding_4) != 0); } void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn) @@ -727,10 +751,10 @@ void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn) lustre_swab_ll_fid (&rn->rn_fid1); lustre_swab_ll_fid (&rn->rn_fid2); __swab64s (&rn->rn_time); - __swab32s (&rn->rn_padding_1); - __swab32s (&rn->rn_padding_2); - __swab32s (&rn->rn_padding_3); - __swab32s (&rn->rn_padding_4); + CLASSERT(offsetof(typeof(*rn), rn_padding_1) != 0); + CLASSERT(offsetof(typeof(*rn), rn_padding_2) != 0); + CLASSERT(offsetof(typeof(*rn), rn_padding_3) != 0); + CLASSERT(offsetof(typeof(*rn), rn_padding_4) != 0); } void lustre_swab_lov_desc (struct lov_desc *ld) @@ -741,8 +765,6 @@ void lustre_swab_lov_desc (struct lov_desc *ld) __swab64s (&ld->ld_default_stripe_size); __swab64s (&ld->ld_default_stripe_offset); __swab32s (&ld->ld_pattern); - __swab32s (&ld->ld_qos_threshold); - __swab32s (&ld->ld_qos_maxage); /* uuid endian insensitive */ } @@ -773,6 +795,33 @@ void lustre_swab_lov_user_md(struct lov_user_md *lum) EXIT; } +static void print_lumj (struct lov_user_md_join *lumj) +{ + CDEBUG(D_OTHER, "lov_user_md %p:\n", lumj); + CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lumj->lmm_magic); + CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lumj->lmm_pattern); + CDEBUG(D_OTHER, "\tlmm_object_id: "LPU64"\n", lumj->lmm_object_id); + CDEBUG(D_OTHER, "\tlmm_object_gr: "LPU64"\n", lumj->lmm_object_gr); + CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lumj->lmm_stripe_size); + CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lumj->lmm_stripe_count); + CDEBUG(D_OTHER, "\tlmm_extent_count: %#x\n", lumj->lmm_extent_count); +} + +void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj) +{ + ENTRY; + CDEBUG(D_IOCTL, "swabbing lov_user_md_join\n"); + __swab32s(&lumj->lmm_magic); + __swab32s(&lumj->lmm_pattern); + __swab64s(&lumj->lmm_object_id); + __swab64s(&lumj->lmm_object_gr); + __swab32s(&lumj->lmm_stripe_size); + __swab32s(&lumj->lmm_stripe_count); + __swab32s(&lumj->lmm_extent_count); + print_lumj(lumj); + EXIT; +} + static void print_lum_objs(struct lov_user_md *lum) { struct lov_user_ost_data *lod; @@ -834,6 +883,7 @@ void lustre_swab_ldlm_intent (struct ldlm_intent *i) void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r) { __swab32s (&r->lr_type); + CLASSERT(offsetof(typeof(*r), lr_padding) != 0); lustre_swab_ldlm_res_id (&r->lr_name); } @@ -848,6 +898,7 @@ void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l) void lustre_swab_ldlm_request (struct ldlm_request *rq) { __swab32s (&rq->lock_flags); + CLASSERT(offsetof(typeof(*rq), lock_padding) != 0); lustre_swab_ldlm_lock_desc (&rq->lock_desc); /* lock_handle1 opaque */ /* lock_handle2 opaque */ @@ -856,35 +907,13 @@ void lustre_swab_ldlm_request (struct ldlm_request *rq) void lustre_swab_ldlm_reply (struct ldlm_reply *r) { __swab32s (&r->lock_flags); + CLASSERT(offsetof(typeof(*r), lock_padding) != 0); lustre_swab_ldlm_lock_desc (&r->lock_desc); /* lock_handle opaque */ __swab64s (&r->lock_policy_res1); __swab64s (&r->lock_policy_res2); } -void lustre_swab_ptlbd_op (struct ptlbd_op *op) -{ - __swab16s (&op->op_cmd); - __swab16s (&op->op_lun); - __swab16s (&op->op_niob_cnt); - /* ignore op__padding */ - __swab32s (&op->op_block_cnt); -} - -void lustre_swab_ptlbd_niob (struct ptlbd_niob *n) -{ - __swab64s (&n->n_xid); - __swab64s (&n->n_block_nr); - __swab32s (&n->n_offset); - __swab32s (&n->n_length); -} - -void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r) -{ - __swab16s (&r->r_status); - __swab16s (&r->r_error_cnt); -} - /* no one calls this */ int llog_log_swabbed(struct llog_log_hdr *hdr) { @@ -906,8 +935,8 @@ void lustre_swab_qdata(struct qunit_data *d) void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux mustang 2.6.12-1.1456_FC4smp #1 SMP Thu Sep 22 02:22:14 EDT 2005 i686 i68 - * with gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) */ + * running on Linux schatzie.adilger.int 2.6.12-1.1381_FC3 #1 Fri Oct 21 03:46:55 EDT 2005 i6 + * with gcc version 3.3.4 20040817 (Red Hat Linux 3.3.4-2) */ /* Constants... */ @@ -1039,8 +1068,6 @@ void lustre_assert_wire_constants(void) (long long)MDS_STATUS_CONN); LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n", (long long)MDS_STATUS_LOV); - LASSERTF(MDS_OPEN_HAS_EA == 1073741824, " found %lld\n", - (long long)MDS_OPEN_HAS_EA); LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n", (long long)LDLM_ENQUEUE); LASSERTF(LDLM_CONVERT == 102, " found %lld\n", @@ -1051,6 +1078,8 @@ void lustre_assert_wire_constants(void) (long long)LDLM_BL_CALLBACK); LASSERTF(LDLM_CP_CALLBACK == 105, " found %lld\n", (long long)LDLM_CP_CALLBACK); + LASSERTF(LDLM_GL_CALLBACK == 106, " found %lld\n", + (long long)LDLM_GL_CALLBACK); LASSERTF(LDLM_LAST_OPC == 107, " found %lld\n", (long long)LDLM_LAST_OPC); LASSERTF(LCK_EX == 1, " found %lld\n", @@ -1065,26 +1094,14 @@ void lustre_assert_wire_constants(void) (long long)LCK_CR); LASSERTF(LCK_NL == 32, " found %lld\n", (long long)LCK_NL); - LASSERTF(PTLBD_QUERY == 200, " found %lld\n", - (long long)PTLBD_QUERY); - LASSERTF(PTLBD_READ == 201, " found %lld\n", - (long long)PTLBD_READ); - LASSERTF(PTLBD_WRITE == 202, " found %lld\n", - (long long)PTLBD_WRITE); - LASSERTF(PTLBD_FLUSH == 203, " found %lld\n", - (long long)PTLBD_FLUSH); - LASSERTF(PTLBD_CONNECT == 204, " found %lld\n", - (long long)PTLBD_CONNECT); - LASSERTF(PTLBD_DISCONNECT == 205, " found %lld\n", - (long long)PTLBD_DISCONNECT); - LASSERTF(PTLBD_LAST_OPC == 206, " found %lld\n", - (long long)PTLBD_LAST_OPC); - LASSERTF(MGMT_CONNECT == 250, " found %lld\n", - (long long)MGMT_CONNECT); - LASSERTF(MGMT_DISCONNECT == 251, " found %lld\n", - (long long)MGMT_DISCONNECT); - LASSERTF(MGMT_EXCEPTION == 252, " found %lld\n", - (long long)MGMT_EXCEPTION); + LASSERTF(LCK_GROUP == 64, " found %lld\n", + (long long)LCK_GROUP); + LASSERTF(LCK_MAXMODE == 65, " found %lld\n", + (long long)LCK_MAXMODE); + CLASSERT(LDLM_PLAIN == 10); + CLASSERT(LDLM_EXTENT == 11); + CLASSERT(LDLM_FLOCK == 12); + CLASSERT(LDLM_IBITS == 13); LASSERTF(OBD_PING == 400, " found %lld\n", (long long)OBD_PING); LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n", @@ -1097,6 +1114,19 @@ void lustre_assert_wire_constants(void) (long long)QUOTA_DQACQ); LASSERTF(QUOTA_DQREL == 602, " found %lld\n", (long long)QUOTA_DQREL); + CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL); + CLASSERT(OBD_CONNECT_INDEX == 0x2ULL); + CLASSERT(OBD_CONNECT_GRANT == 0x8ULL); + CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL); + CLASSERT(OBD_CONNECT_VERSION == 0x20ULL); + CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL); + CLASSERT(OBD_CONNECT_ACL == 0x80ULL); + CLASSERT(OBD_CONNECT_XATTR == 0x100ULL); + CLASSERT(OBD_CONNECT_CROW == 0x200ULL); + CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL); + CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL); + CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL); + CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL); /* Sizes and Offsets */ @@ -1243,92 +1273,66 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obdo, o_mds)); LASSERTF((int)sizeof(((struct obdo *)0)->o_mds) == 4, " found %lld\n", (long long)(int)sizeof(((struct obdo *)0)->o_mds)); + LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, " found %lld\n", + (long long)(int)offsetof(struct obdo, o_stripe_idx)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx)); + LASSERTF((int)offsetof(struct obdo, o_padding_1) == 124, " found %lld\n", + (long long)(int)offsetof(struct obdo, o_padding_1)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_padding_1)); LASSERTF((int)offsetof(struct obdo, o_inline) == 128, " found %lld\n", (long long)(int)offsetof(struct obdo, o_inline)); LASSERTF((int)sizeof(((struct obdo *)0)->o_inline) == 80, " found %lld\n", (long long)(int)sizeof(((struct obdo *)0)->o_inline)); LASSERTF(OBD_INLINESZ == 80, " found %lld\n", (long long)OBD_INLINESZ); - LASSERTF(OBD_MD_FLID == 1, " found %lld\n", - (long long)OBD_MD_FLID); - LASSERTF(OBD_MD_FLATIME == 2, " found %lld\n", - (long long)OBD_MD_FLATIME); - LASSERTF(OBD_MD_FLMTIME == 4, " found %lld\n", - (long long)OBD_MD_FLMTIME); - LASSERTF(OBD_MD_FLCTIME == 8, " found %lld\n", - (long long)OBD_MD_FLCTIME); - LASSERTF(OBD_MD_FLSIZE == 16, " found %lld\n", - (long long)OBD_MD_FLSIZE); - LASSERTF(OBD_MD_FLBLOCKS == 32, " found %lld\n", - (long long)OBD_MD_FLBLOCKS); - LASSERTF(OBD_MD_FLBLKSZ == 64, " found %lld\n", - (long long)OBD_MD_FLBLKSZ); - LASSERTF(OBD_MD_FLMODE == 128, " found %lld\n", - (long long)OBD_MD_FLMODE); - LASSERTF(OBD_MD_FLTYPE == 256, " found %lld\n", - (long long)OBD_MD_FLTYPE); - LASSERTF(OBD_MD_FLUID == 512, " found %lld\n", - (long long)OBD_MD_FLUID); - LASSERTF(OBD_MD_FLGID == 1024, " found %lld\n", - (long long)OBD_MD_FLGID); - LASSERTF(OBD_MD_FLFLAGS == 2048, " found %lld\n", - (long long)OBD_MD_FLFLAGS); - LASSERTF(OBD_MD_FLNLINK == 8192, " found %lld\n", - (long long)OBD_MD_FLNLINK); - LASSERTF(OBD_MD_FLGENER == 16384, " found %lld\n", - (long long)OBD_MD_FLGENER); - LASSERTF(OBD_MD_FLINLINE == 32768, " found %lld\n", - (long long)OBD_MD_FLINLINE); - LASSERTF(OBD_MD_FLRDEV == 65536, " found %lld\n", - (long long)OBD_MD_FLRDEV); - LASSERTF(OBD_MD_FLEASIZE == 131072, " found %lld\n", - (long long)OBD_MD_FLEASIZE); - LASSERTF(OBD_MD_LINKNAME == 262144, " found %lld\n", - (long long)OBD_MD_LINKNAME); - LASSERTF(OBD_MD_FLHANDLE == 524288, " found %lld\n", - (long long)OBD_MD_FLHANDLE); - LASSERTF(OBD_MD_FLCKSUM == 1048576, " found %lld\n", - (long long)OBD_MD_FLCKSUM); - LASSERTF(OBD_MD_FLQOS == 2097152, " found %lld\n", - (long long)OBD_MD_FLQOS); - LASSERTF(OBD_MD_FLCOOKIE == 8388608, " found %lld\n", - (long long)OBD_MD_FLCOOKIE); - LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n", - (long long)OBD_MD_FLGROUP); - LASSERTF(OBD_MD_FLFID == 33554432, " found %lld\n", - (long long)OBD_MD_FLFID); - LASSERTF(OBD_MD_FLEPOCH == 67108864, " found %lld\n", - (long long)OBD_MD_FLEPOCH); - LASSERTF(OBD_MD_FLGRANT == 134217728, " found %lld\n", - (long long)OBD_MD_FLGRANT); - LASSERTF(OBD_MD_FLDIREA == 268435456, " found %lld\n", - (long long)OBD_MD_FLDIREA); - LASSERTF(OBD_MD_FLUSRQUOTA == 536870912, " found %lld\n", - (long long)OBD_MD_FLUSRQUOTA); - LASSERTF(OBD_MD_FLGRPQUOTA == 1073741824, " found %lld\n", - (long long)OBD_MD_FLGRPQUOTA); - LASSERTF(OBD_MD_MDS == 4294967296ULL, " found %lld\n", - (long long)OBD_MD_MDS); - LASSERTF(OBD_MD_REINT == 8589934592ULL, " found %lld\n", - (long long)OBD_MD_REINT); - LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n", - (long long)OBD_FL_INLINEDATA); - LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n", - (long long)OBD_FL_OBDMDEXISTS); - LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n", - (long long)OBD_FL_DELORPHAN); - LASSERTF(OBD_FL_NORPC == 8, " found %lld\n", - (long long)OBD_FL_NORPC); - LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n", - (long long)OBD_FL_IDONLY); - LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n", - (long long)OBD_FL_RECREATE_OBJS); - LASSERTF(OBD_FL_DEBUG_CHECK == 64, " found %lld\n", - (long long)OBD_FL_DEBUG_CHECK); - LASSERTF(OBD_FL_NO_USRQUOTA == 256, " found %lld\n", - (long long)OBD_FL_NO_USRQUOTA); - LASSERTF(OBD_FL_NO_GRPQUOTA == 512, " found %lld\n", - (long long)OBD_FL_NO_GRPQUOTA); + CLASSERT(OBD_MD_FLID == (0x00000001ULL)); + CLASSERT(OBD_MD_FLATIME == (0x00000002ULL)); + CLASSERT(OBD_MD_FLMTIME == (0x00000004ULL)); + CLASSERT(OBD_MD_FLCTIME == (0x00000008ULL)); + CLASSERT(OBD_MD_FLSIZE == (0x00000010ULL)); + CLASSERT(OBD_MD_FLBLOCKS == (0x00000020ULL)); + CLASSERT(OBD_MD_FLBLKSZ == (0x00000040ULL)); + CLASSERT(OBD_MD_FLMODE == (0x00000080ULL)); + CLASSERT(OBD_MD_FLTYPE == (0x00000100ULL)); + CLASSERT(OBD_MD_FLUID == (0x00000200ULL)); + CLASSERT(OBD_MD_FLGID == (0x00000400ULL)); + CLASSERT(OBD_MD_FLFLAGS == (0x00000800ULL)); + CLASSERT(OBD_MD_FLNLINK == (0x00002000ULL)); + CLASSERT(OBD_MD_FLGENER == (0x00004000ULL)); + CLASSERT(OBD_MD_FLINLINE == (0x00008000ULL)); + CLASSERT(OBD_MD_FLRDEV == (0x00010000ULL)); + CLASSERT(OBD_MD_FLEASIZE == (0x00020000ULL)); + CLASSERT(OBD_MD_LINKNAME == (0x00040000ULL)); + CLASSERT(OBD_MD_FLHANDLE == (0x00080000ULL)); + CLASSERT(OBD_MD_FLCKSUM == (0x00100000ULL)); + CLASSERT(OBD_MD_FLQOS == (0x00200000ULL)); + CLASSERT(OBD_MD_FLCOOKIE == (0x00800000ULL)); + CLASSERT(OBD_MD_FLGROUP == (0x01000000ULL)); + CLASSERT(OBD_MD_FLFID == (0x02000000ULL)); + CLASSERT(OBD_MD_FLEPOCH == (0x04000000ULL)); + CLASSERT(OBD_MD_FLGRANT == (0x08000000ULL)); + CLASSERT(OBD_MD_FLDIREA == (0x10000000ULL)); + CLASSERT(OBD_MD_FLUSRQUOTA == (0x20000000ULL)); + CLASSERT(OBD_MD_FLGRPQUOTA == (0x40000000ULL)); + CLASSERT(OBD_MD_FLMODEASIZE == (0x80000000ULL)); + CLASSERT(OBD_MD_MDS == (0x0000000100000000ULL)); + CLASSERT(OBD_MD_REINT == (0x0000000200000000ULL)); + CLASSERT(OBD_MD_FLXATTR == (0x0000001000000000ULL)); + CLASSERT(OBD_MD_FLXATTRLS == (0x0000002000000000ULL)); + CLASSERT(OBD_MD_FLXATTRRM == (0x0000004000000000ULL)); + CLASSERT(OBD_MD_FLACL == (0x0000008000000000ULL)); + CLASSERT(OBD_FL_INLINEDATA == (0x00000001)); + CLASSERT(OBD_FL_OBDMDEXISTS == (0x00000002)); + CLASSERT(OBD_FL_DELORPHAN == (0x00000004)); + CLASSERT(OBD_FL_NORPC == (0x00000008)); + CLASSERT(OBD_FL_IDONLY == (0x00000010)); + CLASSERT(OBD_FL_RECREATE_OBJS == (0x00000020)); + CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040)); + CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100)); + CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200)); + CLASSERT(OBD_FL_CREATE_CROW == (0x00000400)); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", @@ -1381,13 +1385,29 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx)); LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx)); - LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n", - (long long)LOV_MAGIC_V1); + CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0); + CLASSERT(LOV_MAGIC_JOIN == 0x0BD20BD0); LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n", (long long)LOV_PATTERN_RAID0); LASSERTF(LOV_PATTERN_RAID1 == 2, " found %lld\n", (long long)LOV_PATTERN_RAID1); + /* Checks for struct lov_mds_md_join */ + LASSERTF((int)sizeof(struct lov_mds_md_join) == 56, " found %lld\n", + (long long)(int)sizeof(struct lov_mds_md_join)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_md) == 0, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_md)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md) == 32, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_array_id) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_array_id)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id) == 20, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_extent_count) == 52, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_extent_count)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count)); + /* Checks for struct obd_statfs */ LASSERTF((int)sizeof(struct obd_statfs) == 144, " found %lld\n", (long long)(int)sizeof(struct obd_statfs)); @@ -1427,6 +1447,42 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_statfs, os_state)); LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_statfs *)0)->os_state)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare1)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare2)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare3)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare4)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare5)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare6)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare7)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare8)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare9)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9)); /* Checks for struct obd_ioobj */ LASSERTF((int)sizeof(struct obd_ioobj) == 24, " found %lld\n", @@ -1535,6 +1591,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_dqblk, dqb_valid)); LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid)); + LASSERTF((int)offsetof(struct obd_dqblk, padding) == 68, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, padding)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->padding)); + LASSERTF(Q_QUOTACHECK == 0x800100," found %lld\n", + (long long)Q_QUOTACHECK); + LASSERTF(Q_INITQUOTA == 0x800101," found %lld\n", + (long long)Q_INITQUOTA); + LASSERTF(Q_GETOINFO == 0x800102," found %lld\n", + (long long)Q_GETOINFO); + LASSERTF(Q_GETOQUOTA == 0x800103," found %lld\n", + (long long)Q_GETOQUOTA); /* Checks for struct niobuf_remote */ LASSERTF((int)sizeof(struct niobuf_remote) == 16, " found %lld\n", @@ -1697,14 +1765,14 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct mds_body, aclsize)); LASSERTF((int)sizeof(((struct mds_body *)0)->aclsize) == 4, " found %lld\n", (long long)(int)sizeof(((struct mds_body *)0)->aclsize)); - LASSERTF((int)offsetof(struct mds_body, padding_2) == 156, " found %lld\n", - (long long)(int)offsetof(struct mds_body, padding_2)); - LASSERTF((int)sizeof(((struct mds_body *)0)->padding_2) == 4, " found %lld\n", - (long long)(int)sizeof(((struct mds_body *)0)->padding_2)); - LASSERTF((int)offsetof(struct mds_body, padding_3) == 160, " found %lld\n", - (long long)(int)offsetof(struct mds_body, padding_3)); - LASSERTF((int)sizeof(((struct mds_body *)0)->padding_3) == 4, " found %lld\n", - (long long)(int)sizeof(((struct mds_body *)0)->padding_3)); + LASSERTF((int)offsetof(struct mds_body, max_mdsize) == 156, " found %lld\n", + (long long)(int)offsetof(struct mds_body, max_mdsize)); + LASSERTF((int)sizeof(((struct mds_body *)0)->max_mdsize) == 4, " found %lld\n", + (long long)(int)sizeof(((struct mds_body *)0)->max_mdsize)); + LASSERTF((int)offsetof(struct mds_body, max_cookiesize) == 160, " found %lld\n", + (long long)(int)offsetof(struct mds_body, max_cookiesize)); + LASSERTF((int)sizeof(((struct mds_body *)0)->max_cookiesize) == 4, " found %lld\n", + (long long)(int)sizeof(((struct mds_body *)0)->max_cookiesize)); LASSERTF((int)offsetof(struct mds_body, padding_4) == 164, " found %lld\n", (long long)(int)offsetof(struct mds_body, padding_4)); LASSERTF((int)sizeof(((struct mds_body *)0)->padding_4) == 4, " found %lld\n", @@ -1715,22 +1783,20 @@ void lustre_assert_wire_constants(void) (long long)FMODE_WRITE); LASSERTF(FMODE_EXEC == 4, " found %lld\n", (long long)FMODE_EXEC); - LASSERTF(MDS_OPEN_CREAT == 64, " found %lld\n", - (long long)MDS_OPEN_CREAT); - LASSERTF(MDS_OPEN_EXCL == 128, " found %lld\n", - (long long)MDS_OPEN_EXCL); - LASSERTF(MDS_OPEN_TRUNC == 512, " found %lld\n", - (long long)MDS_OPEN_TRUNC); - LASSERTF(MDS_OPEN_APPEND == 1024, " found %lld\n", - (long long)MDS_OPEN_APPEND); - LASSERTF(MDS_OPEN_SYNC == 4096, " found %lld\n", - (long long)MDS_OPEN_SYNC); - LASSERTF(MDS_OPEN_DIRECTORY == 65536, " found %lld\n", - (long long)MDS_OPEN_DIRECTORY); - LASSERTF(MDS_OPEN_DELAY_CREATE == 16777216, " found %lld\n", - (long long)MDS_OPEN_DELAY_CREATE); - LASSERTF(MDS_OPEN_HAS_EA == 1073741824, " found %lld\n", - (long long)MDS_OPEN_HAS_EA); + CLASSERT(MDS_OPEN_CREAT == 00000100); + CLASSERT(MDS_OPEN_EXCL == 00000200); + CLASSERT(MDS_OPEN_TRUNC == 00001000); + CLASSERT(MDS_OPEN_APPEND == 00002000); + CLASSERT(MDS_OPEN_SYNC == 00010000); + CLASSERT(MDS_OPEN_DIRECTORY == 00200000); + CLASSERT(MDS_OPEN_DELAY_CREATE == 0100000000); + CLASSERT(MDS_OPEN_OWNEROVERRIDE == 0200000000); + CLASSERT(MDS_OPEN_JOIN_FILE == 0400000000); + CLASSERT(MDS_OPEN_HAS_EA == 010000000000); + CLASSERT(MDS_OPEN_HAS_OBJS == 020000000000); + CLASSERT(MDS_INODELOCK_LOOKUP == 0x000001); + CLASSERT(MDS_INODELOCK_UPDATE == 0x000002); + CLASSERT(MDS_INODELOCK_OPEN == 0x000004); /* Checks for struct mds_rec_setattr */ LASSERTF((int)sizeof(struct mds_rec_setattr) == 96, " found %lld\n", @@ -1964,6 +2030,18 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct mds_rec_rename *)0)->rn_time) == 8, " found %lld\n", (long long)(int)sizeof(((struct mds_rec_rename *)0)->rn_time)); + /* Checks for struct mds_rec_join */ + LASSERTF((int)sizeof(struct mds_rec_join) == 24, " found %lld\n", + (long long)(int)sizeof(struct mds_rec_join)); + LASSERTF((int)offsetof(struct mds_rec_join, jr_fid) == 0, " found %lld\n", + (long long)(int)offsetof(struct mds_rec_join, jr_fid)); + LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_fid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_fid)); + LASSERTF((int)offsetof(struct mds_rec_join, jr_headsize) == 16, " found %lld\n", + (long long)(int)offsetof(struct mds_rec_join, jr_headsize)); + LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_headsize) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_headsize)); + /* Checks for struct lov_desc */ LASSERTF((int)sizeof(struct lov_desc) == 88, " found %lld\n", (long long)(int)sizeof(struct lov_desc)); @@ -1991,14 +2069,26 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_qos_threshold)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold)); - LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_1)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_2)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_3)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_4)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4)); LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_uuid)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n", @@ -2048,6 +2138,14 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ldlm_flock *)0)->pid) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_flock *)0)->pid)); + /* Checks for struct ldlm_inodebits */ + LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, " found %lld\n", + (long long)(int)sizeof(struct ldlm_inodebits)); + LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, " found %lld\n", + (long long)(int)offsetof(struct ldlm_inodebits, bits)); + LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits)); + /* Checks for struct ldlm_intent */ LASSERTF((int)sizeof(struct ldlm_intent) == 8, " found %lld\n", (long long)(int)sizeof(struct ldlm_intent)); @@ -2063,6 +2161,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_resource_desc, lr_type)); LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type)); + LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding)); + LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding)); LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_resource_desc, lr_name)); LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, " found %lld\n", @@ -2095,6 +2197,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_request, lock_flags)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_request, lock_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_padding)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_padding)); LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_request, lock_desc)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, " found %lld\n", @@ -2115,6 +2221,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_reply, lock_flags)); LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_request, lock_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_padding)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_padding)); LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_request, lock_desc)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, " found %lld\n", @@ -2156,62 +2266,6 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, " found %lld\n", (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks)); - /* Checks for struct ptlbd_op */ - LASSERTF((int)sizeof(struct ptlbd_op) == 12, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_op)); - LASSERTF((int)offsetof(struct ptlbd_op, op_cmd) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_cmd)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_cmd) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_cmd)); - LASSERTF((int)offsetof(struct ptlbd_op, op_lun) == 2, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_lun)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_lun) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_lun)); - LASSERTF((int)offsetof(struct ptlbd_op, op_niob_cnt) == 4, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_niob_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_niob_cnt) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_niob_cnt)); - LASSERTF((int)offsetof(struct ptlbd_op, op__padding) == 6, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op__padding)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op__padding) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op__padding)); - LASSERTF((int)offsetof(struct ptlbd_op, op_block_cnt) == 8, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_block_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_block_cnt) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_block_cnt)); - - /* Checks for struct ptlbd_niob */ - LASSERTF((int)sizeof(struct ptlbd_niob) == 24, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_niob)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_xid) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_xid)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_xid) == 8, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_xid)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_block_nr) == 8, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_block_nr)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_block_nr) == 8, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_block_nr)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_offset) == 16, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_offset)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_offset) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_offset)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_length) == 20, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_length)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_length) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_length)); - - /* Checks for struct ptlbd_rsp */ - LASSERTF((int)sizeof(struct ptlbd_rsp) == 4, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_rsp)); - LASSERTF((int)offsetof(struct ptlbd_rsp, r_status) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_rsp, r_status)); - LASSERTF((int)sizeof(((struct ptlbd_rsp *)0)->r_status) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_rsp *)0)->r_status)); - LASSERTF((int)offsetof(struct ptlbd_rsp, r_error_cnt) == 2, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_rsp, r_error_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_rsp *)0)->r_error_cnt) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_rsp *)0)->r_error_cnt)); - /* Checks for struct llog_logid */ LASSERTF((int)sizeof(struct llog_logid) == 20, " found %lld\n", (long long)(int)sizeof(struct llog_logid)); @@ -2227,22 +2281,16 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_logid, lgl_ogen)); LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen)); - LASSERTF(OST_SZ_REC == 274730752, " found %lld\n", - (long long)OST_SZ_REC); - LASSERTF(OST_RAID1_REC == 274731008, " found %lld\n", - (long long)OST_RAID1_REC); - LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n", - (long long)MDS_UNLINK_REC); - LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n", - (long long)MDS_SETATTR_REC); - LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n", - (long long)OBD_CFG_REC); - LASSERTF(LLOG_GEN_REC == 274989056, " found %lld\n", - (long long)LLOG_GEN_REC); - LASSERTF(LLOG_HDR_MAGIC == 275010873, " found %lld\n", - (long long)LLOG_HDR_MAGIC); - LASSERTF(LLOG_LOGID_MAGIC == 275010875, " found %lld\n", - (long long)LLOG_LOGID_MAGIC); + CLASSERT(OST_SZ_REC == 274730752); + CLASSERT(OST_RAID1_REC == 274731008); + CLASSERT(MDS_UNLINK_REC == 274801668); + CLASSERT(MDS_SETATTR_REC == 274801665); + CLASSERT(OBD_CFG_REC == 274857984); + CLASSERT(PTL_CFG_REC == 274923520); + CLASSERT(LLOG_GEN_REC == 274989056); + CLASSERT(LLOG_JOIN_REC == 275054592); + CLASSERT(LLOG_HDR_MAGIC == 275010873); + CLASSERT(LLOG_LOGID_MAGIC == 275010875); /* Checks for struct llog_catid */ LASSERTF((int)sizeof(struct llog_catid) == 32, " found %lld\n", @@ -2251,6 +2299,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_catid, lci_logid)); LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, " found %lld\n", (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding1)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding2)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding3)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3)); /* Checks for struct llog_rec_hdr */ LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, " found %lld\n", @@ -2267,6 +2327,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_rec_hdr, lrh_type)); LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type)); + LASSERTF((int)offsetof(struct llog_rec_hdr, padding) == 12, " found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, padding)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->padding)); /* Checks for struct llog_rec_tail */ LASSERTF((int)sizeof(struct llog_rec_tail) == 8, " found %lld\n", @@ -2291,6 +2355,26 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_logid_rec, lid_id)); LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, " found %lld\n", (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding1) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding1)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding1)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding2) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding2)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding2)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding3) == 44, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding3)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding3)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding4) == 48, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding4)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding4)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding5) == 52, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding5)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding5) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding5)); LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, " found %lld\n", (long long)(int)offsetof(struct llog_logid_rec, lid_tail)); LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, " found %lld\n", @@ -2315,6 +2399,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_create_rec, lcr_ogen)); LASSERTF((int)sizeof(((struct llog_create_rec *)0)->lcr_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_create_rec *)0)->lcr_ogen)); + LASSERTF((int)offsetof(struct llog_create_rec, padding) == 44, " found %lld\n", + (long long)(int)offsetof(struct llog_create_rec, padding)); + LASSERTF((int)sizeof(((struct llog_create_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_create_rec *)0)->padding)); /* Checks for struct llog_orphan_rec */ LASSERTF((int)sizeof(struct llog_orphan_rec) == 40, " found %lld\n", @@ -2331,6 +2419,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_orphan_rec, lor_ogen)); LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->lor_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_orphan_rec *)0)->lor_ogen)); + LASSERTF((int)offsetof(struct llog_orphan_rec, padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_orphan_rec, padding)); + LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_orphan_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_orphan_rec, lor_tail) == 32, " found %lld\n", (long long)(int)offsetof(struct llog_orphan_rec, lor_tail)); LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->lor_tail) == 8, " found %lld\n", @@ -2351,11 +2443,47 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_unlink_rec, lur_ogen)); LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_ogen)); + LASSERTF((int)offsetof(struct llog_unlink_rec, padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, padding)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, " found %lld\n", (long long)(int)offsetof(struct llog_unlink_rec, lur_tail)); LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, " found %lld\n", (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail)); + /* Checks for struct llog_setattr_rec */ + LASSERTF((int)sizeof(struct llog_setattr_rec) == 48, " found %lld\n", + (long long)(int)sizeof(struct llog_setattr_rec)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_hdr)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_hdr)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_oid) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_oid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_oid) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_oid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_ogen) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_ogen)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_ogen) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_ogen)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_uid) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_uid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_uid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_uid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_gid) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_gid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_gid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_gid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, padding) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, padding)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->padding)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_tail) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_tail)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_tail)); + /* Checks for struct llog_size_change_rec */ LASSERTF((int)sizeof(struct llog_size_change_rec) == 48, " found %lld\n", (long long)(int)sizeof(struct llog_size_change_rec)); @@ -2371,6 +2499,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_size_change_rec, lsc_io_epoch)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch)); + LASSERTF((int)offsetof(struct llog_size_change_rec, padding) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, padding)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 40, " found %lld\n", (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, " found %lld\n", @@ -2467,6 +2599,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_cookie, lgc_index)); LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_padding)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding)); /* Checks for struct llogd_body */ LASSERTF((int)sizeof(struct llogd_body) == 48, " found %lld\n", @@ -2499,20 +2635,15 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llogd_body, lgd_cur_offset)); LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset)); - LASSERTF(LLOG_ORIGIN_HANDLE_CREATE == 501, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_CREATE); - LASSERTF(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - LASSERTF(LLOG_ORIGIN_HANDLE_READ_HEADER == 503, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_READ_HEADER); - LASSERTF(LLOG_ORIGIN_HANDLE_WRITE_REC == 504, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_WRITE_REC); - LASSERTF(LLOG_ORIGIN_HANDLE_CLOSE == 505, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_CLOSE); - LASSERTF(LLOG_ORIGIN_CONNECT == 506, " found %lld\n", - (long long)LLOG_ORIGIN_CONNECT); - LASSERTF(LLOG_CATINFO == 507, " found %lld\n", - (long long)LLOG_CATINFO); + CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501); + CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502); + CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503); + CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504); + CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505); + CLASSERT(LLOG_ORIGIN_CONNECT == 506); + CLASSERT(LLOG_CATINFO == 507); + CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508); + CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509); /* Checks for struct llogd_conn_body */ LASSERTF((int)sizeof(struct llogd_conn_body) == 40, " found %lld\n", @@ -2530,6 +2661,38 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n", (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); + /* Checks for struct llog_array_rec */ + LASSERTF((int)sizeof(struct llog_array_rec) == 72, " found %lld\n", + (long long)(int)sizeof(struct llog_array_rec)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_hdr)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_hdr)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_med) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_med)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_med) == 48, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_med)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_tail) == 64, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_tail)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_tail)); + + /* Checks for struct mds_extent_desc */ + LASSERTF((int)sizeof(struct mds_extent_desc) == 48, " found %lld\n", + (long long)(int)sizeof(struct mds_extent_desc)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_start) == 0, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_start)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_start) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_start)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_len) == 8, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_len)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_len) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_len)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_lmm) == 16, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_lmm)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_lmm) == 32, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); + /* Checks for struct qunit_data */ LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", (long long)(int)sizeof(struct qunit_data)); diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 05c4a96..c897628 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -45,8 +45,7 @@ int ptlrpc_ping(struct obd_import *imp) int rc = 0; ENTRY; - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, - NULL); + req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL); if (req) { DEBUG_REQ(D_INFO, req, "pinging %s->%s", imp->imp_obd->obd_uuid.uuid, @@ -64,70 +63,11 @@ int ptlrpc_ping(struct obd_import *imp) RETURN(rc); } -static int ptlrpc_statfs_interpret(struct ptlrpc_request *req, - void *data, int rc) -{ - struct obd_statfs *msfs; - struct obd_device *obd; - ENTRY; - - if (rc) - RETURN(rc); - - if (!req->rq_repmsg) - RETURN(-EPROTO); - - msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs), - lustre_swab_obd_statfs); - if (msfs == NULL) - RETURN(-EPROTO); - - obd = req->rq_import->imp_obd; - - spin_lock(&obd->obd_osfs_lock); - obd->obd_osfs = *msfs; - obd->obd_osfs_age = cfs_time_current(); - spin_unlock(&obd->obd_osfs_lock); - - RETURN(0); -} - -int ptlrpc_statfs(struct obd_import *imp) -{ - int size = sizeof(struct obd_statfs); - struct ptlrpc_request *req; - ENTRY; - - req = ptlrpc_prep_req(imp, OST_STATFS, 0, - NULL, NULL); - if (!req) { - CERROR("OOM trying to ping %s->%s\n", - imp->imp_obd->obd_uuid.uuid, - imp->imp_target_uuid.uuid); - RETURN(-ENOMEM); - } - - DEBUG_REQ(D_INFO, req, "pinging %s->%s", - imp->imp_obd->obd_uuid.uuid, - imp->imp_target_uuid.uuid); - - req->rq_interpret_reply = ptlrpc_statfs_interpret; - req->rq_replen = lustre_msg_size(1, &size); - req->rq_no_resend = req->rq_no_delay = 1; - ptlrpcd_add_req(req); - - RETURN(0); -} - static void ptlrpc_update_next_ping(struct obd_import *imp) { - cfs_duration_t interval; - - interval = IMP_CROW_ABLE(imp) ? - STATFS_INTERVAL : PING_INTERVAL; - imp->imp_next_ping = cfs_time_shift(cfs_time_seconds( - (imp->imp_state == LUSTRE_IMP_DISCON ? 10 : interval))); + (imp->imp_state == LUSTRE_IMP_DISCON ? RECONNECT_INTERVAL : + PING_INTERVAL))); } void ptlrpc_ping_import_soon(struct obd_import *imp) @@ -159,8 +99,6 @@ static int ptlrpc_pinger_main(void *arg) /* And now, loop forever, pinging as needed. */ while (1) { - unsigned long sleep_interval = PING_INTERVAL; - unsigned long update_interval = 0; cfs_time_t this_ping = cfs_time_current(); struct l_wait_info lwi; cfs_duration_t time_to_next_ping; @@ -174,9 +112,6 @@ static int ptlrpc_pinger_main(void *arg) int force, level; unsigned long flags; - if (IMP_CROW_ABLE(imp)) - sleep_interval = STATFS_INTERVAL; - spin_lock_irqsave(&imp->imp_lock, flags); level = imp->imp_state; force = imp->imp_force_verify; @@ -210,10 +145,7 @@ static int ptlrpc_pinger_main(void *arg) imp->imp_deactive, imp->imp_obd->obd_no_recov); } else if (imp->imp_pingable || force) { - if (IMP_CROW_ABLE(imp)) - ptlrpc_statfs(imp); - else - ptlrpc_ping(imp); + ptlrpc_ping(imp); } } else { if (!imp->imp_pingable) @@ -225,37 +157,28 @@ static int ptlrpc_pinger_main(void *arg) imp->imp_next_ping, this_ping); } - /* using here new calculated @update_interval, as - * sleep_interval holds minimal of possible intervals - * over pingable imports. */ - update_interval = IMP_CROW_ABLE(imp) ? - STATFS_INTERVAL : PING_INTERVAL; - /* obd_timeout might have changed */ if (cfs_time_after(imp->imp_next_ping, cfs_time_add(this_ping, - cfs_time_seconds(update_interval)))) + cfs_time_seconds(PING_INTERVAL)))) ptlrpc_update_next_ping(imp); } mutex_up(&pinger_sem); - /* Wait until the next ping time, or until we're stopped. We - * sleep here smaller interval of two possible (ping or - * statfs). If one of imports is CROW capable we'll sleep - * STATFS_INTERVAL and PING_INTERVAL otherwise. */ + /* Wait until the next ping time, or until we're stopped. */ time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, - cfs_time_seconds(sleep_interval)), + cfs_time_seconds(PING_INTERVAL)), cfs_time_current()); /* The ping sent by ptlrpc_send_rpc may get sent out say .01 second after this. - ptlrpc_pinger_sending_on_import will then set the + ptlrpc_pinger_eending_on_import will then set the next ping time to next_ping + .01 sec, which means we will SKIP the next ping at next_ping, and the ping will get sent 2 timeouts from now! Beware. */ CDEBUG(D_INFO, "next ping in "CFS_DURATION_T" ("CFS_TIME_T")\n", time_to_next_ping, - cfs_time_add(this_ping, cfs_time_seconds(sleep_interval))); + cfs_time_add(this_ping, cfs_time_seconds(PING_INTERVAL))); if (time_to_next_ping > 0) { lwi = LWI_TIMEOUT(max_t(cfs_duration_t, time_to_next_ping, cfs_time_seconds(1)), NULL, NULL); @@ -309,12 +232,13 @@ int ptlrpc_start_pinger(void) if (rc < 0) { CERROR("cannot start thread: %d\n", rc); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); + pinger_thread = NULL; RETURN(rc); } l_wait_event(pinger_thread->t_ctl_waitq, pinger_thread->t_flags & SVC_RUNNING, &lwi); - RETURN(rc); + RETURN(0); } int ptlrpc_stop_pinger(void) @@ -395,6 +319,7 @@ void ptlrpc_pinger_wake_up() * the current implementation of pinger in liblustre is not optimized */ +#ifdef ENABLE_PINGER static struct pinger_data { int pd_recursion; cfs_time_t pd_this_ping; /* jiffies */ @@ -461,8 +386,8 @@ static int pinger_check_rpcs(void *arg) continue; } - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, - NULL); + req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, + 0, NULL, NULL); if (!req) { CERROR("out of memory\n"); break; @@ -493,7 +418,7 @@ static int pinger_check_rpcs(void *arg) DEBUG_REQ(D_HA, req, "pinging %s->%s", req->rq_import->imp_obd->obd_uuid.uuid, req->rq_import->imp_target_uuid.uuid); - (void)ptl_send_rpc(req); + (void)ptl_send_rpc(req, 0); } do_check_set: @@ -544,13 +469,14 @@ out: } static void *pinger_callback = NULL; +#endif /* ENABLE_PINGER */ int ptlrpc_start_pinger(void) { - memset(&pinger_args, 0, sizeof(pinger_args)); #ifdef ENABLE_PINGER - pinger_callback = - liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args); + memset(&pinger_args, 0, sizeof(pinger_args)); + pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs, + &pinger_args); #endif return 0; } @@ -566,6 +492,7 @@ int ptlrpc_stop_pinger(void) void ptlrpc_pinger_sending_on_import(struct obd_import *imp) { +#ifdef ENABLE_PINGER mutex_down(&pinger_sem); ptlrpc_update_next_ping(imp); if (pinger_args.pd_set == NULL && @@ -575,6 +502,7 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp) pinger_args.pd_next_ping = imp->imp_next_ping; } mutex_up(&pinger_sem); +#endif } int ptlrpc_pinger_add_import(struct obd_import *imp) diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 7a0419b..eef6d39 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -72,6 +72,8 @@ void ptlrpcd_wake(struct ptlrpc_request *req) cfs_waitq_signal(&pc->pc_waitq); } +/* requests that are added to the ptlrpcd queue are sent via + * ptlrpcd_check->ptlrpc_check_set() */ void ptlrpcd_add_req(struct ptlrpc_request *req) { struct ptlrpcd_ctl *pc; @@ -187,8 +189,11 @@ int ptlrpcd_check_async_rpcs(void *arg) /* single threaded!! */ pc->pc_recurred++; - if (pc->pc_recurred == 1) + if (pc->pc_recurred == 1) { rc = ptlrpcd_check(pc); + if (!rc) + ptlrpc_expired_set(pc->pc_set); + } pc->pc_recurred--; return rc; @@ -197,7 +202,7 @@ int ptlrpcd_check_async_rpcs(void *arg) static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) { - int rc = 0; + int rc; ENTRY; memset(pc, 0, sizeof(*pc)); @@ -211,21 +216,22 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) pc->pc_set = ptlrpc_prep_set(); if (pc->pc_set == NULL) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); #ifdef __KERNEL__ - if (cfs_kernel_thread(ptlrpcd, pc, 0) < 0) { + rc = cfs_kernel_thread(ptlrpcd, pc, 0); + if (rc < 0) { ptlrpc_set_destroy(pc->pc_set); - GOTO(out, rc = -ECHILD); + RETURN(rc); } wait_for_completion(&pc->pc_starting); #else pc->pc_callback = liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc); + (void)rc; #endif -out: - RETURN(rc); + RETURN(0); } static void ptlrpcd_stop(struct ptlrpcd_ctl *pc) diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 2a1164c..58f253c 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -44,6 +44,7 @@ #include #include +#include #include #include #include diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 10ea8ae..e258b20 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -324,7 +324,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, /* Now allocate pool of reply buffers */ /* Increase max reply size to next power of two */ service->srv_max_reply_size = 1; - while(service->srv_max_reply_size < max_reply_size) + while (service->srv_max_reply_size < max_reply_size) service->srv_max_reply_size <<= 1; if (proc_entry != NULL) @@ -570,17 +570,23 @@ put_conn: timediff = timeval_sub(&work_end, &work_start); if (timediff / 1000000 > (long)obd_timeout) - CERROR("request "LPU64" opc %u from %s processed in %lds\n", + CERROR("request "LPU64" opc %u from %s processed in %lds " + "trans "LPU64" rc %d/%d\n", request->rq_xid, request->rq_reqmsg->opc, libcfs_id2str(request->rq_peer), timeval_sub(&work_end, - &request->rq_arrival_time) / 1000000); + &request->rq_arrival_time) / 1000000, + request->rq_repmsg ? request->rq_repmsg->transno : + request->rq_transno, request->rq_status, + request->rq_repmsg ? request->rq_repmsg->status : -999); else - CDEBUG(D_HA,"request "LPU64" opc %u from %s processed in %ldus" - " (%ldus total)\n", request->rq_xid, - request->rq_reqmsg->opc, + CDEBUG(D_HA, "request "LPU64" opc %u from %s processed in " + "%ldus (%ldus total) trans "LPU64" rc %d/%d\n", + request->rq_xid, request->rq_reqmsg->opc, libcfs_id2str(request->rq_peer), timediff, - timeval_sub(&work_end, &request->rq_arrival_time)); + timeval_sub(&work_end, &request->rq_arrival_time), + request->rq_transno, request->rq_status, + request->rq_repmsg ? request->rq_repmsg->status : -999); if (svc->srv_stats != NULL) { int opc = opcode_offset(request->rq_reqmsg->opc); @@ -625,7 +631,7 @@ ptlrpc_server_handle_reply (struct ptlrpc_service *svc) list_del_init (&rs->rs_list); - /* Disengage from notifiers carefully (lock ordering!) */ + /* Disengage from notifiers carefully (lock order - irqrestore below!)*/ spin_unlock(&svc->srv_lock); spin_lock (&obd->obd_uncommitted_replies_lock); @@ -917,17 +923,16 @@ out_srv_init: svc->srv_done(thread); out: - spin_lock_irqsave(&svc->srv_lock, flags); + CDEBUG(D_NET, "service thread %d exiting: rc %d\n", thread->t_id, rc); + spin_lock_irqsave(&svc->srv_lock, flags); svc->srv_nthreads--; /* must know immediately */ + thread->t_id = rc; thread->t_flags = SVC_STOPPED; - cfs_waitq_signal(&thread->t_ctl_waitq); + cfs_waitq_signal(&thread->t_ctl_waitq); spin_unlock_irqrestore(&svc->srv_lock, flags); - CDEBUG(D_NET, "service thread %d exiting: rc %d\n", thread->t_id, rc); - thread->t_id = rc; - return rc; } @@ -970,13 +975,14 @@ void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) spin_unlock_irqrestore(&svc->srv_lock, flags); } -/* @base_name should be 12 characters or less - 3 will be added on */ +/* @base_name should be 11 characters or less - 3 will be added on */ int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc, char *base_name) { int i, rc = 0; ENTRY; + LASSERT(svc->srv_num_threads > 0); for (i = 0; i < svc->srv_num_threads; i++) { char name[32]; sprintf(name, "%s_%02d", base_name, i); diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index cb717b5..3c5544e 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -398,8 +398,6 @@ int jt_lcfg_lov_setup(int argc, char **argv) jt_cmdname(argv[0]), argv[5]); return CMD_HELP; } - desc.ld_qos_threshold = QOS_DEFAULT_THRESHOLD; - desc.ld_qos_maxage = QOS_DEFAULT_MAXAGE; if (argc == 7) { desc.ld_tgt_count = strtoul(argv[6], &end, 0); diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 4409bc4..2b56b83 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -20,6 +20,13 @@ do { \ #define STRINGIFY(a) #a + +#define CHECK_CDEFINE(a) \ + printf(" CLASSERT("#a" == "STRINGIFY(a) ");\n") + +#define CHECK_CVALUE(a) \ + printf(" CLASSERT("#a" == %lld);\n", (long long)a) + #define CHECK_DEFINE(a) \ do { \ printf(" LASSERTF("#a" == "STRINGIFY(a) \ @@ -64,16 +71,7 @@ do { \ } while(0) - -void check1(void) -{ -#define VALUE 1234567 - - CHECK_VALUE(VALUE); - CHECK_DEFINE(VALUE); -} - -void +static void check_lustre_handle(void) { BLANK_LINE(); @@ -81,7 +79,7 @@ check_lustre_handle(void) CHECK_MEMBER(lustre_handle, cookie); } -void +static void check_lustre_msg(void) { BLANK_LINE(); @@ -100,7 +98,7 @@ check_lustre_msg(void) CHECK_MEMBER(lustre_msg, buflens[7]); } -void +static void check_obdo(void) { BLANK_LINE(); @@ -125,54 +123,62 @@ check_obdo(void) CHECK_MEMBER(obdo, o_misc); CHECK_MEMBER(obdo, o_easize); CHECK_MEMBER(obdo, o_mds); + CHECK_MEMBER(obdo, o_stripe_idx); + CHECK_MEMBER(obdo, o_padding_1); CHECK_MEMBER(obdo, o_inline); CHECK_VALUE(OBD_INLINESZ); - CHECK_VALUE(OBD_MD_FLID); - CHECK_VALUE(OBD_MD_FLATIME); - CHECK_VALUE(OBD_MD_FLMTIME); - CHECK_VALUE(OBD_MD_FLCTIME); - CHECK_VALUE(OBD_MD_FLSIZE); - CHECK_VALUE(OBD_MD_FLBLOCKS); - CHECK_VALUE(OBD_MD_FLBLKSZ); - CHECK_VALUE(OBD_MD_FLMODE); - CHECK_VALUE(OBD_MD_FLTYPE); - CHECK_VALUE(OBD_MD_FLUID); - CHECK_VALUE(OBD_MD_FLGID); - CHECK_VALUE(OBD_MD_FLFLAGS); - CHECK_VALUE(OBD_MD_FLNLINK); - CHECK_VALUE(OBD_MD_FLGENER); - CHECK_VALUE(OBD_MD_FLINLINE); - CHECK_VALUE(OBD_MD_FLRDEV); - CHECK_VALUE(OBD_MD_FLEASIZE); - CHECK_VALUE(OBD_MD_LINKNAME); - CHECK_VALUE(OBD_MD_FLHANDLE); - CHECK_VALUE(OBD_MD_FLCKSUM); - CHECK_VALUE(OBD_MD_FLQOS); - CHECK_VALUE(OBD_MD_FLCOOKIE); - CHECK_VALUE(OBD_MD_FLGROUP); - CHECK_VALUE(OBD_MD_FLFID); - CHECK_VALUE(OBD_MD_FLEPOCH); - CHECK_VALUE(OBD_MD_FLGRANT); - CHECK_VALUE(OBD_MD_FLDIREA); - CHECK_VALUE(OBD_MD_FLUSRQUOTA); - CHECK_VALUE(OBD_MD_FLGRPQUOTA); - CHECK_VALUE_64(OBD_MD_MDS); - CHECK_VALUE_64(OBD_MD_REINT); - - CHECK_VALUE(OBD_FL_INLINEDATA); - CHECK_VALUE(OBD_FL_OBDMDEXISTS); - CHECK_VALUE(OBD_FL_DELORPHAN); - CHECK_VALUE(OBD_FL_NORPC); - CHECK_VALUE(OBD_FL_IDONLY); - CHECK_VALUE(OBD_FL_RECREATE_OBJS); - CHECK_VALUE(OBD_FL_DEBUG_CHECK); - CHECK_VALUE(OBD_FL_NO_USRQUOTA); - CHECK_VALUE(OBD_FL_NO_GRPQUOTA); -} - -void + CHECK_CDEFINE(OBD_MD_FLID); + CHECK_CDEFINE(OBD_MD_FLATIME); + CHECK_CDEFINE(OBD_MD_FLMTIME); + CHECK_CDEFINE(OBD_MD_FLCTIME); + CHECK_CDEFINE(OBD_MD_FLSIZE); + CHECK_CDEFINE(OBD_MD_FLBLOCKS); + CHECK_CDEFINE(OBD_MD_FLBLKSZ); + CHECK_CDEFINE(OBD_MD_FLMODE); + CHECK_CDEFINE(OBD_MD_FLTYPE); + CHECK_CDEFINE(OBD_MD_FLUID); + CHECK_CDEFINE(OBD_MD_FLGID); + CHECK_CDEFINE(OBD_MD_FLFLAGS); + CHECK_CDEFINE(OBD_MD_FLNLINK); + CHECK_CDEFINE(OBD_MD_FLGENER); + CHECK_CDEFINE(OBD_MD_FLINLINE); + CHECK_CDEFINE(OBD_MD_FLRDEV); + CHECK_CDEFINE(OBD_MD_FLEASIZE); + CHECK_CDEFINE(OBD_MD_LINKNAME); + CHECK_CDEFINE(OBD_MD_FLHANDLE); + CHECK_CDEFINE(OBD_MD_FLCKSUM); + CHECK_CDEFINE(OBD_MD_FLQOS); + CHECK_CDEFINE(OBD_MD_FLCOOKIE); + CHECK_CDEFINE(OBD_MD_FLGROUP); + CHECK_CDEFINE(OBD_MD_FLFID); + CHECK_CDEFINE(OBD_MD_FLEPOCH); + CHECK_CDEFINE(OBD_MD_FLGRANT); + CHECK_CDEFINE(OBD_MD_FLDIREA); + CHECK_CDEFINE(OBD_MD_FLUSRQUOTA); + CHECK_CDEFINE(OBD_MD_FLGRPQUOTA); + CHECK_CDEFINE(OBD_MD_FLMODEASIZE); + CHECK_CDEFINE(OBD_MD_MDS); + CHECK_CDEFINE(OBD_MD_REINT); + CHECK_CDEFINE(OBD_MD_FLXATTR); + CHECK_CDEFINE(OBD_MD_FLXATTRLS); + CHECK_CDEFINE(OBD_MD_FLXATTRRM); + CHECK_CDEFINE(OBD_MD_FLACL); + + CHECK_CDEFINE(OBD_FL_INLINEDATA); + CHECK_CDEFINE(OBD_FL_OBDMDEXISTS); + CHECK_CDEFINE(OBD_FL_DELORPHAN); + CHECK_CDEFINE(OBD_FL_NORPC); + CHECK_CDEFINE(OBD_FL_IDONLY); + CHECK_CDEFINE(OBD_FL_RECREATE_OBJS); + CHECK_CDEFINE(OBD_FL_DEBUG_CHECK); + CHECK_CDEFINE(OBD_FL_NO_USRQUOTA); + CHECK_CDEFINE(OBD_FL_NO_GRPQUOTA); + CHECK_CDEFINE(OBD_FL_CREATE_CROW); +} + +static void check_lov_mds_md_v1(void) { BLANK_LINE(); @@ -192,13 +198,24 @@ check_lov_mds_md_v1(void) CHECK_MEMBER(lov_ost_data_v1, l_ost_gen); CHECK_MEMBER(lov_ost_data_v1, l_ost_idx); - CHECK_VALUE(LOV_MAGIC_V1); + CHECK_CDEFINE(LOV_MAGIC_V1); + CHECK_CDEFINE(LOV_MAGIC_JOIN); CHECK_VALUE(LOV_PATTERN_RAID0); CHECK_VALUE(LOV_PATTERN_RAID1); } -void +static void +check_lov_mds_md_join(void) +{ + BLANK_LINE(); + CHECK_STRUCT(lov_mds_md_join); + CHECK_MEMBER(lov_mds_md_join, lmmj_md); + CHECK_MEMBER(lov_mds_md_join, lmmj_array_id); + CHECK_MEMBER(lov_mds_md_join, lmmj_extent_count); +} + +static void check_obd_statfs(void) { BLANK_LINE(); @@ -212,9 +229,18 @@ check_obd_statfs(void) CHECK_MEMBER(obd_statfs, os_bsize); CHECK_MEMBER(obd_statfs, os_namelen); CHECK_MEMBER(obd_statfs, os_state); + CHECK_MEMBER(obd_statfs, os_spare1); + CHECK_MEMBER(obd_statfs, os_spare2); + CHECK_MEMBER(obd_statfs, os_spare3); + CHECK_MEMBER(obd_statfs, os_spare4); + CHECK_MEMBER(obd_statfs, os_spare5); + CHECK_MEMBER(obd_statfs, os_spare6); + CHECK_MEMBER(obd_statfs, os_spare7); + CHECK_MEMBER(obd_statfs, os_spare8); + CHECK_MEMBER(obd_statfs, os_spare9); } -void +static void check_obd_ioobj(void) { BLANK_LINE(); @@ -225,7 +251,7 @@ check_obd_ioobj(void) CHECK_MEMBER(obd_ioobj, ioo_bufcnt); } -void +static void check_obd_quotactl(void) { BLANK_LINE(); @@ -255,9 +281,15 @@ check_obd_quotactl(void) CHECK_MEMBER(obd_dqblk, dqb_btime); CHECK_MEMBER(obd_dqblk, dqb_itime); CHECK_MEMBER(obd_dqblk, dqb_valid); + CHECK_MEMBER(obd_dqblk, padding); + + CHECK_DEFINE(Q_QUOTACHECK); + CHECK_DEFINE(Q_INITQUOTA); + CHECK_DEFINE(Q_GETOINFO); + CHECK_DEFINE(Q_GETOQUOTA); } -void +static void check_niobuf_remote(void) { BLANK_LINE(); @@ -273,7 +305,7 @@ check_niobuf_remote(void) CHECK_VALUE(OBD_BRW_NOQUOTA); } -void +static void check_ost_body(void) { BLANK_LINE(); @@ -281,7 +313,7 @@ check_ost_body(void) CHECK_MEMBER(ost_body, oa); } -void +static void check_ll_fid(void) { BLANK_LINE(); @@ -291,7 +323,7 @@ check_ll_fid(void) CHECK_MEMBER(ll_fid, f_type); } -void +static void check_mds_status_req(void) { BLANK_LINE(); @@ -300,7 +332,7 @@ check_mds_status_req(void) CHECK_MEMBER(mds_status_req, repbuf); } -void +static void check_mds_body(void) { BLANK_LINE(); @@ -329,24 +361,32 @@ check_mds_body(void) CHECK_MEMBER(mds_body, suppgid); CHECK_MEMBER(mds_body, eadatasize); CHECK_MEMBER(mds_body, aclsize); - CHECK_MEMBER(mds_body, padding_2); - CHECK_MEMBER(mds_body, padding_3); + CHECK_MEMBER(mds_body, max_mdsize); + CHECK_MEMBER(mds_body, max_cookiesize); CHECK_MEMBER(mds_body, padding_4); CHECK_VALUE(FMODE_READ); CHECK_VALUE(FMODE_WRITE); CHECK_VALUE(FMODE_EXEC); - CHECK_VALUE(MDS_OPEN_CREAT); - CHECK_VALUE(MDS_OPEN_EXCL); - CHECK_VALUE(MDS_OPEN_TRUNC); - CHECK_VALUE(MDS_OPEN_APPEND); - CHECK_VALUE(MDS_OPEN_SYNC); - CHECK_VALUE(MDS_OPEN_DIRECTORY); - CHECK_VALUE(MDS_OPEN_DELAY_CREATE); - CHECK_VALUE(MDS_OPEN_HAS_EA); + + CHECK_CDEFINE(MDS_OPEN_CREAT); + CHECK_CDEFINE(MDS_OPEN_EXCL); + CHECK_CDEFINE(MDS_OPEN_TRUNC); + CHECK_CDEFINE(MDS_OPEN_APPEND); + CHECK_CDEFINE(MDS_OPEN_SYNC); + CHECK_CDEFINE(MDS_OPEN_DIRECTORY); + CHECK_CDEFINE(MDS_OPEN_DELAY_CREATE); + CHECK_CDEFINE(MDS_OPEN_OWNEROVERRIDE); + CHECK_CDEFINE(MDS_OPEN_JOIN_FILE); + CHECK_CDEFINE(MDS_OPEN_HAS_EA); + CHECK_CDEFINE(MDS_OPEN_HAS_OBJS); + + CHECK_CDEFINE(MDS_INODELOCK_LOOKUP); + CHECK_CDEFINE(MDS_INODELOCK_UPDATE); + CHECK_CDEFINE(MDS_INODELOCK_OPEN); } -void +static void check_mds_rec_setattr(void) { BLANK_LINE(); @@ -368,7 +408,7 @@ check_mds_rec_setattr(void) CHECK_MEMBER(mds_rec_setattr, sa_attr_flags); } -void +static void check_mds_rec_create(void) { BLANK_LINE(); @@ -386,7 +426,7 @@ check_mds_rec_create(void) CHECK_MEMBER(mds_rec_create, cr_suppgid); } -void +static void check_mds_rec_link(void) { BLANK_LINE(); @@ -402,7 +442,7 @@ check_mds_rec_link(void) CHECK_MEMBER(mds_rec_link, lk_time); } -void +static void check_mds_rec_unlink(void) { BLANK_LINE(); @@ -418,7 +458,7 @@ check_mds_rec_unlink(void) CHECK_MEMBER(mds_rec_unlink, ul_time); } -void +static void check_mds_rec_rename(void) { BLANK_LINE(); @@ -434,7 +474,16 @@ check_mds_rec_rename(void) CHECK_MEMBER(mds_rec_rename, rn_time); } -void +static void +check_mds_rec_join(void) +{ + BLANK_LINE(); + CHECK_STRUCT(mds_rec_join); + CHECK_MEMBER(mds_rec_join, jr_fid); + CHECK_MEMBER(mds_rec_join, jr_headsize); +} + +static void check_lov_desc(void) { BLANK_LINE(); @@ -445,12 +494,14 @@ check_lov_desc(void) CHECK_MEMBER(lov_desc, ld_pattern); CHECK_MEMBER(lov_desc, ld_default_stripe_size); CHECK_MEMBER(lov_desc, ld_default_stripe_offset); - CHECK_MEMBER(lov_desc, ld_qos_threshold); - CHECK_MEMBER(lov_desc, ld_qos_maxage); + CHECK_MEMBER(lov_desc, ld_padding_1); + CHECK_MEMBER(lov_desc, ld_padding_2); + CHECK_MEMBER(lov_desc, ld_padding_3); + CHECK_MEMBER(lov_desc, ld_padding_4); CHECK_MEMBER(lov_desc, ld_uuid); } -void +static void check_ldlm_res_id(void) { BLANK_LINE(); @@ -458,7 +509,7 @@ check_ldlm_res_id(void) CHECK_MEMBER(ldlm_res_id, name[RES_NAME_SIZE]); } -void +static void check_ldlm_extent(void) { BLANK_LINE(); @@ -468,7 +519,15 @@ check_ldlm_extent(void) CHECK_MEMBER(ldlm_extent, gid); } -void +static void +check_ldlm_inodebits(void) +{ + BLANK_LINE(); + CHECK_STRUCT(ldlm_inodebits); + CHECK_MEMBER(ldlm_inodebits, bits); +} + +static void check_ldlm_flock(void) { BLANK_LINE(); @@ -479,7 +538,7 @@ check_ldlm_flock(void) CHECK_MEMBER(ldlm_flock, pid); } -void +static void check_ldlm_intent(void) { BLANK_LINE(); @@ -487,16 +546,17 @@ check_ldlm_intent(void) CHECK_MEMBER(ldlm_intent, opc); } -void +static void check_ldlm_resource_desc(void) { BLANK_LINE(); CHECK_STRUCT(ldlm_resource_desc); CHECK_MEMBER(ldlm_resource_desc, lr_type); + CHECK_MEMBER(ldlm_resource_desc, lr_padding); CHECK_MEMBER(ldlm_resource_desc, lr_name); } -void +static void check_ldlm_lock_desc(void) { BLANK_LINE(); @@ -507,30 +567,32 @@ check_ldlm_lock_desc(void) CHECK_MEMBER(ldlm_lock_desc, l_policy_data); } -void +static void check_ldlm_request(void) { BLANK_LINE(); CHECK_STRUCT(ldlm_request); CHECK_MEMBER(ldlm_request, lock_flags); + CHECK_MEMBER(ldlm_request, lock_padding); CHECK_MEMBER(ldlm_request, lock_desc); CHECK_MEMBER(ldlm_request, lock_handle1); CHECK_MEMBER(ldlm_request, lock_handle2); } -void +static void check_ldlm_reply(void) { BLANK_LINE(); CHECK_STRUCT(ldlm_reply); CHECK_MEMBER(ldlm_reply, lock_flags); + CHECK_MEMBER(ldlm_request, lock_padding); CHECK_MEMBER(ldlm_request, lock_desc); CHECK_MEMBER(ldlm_reply, lock_handle); CHECK_MEMBER(ldlm_reply, lock_policy_res1); CHECK_MEMBER(ldlm_reply, lock_policy_res2); } -void +static void check_ldlm_lvb(void) { BLANK_LINE(); @@ -542,39 +604,8 @@ check_ldlm_lvb(void) CHECK_MEMBER(ost_lvb, lvb_blocks); } -void -check_ptlbd_op(void) -{ - BLANK_LINE(); - CHECK_STRUCT(ptlbd_op); - CHECK_MEMBER(ptlbd_op, op_cmd); - CHECK_MEMBER(ptlbd_op, op_lun); - CHECK_MEMBER(ptlbd_op, op_niob_cnt); - CHECK_MEMBER(ptlbd_op, op__padding); - CHECK_MEMBER(ptlbd_op, op_block_cnt); -} - -void -check_ptlbd_niob(void) -{ - BLANK_LINE(); - CHECK_STRUCT(ptlbd_niob); - CHECK_MEMBER(ptlbd_niob, n_xid); - CHECK_MEMBER(ptlbd_niob, n_block_nr); - CHECK_MEMBER(ptlbd_niob, n_offset); - CHECK_MEMBER(ptlbd_niob, n_length); -} -void -check_ptlbd_rsp(void) -{ - BLANK_LINE(); - CHECK_STRUCT(ptlbd_rsp); - CHECK_MEMBER(ptlbd_rsp, r_status); - CHECK_MEMBER(ptlbd_rsp, r_error_cnt); -} - -void +static void check_llog_logid(void) { BLANK_LINE(); @@ -583,26 +614,30 @@ check_llog_logid(void) CHECK_MEMBER(llog_logid, lgl_ogr); CHECK_MEMBER(llog_logid, lgl_ogen); - CHECK_VALUE(OST_SZ_REC); - CHECK_VALUE(OST_RAID1_REC); - CHECK_VALUE(MDS_UNLINK_REC); - CHECK_VALUE(MDS_SETATTR_REC); - CHECK_VALUE(OBD_CFG_REC); - CHECK_VALUE(PTL_CFG_REC); - CHECK_VALUE(LLOG_GEN_REC); - CHECK_VALUE(LLOG_HDR_MAGIC); - CHECK_VALUE(LLOG_LOGID_MAGIC); + CHECK_CVALUE(OST_SZ_REC); + CHECK_CVALUE(OST_RAID1_REC); + CHECK_CVALUE(MDS_UNLINK_REC); + CHECK_CVALUE(MDS_SETATTR_REC); + CHECK_CVALUE(OBD_CFG_REC); + CHECK_CVALUE(PTL_CFG_REC); + CHECK_CVALUE(LLOG_GEN_REC); + CHECK_CVALUE(LLOG_JOIN_REC); + CHECK_CVALUE(LLOG_HDR_MAGIC); + CHECK_CVALUE(LLOG_LOGID_MAGIC); } -void +static void check_llog_catid(void) { BLANK_LINE(); CHECK_STRUCT(llog_catid); CHECK_MEMBER(llog_catid, lci_logid); + CHECK_MEMBER(llog_catid, lci_padding1); + CHECK_MEMBER(llog_catid, lci_padding2); + CHECK_MEMBER(llog_catid, lci_padding3); } -void +static void check_llog_rec_hdr(void) { BLANK_LINE(); @@ -610,9 +645,10 @@ check_llog_rec_hdr(void) CHECK_MEMBER(llog_rec_hdr, lrh_len); CHECK_MEMBER(llog_rec_hdr, lrh_index); CHECK_MEMBER(llog_rec_hdr, lrh_type); + CHECK_MEMBER(llog_rec_hdr, padding); } -void +static void check_llog_rec_tail(void) { BLANK_LINE(); @@ -621,17 +657,22 @@ check_llog_rec_tail(void) CHECK_MEMBER(llog_rec_tail, lrt_index); } -void +static void check_llog_logid_rec(void) { BLANK_LINE(); CHECK_STRUCT(llog_logid_rec); CHECK_MEMBER(llog_logid_rec, lid_hdr); CHECK_MEMBER(llog_logid_rec, lid_id); + CHECK_MEMBER(llog_logid_rec, padding1); + CHECK_MEMBER(llog_logid_rec, padding2); + CHECK_MEMBER(llog_logid_rec, padding3); + CHECK_MEMBER(llog_logid_rec, padding4); + CHECK_MEMBER(llog_logid_rec, padding5); CHECK_MEMBER(llog_logid_rec, lid_tail); } -void +static void check_llog_create_rec(void) { BLANK_LINE(); @@ -640,9 +681,10 @@ check_llog_create_rec(void) CHECK_MEMBER(llog_create_rec, lcr_fid); CHECK_MEMBER(llog_create_rec, lcr_oid); CHECK_MEMBER(llog_create_rec, lcr_ogen); + CHECK_MEMBER(llog_create_rec, padding); } -void +static void check_llog_orphan_rec(void) { BLANK_LINE(); @@ -650,10 +692,11 @@ check_llog_orphan_rec(void) CHECK_MEMBER(llog_orphan_rec, lor_hdr); CHECK_MEMBER(llog_orphan_rec, lor_oid); CHECK_MEMBER(llog_orphan_rec, lor_ogen); + CHECK_MEMBER(llog_orphan_rec, padding); CHECK_MEMBER(llog_orphan_rec, lor_tail); } -void +static void check_llog_unlink_rec(void) { BLANK_LINE(); @@ -661,10 +704,25 @@ check_llog_unlink_rec(void) CHECK_MEMBER(llog_unlink_rec, lur_hdr); CHECK_MEMBER(llog_unlink_rec, lur_oid); CHECK_MEMBER(llog_unlink_rec, lur_ogen); + CHECK_MEMBER(llog_unlink_rec, padding); CHECK_MEMBER(llog_unlink_rec, lur_tail); } -void +static void +check_llog_setattr_rec(void) +{ + BLANK_LINE(); + CHECK_STRUCT(llog_setattr_rec); + CHECK_MEMBER(llog_setattr_rec, lsr_hdr); + CHECK_MEMBER(llog_setattr_rec, lsr_oid); + CHECK_MEMBER(llog_setattr_rec, lsr_ogen); + CHECK_MEMBER(llog_setattr_rec, lsr_uid); + CHECK_MEMBER(llog_setattr_rec, lsr_gid); + CHECK_MEMBER(llog_setattr_rec, padding); + CHECK_MEMBER(llog_setattr_rec, lsr_tail); +} + +static void check_llog_size_change_rec(void) { BLANK_LINE(); @@ -672,10 +730,11 @@ check_llog_size_change_rec(void) CHECK_MEMBER(llog_size_change_rec, lsc_hdr); CHECK_MEMBER(llog_size_change_rec, lsc_fid); CHECK_MEMBER(llog_size_change_rec, lsc_io_epoch); + CHECK_MEMBER(llog_size_change_rec, padding); CHECK_MEMBER(llog_size_change_rec, lsc_tail); } -void +static void check_llog_gen(void) { BLANK_LINE(); @@ -684,7 +743,7 @@ check_llog_gen(void) CHECK_MEMBER(llog_gen, conn_cnt); } -void +static void check_llog_gen_rec(void) { BLANK_LINE(); @@ -694,7 +753,7 @@ check_llog_gen_rec(void) CHECK_MEMBER(llog_gen_rec, lgr_tail); } -void +static void check_llog_log_hdr(void) { BLANK_LINE(); @@ -712,7 +771,7 @@ check_llog_log_hdr(void) CHECK_MEMBER(llog_log_hdr, llh_tail); } -void +static void check_llog_cookie(void) { BLANK_LINE(); @@ -720,9 +779,10 @@ check_llog_cookie(void) CHECK_MEMBER(llog_cookie, lgc_lgl); CHECK_MEMBER(llog_cookie, lgc_subsys); CHECK_MEMBER(llog_cookie, lgc_index); + CHECK_MEMBER(llog_cookie, lgc_padding); } -void +static void check_llogd_body(void) { BLANK_LINE(); @@ -735,16 +795,18 @@ check_llogd_body(void) CHECK_MEMBER(llogd_body, lgd_len); CHECK_MEMBER(llogd_body, lgd_cur_offset); - CHECK_VALUE(LLOG_ORIGIN_HANDLE_CREATE); - CHECK_VALUE(LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - CHECK_VALUE(LLOG_ORIGIN_HANDLE_READ_HEADER); - CHECK_VALUE(LLOG_ORIGIN_HANDLE_WRITE_REC); - CHECK_VALUE(LLOG_ORIGIN_HANDLE_CLOSE); - CHECK_VALUE(LLOG_ORIGIN_CONNECT); - CHECK_VALUE(LLOG_CATINFO); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_CREATE); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_NEXT_BLOCK); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_READ_HEADER); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_WRITE_REC); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_CLOSE); + CHECK_CVALUE(LLOG_ORIGIN_CONNECT); + CHECK_CVALUE(LLOG_CATINFO); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_PREV_BLOCK); + CHECK_CVALUE(LLOG_ORIGIN_HANDLE_DESTROY); } -void +static void check_llogd_conn_body(void) { BLANK_LINE(); @@ -754,7 +816,27 @@ check_llogd_conn_body(void) CHECK_MEMBER(llogd_conn_body, lgdc_ctxt_idx); } -void +static void +check_mds_extent_desc(void) +{ + BLANK_LINE(); + CHECK_STRUCT(mds_extent_desc); + CHECK_MEMBER(mds_extent_desc, med_start); + CHECK_MEMBER(mds_extent_desc, med_len); + CHECK_MEMBER(mds_extent_desc, med_lmm); +} + +static void +check_llog_array_rec(void) +{ + BLANK_LINE(); + CHECK_STRUCT(llog_array_rec); + CHECK_MEMBER(llog_array_rec, lmr_hdr); + CHECK_MEMBER(llog_array_rec, lmr_med); + CHECK_MEMBER(llog_array_rec, lmr_tail); +} + +static void check_qunit_data(void) { BLANK_LINE(); @@ -765,7 +847,7 @@ check_qunit_data(void) CHECK_MEMBER(qunit_data, qd_isblk); } -void +static void system_string (char *cmdline, char *str, int len) { int fds[2]; @@ -909,13 +991,12 @@ main(int argc, char **argv) CHECK_VALUE(MDS_STATUS_CONN); CHECK_VALUE(MDS_STATUS_LOV); - CHECK_VALUE(MDS_OPEN_HAS_EA); - CHECK_VALUE(LDLM_ENQUEUE); CHECK_VALUE(LDLM_CONVERT); CHECK_VALUE(LDLM_CANCEL); CHECK_VALUE(LDLM_BL_CALLBACK); CHECK_VALUE(LDLM_CP_CALLBACK); + CHECK_VALUE(LDLM_GL_CALLBACK); CHECK_VALUE(LDLM_LAST_OPC); CHECK_VALUE(LCK_EX); @@ -924,18 +1005,13 @@ main(int argc, char **argv) CHECK_VALUE(LCK_CW); CHECK_VALUE(LCK_CR); CHECK_VALUE(LCK_NL); + CHECK_VALUE(LCK_GROUP); + CHECK_VALUE(LCK_MAXMODE); - CHECK_VALUE(PTLBD_QUERY); - CHECK_VALUE(PTLBD_READ); - CHECK_VALUE(PTLBD_WRITE); - CHECK_VALUE(PTLBD_FLUSH); - CHECK_VALUE(PTLBD_CONNECT); - CHECK_VALUE(PTLBD_DISCONNECT); - CHECK_VALUE(PTLBD_LAST_OPC); - - CHECK_VALUE(MGMT_CONNECT); - CHECK_VALUE(MGMT_DISCONNECT); - CHECK_VALUE(MGMT_EXCEPTION); + CHECK_CVALUE(LDLM_PLAIN); + CHECK_CVALUE(LDLM_EXTENT); + CHECK_CVALUE(LDLM_FLOCK); + CHECK_CVALUE(LDLM_IBITS); CHECK_VALUE(OBD_PING); CHECK_VALUE(OBD_LOG_CANCEL); @@ -945,12 +1021,27 @@ main(int argc, char **argv) CHECK_VALUE(QUOTA_DQACQ); CHECK_VALUE(QUOTA_DQREL); + CHECK_CDEFINE(OBD_CONNECT_RDONLY); + CHECK_CDEFINE(OBD_CONNECT_INDEX); + CHECK_CDEFINE(OBD_CONNECT_GRANT); + CHECK_CDEFINE(OBD_CONNECT_SRVLOCK); + CHECK_CDEFINE(OBD_CONNECT_VERSION); + CHECK_CDEFINE(OBD_CONNECT_REQPORTAL); + CHECK_CDEFINE(OBD_CONNECT_ACL); + CHECK_CDEFINE(OBD_CONNECT_XATTR); + CHECK_CDEFINE(OBD_CONNECT_CROW); + CHECK_CDEFINE(OBD_CONNECT_TRUNCLOCK); + CHECK_CDEFINE(OBD_CONNECT_TRANSNO); + CHECK_CDEFINE(OBD_CONNECT_IBITS); + CHECK_CDEFINE(OBD_CONNECT_JOIN); + COMMENT("Sizes and Offsets"); BLANK_LINE(); check_lustre_handle(); check_lustre_msg(); check_obdo(); check_lov_mds_md_v1(); + check_lov_mds_md_join(); check_obd_statfs(); check_obd_ioobj(); check_obd_quotactl(); @@ -964,19 +1055,18 @@ main(int argc, char **argv) check_mds_rec_link(); check_mds_rec_unlink(); check_mds_rec_rename(); + check_mds_rec_join(); check_lov_desc(); check_ldlm_res_id(); check_ldlm_extent(); check_ldlm_flock(); + check_ldlm_inodebits(); check_ldlm_intent(); check_ldlm_resource_desc(); check_ldlm_lock_desc(); check_ldlm_request(); check_ldlm_reply(); check_ldlm_lvb(); - check_ptlbd_op(); - check_ptlbd_niob(); - check_ptlbd_rsp(); check_llog_logid(); check_llog_catid(); check_llog_rec_hdr(); @@ -985,6 +1075,7 @@ main(int argc, char **argv) check_llog_create_rec(); check_llog_orphan_rec(); check_llog_unlink_rec(); + check_llog_setattr_rec(); check_llog_size_change_rec(); check_llog_gen(); check_llog_gen_rec(); @@ -992,6 +1083,8 @@ main(int argc, char **argv) check_llog_cookie(); check_llogd_body(); check_llogd_conn_body(); + check_llog_array_rec(); + check_mds_extent_desc(); check_qunit_data(); printf("}\n\n"); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 345bf82..5845795 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -5,6 +5,7 @@ #undef LASSERT #undef LASSERTF +#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) #define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; } #define LASSERTF(cond, fmt, arg) if (!(cond)) { printf("failed '" #cond "'" fmt, arg);ret = 1;} @@ -25,8 +26,8 @@ int main() void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux localhost.localdomain 2.6.9-1.667 #1 Tue Nov 2 14:41:25 EST 2004 i686 i68 - * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */ + * running on Linux schatzie.adilger.int 2.6.12-1.1381_FC3 #1 Fri Oct 21 03:46:55 EDT 2005 i6 + * with gcc version 3.3.4 20040817 (Red Hat Linux 3.3.4-2) */ /* Constants... */ @@ -158,8 +159,6 @@ void lustre_assert_wire_constants(void) (long long)MDS_STATUS_CONN); LASSERTF(MDS_STATUS_LOV == 2, " found %lld\n", (long long)MDS_STATUS_LOV); - LASSERTF(MDS_OPEN_HAS_EA == 1073741824, " found %lld\n", - (long long)MDS_OPEN_HAS_EA); LASSERTF(LDLM_ENQUEUE == 101, " found %lld\n", (long long)LDLM_ENQUEUE); LASSERTF(LDLM_CONVERT == 102, " found %lld\n", @@ -170,6 +169,8 @@ void lustre_assert_wire_constants(void) (long long)LDLM_BL_CALLBACK); LASSERTF(LDLM_CP_CALLBACK == 105, " found %lld\n", (long long)LDLM_CP_CALLBACK); + LASSERTF(LDLM_GL_CALLBACK == 106, " found %lld\n", + (long long)LDLM_GL_CALLBACK); LASSERTF(LDLM_LAST_OPC == 107, " found %lld\n", (long long)LDLM_LAST_OPC); LASSERTF(LCK_EX == 1, " found %lld\n", @@ -184,26 +185,14 @@ void lustre_assert_wire_constants(void) (long long)LCK_CR); LASSERTF(LCK_NL == 32, " found %lld\n", (long long)LCK_NL); - LASSERTF(PTLBD_QUERY == 200, " found %lld\n", - (long long)PTLBD_QUERY); - LASSERTF(PTLBD_READ == 201, " found %lld\n", - (long long)PTLBD_READ); - LASSERTF(PTLBD_WRITE == 202, " found %lld\n", - (long long)PTLBD_WRITE); - LASSERTF(PTLBD_FLUSH == 203, " found %lld\n", - (long long)PTLBD_FLUSH); - LASSERTF(PTLBD_CONNECT == 204, " found %lld\n", - (long long)PTLBD_CONNECT); - LASSERTF(PTLBD_DISCONNECT == 205, " found %lld\n", - (long long)PTLBD_DISCONNECT); - LASSERTF(PTLBD_LAST_OPC == 206, " found %lld\n", - (long long)PTLBD_LAST_OPC); - LASSERTF(MGMT_CONNECT == 250, " found %lld\n", - (long long)MGMT_CONNECT); - LASSERTF(MGMT_DISCONNECT == 251, " found %lld\n", - (long long)MGMT_DISCONNECT); - LASSERTF(MGMT_EXCEPTION == 252, " found %lld\n", - (long long)MGMT_EXCEPTION); + LASSERTF(LCK_GROUP == 64, " found %lld\n", + (long long)LCK_GROUP); + LASSERTF(LCK_MAXMODE == 65, " found %lld\n", + (long long)LCK_MAXMODE); + CLASSERT(LDLM_PLAIN == 10); + CLASSERT(LDLM_EXTENT == 11); + CLASSERT(LDLM_FLOCK == 12); + CLASSERT(LDLM_IBITS == 13); LASSERTF(OBD_PING == 400, " found %lld\n", (long long)OBD_PING); LASSERTF(OBD_LOG_CANCEL == 401, " found %lld\n", @@ -216,6 +205,19 @@ void lustre_assert_wire_constants(void) (long long)QUOTA_DQACQ); LASSERTF(QUOTA_DQREL == 602, " found %lld\n", (long long)QUOTA_DQREL); + CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL); + CLASSERT(OBD_CONNECT_INDEX == 0x2ULL); + CLASSERT(OBD_CONNECT_GRANT == 0x8ULL); + CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL); + CLASSERT(OBD_CONNECT_VERSION == 0x20ULL); + CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL); + CLASSERT(OBD_CONNECT_ACL == 0x80ULL); + CLASSERT(OBD_CONNECT_XATTR == 0x100ULL); + CLASSERT(OBD_CONNECT_CROW == 0x200ULL); + CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL); + CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL); + CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL); + CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL); /* Sizes and Offsets */ @@ -362,92 +364,66 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obdo, o_mds)); LASSERTF((int)sizeof(((struct obdo *)0)->o_mds) == 4, " found %lld\n", (long long)(int)sizeof(((struct obdo *)0)->o_mds)); + LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, " found %lld\n", + (long long)(int)offsetof(struct obdo, o_stripe_idx)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx)); + LASSERTF((int)offsetof(struct obdo, o_padding_1) == 124, " found %lld\n", + (long long)(int)offsetof(struct obdo, o_padding_1)); + LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obdo *)0)->o_padding_1)); LASSERTF((int)offsetof(struct obdo, o_inline) == 128, " found %lld\n", (long long)(int)offsetof(struct obdo, o_inline)); LASSERTF((int)sizeof(((struct obdo *)0)->o_inline) == 80, " found %lld\n", (long long)(int)sizeof(((struct obdo *)0)->o_inline)); LASSERTF(OBD_INLINESZ == 80, " found %lld\n", (long long)OBD_INLINESZ); - LASSERTF(OBD_MD_FLID == 1, " found %lld\n", - (long long)OBD_MD_FLID); - LASSERTF(OBD_MD_FLATIME == 2, " found %lld\n", - (long long)OBD_MD_FLATIME); - LASSERTF(OBD_MD_FLMTIME == 4, " found %lld\n", - (long long)OBD_MD_FLMTIME); - LASSERTF(OBD_MD_FLCTIME == 8, " found %lld\n", - (long long)OBD_MD_FLCTIME); - LASSERTF(OBD_MD_FLSIZE == 16, " found %lld\n", - (long long)OBD_MD_FLSIZE); - LASSERTF(OBD_MD_FLBLOCKS == 32, " found %lld\n", - (long long)OBD_MD_FLBLOCKS); - LASSERTF(OBD_MD_FLBLKSZ == 64, " found %lld\n", - (long long)OBD_MD_FLBLKSZ); - LASSERTF(OBD_MD_FLMODE == 128, " found %lld\n", - (long long)OBD_MD_FLMODE); - LASSERTF(OBD_MD_FLTYPE == 256, " found %lld\n", - (long long)OBD_MD_FLTYPE); - LASSERTF(OBD_MD_FLUID == 512, " found %lld\n", - (long long)OBD_MD_FLUID); - LASSERTF(OBD_MD_FLGID == 1024, " found %lld\n", - (long long)OBD_MD_FLGID); - LASSERTF(OBD_MD_FLFLAGS == 2048, " found %lld\n", - (long long)OBD_MD_FLFLAGS); - LASSERTF(OBD_MD_FLNLINK == 8192, " found %lld\n", - (long long)OBD_MD_FLNLINK); - LASSERTF(OBD_MD_FLGENER == 16384, " found %lld\n", - (long long)OBD_MD_FLGENER); - LASSERTF(OBD_MD_FLINLINE == 32768, " found %lld\n", - (long long)OBD_MD_FLINLINE); - LASSERTF(OBD_MD_FLRDEV == 65536, " found %lld\n", - (long long)OBD_MD_FLRDEV); - LASSERTF(OBD_MD_FLEASIZE == 131072, " found %lld\n", - (long long)OBD_MD_FLEASIZE); - LASSERTF(OBD_MD_LINKNAME == 262144, " found %lld\n", - (long long)OBD_MD_LINKNAME); - LASSERTF(OBD_MD_FLHANDLE == 524288, " found %lld\n", - (long long)OBD_MD_FLHANDLE); - LASSERTF(OBD_MD_FLCKSUM == 1048576, " found %lld\n", - (long long)OBD_MD_FLCKSUM); - LASSERTF(OBD_MD_FLQOS == 2097152, " found %lld\n", - (long long)OBD_MD_FLQOS); - LASSERTF(OBD_MD_FLCOOKIE == 8388608, " found %lld\n", - (long long)OBD_MD_FLCOOKIE); - LASSERTF(OBD_MD_FLGROUP == 16777216, " found %lld\n", - (long long)OBD_MD_FLGROUP); - LASSERTF(OBD_MD_FLFID == 33554432, " found %lld\n", - (long long)OBD_MD_FLFID); - LASSERTF(OBD_MD_FLEPOCH == 67108864, " found %lld\n", - (long long)OBD_MD_FLEPOCH); - LASSERTF(OBD_MD_FLGRANT == 134217728, " found %lld\n", - (long long)OBD_MD_FLGRANT); - LASSERTF(OBD_MD_FLDIREA == 268435456, " found %lld\n", - (long long)OBD_MD_FLDIREA); - LASSERTF(OBD_MD_FLUSRQUOTA == 536870912, " found %lld\n", - (long long)OBD_MD_FLUSRQUOTA); - LASSERTF(OBD_MD_FLGRPQUOTA == 1073741824, " found %lld\n", - (long long)OBD_MD_FLGRPQUOTA); - LASSERTF(OBD_MD_MDS == 4294967296ULL, " found %lld\n", - (long long)OBD_MD_MDS); - LASSERTF(OBD_MD_REINT == 8589934592ULL, " found %lld\n", - (long long)OBD_MD_REINT); - LASSERTF(OBD_FL_INLINEDATA == 1, " found %lld\n", - (long long)OBD_FL_INLINEDATA); - LASSERTF(OBD_FL_OBDMDEXISTS == 2, " found %lld\n", - (long long)OBD_FL_OBDMDEXISTS); - LASSERTF(OBD_FL_DELORPHAN == 4, " found %lld\n", - (long long)OBD_FL_DELORPHAN); - LASSERTF(OBD_FL_NORPC == 8, " found %lld\n", - (long long)OBD_FL_NORPC); - LASSERTF(OBD_FL_IDONLY == 16, " found %lld\n", - (long long)OBD_FL_IDONLY); - LASSERTF(OBD_FL_RECREATE_OBJS == 32, " found %lld\n", - (long long)OBD_FL_RECREATE_OBJS); - LASSERTF(OBD_FL_DEBUG_CHECK == 64, " found %lld\n", - (long long)OBD_FL_DEBUG_CHECK); - LASSERTF(OBD_FL_NO_USRQUOTA == 256, " found %lld\n", - (long long)OBD_FL_NO_USRQUOTA); - LASSERTF(OBD_FL_NO_GRPQUOTA == 512, " found %lld\n", - (long long)OBD_FL_NO_GRPQUOTA); + CLASSERT(OBD_MD_FLID == (0x00000001ULL)); + CLASSERT(OBD_MD_FLATIME == (0x00000002ULL)); + CLASSERT(OBD_MD_FLMTIME == (0x00000004ULL)); + CLASSERT(OBD_MD_FLCTIME == (0x00000008ULL)); + CLASSERT(OBD_MD_FLSIZE == (0x00000010ULL)); + CLASSERT(OBD_MD_FLBLOCKS == (0x00000020ULL)); + CLASSERT(OBD_MD_FLBLKSZ == (0x00000040ULL)); + CLASSERT(OBD_MD_FLMODE == (0x00000080ULL)); + CLASSERT(OBD_MD_FLTYPE == (0x00000100ULL)); + CLASSERT(OBD_MD_FLUID == (0x00000200ULL)); + CLASSERT(OBD_MD_FLGID == (0x00000400ULL)); + CLASSERT(OBD_MD_FLFLAGS == (0x00000800ULL)); + CLASSERT(OBD_MD_FLNLINK == (0x00002000ULL)); + CLASSERT(OBD_MD_FLGENER == (0x00004000ULL)); + CLASSERT(OBD_MD_FLINLINE == (0x00008000ULL)); + CLASSERT(OBD_MD_FLRDEV == (0x00010000ULL)); + CLASSERT(OBD_MD_FLEASIZE == (0x00020000ULL)); + CLASSERT(OBD_MD_LINKNAME == (0x00040000ULL)); + CLASSERT(OBD_MD_FLHANDLE == (0x00080000ULL)); + CLASSERT(OBD_MD_FLCKSUM == (0x00100000ULL)); + CLASSERT(OBD_MD_FLQOS == (0x00200000ULL)); + CLASSERT(OBD_MD_FLCOOKIE == (0x00800000ULL)); + CLASSERT(OBD_MD_FLGROUP == (0x01000000ULL)); + CLASSERT(OBD_MD_FLFID == (0x02000000ULL)); + CLASSERT(OBD_MD_FLEPOCH == (0x04000000ULL)); + CLASSERT(OBD_MD_FLGRANT == (0x08000000ULL)); + CLASSERT(OBD_MD_FLDIREA == (0x10000000ULL)); + CLASSERT(OBD_MD_FLUSRQUOTA == (0x20000000ULL)); + CLASSERT(OBD_MD_FLGRPQUOTA == (0x40000000ULL)); + CLASSERT(OBD_MD_FLMODEASIZE == (0x80000000ULL)); + CLASSERT(OBD_MD_MDS == (0x0000000100000000ULL)); + CLASSERT(OBD_MD_REINT == (0x0000000200000000ULL)); + CLASSERT(OBD_MD_FLXATTR == (0x0000001000000000ULL)); + CLASSERT(OBD_MD_FLXATTRLS == (0x0000002000000000ULL)); + CLASSERT(OBD_MD_FLXATTRRM == (0x0000004000000000ULL)); + CLASSERT(OBD_MD_FLACL == (0x0000008000000000ULL)); + CLASSERT(OBD_FL_INLINEDATA == (0x00000001)); + CLASSERT(OBD_FL_OBDMDEXISTS == (0x00000002)); + CLASSERT(OBD_FL_DELORPHAN == (0x00000004)); + CLASSERT(OBD_FL_NORPC == (0x00000008)); + CLASSERT(OBD_FL_IDONLY == (0x00000010)); + CLASSERT(OBD_FL_RECREATE_OBJS == (0x00000020)); + CLASSERT(OBD_FL_DEBUG_CHECK == (0x00000040)); + CLASSERT(OBD_FL_NO_USRQUOTA == (0x00000100)); + CLASSERT(OBD_FL_NO_GRPQUOTA == (0x00000200)); + CLASSERT(OBD_FL_CREATE_CROW == (0x00000400)); /* Checks for struct lov_mds_md_v1 */ LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, " found %lld\n", @@ -500,13 +476,29 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx)); LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx)); - LASSERTF(LOV_MAGIC_V1 == 198249424, " found %lld\n", - (long long)LOV_MAGIC_V1); + CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0); + CLASSERT(LOV_MAGIC_JOIN == 0x0BD20BD0); LASSERTF(LOV_PATTERN_RAID0 == 1, " found %lld\n", (long long)LOV_PATTERN_RAID0); LASSERTF(LOV_PATTERN_RAID1 == 2, " found %lld\n", (long long)LOV_PATTERN_RAID1); + /* Checks for struct lov_mds_md_join */ + LASSERTF((int)sizeof(struct lov_mds_md_join) == 56, " found %lld\n", + (long long)(int)sizeof(struct lov_mds_md_join)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_md) == 0, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_md)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md) == 32, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_md)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_array_id) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_array_id)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id) == 20, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_array_id)); + LASSERTF((int)offsetof(struct lov_mds_md_join, lmmj_extent_count) == 52, " found %lld\n", + (long long)(int)offsetof(struct lov_mds_md_join, lmmj_extent_count)); + LASSERTF((int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_mds_md_join *)0)->lmmj_extent_count)); + /* Checks for struct obd_statfs */ LASSERTF((int)sizeof(struct obd_statfs) == 144, " found %lld\n", (long long)(int)sizeof(struct obd_statfs)); @@ -546,6 +538,42 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_statfs, os_state)); LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_statfs *)0)->os_state)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare1) == 108, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare1)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare1)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare2)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare3)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare4)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare5)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare6)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare7)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare8)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8)); + LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, " found %lld\n", + (long long)(int)offsetof(struct obd_statfs, os_spare9)); + LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9)); /* Checks for struct obd_ioobj */ LASSERTF((int)sizeof(struct obd_ioobj) == 24, " found %lld\n", @@ -654,6 +682,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct obd_dqblk, dqb_valid)); LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, " found %lld\n", (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid)); + LASSERTF((int)offsetof(struct obd_dqblk, padding) == 68, " found %lld\n", + (long long)(int)offsetof(struct obd_dqblk, padding)); + LASSERTF((int)sizeof(((struct obd_dqblk *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct obd_dqblk *)0)->padding)); + LASSERTF(Q_QUOTACHECK == 0x800100," found %lld\n", + (long long)Q_QUOTACHECK); + LASSERTF(Q_INITQUOTA == 0x800101," found %lld\n", + (long long)Q_INITQUOTA); + LASSERTF(Q_GETOINFO == 0x800102," found %lld\n", + (long long)Q_GETOINFO); + LASSERTF(Q_GETOQUOTA == 0x800103," found %lld\n", + (long long)Q_GETOQUOTA); /* Checks for struct niobuf_remote */ LASSERTF((int)sizeof(struct niobuf_remote) == 16, " found %lld\n", @@ -816,14 +856,14 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct mds_body, aclsize)); LASSERTF((int)sizeof(((struct mds_body *)0)->aclsize) == 4, " found %lld\n", (long long)(int)sizeof(((struct mds_body *)0)->aclsize)); - LASSERTF((int)offsetof(struct mds_body, padding_2) == 156, " found %lld\n", - (long long)(int)offsetof(struct mds_body, padding_2)); - LASSERTF((int)sizeof(((struct mds_body *)0)->padding_2) == 4, " found %lld\n", - (long long)(int)sizeof(((struct mds_body *)0)->padding_2)); - LASSERTF((int)offsetof(struct mds_body, padding_3) == 160, " found %lld\n", - (long long)(int)offsetof(struct mds_body, padding_3)); - LASSERTF((int)sizeof(((struct mds_body *)0)->padding_3) == 4, " found %lld\n", - (long long)(int)sizeof(((struct mds_body *)0)->padding_3)); + LASSERTF((int)offsetof(struct mds_body, max_mdsize) == 156, " found %lld\n", + (long long)(int)offsetof(struct mds_body, max_mdsize)); + LASSERTF((int)sizeof(((struct mds_body *)0)->max_mdsize) == 4, " found %lld\n", + (long long)(int)sizeof(((struct mds_body *)0)->max_mdsize)); + LASSERTF((int)offsetof(struct mds_body, max_cookiesize) == 160, " found %lld\n", + (long long)(int)offsetof(struct mds_body, max_cookiesize)); + LASSERTF((int)sizeof(((struct mds_body *)0)->max_cookiesize) == 4, " found %lld\n", + (long long)(int)sizeof(((struct mds_body *)0)->max_cookiesize)); LASSERTF((int)offsetof(struct mds_body, padding_4) == 164, " found %lld\n", (long long)(int)offsetof(struct mds_body, padding_4)); LASSERTF((int)sizeof(((struct mds_body *)0)->padding_4) == 4, " found %lld\n", @@ -834,22 +874,20 @@ void lustre_assert_wire_constants(void) (long long)FMODE_WRITE); LASSERTF(FMODE_EXEC == 4, " found %lld\n", (long long)FMODE_EXEC); - LASSERTF(MDS_OPEN_CREAT == 64, " found %lld\n", - (long long)MDS_OPEN_CREAT); - LASSERTF(MDS_OPEN_EXCL == 128, " found %lld\n", - (long long)MDS_OPEN_EXCL); - LASSERTF(MDS_OPEN_TRUNC == 512, " found %lld\n", - (long long)MDS_OPEN_TRUNC); - LASSERTF(MDS_OPEN_APPEND == 1024, " found %lld\n", - (long long)MDS_OPEN_APPEND); - LASSERTF(MDS_OPEN_SYNC == 4096, " found %lld\n", - (long long)MDS_OPEN_SYNC); - LASSERTF(MDS_OPEN_DIRECTORY == 65536, " found %lld\n", - (long long)MDS_OPEN_DIRECTORY); - LASSERTF(MDS_OPEN_DELAY_CREATE == 16777216, " found %lld\n", - (long long)MDS_OPEN_DELAY_CREATE); - LASSERTF(MDS_OPEN_HAS_EA == 1073741824, " found %lld\n", - (long long)MDS_OPEN_HAS_EA); + CLASSERT(MDS_OPEN_CREAT == 00000100); + CLASSERT(MDS_OPEN_EXCL == 00000200); + CLASSERT(MDS_OPEN_TRUNC == 00001000); + CLASSERT(MDS_OPEN_APPEND == 00002000); + CLASSERT(MDS_OPEN_SYNC == 00010000); + CLASSERT(MDS_OPEN_DIRECTORY == 00200000); + CLASSERT(MDS_OPEN_DELAY_CREATE == 0100000000); + CLASSERT(MDS_OPEN_OWNEROVERRIDE == 0200000000); + CLASSERT(MDS_OPEN_JOIN_FILE == 0400000000); + CLASSERT(MDS_OPEN_HAS_EA == 010000000000); + CLASSERT(MDS_OPEN_HAS_OBJS == 020000000000); + CLASSERT(MDS_INODELOCK_LOOKUP == 0x000001); + CLASSERT(MDS_INODELOCK_UPDATE == 0x000002); + CLASSERT(MDS_INODELOCK_OPEN == 0x000004); /* Checks for struct mds_rec_setattr */ LASSERTF((int)sizeof(struct mds_rec_setattr) == 96, " found %lld\n", @@ -1083,6 +1121,18 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct mds_rec_rename *)0)->rn_time) == 8, " found %lld\n", (long long)(int)sizeof(((struct mds_rec_rename *)0)->rn_time)); + /* Checks for struct mds_rec_join */ + LASSERTF((int)sizeof(struct mds_rec_join) == 24, " found %lld\n", + (long long)(int)sizeof(struct mds_rec_join)); + LASSERTF((int)offsetof(struct mds_rec_join, jr_fid) == 0, " found %lld\n", + (long long)(int)offsetof(struct mds_rec_join, jr_fid)); + LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_fid) == 16, " found %lld\n", + (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_fid)); + LASSERTF((int)offsetof(struct mds_rec_join, jr_headsize) == 16, " found %lld\n", + (long long)(int)offsetof(struct mds_rec_join, jr_headsize)); + LASSERTF((int)sizeof(((struct mds_rec_join *)0)->jr_headsize) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_rec_join *)0)->jr_headsize)); + /* Checks for struct lov_desc */ LASSERTF((int)sizeof(struct lov_desc) == 88, " found %lld\n", (long long)(int)sizeof(struct lov_desc)); @@ -1110,14 +1160,26 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_qos_threshold)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold)); - LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_1)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_2)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_3)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_padding_4)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4)); LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_uuid)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n", @@ -1167,6 +1229,14 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ldlm_flock *)0)->pid) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_flock *)0)->pid)); + /* Checks for struct ldlm_inodebits */ + LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, " found %lld\n", + (long long)(int)sizeof(struct ldlm_inodebits)); + LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, " found %lld\n", + (long long)(int)offsetof(struct ldlm_inodebits, bits)); + LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits)); + /* Checks for struct ldlm_intent */ LASSERTF((int)sizeof(struct ldlm_intent) == 8, " found %lld\n", (long long)(int)sizeof(struct ldlm_intent)); @@ -1182,6 +1252,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_resource_desc, lr_type)); LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type)); + LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding)); + LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding)); LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_resource_desc, lr_name)); LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, " found %lld\n", @@ -1214,6 +1288,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_request, lock_flags)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_request, lock_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_padding)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_padding)); LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_request, lock_desc)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, " found %lld\n", @@ -1234,6 +1312,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct ldlm_reply, lock_flags)); LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, " found %lld\n", (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags)); + LASSERTF((int)offsetof(struct ldlm_request, lock_padding) == 4, " found %lld\n", + (long long)(int)offsetof(struct ldlm_request, lock_padding)); + LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct ldlm_request *)0)->lock_padding)); LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, " found %lld\n", (long long)(int)offsetof(struct ldlm_request, lock_desc)); LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, " found %lld\n", @@ -1275,62 +1357,6 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, " found %lld\n", (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks)); - /* Checks for struct ptlbd_op */ - LASSERTF((int)sizeof(struct ptlbd_op) == 12, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_op)); - LASSERTF((int)offsetof(struct ptlbd_op, op_cmd) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_cmd)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_cmd) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_cmd)); - LASSERTF((int)offsetof(struct ptlbd_op, op_lun) == 2, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_lun)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_lun) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_lun)); - LASSERTF((int)offsetof(struct ptlbd_op, op_niob_cnt) == 4, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_niob_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_niob_cnt) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_niob_cnt)); - LASSERTF((int)offsetof(struct ptlbd_op, op__padding) == 6, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op__padding)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op__padding) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op__padding)); - LASSERTF((int)offsetof(struct ptlbd_op, op_block_cnt) == 8, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_op, op_block_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_op *)0)->op_block_cnt) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_op *)0)->op_block_cnt)); - - /* Checks for struct ptlbd_niob */ - LASSERTF((int)sizeof(struct ptlbd_niob) == 24, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_niob)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_xid) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_xid)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_xid) == 8, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_xid)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_block_nr) == 8, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_block_nr)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_block_nr) == 8, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_block_nr)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_offset) == 16, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_offset)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_offset) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_offset)); - LASSERTF((int)offsetof(struct ptlbd_niob, n_length) == 20, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_niob, n_length)); - LASSERTF((int)sizeof(((struct ptlbd_niob *)0)->n_length) == 4, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_niob *)0)->n_length)); - - /* Checks for struct ptlbd_rsp */ - LASSERTF((int)sizeof(struct ptlbd_rsp) == 4, " found %lld\n", - (long long)(int)sizeof(struct ptlbd_rsp)); - LASSERTF((int)offsetof(struct ptlbd_rsp, r_status) == 0, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_rsp, r_status)); - LASSERTF((int)sizeof(((struct ptlbd_rsp *)0)->r_status) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_rsp *)0)->r_status)); - LASSERTF((int)offsetof(struct ptlbd_rsp, r_error_cnt) == 2, " found %lld\n", - (long long)(int)offsetof(struct ptlbd_rsp, r_error_cnt)); - LASSERTF((int)sizeof(((struct ptlbd_rsp *)0)->r_error_cnt) == 2, " found %lld\n", - (long long)(int)sizeof(((struct ptlbd_rsp *)0)->r_error_cnt)); - /* Checks for struct llog_logid */ LASSERTF((int)sizeof(struct llog_logid) == 20, " found %lld\n", (long long)(int)sizeof(struct llog_logid)); @@ -1346,24 +1372,16 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_logid, lgl_ogen)); LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen)); - LASSERTF(OST_SZ_REC == 274730752, " found %lld\n", - (long long)OST_SZ_REC); - LASSERTF(OST_RAID1_REC == 274731008, " found %lld\n", - (long long)OST_RAID1_REC); - LASSERTF(MDS_UNLINK_REC == 274801668, " found %lld\n", - (long long)MDS_UNLINK_REC); - LASSERTF(MDS_SETATTR_REC == 274801665, " found %lld\n", - (long long)MDS_SETATTR_REC); - LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n", - (long long)OBD_CFG_REC); - LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n", - (long long)PTL_CFG_REC); - LASSERTF(LLOG_GEN_REC == 274989056, " found %lld\n", - (long long)LLOG_GEN_REC); - LASSERTF(LLOG_HDR_MAGIC == 275010873, " found %lld\n", - (long long)LLOG_HDR_MAGIC); - LASSERTF(LLOG_LOGID_MAGIC == 275010875, " found %lld\n", - (long long)LLOG_LOGID_MAGIC); + CLASSERT(OST_SZ_REC == 274730752); + CLASSERT(OST_RAID1_REC == 274731008); + CLASSERT(MDS_UNLINK_REC == 274801668); + CLASSERT(MDS_SETATTR_REC == 274801665); + CLASSERT(OBD_CFG_REC == 274857984); + CLASSERT(PTL_CFG_REC == 274923520); + CLASSERT(LLOG_GEN_REC == 274989056); + CLASSERT(LLOG_JOIN_REC == 275054592); + CLASSERT(LLOG_HDR_MAGIC == 275010873); + CLASSERT(LLOG_LOGID_MAGIC == 275010875); /* Checks for struct llog_catid */ LASSERTF((int)sizeof(struct llog_catid) == 32, " found %lld\n", @@ -1372,6 +1390,18 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_catid, lci_logid)); LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, " found %lld\n", (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding1)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding2)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2)); + LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_catid, lci_padding3)); + LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3)); /* Checks for struct llog_rec_hdr */ LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, " found %lld\n", @@ -1388,6 +1418,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_rec_hdr, lrh_type)); LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type)); + LASSERTF((int)offsetof(struct llog_rec_hdr, padding) == 12, " found %lld\n", + (long long)(int)offsetof(struct llog_rec_hdr, padding)); + LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_rec_hdr *)0)->padding)); /* Checks for struct llog_rec_tail */ LASSERTF((int)sizeof(struct llog_rec_tail) == 8, " found %lld\n", @@ -1412,6 +1446,26 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_logid_rec, lid_id)); LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, " found %lld\n", (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding1) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding1)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding1) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding1)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding2) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding2)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding2) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding2)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding3) == 44, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding3)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding3) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding3)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding4) == 48, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding4)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding4) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding4)); + LASSERTF((int)offsetof(struct llog_logid_rec, padding5) == 52, " found %lld\n", + (long long)(int)offsetof(struct llog_logid_rec, padding5)); + LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->padding5) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_logid_rec *)0)->padding5)); LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, " found %lld\n", (long long)(int)offsetof(struct llog_logid_rec, lid_tail)); LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, " found %lld\n", @@ -1436,6 +1490,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_create_rec, lcr_ogen)); LASSERTF((int)sizeof(((struct llog_create_rec *)0)->lcr_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_create_rec *)0)->lcr_ogen)); + LASSERTF((int)offsetof(struct llog_create_rec, padding) == 44, " found %lld\n", + (long long)(int)offsetof(struct llog_create_rec, padding)); + LASSERTF((int)sizeof(((struct llog_create_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_create_rec *)0)->padding)); /* Checks for struct llog_orphan_rec */ LASSERTF((int)sizeof(struct llog_orphan_rec) == 40, " found %lld\n", @@ -1452,6 +1510,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_orphan_rec, lor_ogen)); LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->lor_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_orphan_rec *)0)->lor_ogen)); + LASSERTF((int)offsetof(struct llog_orphan_rec, padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_orphan_rec, padding)); + LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_orphan_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_orphan_rec, lor_tail) == 32, " found %lld\n", (long long)(int)offsetof(struct llog_orphan_rec, lor_tail)); LASSERTF((int)sizeof(((struct llog_orphan_rec *)0)->lor_tail) == 8, " found %lld\n", @@ -1472,11 +1534,47 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_unlink_rec, lur_ogen)); LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_ogen) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_ogen)); + LASSERTF((int)offsetof(struct llog_unlink_rec, padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_unlink_rec, padding)); + LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_unlink_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, " found %lld\n", (long long)(int)offsetof(struct llog_unlink_rec, lur_tail)); LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, " found %lld\n", (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail)); + /* Checks for struct llog_setattr_rec */ + LASSERTF((int)sizeof(struct llog_setattr_rec) == 48, " found %lld\n", + (long long)(int)sizeof(struct llog_setattr_rec)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_hdr)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_hdr)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_oid) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_oid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_oid) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_oid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_ogen) == 24, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_ogen)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_ogen) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_ogen)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_uid) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_uid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_uid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_uid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_gid) == 32, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_gid)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_gid) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_gid)); + LASSERTF((int)offsetof(struct llog_setattr_rec, padding) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, padding)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->padding)); + LASSERTF((int)offsetof(struct llog_setattr_rec, lsr_tail) == 40, " found %lld\n", + (long long)(int)offsetof(struct llog_setattr_rec, lsr_tail)); + LASSERTF((int)sizeof(((struct llog_setattr_rec *)0)->lsr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_setattr_rec *)0)->lsr_tail)); + /* Checks for struct llog_size_change_rec */ LASSERTF((int)sizeof(struct llog_size_change_rec) == 48, " found %lld\n", (long long)(int)sizeof(struct llog_size_change_rec)); @@ -1492,6 +1590,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_size_change_rec, lsc_io_epoch)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_io_epoch)); + LASSERTF((int)offsetof(struct llog_size_change_rec, padding) == 36, " found %lld\n", + (long long)(int)offsetof(struct llog_size_change_rec, padding)); + LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_size_change_rec *)0)->padding)); LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 40, " found %lld\n", (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail)); LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, " found %lld\n", @@ -1588,6 +1690,10 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llog_cookie, lgc_index)); LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, " found %lld\n", (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index)); + LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, " found %lld\n", + (long long)(int)offsetof(struct llog_cookie, lgc_padding)); + LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, " found %lld\n", + (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding)); /* Checks for struct llogd_body */ LASSERTF((int)sizeof(struct llogd_body) == 48, " found %lld\n", @@ -1620,20 +1726,15 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct llogd_body, lgd_cur_offset)); LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset)); - LASSERTF(LLOG_ORIGIN_HANDLE_CREATE == 501, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_CREATE); - LASSERTF(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - LASSERTF(LLOG_ORIGIN_HANDLE_READ_HEADER == 503, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_READ_HEADER); - LASSERTF(LLOG_ORIGIN_HANDLE_WRITE_REC == 504, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_WRITE_REC); - LASSERTF(LLOG_ORIGIN_HANDLE_CLOSE == 505, " found %lld\n", - (long long)LLOG_ORIGIN_HANDLE_CLOSE); - LASSERTF(LLOG_ORIGIN_CONNECT == 506, " found %lld\n", - (long long)LLOG_ORIGIN_CONNECT); - LASSERTF(LLOG_CATINFO == 507, " found %lld\n", - (long long)LLOG_CATINFO); + CLASSERT(LLOG_ORIGIN_HANDLE_CREATE == 501); + CLASSERT(LLOG_ORIGIN_HANDLE_NEXT_BLOCK == 502); + CLASSERT(LLOG_ORIGIN_HANDLE_READ_HEADER == 503); + CLASSERT(LLOG_ORIGIN_HANDLE_WRITE_REC == 504); + CLASSERT(LLOG_ORIGIN_HANDLE_CLOSE == 505); + CLASSERT(LLOG_ORIGIN_CONNECT == 506); + CLASSERT(LLOG_CATINFO == 507); + CLASSERT(LLOG_ORIGIN_HANDLE_PREV_BLOCK == 508); + CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509); /* Checks for struct llogd_conn_body */ LASSERTF((int)sizeof(struct llogd_conn_body) == 40, " found %lld\n", @@ -1651,6 +1752,38 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, " found %lld\n", (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); + /* Checks for struct llog_array_rec */ + LASSERTF((int)sizeof(struct llog_array_rec) == 72, " found %lld\n", + (long long)(int)sizeof(struct llog_array_rec)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_hdr) == 0, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_hdr)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_hdr) == 16, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_hdr)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_med) == 16, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_med)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_med) == 48, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_med)); + LASSERTF((int)offsetof(struct llog_array_rec, lmr_tail) == 64, " found %lld\n", + (long long)(int)offsetof(struct llog_array_rec, lmr_tail)); + LASSERTF((int)sizeof(((struct llog_array_rec *)0)->lmr_tail) == 8, " found %lld\n", + (long long)(int)sizeof(((struct llog_array_rec *)0)->lmr_tail)); + + /* Checks for struct mds_extent_desc */ + LASSERTF((int)sizeof(struct mds_extent_desc) == 48, " found %lld\n", + (long long)(int)sizeof(struct mds_extent_desc)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_start) == 0, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_start)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_start) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_start)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_len) == 8, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_len)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_len) == 8, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_len)); + LASSERTF((int)offsetof(struct mds_extent_desc, med_lmm) == 16, " found %lld\n", + (long long)(int)offsetof(struct mds_extent_desc, med_lmm)); + LASSERTF((int)sizeof(((struct mds_extent_desc *)0)->med_lmm) == 32, " found %lld\n", + (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); + /* Checks for struct qunit_data */ LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", (long long)(int)sizeof(struct qunit_data));