From de7fe0875bf906416bbcbc650cd30895cb3aaffd Mon Sep 17 00:00:00 2001 From: Aurelien Degremont Date: Tue, 10 Jan 2012 15:27:33 +0100 Subject: [PATCH] LU-827 lov: Implement a per file data_version. This patch extends getattr RPC to also send in its reply a data version for the file content. This only tracks data object changes. This does not track chown/chmod on OST object. This value is intended to be only compared for equality. A new call in liblustreapi is available and through 'lfs data_version'. Signed-off-by: Aurelien Degremont Signed-off-by: Johann Lombardi Change-Id: I5d748e096b11627c05f86ef6f3f6906bce120893 Reviewed-on: http://review.whamcloud.com/1671 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/doc/lfs.1 | 12 +++++++ lustre/include/lustre/liblustreapi.h | 2 ++ lustre/include/lustre/lustre_user.h | 8 +++++ lustre/include/lustre_lib.h | 1 + lustre/llite/file.c | 65 ++++++++++++++++++++++++++++++++++-- lustre/lov/lov_internal.h | 2 +- lustre/lov/lov_merge.c | 9 ++++- lustre/obdfilter/filter.c | 8 +++++ lustre/ptlrpc/wiretest.c | 2 ++ lustre/tests/sanityn.sh | 48 ++++++++++++++++++++++++++ lustre/utils/lfs.c | 50 +++++++++++++++++++++++++++ lustre/utils/liblustreapi.c | 30 +++++++++++++++++ lustre/utils/wiretest.c | 2 ++ 13 files changed, 235 insertions(+), 4 deletions(-) diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1 index 5953ad4..d5d8d44 100644 --- a/lustre/doc/lfs.1 +++ b/lustre/doc/lfs.1 @@ -74,6 +74,8 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the \fB[-b ] [-i ] \fB\fR .br +.B lfs data_version [-n] \fB\fR +.br .B lfs help .SH DESCRIPTION .B lfs @@ -227,6 +229,16 @@ To set filesystem quotas for users or groups. Limits can be specified with -b, - .B setquota -t [-u|-g] [--block-grace ] [--inode-grace ] To set filesystem quota grace times for users or groups. Grace time is specified in "XXwXXdXXhXXmXXs" format or as an integer seconds value, see EXAMPLES .TP +.B data_version [-n] +Display current version of file data. If -n is specified, data version is read +without taking lock. As a consequence, data version could be outdated if there +is dirty caches on filesystem clients, but this will not force data flushes and +has less impact on filesystem. + +Even without -n, race conditions are possible and data version should be +checked before and after an operation to be confident the data did not change +during it. +.TP .B help Provides brief help on the various arguments .TP diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index a528c55..331d3ee 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -223,6 +223,8 @@ extern int llapi_fid2path(const char *device, const char *fidstr, char *path, extern int llapi_path2fid(const char *path, lustre_fid *fid); extern int llapi_get_version(char *buffer, int buffer_size, char **version); +extern int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags); + /* Changelog interface. priv is private state, managed internally by these functions */ #define CHANGELOG_FLAG_FOLLOW 0x01 /* Not yet implemented */ diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index be69c18..51a6857 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -155,6 +155,7 @@ struct obd_statfs { #define LL_IOC_HSM_CT_START _IOW ('f', 176,struct lustre_kernelcomm) /* see for ioctl numbers 177-210 */ +#define LL_IOC_DATA_VERSION _IOR ('f', 218, struct ioc_data_version) #define LL_STATFS_LMV 1 #define LL_STATFS_LOV 2 @@ -632,6 +633,13 @@ enum changelog_message_type { /********* Misc **********/ +struct ioc_data_version { + __u64 idv_version; + __u64 idv_flags; /* See LL_DV_xxx */ +}; +#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling + version. Dirty caches are left unchanged. */ + #ifndef offsetof # define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index a60389f..6f4cf15 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -559,6 +559,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE) +/* defines ioctl number 218 */ #define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t) #define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 231f2f3..80dcdb8 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -708,7 +708,8 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLGROUP | OBD_MD_FLEPOCH; + OBD_MD_FLGROUP | OBD_MD_FLEPOCH | + OBD_MD_FLDATAVERSION; oinfo.oi_capa = capa; if (sync) { oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; @@ -728,7 +729,8 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, if (rc == 0) oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE); + OBD_MD_FLCTIME | OBD_MD_FLSIZE | + OBD_MD_FLDATAVERSION); RETURN(rc); } @@ -1746,6 +1748,48 @@ error: RETURN(rc); } +/* + * Read the data_version for inode. + * + * This value is computed using stripe object version on OST. + * Version is computed using server side locking. + * + * @param extent_lock Take extent lock. Not needed if a process is already + * holding the OST object group locks. + */ +static int ll_data_version(struct inode *inode, __u64 *data_version, + int extent_lock) +{ + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct obdo *obdo = NULL; + int rc; + ENTRY; + + /* If no stripe, we consider version is 0. */ + if (!lsm) { + *data_version = 0; + CDEBUG(D_INODE, "No object for inode\n"); + RETURN(0); + } + + OBD_ALLOC_PTR(obdo); + if (obdo == NULL) + RETURN(-ENOMEM); + + rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock); + if (!rc) { + if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) + rc = -EOPNOTSUPP; + else + *data_version = obdo->o_data_version; + } + + OBD_FREE_PTR(obdo); + + RETURN(rc); +} + #ifdef HAVE_UNLOCKED_IOCTL long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1836,6 +1880,23 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, } case OBD_IOC_FID2PATH: RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg)); + case LL_IOC_DATA_VERSION: { + struct ioc_data_version idv; + int rc; + + if (cfs_copy_from_user(&idv, (char *)arg, sizeof(idv))) + RETURN(-EFAULT); + + rc = ll_data_version(inode, &idv.idv_version, + !(idv.idv_flags & LL_DV_NOFLUSH)); + + if (rc == 0 && + cfs_copy_to_user((char *) arg, &idv, sizeof(idv))) + RETURN(-EFAULT); + + RETURN(rc); + } + case LL_IOC_GET_MDTIDX: { int mdtidx; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 1a6b34a..e602606 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -157,7 +157,7 @@ static inline void lov_llh_put(struct lov_lock_handles *llh) (char *)((lv)->lov_tgts[index]->ltd_uuid.uuid) /* lov_merge.c */ -void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, +void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid, struct lov_stripe_md *lsm, int stripeno, int *set); int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm, struct ost_lvb *lvb, int kms_only); diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c index ad759c3..2f943de 100644 --- a/lustre/lov/lov_merge.c +++ b/lustre/lov/lov_merge.c @@ -176,7 +176,7 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, RETURN(0); } -void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, +void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid, struct lov_stripe_md *lsm, int stripeno, int *set) { valid &= src->o_valid; @@ -198,11 +198,18 @@ void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, tgt->o_ctime = src->o_ctime; if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) tgt->o_mtime = src->o_mtime; + if (valid & OBD_MD_FLDATAVERSION) + tgt->o_data_version += src->o_data_version; } else { memcpy(tgt, src, sizeof(*tgt)); tgt->o_id = lsm->lsm_object_id; if (valid & OBD_MD_FLSIZE) tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno); } + + /* data_version needs to be valid on all stripes to be correct! */ + if (!(valid & OBD_MD_FLDATAVERSION)) + tgt->o_valid &= ~OBD_MD_FLDATAVERSION; + *set += 1; } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 9dfef42..93b6edc3 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -3158,6 +3158,7 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo) { struct dentry *dentry = NULL; struct obd_device *obd; + __u64 curr_version; int rc = 0; ENTRY; @@ -3180,6 +3181,13 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo) oinfo->oi_oa->o_valid = OBD_MD_FLID; obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL, FILTER_VALID_FLAGS); + /* Store inode version in reply */ + curr_version = fsfilt_get_version(exp->exp_obd, dentry->d_inode); + if ((__s64)curr_version != -EOPNOTSUPP) { + oinfo->oi_oa->o_valid |= OBD_MD_FLDATAVERSION; + oinfo->oi_oa->o_data_version = curr_version; + } + f_dput(dentry); RETURN(rc); } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 9aca930..7208df9 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -904,6 +904,8 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE)); LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingF)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index df7e017..56eac96 100644 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -31,6 +31,7 @@ export TMP=${TMP:-/tmp} MOUNT_2=${MOUNT_2:-"yes"} CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} +TSTUSR=${TSTUSR:-"quota_usr"} SAVE_PWD=$PWD @@ -1877,6 +1878,53 @@ test_50() { } run_test 50 "osc lvb attrs: enqueue vs. CP AST ==============" +test_60() { + # Create a file + mkdir -p $DIR1/$tdir + file1=$DIR1/$tdir/file + file2=$DIR2/$tdir/file + + echo orig > $file2 || error "Could not create $file2" + version=$($LFS data_version $file1) + + # Append data + echo append >> $file2 || error "Could not append to $file2" + version2=$($LFS data_version $file1) + [ "$version" != "$version2" ] || + error "append did not change data version: $version" + + # Overwrite data + echo overwrite > $file2 || error "Could not overwrite $file2" + version3=$($LFS data_version $file1) + [ "$version2" != "$version3" ] || + error "overwrite did not change data version: $version2" + + # Truncate before EOF + $TRUNCATE $file2 3 || error "Could not truncate $file2" + version4=$($LFS data_version $file1) + [ "$version3" != "$version4" ] || + error "truncate did not change data version: $version3" + + # Truncate after EOF + $TRUNCATE $file2 123456 || error "Could not truncate $file2" + version5=$($LFS data_version $file1) + [ "$version4" != "$version5" ] || + error "truncate did not change data version: $version4" + + # Chmod do not change version + chmod 400 $file2 || error "Could not chmod 400 $file2" + version6=$($LFS data_version $file1) + [ "$version5" == "$version6" ] || + error "chmod should not change data version: $version5 != $version6" + + # Chown do not change version + chown $TSTUSR $file2 || error "Could not chown $TSTUSR $file2" + version7=$($LFS data_version $file1) + [ "$version5" == "$version7" ] || + error "chown should not change data version: $version5 != $version7" +} +run_test 60 "Verify data_version behaviour" + log "cleanup: ======================================================" [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2 diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 07e3ed7..a52bd08 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -114,6 +114,7 @@ static int lfs_changelog(int argc, char **argv); static int lfs_changelog_clear(int argc, char **argv); static int lfs_fid2path(int argc, char **argv); static int lfs_path2fid(int argc, char **argv); +static int lfs_data_version(int argc, char **argv); /* all avaialable commands */ command_t cmdlist[] = { @@ -252,6 +253,8 @@ command_t cmdlist[] = { /*[--rec ]*/}, {"path2fid", lfs_path2fid, 0, "Display the fid for a given path.\n" "usage: path2fid "}, + {"data_version", lfs_data_version, 0, "Display file data version for " + "a given path.\n" "usage: data_version [-n] "}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, @@ -2621,6 +2624,53 @@ static int lfs_path2fid(int argc, char **argv) return 0; } +static int lfs_data_version(int argc, char **argv) +{ + char *path; + __u64 data_version; + int fd; + int rc; + int c; + int nolock = 0; + + if (argc < 2) + return CMD_HELP; + + optind = 0; + while ((c = getopt(argc, argv, "n")) != -1) { + switch (c) { + case 'n': + nolock = LL_DV_NOFLUSH; + break; + default: + return CMD_HELP; + } + } + if (optind == argc) + return CMD_HELP; + + path = argv[optind]; + fd = open(path, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "can't open %s: %s\n", path, + strerror(errno)); + return errno; + } + + rc = llapi_get_data_version(fd, &data_version, nolock); + if (rc) { + fprintf(stderr, "can't get version for %s: %s\n", path, + strerror(errno = -rc)); + return rc; + } + + printf("%llu\n", data_version); + + close(fd); + + return 0; +} + int main(int argc, char **argv) { int rc; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index e9937a9..0584319 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -3941,3 +3941,33 @@ int llapi_get_version(char *buffer, int buffer_size, *version = data->ioc_bulk; return 0; } + +/** + * Get a 64-bit value representing the version of file data pointed by fd. + * + * Each write or truncate, flushed on OST, will change this value. You can use + * this value to verify if file data was modified. This only checks the file + * data, not metadata. + * + * \param flags If set to LL_DV_NOFLUSH, the data version will be read + * directly from OST without regard to possible dirty cache on + * client nodes. + * + * \retval 0 on success. + * \retval -errno on error. + */ +int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags) +{ + int rc; + struct ioc_data_version idv; + + idv.idv_flags = flags; + + rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv); + if (rc) + rc = -errno; + else + *data_version = idv.idv_version; + + return rc; +} diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 76dcca4..eceaf35 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -912,6 +912,8 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE)); LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n", + (long long)(int)offsetof(struct obd_connect_data, paddingF)); + LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", -- 1.8.3.1