struct list_head l_exp_list;
};
+/**
+ * Describe the overlap between two locks. itree_overlap_cb data.
+ */
+struct ldlm_match_data {
+ struct ldlm_lock *lmd_old;
+ struct ldlm_lock *lmd_lock;
+ enum ldlm_mode *lmd_mode;
+ union ldlm_policy_data *lmd_policy;
+ __u64 lmd_flags;
+ __u64 lmd_skip_flags;
+ int lmd_unref;
+ bool lmd_has_ast_data;
+};
+
/** For uncommitted cross-MDT lock, store transno this lock belongs to */
#define l_transno l_client_cookie
return ldlm_lock_match_with_skip(ns, flags, 0, res_id, type, policy,
mode, lh, unref);
}
-
+struct ldlm_lock *search_itree(struct ldlm_resource *res,
+ struct ldlm_match_data *data);
enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh,
__u64 *bits);
void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode);
#define OBD_FAIL_OSC_DELAY_SETTIME 0x412
#define OBD_FAIL_OSC_CONNECT_GRANT_PARAM 0x413
#define OBD_FAIL_OSC_DELAY_IO 0x414
+#define OBD_FAIL_OSC_NO_SIZE_DATA 0x415
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
}
/**
- * Describe the overlap between two locks. itree_overlap_cb data.
- */
-struct lock_match_data {
- struct ldlm_lock *lmd_old;
- struct ldlm_lock *lmd_lock;
- enum ldlm_mode *lmd_mode;
- union ldlm_policy_data *lmd_policy;
- __u64 lmd_flags;
- __u64 lmd_skip_flags;
- int lmd_unref;
-};
-
-/**
* Check if the given @lock meets the criteria for a match.
* A reference on the lock is taken if matched.
*
* \param lock test-against this lock
* \param data parameters
*/
-static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data)
+static int lock_matches(struct ldlm_lock *lock, struct ldlm_match_data *data)
{
union ldlm_policy_data *lpol = &lock->l_policy_data;
- enum ldlm_mode match;
+ enum ldlm_mode match = LCK_MINMODE;
if (lock == data->lmd_old)
return INTERVAL_ITER_STOP;
if (!(lock->l_req_mode & *data->lmd_mode))
return INTERVAL_ITER_CONT;
+
+ /* When we search for ast_data, we are not doing a traditional match,
+ * so we don't worry about IBITS or extent matching.
+ */
+ if (data->lmd_has_ast_data) {
+ if (!lock->l_ast_data)
+ return INTERVAL_ITER_CONT;
+
+ goto matched;
+ }
+
match = lock->l_req_mode;
switch (lock->l_resource->lr_type) {
if (data->lmd_skip_flags & lock->l_flags)
return INTERVAL_ITER_CONT;
+matched:
if (data->lmd_flags & LDLM_FL_TEST_LOCK) {
LDLM_LOCK_GET(lock);
ldlm_lock_touch_in_lru(lock);
static unsigned int itree_overlap_cb(struct interval_node *in, void *args)
{
struct ldlm_interval *node = to_ldlm_interval(in);
- struct lock_match_data *data = args;
+ struct ldlm_match_data *data = args;
struct ldlm_lock *lock;
int rc;
*
* \retval a referenced lock or NULL.
*/
-static struct ldlm_lock *search_itree(struct ldlm_resource *res,
- struct lock_match_data *data)
+struct ldlm_lock *search_itree(struct ldlm_resource *res,
+ struct ldlm_match_data *data)
{
struct interval_node_extent ext = {
.start = data->lmd_policy->l_extent.start,
return NULL;
}
+EXPORT_SYMBOL(search_itree);
/**
* \retval a referenced lock or NULL.
*/
static struct ldlm_lock *search_queue(struct list_head *queue,
- struct lock_match_data *data)
+ struct ldlm_match_data *data)
{
struct ldlm_lock *lock;
int rc;
enum ldlm_mode mode,
struct lustre_handle *lockh, int unref)
{
- struct lock_match_data data = {
+ struct ldlm_match_data data = {
.lmd_old = NULL,
.lmd_lock = NULL,
.lmd_mode = &mode,
.lmd_flags = flags,
.lmd_skip_flags = skip_flags,
.lmd_unref = unref,
+ .lmd_has_ast_data = false,
};
struct ldlm_resource *res;
struct ldlm_lock *lock;
struct ost_lvb *lvb;
struct req_capsule *cap;
struct cl_object *obj = NULL;
+ struct ldlm_resource *res = dlmlock->l_resource;
+ struct ldlm_match_data matchdata = { 0 };
+ union ldlm_policy_data policy;
+ enum ldlm_mode mode = LCK_PW | LCK_GROUP | LCK_PR;
int result;
__u16 refcheck;
if (IS_ERR(env))
GOTO(out, result = PTR_ERR(env));
+ policy.l_extent.start = 0;
+ policy.l_extent.end = LUSTRE_EOF;
- lock_res_and_lock(dlmlock);
- if (dlmlock->l_ast_data != NULL) {
- obj = osc2cl(dlmlock->l_ast_data);
- cl_object_get(obj);
+ matchdata.lmd_mode = &mode;
+ matchdata.lmd_policy = &policy;
+ matchdata.lmd_flags = LDLM_FL_TEST_LOCK | LDLM_FL_CBPENDING;
+ matchdata.lmd_unref = 1;
+ matchdata.lmd_has_ast_data = true;
+
+ LDLM_LOCK_GET(dlmlock);
+
+ /* If any dlmlock has l_ast_data set, we must find it or we risk
+ * missing a size update done under a different lock.
+ */
+ while (dlmlock) {
+ lock_res_and_lock(dlmlock);
+ if (dlmlock->l_ast_data) {
+ obj = osc2cl(dlmlock->l_ast_data);
+ cl_object_get(obj);
+ }
+ unlock_res_and_lock(dlmlock);
+ LDLM_LOCK_PUT(dlmlock);
+
+ dlmlock = NULL;
+
+ if (obj == NULL && res->lr_type == LDLM_EXTENT) {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_SIZE_DATA))
+ break;
+
+ lock_res(res);
+ dlmlock = search_itree(res, &matchdata);
+ unlock_res(res);
+ }
}
- unlock_res_and_lock(dlmlock);
if (obj != NULL) {
/* Do not grab the mutex of cl_lock for glimpse.
CFS_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK, cfs_fail_val > 0 ?
cfs_fail_val : (obd_timeout + 1) / 4);
+ /* Delay write commit to show stale size information */
+ CFS_FAIL_TIMEOUT(OBD_FAIL_OSC_NO_SIZE_DATA, cfs_fail_val);
+
/* There must be big cache in current thread to process this request
* if it is NULL then something went wrong and it wasn't allocated,
* report -ENOMEM in that case */
/* Name of file/directory. Will be set once and will not change. */
static char mainpath[PATH_MAX];
+/* Path to same file/directory on second mount */
+static char mainpath2[PATH_MAX];
static char *mainfile;
static char fsmountdir[PATH_MAX]; /* Lustre mountpoint */
static char *lustre_dir; /* Test directory inside Lustre */
+static char *lustre_dir2; /* Same dir but on second mountpoint */
static int single_test; /* Number of a single test to execute*/
/* Cleanup our test file. */
return 0;
}
+/* Do lockahead requests from two mount points & sanity check size
+ *
+ * The key thing here is that client2 updates the size by writing, then asks
+ * for another lock beyond that. That next lock is never used.
+ * The bug (LU-11670) is that the glimpse for client1 will only check the
+ * highest lock and miss the size update made by the lower lock.
+ */
+static int test23(void)
+{
+ struct llapi_lu_ladvise advice;
+ size_t write_size = 1024 * 1024;
+ char buf[write_size];
+ const int count = 1;
+ struct stat sb;
+ struct stat sb2;
+ int fd;
+ /* On second mount */
+ int fd2;
+ int rc;
+ int i;
+
+ fd = open(mainpath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+ ASSERTF(fd >= 0, "open failed for '%s': %s",
+ mainpath, strerror(errno));
+
+ /* mainpath2 is a different Lustre mount */
+ fd2 = open(mainpath2, O_RDWR, S_IRUSR | S_IWUSR);
+ ASSERTF(fd2 >= 0, "open failed for '%s': %s",
+ mainpath2, strerror(errno));
+
+ /* Lock + write MiB 1 from second client */
+ setup_ladvise_lockahead(&advice, MODE_WRITE_USER, 0, 0,
+ write_size - 1, true);
+
+ /* Manually set the result so we can verify it's being modified */
+ advice.lla_lockahead_result = 345678;
+
+ rc = llapi_ladvise(fd2, 0, count, &advice);
+ ASSERTF(rc == 0,
+ "cannot lockahead '%s': %s", mainpath2, strerror(errno));
+ ASSERTF(advice.lla_lockahead_result == 0,
+ "unexpected extent result: %d",
+ advice.lla_lockahead_result);
+
+ /* Ask again until we get the lock (status 1). */
+ for (i = 1; i < 100; i++) {
+ usleep(100000); /* 0.1 second */
+ advice.lla_lockahead_result = 456789;
+ rc = llapi_ladvise(fd2, 0, count, &advice);
+ ASSERTF(rc == 0, "cannot lockahead '%s': %s",
+ mainpath2, strerror(errno));
+
+ if (advice.lla_lockahead_result > 0)
+ break;
+ }
+
+ ASSERTF(advice.lla_lockahead_result == LLA_RESULT_SAME,
+ "unexpected extent result: %d",
+ advice.lla_lockahead_result);
+
+ rc = write(fd2, buf, write_size);
+ ASSERTF(rc == sizeof(buf), "write failed for '%s': %s",
+ mainpath2, strerror(errno));
+
+ /* Lock (but don't write) MiB 2 from second client */
+ setup_ladvise_lockahead(&advice, MODE_WRITE_USER, 0, write_size,
+ 2*write_size - 1, true);
+
+ /* Manually set the result so we can verify it's being modified */
+ advice.lla_lockahead_result = 345678;
+
+ rc = llapi_ladvise(fd2, 0, count, &advice);
+ ASSERTF(rc == 0,
+ "cannot lockahead '%s': %s", mainpath2, strerror(errno));
+ ASSERTF(advice.lla_lockahead_result == 0,
+ "unexpected extent result: %d",
+ advice.lla_lockahead_result);
+
+ /* Ask again until we get the lock (status 1). */
+ for (i = 1; i < 100; i++) {
+ usleep(100000); /* 0.1 second */
+ advice.lla_lockahead_result = 456789;
+ rc = llapi_ladvise(fd2, 0, count, &advice);
+ ASSERTF(rc == 0, "cannot lockahead '%s': %s",
+ mainpath2, strerror(errno));
+
+ if (advice.lla_lockahead_result > 0)
+ break;
+ }
+
+ ASSERTF(advice.lla_lockahead_result == LLA_RESULT_SAME,
+ "unexpected extent result: %d",
+ advice.lla_lockahead_result);
+
+ rc = fstat(fd, &sb);
+ ASSERTF(!rc, "stat failed for '%s': %s",
+ mainpath, strerror(errno));
+ rc = fstat(fd2, &sb2);
+ ASSERTF(!rc, "stat failed for '%s': %s",
+ mainpath2, strerror(errno));
+
+ ASSERTF(sb.st_size == sb2.st_size,
+ "size on %s and %s differs: %lu vs %lu",
+ mainpath, mainpath2, sb.st_size, sb2.st_size);
+
+ ASSERTF(sb.st_size == write_size, "size %lu != bytes written (%lu)",
+ sb.st_size, write_size);
+
+ close(fd);
+ close(fd2);
+
+ return 0;
+}
+
static void usage(char *prog)
{
- fprintf(stderr, "Usage: %s [-d lustre_dir], [-t single_test]\n", prog);
+ fprintf(stderr,
+ "Usage: %s [-d lustre_dir], [-D lustre_dir2] [-t test]\n",
+ prog);
exit(-1);
}
{
int c;
- while ((c = getopt(argc, argv, "d:f:t:")) != -1) {
+ while ((c = getopt(argc, argv, "d:D:f:t:")) != -1) {
switch (c) {
case 'f':
mainfile = optarg;
case 'd':
lustre_dir = optarg;
break;
+ case 'D':
+ lustre_dir2 = optarg;
+ break;
case 't':
single_test = atoi(optarg);
break;
return -1;
}
+ if (lustre_dir2) {
+ rc = llapi_search_mounts(lustre_dir2, 0, fsmountdir, fsname);
+ if (rc != 0) {
+ fprintf(stderr,
+ "Error: '%s': not a Lustre filesystem\n",
+ lustre_dir2);
+ return -1;
+ }
+ }
+
/* Play nice with Lustre test scripts. Non-line buffered output
* stream under I/O redirection may appear incorrectly. */
setvbuf(stdout, NULL, _IOLBF, 0);
rc = snprintf(mainpath, sizeof(mainpath), "%s/%s", lustre_dir,
mainfile);
ASSERTF(rc > 0 && rc < sizeof(mainpath), "invalid name for mainpath");
+
+ if (lustre_dir2) {
+ rc = snprintf(mainpath2, sizeof(mainpath2), "%s/%s",
+ lustre_dir2, mainfile);
+ ASSERTF(rc > 0 && rc < sizeof(mainpath2),
+ "invalid name for mainpath2");
+ }
+
cleanup();
switch (single_test) {
PERFORM(test20);
PERFORM(test21);
PERFORM(test22);
+ /* Some tests require a second mount point */
+ if (lustre_dir2)
+ PERFORM(test23);
/* When running all the test cases, we can't use the return
* from the last test case, as it might be non-zero to return
* info, rather than for an error. Test cases assert and exit
case 22:
PERFORM(test22);
break;
+ case 23:
+ ASSERTF(lustre_dir2,
+ "must provide second mount point for test 23");
+ PERFORM(test23);
+ break;
default:
fprintf(stderr, "impossible value of single_test %d\n",
single_test);
test_255c() {
[ $OST1_VERSION -lt $(version_code 2.10.50) ] &&
- skip "lustre < 2.10.53 does not support lockahead"
+ skip "lustre < 2.10.50 does not support lockahead"
local count
local new_count
}
run_test 102 "Test open by handle of unlinked file"
+# Compare file size between first & second mount, ensuring the client correctly
+# glimpses even with unused speculative locks - LU-11670
+test_103() {
+ [ $(lustre_version_code $ost1) -lt $(version_code 2.10.50) ] &&
+ skip "Lockahead needs OST version at least 2.10.50"
+
+ local testnum=23
+
+ test_mkdir -p $DIR/$tdir
+
+ # Force file on to OST0
+ $LFS setstripe -i 0 $DIR/$tdir
+
+ # Do not check multiple locks on glimpse
+ # OBD_FAIL_OSC_NO_SIZE_DATA 0x415
+ $LCTL set_param fail_loc=0x415
+
+ # Delay write commit by 2 seconds to guarantee glimpse wins race
+ # The same fail_loc is used on client & server so it can work in the
+ # single node sanity setup
+ do_facet ost1 $LCTL set_param fail_loc=0x415 fail_val=2
+
+ echo "Incorrect size expected (no glimpse fix):"
+ lockahead_test -d $DIR/$tdir -D $DIR2/$tdir -t $testnum -f $tfile
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ error "Lockahead test $testnum passed with fail_loc set, ${rc}"
+ fi
+
+ # guarantee write commit timeout has expired
+ sleep 2
+
+ # Clear fail_loc on client
+ $LCTL set_param fail_loc=0
+
+ # Delay write commit by 2 seconds to guarantee glimpse wins race
+ # OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
+ do_facet ost1 $LCTL set_param fail_loc=0x214 fail_val=2
+
+ # Write commit is still delayed by 2 seconds
+ lockahead_test -d $DIR/$tdir -D $DIR2/$tdir -t $testnum -f $tfile
+ rc=$?
+ [ $rc -eq 0 ] || error "Lockahead test${testnum} failed, ${rc}"
+
+ # guarantee write commit timeout has expired
+ sleep 2
+
+ rm -f $DIR/$tfile || error "unable to delete $DIR/$tfile"
+}
+run_test 103 "Test size correctness with lockahead"
+
log "cleanup: ======================================================"
# kill and wait in each test only guarentee script finish, but command in script