ofd_seq_put(env, oseq);
*vallen = sizeof(*last_id);
} else if (KEY_IS(KEY_FIEMAP)) {
- struct ofd_thread_info *info;
struct ofd_device *ofd = ofd_exp(exp);
struct ofd_object *fo;
struct ll_fiemap_info_key *fm_key = key;
+ struct lu_fid fid;
if (val == NULL) {
*vallen = fiemap_count_to_size(
GOTO(out, rc = 0);
}
- info = ofd_info_init(env, exp);
- rc = ostid_to_fid(&info->fti_fid, &fm_key->oa.o_oi, 0);
+ rc = ostid_to_fid(&fid, &fm_key->oa.o_oi, 0);
if (rc != 0)
GOTO(out, rc);
CDEBUG(D_INODE, "get FIEMAP of object "DFID"\n",
- PFID(&info->fti_fid));
+ PFID(&fid));
- fo = ofd_object_find(env, ofd, &info->fti_fid);
+ fo = ofd_object_find(env, ofd, &fid);
if (IS_ERR(fo)) {
CERROR("%s: error finding object "DFID"\n",
- exp->exp_obd->obd_name, PFID(&info->fti_fid));
+ exp->exp_obd->obd_name, PFID(&fid));
rc = PTR_ERR(fo);
} else {
struct ll_user_fiemap *fiemap = val;
ptlrpc_req_finished(req);
RETURN(rc);
} else if (KEY_IS(KEY_FIEMAP)) {
- struct ptlrpc_request *req;
- struct ll_user_fiemap *reply;
- char *tmp;
- int rc;
+ struct ll_fiemap_info_key *fm_key =
+ (struct ll_fiemap_info_key *)key;
+ struct ldlm_res_id res_id;
+ ldlm_policy_data_t policy;
+ struct lustre_handle lockh;
+ ldlm_mode_t mode = 0;
+ struct ptlrpc_request *req;
+ struct ll_user_fiemap *reply;
+ char *tmp;
+ int rc;
+
+ if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
+ goto skip_locking;
+
+ policy.l_extent.start = fm_key->fiemap.fm_start &
+ CFS_PAGE_MASK;
+
+ if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
+ fm_key->fiemap.fm_start + CFS_PAGE_SIZE - 1)
+ policy.l_extent.end = OBD_OBJECT_EOF;
+ else
+ policy.l_extent.end = (fm_key->fiemap.fm_start +
+ fm_key->fiemap.fm_length +
+ CFS_PAGE_SIZE - 1) & CFS_PAGE_MASK;
+
+ ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
+ mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_LVB_READY,
+ &res_id, LDLM_EXTENT, &policy,
+ LCK_PR | LCK_PW, &lockh, 0);
+ if (mode) { /* lock is cached on client */
+ if (mode != LCK_PR) {
+ ldlm_lock_addref(&lockh, LCK_PR);
+ ldlm_lock_decref(&lockh, LCK_PW);
+ }
+ } else { /* no cached lock, needs acquire lock on server side */
+ fm_key->oa.o_valid |= OBD_MD_FLFLAGS;
+ fm_key->oa.o_flags |= OBD_FL_SRVLOCK;
+ }
+skip_locking:
req = ptlrpc_request_alloc(class_exp2cliimp(exp),
&RQF_OST_GET_INFO_FIEMAP);
if (req == NULL)
- RETURN(-ENOMEM);
+ GOTO(drop_lock, rc = -ENOMEM);
req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY,
RCL_CLIENT, keylen);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
if (rc) {
ptlrpc_request_free(req);
- RETURN(rc);
+ GOTO(drop_lock, rc);
}
tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
ptlrpc_request_set_replen(req);
rc = ptlrpc_queue_wait(req);
if (rc)
- GOTO(out1, rc);
+ GOTO(fini_req, rc);
reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
if (reply == NULL)
- GOTO(out1, rc = -EPROTO);
+ GOTO(fini_req, rc = -EPROTO);
memcpy(val, reply, *vallen);
- out1:
+fini_req:
ptlrpc_req_finished(req);
-
+drop_lock:
+ if (mode)
+ ldlm_lock_decref(&lockh, LCK_PR);
RETURN(rc);
}
RETURN(rc);
}
+struct locked_region {
+ cfs_list_t list;
+ struct lustre_handle lh;
+};
+
+static int lock_region(struct obd_export *exp, struct obdo *oa,
+ unsigned long long begin, unsigned long long end,
+ cfs_list_t *locked)
+{
+ struct locked_region *region = NULL;
+ int rc;
+
+ LASSERT(begin <= end);
+ OBD_ALLOC_PTR(region);
+ if (region == NULL)
+ return -ENOMEM;
+
+ rc = ost_lock_get(exp, oa, begin, end - begin, ®ion->lh, LCK_PR, 0);
+ if (rc)
+ return rc;
+
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu], lh=%p\n",
+ begin, end, ®ion->lh);
+ cfs_list_add(®ion->list, locked);
+
+ return 0;
+}
+
+static int lock_zero_regions(struct obd_export *exp, struct obdo *oa,
+ struct ll_user_fiemap *fiemap,
+ cfs_list_t *locked)
+{
+ __u64 begin = fiemap->fm_start;
+ unsigned int i;
+ int rc = 0;
+ struct ll_fiemap_extent *fiemap_start = fiemap->fm_extents;
+ ENTRY;
+
+ CDEBUG(D_OTHER, "extents count %u\n", fiemap->fm_mapped_extents);
+ for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+ if (fiemap_start[i].fe_logical > begin) {
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu]\n",
+ begin, fiemap_start[i].fe_logical);
+ rc = lock_region(exp, oa, begin,
+ fiemap_start[i].fe_logical, locked);
+ if (rc)
+ RETURN(rc);
+ }
+
+ begin = fiemap_start[i].fe_logical + fiemap_start[i].fe_length;
+ }
+
+ if (begin < (fiemap->fm_start + fiemap->fm_length)) {
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu]\n",
+ begin, fiemap->fm_start + fiemap->fm_length);
+ rc = lock_region(exp, oa, begin,
+ fiemap->fm_start + fiemap->fm_length, locked);
+ }
+
+ RETURN(rc);
+}
+
+static void unlock_zero_regions(struct obd_export *exp, cfs_list_t *locked)
+{
+ struct locked_region *entry, *temp;
+ cfs_list_for_each_entry_safe(entry, temp, locked, list) {
+ CDEBUG(D_OTHER, "ost unlock lh=%p\n", &entry->lh);
+ ost_lock_put(exp, &entry->lh, LCK_PR);
+ cfs_list_del(&entry->list);
+ OBD_FREE_PTR(entry);
+ }
+}
+
static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req)
{
void *key, *reply;
int keylen, replylen, rc = 0;
struct req_capsule *pill = &req->rq_pill;
+ cfs_list_t locked = CFS_LIST_HEAD_INIT(locked);
+ struct ll_fiemap_info_key *fm_key = NULL;
+ struct ll_user_fiemap *fiemap;
ENTRY;
/* this common part for get_info rpc */
keylen = req_capsule_get_size(pill, &RMF_SETINFO_KEY, RCL_CLIENT);
if (KEY_IS(KEY_FIEMAP)) {
- struct ll_fiemap_info_key *fm_key = key;
- int rc;
-
+ fm_key = key;
rc = ost_validate_obdo(exp, &fm_key->oa, NULL);
if (rc)
RETURN(rc);
- }
+ }
rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
&replylen, NULL, NULL);
if (rc)
- RETURN(rc);
+ RETURN(rc);
req_capsule_set_size(pill, &RMF_GENERIC_DATA,
RCL_SERVER, replylen);
rc = req_capsule_server_pack(pill);
if (rc)
- RETURN(rc);
+ RETURN(rc);
reply = req_capsule_server_get(pill, &RMF_GENERIC_DATA);
if (reply == NULL)
- RETURN(-ENOMEM);
+ RETURN(-ENOMEM);
if (KEY_IS(KEY_LAST_FID)) {
void *val;
} else {
CERROR("%s: invalid req val %p vallen %d replylen %d\n",
exp->exp_obd->obd_name, val, vallen, replylen);
- GOTO(out, rc = -EINVAL);
+ RETURN(-EINVAL);
}
}
- /* call again to fill in the reply buffer */
- rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
- &replylen, reply, NULL);
-out:
- lustre_msg_set_status(req->rq_repmsg, 0);
+ /* call again to fill in the reply buffer */
+ rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
+ &replylen, reply, NULL);
+
+ /* LU-3219: Lock the sparse areas to make sure dirty flushed back
+ * from client, then call fiemap again. */
+ if (KEY_IS(KEY_FIEMAP) && (fm_key->oa.o_valid & OBD_MD_FLFLAGS) &&
+ (fm_key->oa.o_flags & OBD_FL_SRVLOCK)) {
+ fiemap = (struct ll_user_fiemap *)reply;
+ fm_key = key;
+
+ rc = lock_zero_regions(exp, &fm_key->oa, fiemap, &locked);
+ if (rc == 0 && !cfs_list_empty(&locked))
+ rc = obd_get_info(req->rq_svc_thread->t_env, exp,
+ keylen, key, &replylen, reply, NULL);
+ unlock_zero_regions(exp, &locked);
+ if (rc)
+ RETURN(rc);
+ }
+
+ lustre_msg_set_status(req->rq_repmsg, 0);
+
RETURN(rc);
}
noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany
noinst_PROGRAMS += openfilleddirunlink rename_many memhog
noinst_PROGRAMS += mmap_sanity writemany reads flocks_test
-noinst_PROGRAMS += write_time_limit rwv copytool lgetxattr_size_check
+noinst_PROGRAMS += write_time_limit rwv copytool lgetxattr_size_check checkfiemap
bin_PROGRAMS = mcreate munlink
testdir = $(libdir)/lustre/tests
--- /dev/null
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2013 Xyratex Technology Limited
+ *
+ * Author: Artem Blagodarenko <Artem_Blagodarenko@xyratex.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#ifndef HAVE_FIEMAP
+# include <linux/fiemap.h>
+#endif
+
+#ifdef __linux__
+# ifndef FS_IOC_FIEMAP
+# define FS_IOC_FIEMAP (_IOWR('f', 11, struct fiemap))
+# endif
+#endif
+
+#define ONEMB 1048576
+
+/* This test executes fiemap ioctl and check
+ * a) there are no file ranges marked with FIEMAP_EXTENT_UNWRITTEN
+ * b) data ranges sizes sum is equal to given in second param */
+int check_fiemap(int fd, long long orig_size)
+{
+ /* This buffer is enougth for 1MB length file */
+ union { struct fiemap f; char c[4096]; } fiemap_buf;
+ struct fiemap *fiemap = &fiemap_buf.f;
+ struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
+ unsigned int count = (sizeof(fiemap_buf) - sizeof(*fiemap)) /
+ sizeof(*fm_extents);
+ unsigned int i = 0;
+ long long file_size = 0;
+
+ memset(&fiemap_buf, 0, sizeof(fiemap_buf));
+
+ fiemap->fm_start = 0;
+ fiemap->fm_flags = FIEMAP_FLAG_SYNC;
+ fiemap->fm_extent_count = count;
+ fiemap->fm_length = FIEMAP_MAX_OFFSET;
+
+ if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) {
+ fprintf(stderr, "error while ioctl %i\n", errno);
+ return -1;
+ }
+
+ for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+ printf("extent in "
+ "offset %lu, length %lu\n"
+ "flags: %x\n",
+ (unsigned long)fm_extents[i].fe_logical,
+ (unsigned long)fm_extents[i].fe_length,
+ fm_extents[i].fe_flags);
+
+ if (fm_extents[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
+ fprintf(stderr, "Unwritten extent\n");
+ return -2;
+ } else {
+ file_size += fm_extents[i].fe_length;
+ }
+ }
+
+ printf("No unwritten extents, extents number %u, "
+ "file size %lli, original size %lli\n",
+ fiemap->fm_mapped_extents,
+ file_size, orig_size);
+ return file_size != orig_size;
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ struct option long_opts[] = {
+ {"test", no_argument, 0, 't'},
+ {NULL, 0, NULL, 0}
+ };
+ int fd;
+ int rc;
+
+ optind = 0;
+ while ((c = getopt_long(argc, argv, "t", long_opts, NULL)) != -1) {
+ switch (c) {
+ case 't':
+ return 0;
+ default:
+ fprintf(stderr, "error: %s: option '%s' unrecognized\n",
+ argv[0], argv[optind - 1]);
+ return -1;
+ }
+ }
+
+ if (optind != argc - 2) {
+ fprintf(stderr, "Usage: %s <filename> <filesize>\n", argv[0]);
+ return -1;
+ }
+
+ fd = open(argv[optind], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "cannot open %s for reading, error %i",
+ argv[optind], errno);
+ return -1;
+ }
+
+ fprintf(stderr, "fd: %i\n", fd);
+
+ rc = check_fiemap(fd, atoll(argv[optind + 1]));
+
+ if (close(fd) < 0)
+ fprintf(stderr, "closing %s, error %i", argv[optind], errno);
+
+ return rc;
+}
}
run_test 70b "remove files after calling rm_entry"
+test_71() {
+ checkfiemap --test ||
+ { skip "checkfiemap not runnable: $?" && return; }
+ # write data this way: hole - data - hole - data
+ dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1
+ [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR1/$tfile) + 1)))" = \
+ "zfs" ] &&
+ skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return 0
+ dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1
+ GET_STAT="lctl get_param -n ldlm.services.ldlm_cbd.stats"
+ stat $DIR2/$tfile
+ local can1=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}')
+ echo $can1
+ checkfiemap $DIR2/$tfile 81920 ||
+ error "data is not flushed from client"
+ local can2=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}')
+ echo $can2
+
+ # common case of "create file, copy file" on a single node
+ # should not flush data from ost
+ dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=1 count=1
+ dd if=/dev/urandom of=$DIR1/$tfile bs=40K seek=3 count=1
+ stat $DIR1/$tfile
+ local can3=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}')
+ echo $can3
+ checkfiemap $DIR1/$tfile 81920 ||
+ error 4
+ local can4=$($GET_STAT | awk '/ldlm_bl_callback/ {print $2}')
+ echo $can2
+ [ $can3 -eq $can4 ] || error $((can2-can1)) "cancel RPC occured."
+}
+run_test 71 "correct file map just after write operation is finished"
+
log "cleanup: ======================================================"
[ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2