Whamcloud - gitweb
b=10555
authorkalpak <kalpak>
Thu, 7 Aug 2008 21:33:35 +0000 (21:33 +0000)
committerkalpak <kalpak>
Thu, 7 Aug 2008 21:33:35 +0000 (21:33 +0000)
i=adilger
i=green
i=girish

add ldiskfs patches and lustre support for FIEMAP ioctl

14 files changed:
lustre/ChangeLog
lustre/autoconf/lustre-core.m4
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/obd.h
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/lov/lov_obd.c
lustre/obdfilter/filter.c
lustre/osc/osc_request.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/tests/sanity.sh
lustre/utils/wirecheck.c

index dcfca10..0178624 100644 (file)
@@ -449,6 +449,13 @@ Bugzilla   : 16140
 Description: journal_dev option does not work in b1_6
 Details    : pass mount option during pre-mount.
 
+Severity   : enhancement
+Bugzilla   : 10555
+Description: Add a FIEMAP(FIle Extent MAP) ioctl for ldiskfs
+Details    : FIEMAP ioctl will allow an application to efficiently fetch the
+             extent information of a file. It can be used to map logical blocks
+             in a file to physical blocks in the block device.
+
 -------------------------------------------------------------------------------
 
 
index fb55e1d..c729a0e 100644 (file)
@@ -551,6 +551,28 @@ AC_DEFUN([LC_XATTR_ACL],
 [])
 ])
 
+#
+# LC_LINUX_FIEMAP_H
+#
+# If we have fiemap.h
+# after 2.6.27 use fiemap.h in include/linux
+#
+AC_DEFUN([LC_LINUX_FIEMAP_H],
+[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
+        AC_MSG_CHECKING([if fiemap.h can be compiled])
+        LB_LINUX_TRY_COMPILE([
+                #include <linux/fiemap.h>
+        ],[],[
+                AC_MSG_RESULT([yes])
+                AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
+        ],[
+                AC_MSG_RESULT([no])
+        ])
+],
+[])
+])
+
+
 AC_DEFUN([LC_STRUCT_INTENT_FILE],
 [AC_MSG_CHECKING([if struct open_intent has a file field])
 LB_LINUX_TRY_COMPILE([
index 3cfccc6..ba2ca51 100644 (file)
@@ -96,6 +96,7 @@
 /* Defn's shared with user-space. */
 #include <lustre/lustre_user.h>
 #include <lustre_ver.h>
+#include <lustre/ll_fiemap.h>
 
 /*
  * this file contains all data structures used in Lustre interfaces:
@@ -1546,9 +1547,16 @@ struct ost_body {
         struct  obdo oa;
 };
 
+/* Key for FIEMAP to be used in get_info calls */
+struct ll_fiemap_info_key {
+        char    name[8];
+        struct  obdo oa;
+        struct  ll_user_fiemap fiemap;
+};
 
 extern void lustre_swab_ost_body (struct ost_body *b);
 extern void lustre_swab_ost_last_id(obd_id *id);
+extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
 
 extern void lustre_swab_lov_user_md(struct lov_user_md *lum);
 extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum);
index 7c04e18..5c1d4d8 100644 (file)
@@ -41,6 +41,7 @@
 #ifndef _LUSTRE_USER_H
 #define _LUSTRE_USER_H
 
+#include <lustre/ll_fiemap.h>
 #if defined(__linux__)
 #include <linux/lustre_user.h>
 #elif defined(__APPLE__)
 #define EXT3_IOC_SETVERSION             _IOW('f', 4, long)
 #define EXT3_IOC_GETVERSION_OLD         _IOR('v', 1, long)
 #define EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
+#define EXT3_IOC_FIEMAP                 _IOWR('f', 10, struct ll_user_fiemap)
 #endif
 
+/* FIEMAP flags supported by Lustre */
+#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
+
 struct obd_statfs;
 
 /* 
index 2d67acb..cc338bf 100644 (file)
@@ -938,6 +938,7 @@ enum obd_cleanup_stage {
 #define KEY_BLOCKSIZE           "blocksize"
 #define KEY_BLOCKSIZE_BITS      "blocksize_bits"
 #define KEY_MAX_EASIZE          "max_ea_size"
+#define KEY_FIEMAP              "FIEMAP"
 /* XXX unused */
 #define KEY_ASYNC               "async"
 
index 7f92fb8..226983c 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/lustre_compat25.h>
 #endif
 #include "llite_internal.h"
+#include <lustre/ll_fiemap.h>
 
 /* also used by llite/special.c:ll_special_open() */
 struct ll_file_data *ll_file_data_get(void)
@@ -2455,6 +2456,43 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
         RETURN(rc);
 }
 
+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
+              int num_bytes)
+{
+        struct obd_export *exp = ll_i2obdexp(inode);
+        struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+        struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
+        int vallen = num_bytes;
+        int rc;
+        ENTRY;
+
+        /* If the stripe_count > 1 and the application does not understand
+         * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
+         */
+        if (lsm->lsm_stripe_count > 1 &&
+            !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
+                return -EOPNOTSUPP;
+
+        fm_key.oa.o_id = lsm->lsm_object_id;
+        fm_key.oa.o_valid = OBD_MD_FLID;
+
+        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLSIZE);
+
+        /* If filesize is 0, then there would be no objects for mapping */
+        if (fm_key.oa.o_size == 0) {
+                fiemap->fm_mapped_extents = 0;
+                RETURN(0);
+        }
+
+        memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
+
+        rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
+        if (rc)
+                CERROR("obd_get_info failed: rc = %d\n", rc);
+
+        RETURN(rc);
+}
+
 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                   unsigned long arg)
 {
@@ -2504,6 +2542,72 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
                 RETURN(ll_lov_recreate_obj(inode, file, arg));
+        case EXT3_IOC_FIEMAP: {
+                struct ll_user_fiemap *fiemap_s;
+                size_t num_bytes, ret_bytes;
+                unsigned int extent_count;
+                int rc = 0;
+
+                /* Get the extent count so we can calculate the size of
+                 * required fiemap buffer */
+                if (get_user(extent_count,
+                    &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
+                        RETURN(-EFAULT);
+                num_bytes = sizeof(*fiemap_s) + (extent_count *
+                                                 sizeof(struct ll_fiemap_extent));
+                OBD_VMALLOC(fiemap_s, num_bytes);
+                if (fiemap_s == NULL)
+                        RETURN(-ENOMEM);
+
+                if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
+                                   sizeof(*fiemap_s)))
+                        GOTO(error, rc = -EFAULT);
+
+                if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
+                        fiemap_s->fm_flags = fiemap_s->fm_flags &
+                                                    ~LUSTRE_FIEMAP_FLAGS_COMPAT;
+                        if (copy_to_user((char *)arg, fiemap_s,
+                                         sizeof(*fiemap_s)))
+                                GOTO(error, rc = -EFAULT);
+
+                        GOTO(error, rc = -EBADR);
+                }
+
+                /* If fm_extent_count is non-zero, read the first extent since
+                 * it is used to calculate end_offset and device from previous
+                 * fiemap call. */
+                if (extent_count) {
+                        if (copy_from_user(&fiemap_s->fm_extents[0],
+                            (char __user *)arg + sizeof(*fiemap_s),
+                            sizeof(struct ll_fiemap_extent)))
+                                GOTO(error, rc = -EFAULT);
+                }
+
+                if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
+                        int rc;
+
+                        rc = filemap_fdatawrite(inode->i_mapping);
+                        if (rc)
+                                GOTO(error, rc);
+                }
+
+                rc = ll_fiemap(inode, fiemap_s, num_bytes);
+                if (rc)
+                        GOTO(error, rc);
+
+                ret_bytes = sizeof(struct ll_user_fiemap);
+
+                if (extent_count != 0)
+                        ret_bytes += (fiemap_s->fm_mapped_extents *
+                                         sizeof(struct ll_fiemap_extent));
+
+                if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
+                        rc = -EFAULT;
+
+error:
+                OBD_VFREE(fiemap_s, num_bytes);
+                RETURN(rc);
+        }
         case EXT3_IOC_GETFLAGS:
         case EXT3_IOC_SETFLAGS:
                 RETURN(ll_iocontrol(inode, file, cmd, arg));
index 084ce21..ab2ade9 100644 (file)
@@ -677,6 +677,9 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
                      int set_default);
 int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm, 
                      int *lmm_size, struct ptlrpc_request **request);
+int ll_fsync(struct file *file, struct dentry *dentry, int data);
+int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
+              int num_bytes);
 
 /* llite/dcache.c */
 extern struct dentry_operations ll_init_d_ops;
index a57b125..75673d4 100644 (file)
@@ -64,6 +64,7 @@
 #include <lprocfs_status.h>
 #include <lustre_param.h>
 #include <lustre_cache.h>
+#include <lustre/ll_fiemap.h>
 
 #include "lov_internal.h"
 
@@ -2487,6 +2488,326 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         RETURN(rc);
 }
 
+#define FIEMAP_BUFFER_SIZE 4096
+
+/* Non-zero fe_logical indicates that this is a continuation FIEMAP
+ * call. The local end offset and the device are sent in the first
+ * fm_extent. This function calculates the stripe number from the index.
+ * This function returns a stripe_no on which mapping is to be restarted.
+ *
+ * This function returns fm_end_offset which is the in-OST offset at which
+ * mapping should be restarted. If fm_end_offset=0 is returned then caller
+ * will re-calculate proper offset in next stripe.
+ * Note that the first extent is passed to lov_get_info via the value field */
+obd_size fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap,
+                                   struct lov_stripe_md *lsm, obd_size fm_start,
+                                   obd_size fm_end, int *start_stripe)
+{
+        obd_size local_end = fiemap->fm_extents[0].fe_logical;
+        obd_off lun_start, lun_end;
+        obd_size fm_end_offset;
+        int stripe_no = -1, i;
+
+        if (fiemap->fm_extent_count == 0 ||
+            fiemap->fm_extents[0].fe_logical == 0)
+                return 0;
+
+        /* Find out stripe_no from ost_index saved in the fe_device */
+        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                if (lsm->lsm_oinfo[i]->loi_ost_idx ==
+                                        fiemap->fm_extents[0].fe_device) {
+                        stripe_no = i;
+                        break;
+                }
+        }
+
+        /* If we have finished mapping on previous device, shift logical
+         * offset to start of next device */
+        if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
+                                   &lun_start, &lun_end)) != 0 &&
+                                   local_end < lun_end) {
+                fm_end_offset = local_end;
+                *start_stripe = stripe_no;
+        } else {
+                /* This is a special value to indicate that caller should
+                 * calculate offset in next stripe. */
+                fm_end_offset = 0;
+                *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
+        }
+
+        return fm_end_offset;
+}
+
+/* We calculate on which OST the mapping will end. If the length of mapping
+ * is greater than (stripe_size * stripe_count) then the last_stripe will
+ * will be one just before start_stripe. Else we check if the mapping
+ * intersects each OST and find last_stripe.
+ * This function returns the last_stripe and also sets the stripe_count
+ * over which the mapping is spread */
+int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, obd_size fm_start,
+                            obd_size fm_end, int start_stripe,
+                            int *stripe_count)
+{
+        int last_stripe;
+        obd_off obd_start, obd_end;
+        int i, j;
+
+        if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
+                last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
+                                                              start_stripe - 1);
+                *stripe_count = lsm->lsm_stripe_count;
+        } else {
+                for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
+                     i = (i + 1) % lsm->lsm_stripe_count, j++) {
+                        if ((lov_stripe_intersects(lsm, i, fm_start, fm_end,
+                                                   &obd_start, &obd_end)) == 0)
+                                break;
+                }
+                *stripe_count = j;
+                last_stripe = (start_stripe + j - 1) %lsm->lsm_stripe_count;
+        }
+
+        return last_stripe;
+}
+
+/* Set fe_device and copy extents from local buffer into main return buffer */
+void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap,
+                                  struct ll_fiemap_extent *lcl_fm_ext,
+                                  int ost_index, unsigned int ext_count,
+                                  int current_extent)
+{
+        char *to;
+        int ext;
+
+        for (ext = 0; ext < ext_count; ext++) {
+                lcl_fm_ext[ext].fe_device = ost_index;
+                lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
+        }
+
+        /* Copy fm_extent's from fm_local to return buffer */
+        to = (char *)fiemap + fiemap_count_to_size(current_extent);
+        memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent));
+}
+
+static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
+                      __u32 *vallen, void *val, struct lov_stripe_md *lsm)
+{
+        struct ll_fiemap_info_key *fm_key = key;
+        struct ll_user_fiemap *fiemap = val;
+        struct ll_user_fiemap *fm_local = NULL;
+        struct ll_fiemap_extent *lcl_fm_ext;
+        int count_local;
+        unsigned int get_num_extents = 0;
+        int ost_index = 0, actual_start_stripe, start_stripe;
+        obd_size fm_start, fm_end, fm_length, fm_end_offset = 0;
+        obd_size curr_loc;
+        int current_extent = 0, rc = 0, i;
+        int ost_eof = 0; /* EOF for object */
+        int ost_done = 0; /* done with required mapping for this OST? */
+        int last_stripe;
+        int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count;
+        unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
+
+        if (lsm == NULL)
+                GOTO(out, rc = 0);
+
+        if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size)
+                buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count);
+
+        OBD_ALLOC(fm_local, buffer_size);
+        if (fm_local == NULL)
+                GOTO(out, rc = -ENOMEM);
+        lcl_fm_ext = &fm_local->fm_extents[0];
+
+        count_local = fiemap_size_to_count(buffer_size);
+
+        memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
+        fm_start = fiemap->fm_start;
+        fm_length = fiemap->fm_length;
+        /* Calculate start stripe, last stripe and length of mapping */
+        actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+        fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
+                                                fm_start + fm_length - 1);
+        /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
+        if (fm_end > fm_key->oa.o_size)
+                fm_end = fm_key->oa.o_size;
+
+        last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
+                                              actual_start_stripe, &stripe_count);
+
+        fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end,
+                                                  &start_stripe);
+
+        if (fiemap->fm_extent_count == 0) {
+                get_num_extents = 1;
+                count_local = 0;
+        }
+
+        /* Check each stripe */
+        for (cur_stripe = start_stripe, i = 0; i < stripe_count;
+             i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
+                obd_size req_fm_len; /* Stores length of required mapping */
+                obd_size len_mapped_single_call;
+                obd_off lun_start, lun_end, obd_object_end;
+                unsigned int ext_count;
+
+                cur_stripe_wrap = cur_stripe;
+
+                /* Find out range of mapping on this stripe */
+                if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
+                                           &lun_start, &obd_object_end)) == 0)
+                        continue;
+
+                /* If this is a continuation FIEMAP call and we are on
+                 * starting stripe then lun_start needs to be set to
+                 * fm_end_offset */
+                if (fm_end_offset != 0 && cur_stripe == start_stripe)
+                        lun_start = fm_end_offset;
+
+                if (fm_length != ~0ULL) {
+                        /* Handle fm_start + fm_length overflow */
+                        if (fm_start + fm_length < fm_start)
+                                fm_length = ~0ULL - fm_start;
+                        lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
+                                                     cur_stripe);
+                } else {
+                        lun_end = ~0ULL;
+                }
+
+                if (lun_start == lun_end)
+                        continue;
+
+                req_fm_len = obd_object_end - lun_start;
+                fm_local->fm_length = 0;
+                len_mapped_single_call = 0;
+
+                /* If the output buffer is very large and the objects have many
+                 * extents we may need to loop on a single OST repeatedly */
+                ost_eof = 0;
+                ost_done = 0;
+                do {
+                        if (get_num_extents == 0) {
+                                /* Don't get too many extents. */
+                                if (current_extent + count_local >
+                                    fiemap->fm_extent_count)
+                                        count_local = fiemap->fm_extent_count -
+                                                                 current_extent;
+                        }
+
+                        lun_start += len_mapped_single_call;
+                        fm_local->fm_length = req_fm_len - len_mapped_single_call;
+                        req_fm_len = fm_local->fm_length;
+                        fm_local->fm_extent_count = count_local;
+                        fm_local->fm_mapped_extents = 0;
+                        fm_local->fm_flags = fiemap->fm_flags;
+
+                        fm_key->oa.o_id = lsm->lsm_oinfo[cur_stripe]->loi_id;
+                        ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
+
+                        if (ost_index < 0 || ost_index >=lov->desc.ld_tgt_count)
+                                GOTO(out, rc = -EINVAL);
+
+                        /* If OST is inactive, return extent with UNKNOWN flag */
+                        if (lov && !lov->lov_tgts[ost_index]->ltd_active) {
+                                fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
+                                fm_local->fm_mapped_extents = 1;
+
+                                lcl_fm_ext[0].fe_logical = lun_start;
+                                lcl_fm_ext[0].fe_length = obd_object_end -
+                                                                      lun_start;
+                                lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+                                goto inactive_tgt;
+                        }
+
+                        fm_local->fm_start = lun_start;
+                        fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+                        memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local));
+                        *vallen=fiemap_count_to_size(fm_local->fm_extent_count);
+                        rc = obd_get_info(lov->lov_tgts[ost_index]->ltd_exp,
+                                          keylen, key, vallen, fm_local, lsm);
+                        if (rc != 0)
+                                GOTO(out, rc);
+
+inactive_tgt:
+                        ext_count = fm_local->fm_mapped_extents;
+                        if (ext_count == 0) {
+                                ost_done = 1;
+                                /* If last stripe has hole at the end,
+                                 * then we need to return */
+                                if (cur_stripe_wrap == last_stripe) {
+                                        fiemap->fm_mapped_extents = 0;
+                                        goto finish;
+                                }
+                                break;
+                        }
+
+                        /* If we just need num of extents then go to next device */
+                        if (get_num_extents) {
+                                current_extent += ext_count;
+                                break;
+                        }
+
+                        len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical -
+                                  lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+
+                        /* Have we finished mapping on this device? */
+                        if (req_fm_len <= len_mapped_single_call)
+                                ost_done = 1;
+
+                        /* Clear the EXTENT_LAST flag which can be present on
+                         * last extent */
+                        if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST)
+                                lcl_fm_ext[ext_count - 1].fe_flags &=
+                                                            ~FIEMAP_EXTENT_LAST;
+
+                        curr_loc = lov_stripe_size(lsm,
+                                           lcl_fm_ext[ext_count - 1].fe_logical+
+                                           lcl_fm_ext[ext_count - 1].fe_length,
+                                           cur_stripe);
+                        if (curr_loc >= fm_key->oa.o_size)
+                                ost_eof = 1;
+
+                        fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
+                                                     ost_index, ext_count,
+                                                     current_extent);
+
+                        current_extent += ext_count;
+
+                        /* Ran out of available extents? */
+                        if (current_extent >= fiemap->fm_extent_count)
+                                goto finish;
+                } while (ost_done == 0 && ost_eof == 0);
+
+                if (cur_stripe_wrap == last_stripe)
+                        goto finish;
+        }
+
+finish:
+        /* Indicate that we are returning device offsets unless file just has
+         * single stripe */
+        if (lsm->lsm_stripe_count > 1)
+                fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
+
+        if (get_num_extents)
+                goto skip_last_device_calc;
+
+        /* Check if we have reached the last stripe and whether mapping for that
+         * stripe is done. */
+        if (cur_stripe_wrap == last_stripe) {
+                if (ost_done || ost_eof)
+                        fiemap->fm_extents[current_extent - 1].fe_flags |=
+                                                             FIEMAP_EXTENT_LAST;
+        }
+
+skip_last_device_calc:
+        fiemap->fm_mapped_extents = current_extent;
+
+out:
+        OBD_FREE(fm_local, buffer_size);
+        return rc;
+}
+
 static int lov_get_info(struct obd_export *exp, __u32 keylen,
                         void *key, __u32 *vallen, void *val,
                         struct lov_stripe_md *lsm)
@@ -2561,6 +2882,9 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
                         if (tgt && obd_uuid_equals(val, &tgt->ltd_uuid))
                                 GOTO(out, rc = i);
                 }
+        } else if (KEY_IS(KEY_FIEMAP)) {
+                rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
+                GOTO(out, rc);
         }
 
         rc = -EINVAL;
index 0010819..958bee6 100644 (file)
@@ -76,6 +76,7 @@
 #include <lustre_quota.h>
 #include <linux/slab.h>
 #include <lustre_param.h>
+#include <lustre/ll_fiemap.h>
 
 #include "filter_internal.h"
 
@@ -3548,6 +3549,38 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen,
                 RETURN(0);
         }
 
+        if (KEY_IS(KEY_FIEMAP)) {
+                struct ll_fiemap_info_key *fm_key = key;
+                struct dentry *dentry;
+                struct ll_user_fiemap *fiemap = val;
+                struct lvfs_run_ctxt saved;
+                int rc;
+
+                if (fiemap == NULL) {
+                        *vallen = fiemap_count_to_size(
+                                                fm_key->fiemap.fm_extent_count);
+                        RETURN(0);
+                }
+
+                dentry = __filter_oa2dentry(exp->exp_obd, &fm_key->oa,
+                                            __FUNCTION__, 1);
+                if (IS_ERR(dentry))
+                        RETURN(PTR_ERR(dentry));
+
+                memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
+                push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+                rc = fsfilt_iocontrol(obd, dentry->d_inode, NULL,
+                                      EXT3_IOC_FIEMAP, (long)fiemap);
+                if (rc) {
+                        f_dput(dentry);
+                        RETURN(rc);
+                }
+                pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                f_dput(dentry);
+                RETURN(0);
+        }
+
         CDEBUG(D_IOCTL, "invalid key\n");
         RETURN(-EINVAL);
 }
index 003d06f..a27e83a 100644 (file)
@@ -3494,7 +3494,39 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen,
         out:
                 ptlrpc_req_finished(req);
                 RETURN(rc);
+        } else if (KEY_IS(KEY_FIEMAP)) {
+                struct ptlrpc_request *req;
+                struct ll_user_fiemap *reply;
+                char *bufs[2] = { NULL, key };
+                int size[2] = { sizeof(struct ptlrpc_body), keylen };
+                int rc;
+
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
+                                      OST_GET_INFO, 2, size, bufs);
+                if (req == NULL)
+                        RETURN(-ENOMEM);
+
+                size[REPLY_REC_OFF] = *vallen;
+                ptlrpc_req_set_repsize(req, 2, size);
+
+                rc = ptlrpc_queue_wait(req);
+                if (rc)
+                        GOTO(out1, rc);
+                reply = lustre_swab_repbuf(req, REPLY_REC_OFF, *vallen,
+                                           lustre_swab_fiemap);
+                if (reply == NULL) {
+                        CERROR("Can't unpack FIEMAP reply.\n");
+                        GOTO(out1, rc = -EPROTO);
+                }
+
+                memcpy(val, reply, *vallen);
+
+        out1:
+                ptlrpc_req_finished(req);
+
+                RETURN(rc);
         }
+
         RETURN(-EINVAL);
 }
 
index abc6e2f..6d4a45d 100644 (file)
@@ -52,6 +52,7 @@
 #include <obd_support.h>
 #include <obd_class.h>
 #include <lustre_net.h>
+#include <lustre/ll_fiemap.h>
 
 #if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(1,8,0,0)
 #error "lustre_msg_v1 has been deprecated since 1.6.0, please remove it"
@@ -2181,6 +2182,30 @@ void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul)
         CLASSERT(offsetof(typeof(*ul), ul_padding_4) != 0);
 }
 
+void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
+{
+        __swab64s(&fm_extent->fe_logical);
+        __swab64s(&fm_extent->fe_physical);
+        __swab64s(&fm_extent->fe_length);
+        __swab32s(&fm_extent->fe_flags);
+        __swab32s(&fm_extent->fe_device);
+}
+
+void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
+{
+        int i;
+
+        __swab64s(&fiemap->fm_start);
+        __swab64s(&fiemap->fm_length);
+        __swab32s(&fiemap->fm_flags);
+        __swab32s(&fiemap->fm_mapped_extents);
+        __swab32s(&fiemap->fm_extent_count);
+        __swab32s(&fiemap->fm_reserved);
+
+        for (i = 0; i < fiemap->fm_mapped_extents; i++)
+                lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
+}
+
 void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn)
 {
         __swab32s (&rn->rn_opcode);
index eaa0740..be33fef 100644 (file)
@@ -281,6 +281,7 @@ EXPORT_SYMBOL(lustre_msg_set_transno);
 EXPORT_SYMBOL(lustre_msg_set_status);
 EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
 EXPORT_SYMBOL(lustre_swab_mgs_target_info);
+EXPORT_SYMBOL(lustre_swab_fiemap);
 
 /* recover.c */
 EXPORT_SYMBOL(ptlrpc_disconnect_import);
index db67fd4..ad9da30 100644 (file)
@@ -5147,6 +5147,224 @@ test_129() {
 }
 run_test 129 "test directory size limit ========================"
 
+test_130a() {
+       filefrag_op=$(filefrag -e 2>&1 | grep "invalid option")
+       [ -n "$filefrag_op" ] && skip '"filefrag does not support FIEMAP" && return'
+
+       local fm_file=$DIR/$tfile
+       lfs setstripe -s 65536 -c 1 $fm_file || error "setstripe failed on $fm_file"
+       dd if=/dev/zero of=$fm_file bs=65536 count=1 || error "dd failed for $fm_file"
+
+       filefrag -ves $fm_file || error "filefrag $fm_file failed"
+       filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"`
+
+       lun=`$GETSTRIPE $fm_file  | grep -A 10 obdidx | awk '{print $1}' | grep -v "obdidx"`
+
+       start_blk=`echo $filefrag_op | cut -d: -f2 | cut -d. -f1`
+       IFS=$'\n'
+       tot_len=0
+       for line in $filefrag_op
+       do
+               frag_lun=`echo $line | cut -d: -f5`
+               ext_len=`echo $line | cut -d: -f4`
+               if (( $frag_lun != $lun )); then
+                       error "FIEMAP on 1-stripe file($fm_file) failed"
+                       return
+               fi
+               (( tot_len += ext_len ))
+       done
+
+       if (( lun != frag_lun || start_blk != 0 || tot_len != 64 )); then
+               error "FIEMAP on 1-stripe file($fm_file) failed;"
+               return
+       fi
+       echo "FIEMAP on single striped file succeeded"
+}
+run_test 130a "FIEMAP (1-stripe file)"
+
+test_130b() {
+       [ "$OSTCOUNT" -lt "2" ] && skip "skipping FIEMAP on 2-stripe file test" && return
+
+       filefrag_op=$(filefrag -e 2>&1 | grep "invalid option")
+       [ -n "$filefrag_op" ] && skip '"filefrag does not support FIEMAP" && return'
+
+       local fm_file=$DIR/$tfile
+       lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file"
+       dd if=/dev/zero of=$fm_file bs=1M count=2 || error "dd failed on $fm_file"
+
+       filefrag -ves $fm_file || error "filefrag $fm_file failed"
+       filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"`
+
+       last_lun=`echo $filefrag_op | cut -d: -f5`
+
+       IFS=$'\n'
+       tot_len=0
+       num_luns=1
+       for line in $filefrag_op
+       do
+               frag_lun=`echo $line | cut -d: -f5`
+               ext_len=`echo $line | cut -d: -f4`
+               if (( $frag_lun != $last_lun )); then
+                       if (( tot_len != 1024 )); then
+                               error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 256"
+                               return
+                       else
+                               (( num_luns += 1 ))
+                               tot_len=0
+                       fi
+               fi
+               (( tot_len += ext_len ))
+               last_lun=$frag_lun
+       done
+       if (( num_luns != 2 || tot_len != 1024 )); then
+               error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun"
+               return
+       fi
+
+       echo "FIEMAP on 2-stripe file succeeded"
+}
+run_test 130b "FIEMAP (2-stripe file)"
+
+test_130c() {
+       [ "$OSTCOUNT" -lt "2" ] && skip "skipping FIEMAP on 2-stripe file with hole test" && return
+
+       filefrag_op=$(filefrag -e 2>&1 | grep "invalid option")
+       [ -n "$filefrag_op" ] && skip '"filefrag does not support FIEMAP" && return'
+
+       local fm_file=$DIR/$tfile
+       lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file"
+       dd if=/dev/zero of=$fm_file seek=1 bs=1M count=1 || error "dd failed on $fm_file"
+
+       filefrag -ves $fm_file || error "filefrag $fm_file failed"
+       filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"`
+
+       last_lun=`echo $filefrag_op | cut -d: -f5`
+
+       IFS=$'\n'
+       tot_len=0
+       num_luns=1
+       for line in $filefrag_op
+       do
+               frag_lun=`echo $line | cut -d: -f5`
+               ext_len=`echo $line | cut -d: -f4`
+               if (( $frag_lun != $last_lun )); then
+                       logical=`echo $line | cut -d: -f2 | cut -d. -f1`
+                       if (( logical != 512 )); then
+                               error "FIEMAP on $fm_file failed; returned logical start for lun $logical instead of 512"
+                               return
+                       fi
+                       if (( tot_len != 512 )); then
+                               error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024"
+                               return
+                       else
+                               (( num_luns += 1 ))
+                               tot_len=0
+                       fi
+               fi
+               (( tot_len += ext_len ))
+               last_lun=$frag_lun
+       done
+       if (( num_luns != 2 || tot_len != 512 )); then
+               error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun"
+               return
+       fi
+
+       echo "FIEMAP on 2-stripe file with hole succeeded"
+}
+run_test 130c "FIEMAP (2-stripe file with hole)"
+
+test_130d() {
+       [ "$OSTCOUNT" -lt "3" ] && skip "skipping FIEMAP on N-stripe file test" && return
+
+       filefrag_op=$(filefrag -e 2>&1 | grep "invalid option")
+       [ -n "$filefrag_op" ] && skip '"filefrag does not support FIEMAP" && return'
+
+       local fm_file=$DIR/$tfile
+       lfs setstripe -s 65536 -c $OSTCOUNT $fm_file || error "setstripe failed on $fm_file"
+       dd if=/dev/zero of=$fm_file bs=1M count=$OSTCOUNT || error "dd failed on $fm_file"
+
+       filefrag -ves $fm_file || error "filefrag $fm_file failed"
+       filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"`
+
+       last_lun=`echo $filefrag_op | cut -d: -f5`
+
+       IFS=$'\n'
+       tot_len=0
+       num_luns=1
+       for line in $filefrag_op
+       do
+               frag_lun=`echo $line | cut -d: -f5`
+               ext_len=`echo $line | cut -d: -f4`
+               if (( $frag_lun != $last_lun )); then
+                       if (( tot_len != 1024 )); then
+                               error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024"
+                               return
+                       else
+                               (( num_luns += 1 ))
+                               tot_len=0
+                       fi
+               fi
+               (( tot_len += ext_len ))
+               last_lun=$frag_lun
+       done
+       if (( num_luns != OSTCOUNT || tot_len != 1024 )); then
+               error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun"
+               return
+       fi
+
+       echo "FIEMAP on N-stripe file succeeded"
+}
+run_test 130d "FIEMAP (N-stripe file)"
+
+test_130e() {
+       [ "$OSTCOUNT" -lt "2" ] && skip "skipping continuation FIEMAP test" && return
+
+       filefrag_op=$(filefrag -e 2>&1 | grep "invalid option")
+       [ -n "$filefrag_op" ] && skip '"filefrag does not support FIEMAP" && return'
+
+       local fm_file=$DIR/$tfile
+       lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file"
+       NUM_BLKS=512
+       EXPECTED_LEN=$(( (NUM_BLKS / 2) * 4 ))
+       for ((i = 0; i < $NUM_BLKS; i++))
+       do
+               dd if=/dev/zero of=$fm_file count=1 bs=4096 seek=$((2*$i)) conv=notrunc > /dev/null 2>&1
+       done
+
+       filefrag -ves $fm_file || error "filefrag $fm_file failed"
+       filefrag_op=`filefrag -ve $fm_file | grep -A 750 "ext:" | grep -v "ext:" | grep -v "found"`
+
+       last_lun=`echo $filefrag_op | cut -d: -f5`
+
+       IFS=$'\n'
+       tot_len=0
+       num_luns=1
+       for line in $filefrag_op
+       do
+               frag_lun=`echo $line | cut -d: -f5`
+               ext_len=`echo $line | cut -d: -f4`
+               if (( $frag_lun != $last_lun )); then
+                       if (( tot_len != $EXPECTED_LEN )); then
+                               error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of $EXPECTED_LEN"
+                               return
+                       else
+                               (( num_luns += 1 ))
+                               tot_len=0
+                       fi
+               fi
+               (( tot_len += ext_len ))
+               last_lun=$frag_lun
+       done
+       if (( num_luns != 2 || tot_len != $EXPECTED_LEN )); then
+               echo "$num_luns $tot_len"
+               error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun"
+               return
+       fi
+
+       echo "FIEMAP with continuation calls succeeded"
+}
+run_test 130e "FIEMAP (test continuation FIEMAP calls)"
+
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
 HOME=$OLDHOME
index 3fcf95c..4570cb6 100644 (file)
@@ -1111,6 +1111,50 @@ check_quota_adjust_qunit(void)
 }
 
 static void
+check_ll_user_fiemap(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT_TYPEDEF(ll_user_fiemap);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_start);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_length);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_flags);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_mapped_extents);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_extent_count);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_reserved);
+        CHECK_MEMBER_TYPEDEF(ll_user_fiemap, fm_extents);
+
+        CHECK_CDEFINE(FIEMAP_FLAG_SYNC);
+        CHECK_CDEFINE(FIEMAP_FLAG_XATTR);
+        CHECK_CDEFINE(FIEMAP_FLAG_DEVICE_ORDER);
+}
+
+static void
+check_ll_fiemap_extent(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT_TYPEDEF(ll_fiemap_extent);
+        CHECK_MEMBER_TYPEDEF(ll_fiemap_extent, fe_logical);
+        CHECK_MEMBER_TYPEDEF(ll_fiemap_extent, fe_physical);
+        CHECK_MEMBER_TYPEDEF(ll_fiemap_extent, fe_length);
+        CHECK_MEMBER_TYPEDEF(ll_fiemap_extent, fe_flags);
+        CHECK_MEMBER_TYPEDEF(ll_fiemap_extent, fe_device);
+
+        CHECK_CDEFINE(FIEMAP_EXTENT_LAST);
+        CHECK_CDEFINE(FIEMAP_EXTENT_UNKNOWN);
+        CHECK_CDEFINE(FIEMAP_EXTENT_DELALLOC);
+        CHECK_CDEFINE(FIEMAP_EXTENT_NO_DIRECT);
+        CHECK_CDEFINE(FIEMAP_EXTENT_SECONDARY);
+        CHECK_CDEFINE(FIEMAP_EXTENT_NET);
+        CHECK_CDEFINE(FIEMAP_EXTENT_DATA_COMPRESSED);
+        CHECK_CDEFINE(FIEMAP_EXTENT_DATA_ENCRYPTED);
+        CHECK_CDEFINE(FIEMAP_EXTENT_NOT_ALIGNED);
+        CHECK_CDEFINE(FIEMAP_EXTENT_DATA_INLINE);
+        CHECK_CDEFINE(FIEMAP_EXTENT_DATA_TAIL);
+        CHECK_CDEFINE(FIEMAP_EXTENT_UNWRITTEN);
+        CHECK_CDEFINE(FIEMAP_EXTENT_MERGED);
+}
+
+static void
 system_string (char *cmdline, char *str, int len)
 {
         int   fds[2];
@@ -1369,6 +1413,8 @@ main(int argc, char **argv)
 #endif
         check_posix_acl_xattr_entry();
         check_posix_acl_xattr_header();
+        check_ll_user_fiemap();
+        check_ll_fiemap_extent();
         printf("#endif\n");