../lipe_version.h \
../list.h \
ls3_debug.h \
+ ls3_fid2path.c \
+ ls3_fid2path.h \
ls3_main.c \
ls3_object_attrs.c \
ls3_object_attrs.h \
--- /dev/null
+#include "ls3_fid2path.h"
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/lustre/lustre_fid.h>
+#include <linux/lustre/lustre_idl.h>
+#include <linux/lustre/lustre_ioctl.h>
+#include "list.h"
+#include "ls3_debug.h"
+
+/* Thread local fid2path cache. Created on demand on first call to
+ * ls3_fid2path(). XXX Must be destroyed by explicit call to
+ * ls3_fid2path_cache_fini(). We assume that it is only used for
+ * directories since we only cache one path per FID. This is used to
+ * speedup normal fid2path on MDTs by using the link xattr to get the
+ * parent FIDs and names and then using the cache to get the paths of
+ * parents FIDs.
+ *
+ * XXX Not for caching paths of arbitrary files. It won't work
+ * properly and will defeat the benefits by polluting the cache.
+ *
+ * We assume that it is only used for a single FS. This is how we use
+ * it now.
+ *
+ * TODO Move this out of thread local storage and into scanning thread
+ * info.
+ */
+static __thread struct f2p_cache *ls3_fid2path_cache;
+
+enum {
+ LS3_FID2PATH_CACHE_HASH_SHIFT = 10,
+ LS3_FID2PATH_CACHE_COUNT_MAX = 1UL << LS3_FID2PATH_CACHE_HASH_SHIFT,
+};
+
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *) 0)->member) * __mptr = (ptr); \
+ (type *) ((char *) __mptr - offsetof(type, member)); })
+
+struct f2p_node {
+ struct lipe_list_head fn_lru_link;
+ struct hlist_node fn_node;
+ struct lu_fid fn_fid;
+ int fn_rc;
+ char fn_path[];
+};
+
+struct f2p_cache {
+ struct lipe_list_head fc_lru_list;
+ struct hlist_head *fc_heads;
+ size_t fc_hash_mask;
+ size_t fc_node_count;
+ size_t fc_node_count_max;
+};
+
+static struct f2p_node *f2p_node_create(const struct lu_fid *fid, int rc, const char *path)
+{
+ size_t fn_size;
+ struct f2p_node *fn;
+
+ assert((rc == 0) == (path != NULL));
+
+ if (path != NULL)
+ fn_size = sizeof(*fn) + strlen(path) + 1;
+ else
+ fn_size = sizeof(*fn) + 1; /* + 1 for '\0' in fn_path[0] */
+
+ fn = xcalloc(1, fn_size);
+ LIPE_INIT_LIST_HEAD(&fn->fn_lru_link);
+ INIT_HLIST_NODE(&fn->fn_node);
+ fn->fn_fid = *fid;
+ fn->fn_rc = rc;
+
+ if (path != NULL)
+ strcpy(fn->fn_path, path);
+
+ return fn;
+}
+
+static void f2p_cache_free(struct f2p_cache *fc)
+{
+ struct f2p_node *fn, *tmp;
+
+ if (fc == NULL)
+ return;
+
+ lipe_list_for_each_entry_safe(fn, tmp, &fc->fc_lru_list, fn_lru_link) {
+ lipe_list_del(&fn->fn_lru_link);
+ free(fn);
+ }
+
+ assert(lipe_list_empty(&fc->fc_lru_list));
+ free(fc->fc_heads);
+ free(fc);
+}
+
+static int f2p_cache_create(struct f2p_cache **pfc, size_t hash_shift, size_t count_max)
+{
+ struct f2p_cache *fc;
+ size_t head_count;
+
+ assert(hash_shift > 1);
+ assert(count_max > 1);
+
+ fc = xcalloc(1, sizeof(*fc));
+
+ LIPE_INIT_LIST_HEAD(&fc->fc_lru_list);
+
+ head_count = (1UL << hash_shift);
+ fc->fc_hash_mask = head_count - 1;
+
+ fc->fc_heads = xcalloc(head_count, sizeof(fc->fc_heads[0]));
+ fc->fc_node_count_max = count_max;
+
+ *pfc = fc;
+ fc = NULL;
+ f2p_cache_free(fc);
+
+ return 0;
+}
+
+static int f2p_cache_find(char **ppath, struct f2p_cache *fc, int client_mount_fd, const struct lu_fid *fid)
+{
+ size_t hash;
+ struct hlist_head *head;
+ struct getinfo_fid2path *gf = NULL;
+ unsigned int pathlen = PATH_MAX;
+ struct f2p_node *fn;
+ struct hlist_node *pos;
+ int rc;
+
+ *ppath = NULL;
+
+ hash = lustre_hash_fnv_1a_64(fid, sizeof(*fid));
+ head = &fc->fc_heads[hash & fc->fc_hash_mask];
+
+ /* This does not handle paths longer than PATH_MAX (4096). It
+ * wouldn't be easy to do so since ll_fid2path() will fail
+ * with -EINVAL if we use a buffer size (pathlen/gf_pathlen)
+ * greater than PATH_MAX. We could try working up the
+ * directory hierarch until. */
+
+ hlist_for_each_entry(fn, pos, head, fn_node)
+ if (lu_fid_eq(fid, &fn->fn_fid))
+ goto out_found;
+
+ fn = NULL;
+
+ gf = xcalloc(1, sizeof(*gf) + pathlen);
+ gf->gf_fid = *fid;
+ gf->gf_pathlen = pathlen;
+
+ rc = ioctl(client_mount_fd, OBD_IOC_FID2PATH, gf);
+ if (rc < 0) {
+ fn = f2p_node_create(fid, -errno, NULL);
+ } else {
+ fn = f2p_node_create(fid, 0, gf->gf_u.gf_path);
+ }
+
+ /* XXX The path returned by OBD_IOC_FID2PATH may contain
+ * double slashes due some quirks in fid2path for striped
+ * directories. (It's still correct.)
+ *
+ * We strip extra slashes in lipe_object_attrs_add_path(). */
+
+ hlist_add_head(&fn->fn_node, head);
+ fc->fc_node_count++;
+
+ assert(fc->fc_node_count_max > 1);
+
+ while (fc->fc_node_count > fc->fc_node_count_max) {
+ struct f2p_node *fn2;
+
+ assert(!lipe_list_empty(&fc->fc_lru_list));
+ fn2 = lipe_list_entry(fc->fc_lru_list.next, struct f2p_node, fn_lru_link);
+ lipe_list_del(&fn2->fn_lru_link);
+ hlist_del(&fn2->fn_node);
+ free(fn2);
+ fc->fc_node_count--;
+ }
+
+out_found:
+ lipe_list_move_tail(&fn->fn_lru_link, &fc->fc_lru_list);
+ rc = fn->fn_rc;
+ if (rc < 0)
+ goto out;
+
+ *ppath = xstrdup(fn->fn_path);
+out:
+ free(gf);
+
+ return rc;
+}
+
+int ls3_fid2path(char **path, int client_mount_fd, const struct lu_fid *fid)
+{
+ int rc;
+
+ if (ls3_fid2path_cache == NULL) {
+ rc = f2p_cache_create(&ls3_fid2path_cache,
+ LS3_FID2PATH_CACHE_HASH_SHIFT,
+ LS3_FID2PATH_CACHE_COUNT_MAX);
+
+ /* As written f2p_cache_create() will not return an
+ * error. If it did (because we enabled tuning or
+ * something or optional disabling) then we should fix
+ * things so that the caller will fallback to the
+ * usual fid2path. */
+ assert(rc == 0);
+ }
+
+ assert(ls3_fid2path_cache != NULL);
+
+ return f2p_cache_find(path, ls3_fid2path_cache, client_mount_fd, fid);
+}
+
+void ls3_fid2path_cache_fini(void)
+{
+ f2p_cache_free(ls3_fid2path_cache);
+ ls3_fid2path_cache = NULL;
+}
--- /dev/null
+#ifndef _LS3_FID2PATH_H_
+#define _LS3_FID2PATH_H_
+
+#include <stddef.h>
+
+struct lu_fid;
+
+int ls3_fid2path(char **path, int client_mount_fd, const struct lu_fid *fid);
+void ls3_fid2path_cache_fini(void);
+
+#endif /* _LS3_FID2PATH_H_ */
while ((c = getopt_long(argc, argv, "hil:s:v", options, NULL)) != EOF) {
switch (c) {
+ case LS3_OPT_ALL_PATHS:
+ print_all_paths = true;
+ break;
case LS3_OPT_CLIENT_MOUNT:
ls3_client_mount_path = optarg;
break;
return 0;
}
-int lipe_object_attrs_add_path(struct ls3_object_attrs *attrs,
- const char *path)
-{
- struct lipe_path_entry *lpe = NULL;
-
- lpe = xcalloc(1, sizeof(*lpe));
- lpe->lpe_path = xstrdup(path);
-
- lipe_list_add_tail(&lpe->lpe_linkage, &attrs->loa_paths);
-
- return 0;
-}
-
/* Fixup DNE striped directory path with '//'. Root => "". Does not
* return "/" for root. See also copy_strip_dne_path(). */
static void lipe_fid2path_fixup(char *path)
{
- char *d, *s;
+ char *d = path, *s = path;
+
+ while (*s == '/')
+ s++;
- for (d = path, s = path; *s != '\0'; s++) {
+ for (; *s != '\0'; s++) {
if (*s == '/' && *(s + 1) == '/')
continue;
*d = '\0';
}
+int lipe_object_attrs_add_path(struct ls3_object_attrs *attrs,
+ const char *path)
+{
+ struct lipe_path_entry *lpe = NULL;
+
+ lpe = xcalloc(1, sizeof(*lpe));
+ lpe->lpe_path = xstrdup(path);
+ lipe_fid2path_fixup(lpe->lpe_path);
+
+ lipe_list_add_tail(&lpe->lpe_linkage, &attrs->loa_paths);
+
+ return 0;
+}
+
int lipe_object_attrs_set_paths(struct ls3_object_attrs *loa,
int client_mount_fd)
{
goto out;
}
- lipe_fid2path_fixup(gf->gf_u.gf_path);
-
rc = lipe_object_attrs_add_path(loa, gf->gf_u.gf_path);
if (rc < 0)
goto out;
int64_t loa_projid;
struct lu_fid loa_file_fid;
struct lu_fid loa_self_fid;
-
+ uint32_t loa_lma_compat;
+ uint32_t loa_lma_incompat;
char loa_leh_buf[XATTR_SIZE_MAX];
char loa_lmv_buf[XATTR_SIZE_MAX];
struct lov_user_md *loa_lum;
*/
#include "ls3_scan.h"
#include <stdbool.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <com_err.h>
#include <ext2fs/ext2fs.h>
#include "ls3_debug.h"
+#include "ls3_fid2path.h"
#include "ls3_object_attrs.h"
/* XXX We are mixing libext2fs errcode_t (long), pthread positive rcs,
lma = (struct lustre_mdt_attrs *)buf;
fid_le_to_cpu(&loa->loa_self_fid, &lma->lma_self_fid);
+ loa->loa_lma_compat = ext2fs_le32_to_cpu(lma->lma_compat);
+ loa->loa_lma_incompat = ext2fs_le32_to_cpu(lma->lma_incompat);
loa->loa_attr_bits |= LS3_OBJECT_ATTR_SELF_FID;
return 0;
if (loa->loa_attr_bits & LS3_OBJECT_ATTR_LINKS)
return 0;
+ assert(lipe_list_empty(&loa->loa_links));
+
+ /* We use the link xattr plus caching to speed up paths but we
+ * this is tricky because the link xattr is not always updated
+ * properly after unlink. This is why we test
+ * LMAI_ORPHAN. */
+
+ rc = ldiskfs_read_attr_self_fid(li, lo, loa);
+ if (rc < 0)
+ return rc;
+
+ if (loa->loa_lma_incompat & LMAI_ORPHAN)
+ goto out_ok;
+
rc = ext2fs_attr_get(fs, (struct ext2_inode *)inode,
EXT2_ATTR_INDEX_TRUSTED,
XATTR_NAME_LINK + strlen("trusted."),
LS3_ERROR_OBJ(lo, "cannot decode link xattr: rc = %ld\n", rc);
return rc;
}
-
+out_ok:
loa->loa_attr_bits |= LS3_OBJECT_ATTR_LINKS;
return 0;
}
+static char *xsprintf(const char *fmt, ...)
+{
+ char *str = NULL;
+ va_list ap;
+ int rc;
+
+ va_start(ap, fmt);
+ rc = vasprintf(&str, fmt, ap);
+ va_end(ap);
+ assert(!(rc < 0) && str != NULL);
+
+ return str;
+}
+
static int
ldiskfs_read_attr_paths(struct ls3_instance *li,
struct lipe_object *lo,
struct ls3_object_attrs *loa)
{
+ struct lipe_link_entry *lle;
int rc;
if (loa->loa_attr_bits & LS3_OBJECT_ATTR_PATHS)
return 0;
- rc = ldiskfs_read_attr_file_fid(li, lo, loa);
- if (rc < 0)
- return rc;
+ assert(lipe_list_empty(&loa->loa_paths));
- if (li->li_device_is_mdt) {
- /* We cannot use links to make paths faster because
- * the linkea is not updated properly after
- * unlink. But requiring a link xattr before fid2path
- * prevents MDT crashes when we pass fids of OI_scrub
- * or other internal files. */
- rc = ldiskfs_read_attr_links(li, lo, loa);
+ if (li->li_device_is_ost) {
+ rc = ldiskfs_read_attr_file_fid(li, lo, loa);
if (rc < 0)
return rc;
+
+ return lipe_object_attrs_set_paths(loa, li->li_client_mount_fd);
+ }
+
+ /* Requiring a link xattr before fid2path prevents MDT crashes
+ * when we pass fids of OI_scrub or other internal files. */
+
+ rc = ldiskfs_read_attr_links(li, lo, loa);
+ if (rc < 0)
+ return rc;
+
+ if (lipe_list_empty(&loa->loa_links)) {
+ /* This must be a pending/orphan file. */
+ goto out_ok;
}
- return lipe_object_attrs_set_paths(loa, li->li_client_mount_fd);
+ /* TODO Add a way to tune or disable fid2path caching. */
+
+ rc = 0;
+ lipe_list_for_each_entry(lle, &loa->loa_links, lle_linkage) {
+ char *parent_path = NULL;
+ char *path = NULL;
+ int rc2;
+
+ rc2 = ls3_fid2path(&parent_path, li->li_client_mount_fd, &lle->lle_parent_fid);
+ if (rc2 < 0) {
+ assert(parent_path == NULL);
+ rc = rc2;
+ continue;
+ }
+
+ path = xsprintf("%s/%s", parent_path, lle->lle_name);
+ lipe_object_attrs_add_path(loa, path);
+ free(parent_path);
+ free(path);
+ }
+
+ if (lipe_list_empty(&loa->loa_paths)) {
+ assert(rc < 0);
+ return rc;
+ }
+out_ok:
+ loa->loa_attr_bits |= LS3_OBJECT_ATTR_PATHS;
+
+ return 0;
}
static int ldiskfs_copy_xattr(char *name, char *value, size_t value_len,
out_free_attrs:
lipe_object_attrs_fini(&loa);
out:
+ ls3_fid2path_cache_fini();
+
LS3_DEBUG_D(rc);
return (void *)rc;
+
#!/bin/bash
#
# Tests for lipe_find and lipe_scan.
init_lipe_scan3_env_file "$file"
fid=$($LFS path2fid "$file")
-lipe_scan3 "$device" --print-file-fid
+ lipe_scan3 "$device" --print-file-fid
out=$(lipe_scan3 "$device" --print-file-fid)
[[ "$out" == "$fid" ]] || error "--print-file-fid should print '$fid'"
out=$(lipe_scan3 "$device" --print-relative-path)
[[ "$out" == "$tfile" ]] || error "--print-relative-path should print relative path"
- # TODO --all-paths
# TODO --null
# TODO --delim
}
run_test 302 "--print-*-path options work"
+test_303() {
+ local facet=mds1
+ local device="$(facet_device $facet)"
+ local tmp1=$(mktemp)
+ local tmp2=$(mktemp)
+
+ init_lipe_scan3_env
+
+ $LFS mkdir -c $MDSCOUNT $MOUNT/$tdir
+ $LFS mkdir -c $MDSCOUNT $MOUNT/$tdir/d{0..3}
+ $LFS mkdir -c 1 -i 0 $MOUNT/$tdir/d{0..3}/d{0..3}
+ touch $MOUNT/$tdir/d{0..3}/d{0..3}/f{0..3}
+ sync
+
+ # XXX lipe_scan3 does not return the ROOT
+ (echo "$MOUNT"; lipe_scan3 "${device}" --print-absolute-path) | sort > $tmp1
+ lfs find "$MOUNT" --mdt-index 0 | sort > $tmp2
+ diff $tmp1 $tmp2 || error "--print-absolute-path should print the right paths"
+}
+run_test 303 "--print-absolute-paths prints the right paths"
+
+test_304() {
+ local facet=mds1
+ local device="$(facet_device $facet)"
+ local file=$MOUNT/$tfile
+ local out
+ local expect
+
+ init_lipe_scan3_env_file "$file"
+
+ ln "$file" "$file-1"
+ ln "$file" "$file-2"
+ sync
+
+ out=$(lipe_scan3 "${device}" --print-absolute-path)
+ [[ "$out" == "$file" ]] || error "print-absolute-path got '$out', expected '$file"
+
+ out=$(lipe_scan3 "${device}" --print-absolute-path --all-paths | sort)
+ expect=$(ls "$file"*)
+
+ [[ "$out" == "$expect" ]] || error "print-absolute-path got '$out', expected '$expect'"
+
+ out=$(lipe_scan3 "${device}" --print-relative-path)
+ [[ "$out" == "$tfile" ]] || error "print-absolute-path got '$out', expected '$tfile"
+
+ out=$(lipe_scan3 "${device}" --print-relative-path --all-paths | sort)
+ expect=$(ls "$MOUNT")
+ [[ "$out" == "$expect" ]] || error "print-relative-path got '$out', expected '$expect'"
+}
+run_test 304 "print-*-paths with multiple links"
+
+test_305() {
+ local facet=mds1
+ local device="$(facet_device $facet)"
+ local file
+ local fid
+ local mdt_index
+ local out
+
+ init_lipe_scan3_env
+
+ $LFS mkdir -c $MDSCOUNT $MOUNT/$tdir
+ $LFS mkdir -c $MDSCOUNT $MOUNT/$tdir/d0
+ $LFS mkdir -c 1 -i 0 $MOUNT/$tdir/d0/d0
+ file=$MOUNT/$tdir/d0/d0/f0
+ touch $file
+ fid=$($LFS path2fid $file)
+ mdt_index=$($LFS getstripe --mdt-index $file)
+ ((mdt_index == 0)) || error "expected MDT index 0, got '$mdt_index'"
+ sync
+
+ out=$(lipe_scan3 "${device}" --print-json=file_fid,paths |
+ jq --raw-output --arg FID "$fid" 'select(.file_fid == $FID) | .paths[]')
+ [[ "$out" == "$tdir/d0/d0/f0" ]] || error "JSON got '$out', expected '$tdir/d0/d0/f0'"
+
+ ln "$file" "$file"-1
+ ln "$file" "$file"-2
+ sync
+
+ out=$(lipe_scan3 "${device}" --print-json=file_fid,paths |
+ jq --raw-output --arg FID "$fid" 'select(.file_fid == $FID) | .paths[0]')
+ [[ "$out" == "$tdir/d0/d0/f0" ]] || error "JSON got '$out', expected '$tdir/d0/d0/f0'"
+
+ out=$(lipe_scan3 "${device}" --print-json=file_fid,paths |
+ jq --raw-output --arg FID "$fid" 'select(.file_fid == $FID) | .paths[1]')
+ [[ "$out" == "$tdir/d0/d0/f0-1" ]] || error "JSON got '$out', expected '$tdir/d0/d0/f0-1'"
+
+ out=$(lipe_scan3 "${device}" --print-json=file_fid,paths |
+ jq --raw-output --arg FID "$fid" 'select(.file_fid == $FID) | .paths[2]')
+ [[ "$out" == "$tdir/d0/d0/f0-2" ]] || error "JSON got '$out', expected '$tdir/d0/d0/f0-2'"
+}
+run_test 305 "print-json prints the right paths"
+
# loading and scripts
test_400() {