RETURN(rc);
}
+static int cmm_init_capa_keys(struct md_device *md,
+ struct lustre_capa_key *keys)
+{
+ struct cmm_device *cmm_dev = md2cmm_dev(md);
+ int rc;
+ ENTRY;
+ LASSERT(cmm_child_ops(cmm_dev)->mdo_init_capa_keys);
+ rc = cmm_child_ops(cmm_dev)->mdo_init_capa_keys(cmm_dev->cmm_child,
+ keys);
+ RETURN(rc);
+}
+
+static int cmm_update_capa_key(const struct lu_context *ctxt,
+ struct md_device *md,
+ struct lustre_capa_key *key)
+{
+ struct cmm_device *cmm_dev = md2cmm_dev(md);
+ int rc;
+ ENTRY;
+ rc = cmm_child_ops(cmm_dev)->mdo_update_capa_key(ctxt,
+ cmm_dev->cmm_child,
+ key);
+ RETURN(rc);
+}
+
static struct md_device_operations cmm_md_ops = {
.mdo_statfs = cmm_statfs,
.mdo_root_get = cmm_root_get,
.mdo_maxsize_get = cmm_maxsize_get,
+ .mdo_init_capa_keys = cmm_init_capa_keys,
+ .mdo_update_capa_key= cmm_update_capa_key,
};
extern struct lu_device_type mdc_device_type;
{
struct cmm_device *m = lu2cmm_dev(d);
- LASSERT(atomic_read(&d->ld_ref) == 0);
LASSERT(m->cmm_tgt_count == 0);
LASSERT(list_empty(&m->cmm_targets));
md_device_fini(&m->cmm_md_dev);
RETURN(rc);
}
+static int cml_capa_get(const struct lu_context *ctxt, struct md_object *mo,
+ struct lustre_capa *capa)
+{
+ int rc;
+ ENTRY;
+ rc = mo_capa_get(ctxt, md_object_next(mo), capa);
+ RETURN(rc);
+}
+
static struct md_object_operations cml_mo_ops = {
.moo_permission = cml_permission,
.moo_attr_get = cml_attr_get,
.moo_open = cml_open,
.moo_close = cml_close,
.moo_readpage = cml_readpage,
- .moo_readlink = cml_readlink
+ .moo_readlink = cml_readlink,
+ .moo_capa_get = cml_capa_get
};
/* md_dir operations */
/* rename is split to local/remote by location of new parent dir */
struct md_object *md_object_find(const struct lu_context *ctx,
- struct md_device *md,
- const struct lu_fid *f)
+ struct md_device *md,
+ const struct lu_fid *f)
{
struct lu_object *o;
struct md_object *m;
ENTRY;
- o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f);
+ o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f, BYPASS_CAPA);
if (IS_ERR(o))
m = (struct md_object *)o;
else {
RETURN(-EREMOTE);
}
+static int cmr_capa_get(const struct lu_context *ctxt, struct md_object *mo,
+ struct lustre_capa *capa)
+{
+ RETURN(-EFAULT);
+}
+
static struct md_object_operations cmr_mo_ops = {
.moo_permission = cmr_permission,
.moo_attr_get = cmr_attr_get,
.moo_open = cmr_open,
.moo_close = cmr_close,
.moo_readpage = cmr_readpage,
- .moo_readlink = cmr_readlink
+ .moo_readlink = cmr_readlink,
+ .moo_capa_get = cmr_capa_get
};
/* remote part of md_dir operations */
memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata));
- rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu),
+ /* FIXME: split capability */
+ rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), NULL,
OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID |
OBD_MD_FLFLAGS,
0, &mci->mci_req);
mci = mdc_info_init(ctx);
+ /* FIXME: capability for split! */
rc = md_is_subdir(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu),
- fid, &mci->mci_req);
-
+ fid, NULL, NULL, &mci->mci_req);
if (rc)
GOTO(out, rc);
obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \
obd_ost.h obd_support.h lustre_ver.h lu_object.h \
md_object.h dt_object.h lustre_param.h lustre_mdt.h \
- lustre_fid.h lustre_fld.h lustre_req_layout.h
+ lustre_fid.h lustre_fld.h lustre_req_layout.h lustre_capa.h
* consistent.
*/
int (*loo_object_invariant)(const struct lu_object *o);
+ /*
+ * Called to authorize action by capability.
+ */
+ int (*loo_object_auth)(const struct lu_context *ctx,
+ const struct lu_object *o,
+ struct lustre_capa *capa,
+ __u64 opc);
};
/*
*/
struct lu_fid loh_fid;
/*
+ * Fid capability.
+ */
+ unsigned int loh_capa_bypass:1; /* bypass capability check */
+ struct lustre_capa loh_capa; /* capability sent by client */
+ /*
* Common object attributes, cached for efficiency. From enum
* lu_object_header_attr.
*/
__u32 s_cache_race;
__u32 s_lru_purged;
} ls_stats;
+
+ /* Capability */
+ struct lustre_capa_key *ls_capa_keys;
+ unsigned long ls_capa_timeout;
+ __u32 ls_capa_alg;
};
/*
* any case, additional reference is acquired on the returned object.
*/
struct lu_object *lu_object_find(const struct lu_context *ctxt,
- struct lu_site *s, const struct lu_fid *f);
+ struct lu_site *s, const struct lu_fid *f,
+ struct lustre_capa *c);
+
+/*
+ * Auth lu_object capability.
+ */
+int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o,
+ struct lustre_capa *capa, __u64 opc);
/*
* Helpers.
}
/*
+ * Pointer to the fid capability of this object.
+ */
+static inline struct lustre_capa *
+lu_object_capa(const struct lu_object *o)
+{
+ return &o->lo_header->loh_capa;
+}
+
+static inline int lu_object_capa_bypass(const struct lu_object *o)
+{
+ return o->lo_header->loh_capa_bypass;
+}
+
+/*
* return device operations vector for this object
*/
static inline struct lu_device_operations *
return o->lo_header->loh_attr;
}
+static inline void lu_object_bypass_capa(struct lu_object *o)
+{
+ o->lo_header->loh_capa_bypass = 1;
+}
+
struct lu_rdpg {
/* input params, should be filled out by mdt */
__u32 rp_hash; /* hash */
#define DFID "[%16.16"LPF64"x/%8.8x:%8.8x]"
-#define PFID(fid) \
- fid_seq((fid)), \
- fid_oid((fid)), \
- fid_ver((fid))
+#define PFID(fid) \
+ fid_seq(fid), \
+ fid_oid(fid), \
+ fid_ver(fid)
extern void lustre_swab_lu_fid(struct lu_fid *fid);
extern void lustre_swab_lu_range(struct lu_range *range);
#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
#define MAX_HASH_SIZE 0x7fffffff
+/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
struct lmv_stripe_md {
__u32 mea_magic;
__u32 mea_count;
__u32 lm_buflens[0];
};
-/* without security, ptlrpc_body is put in the first buffer. */
+/* without gss, ptlrpc_body is put at the first buffer. */
struct ptlrpc_body {
struct lustre_handle pb_handle;
__u32 pb_type;
#define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote 1.8 client */
#define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Max bytes per rpc */
#define OBD_CONNECT_QUOTA64 0x100000ULL /* 64bit qunit_data.qd_count b=10707*/
-#define OBD_CONNECT_FID_CAPA 0x200000ULL /* fid capability */
+#define OBD_CONNECT_MDS_CAPA 0x200000ULL /* MDS capability */
#define OBD_CONNECT_OSS_CAPA 0x400000ULL /* OSS capability */
/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
* and lustre/utils/wirecheck.c */
/* Size-on-MDS epoch and flags. */
__u64 ioepoch;
__u32 flags;
+
+ struct obd_capa *mod_capa1;
+ struct obd_capa *mod_capa2;
};
#define MDS_MODE_DONT_LOCK (1 << 30)
#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */
#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */
#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */
+#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */
+#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */
#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \
MDS_SETXATTR = 50,
MDS_WRITEPAGE = 51,
MDS_IS_SUBDIR = 52,
+ MDS_RENEW_CAPA = 53,
MDS_LAST_OPC
} mds_cmd_t;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *posix_acl;
#endif
- struct mdt_remote_perm *remote_perm;
+ struct mdt_remote_perm *remote_perm;
+ struct obd_capa *mds_capa;
+ struct obd_capa *oss_capa;
};
#define Q_QUOTACHECK 0x800100
__u32 sa_uid;
__u32 sa_gid;
__u32 sa_attr_flags;
- __u32 sa_padding; /* also fix lustre_swab_mdt_rec_setattr */
+ __u32 sa_padding; /* also fix lustre_swab_mds_rec_setattr */
};
extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
SEC_LAST_OPC
} sec_cmd_t;
+/*
+ * capa related definitions
+ */
+#define CAPA_HMAC_MAX_LEN 64
+#define CAPA_HMAC_KEY_MAX_LEN 56
+
+/* NB take care when changing the sequence of elements this struct,
+ * because the offset info is used in find_capa() */
+struct lustre_capa {
+ struct lu_fid lc_fid; /* fid */
+ __u64 lc_opc; /* operations allowed */
+ __u32 lc_flags; /* HMAC algorithm & flags */
+ __u32 lc_keyid; /* key used for the capability */
+ __u64 lc_expiry; /* expiry time (sec) */
+ __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /* HMAC */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa(struct lustre_capa *c);
+
+/* lustre_capa.lc_opc */
+enum {
+ /* MDS only fid capability */
+ CAPA_OPC_BODY_WRITE = 1, /* write fid data */
+ CAPA_OPC_BODY_READ = 1<<1, /* read fid data */
+ CAPA_OPC_INDEX_LOOKUP = 1<<2, /* lookup fid */
+ CAPA_OPC_INDEX_INSERT = 1<<3, /* insert fid */
+ CAPA_OPC_INDEX_DELETE = 1<<4, /* delete fid */
+ /* OSS only fid capability */
+ CAPA_OPC_OSS_WRITE = 1<<5, /* write oss object data */
+ CAPA_OPC_OSS_READ = 1<<6, /* read oss object data */
+ CAPA_OPC_OSS_TRUNC = 1<<7, /* truncate oss object */
+ /* MDS & OSS both might have */
+ CAPA_OPC_META_WRITE = 1<<8, /* write fid meta data */
+ CAPA_OPC_META_READ = 1<<9, /* read fid meta data */
+
+};
+
+#define CAPA_OPC_MDS_ONLY \
+ (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | \
+ CAPA_OPC_INDEX_LOOKUP | CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
+#define CAPA_OPC_OSS_ONLY \
+ (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC)
+#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
+#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
+
+static inline int capa_for_mds(struct lustre_capa *c)
+{
+ return (c->lc_opc & CAPA_OPC_MDS_ONLY) != 0;
+}
+
+static inline int capa_for_oss(struct lustre_capa *c)
+{
+ return (c->lc_opc & CAPA_OPC_OSS_ONLY) != 0;
+}
+
+/* lustre_capa.lc_flags */
+enum {
+ CAPA_FL_SHORT_EXPIRY = 1, /* short capa expiry */
+ CAPA_FL_ROOT = 2, /* root fid capa, will always renew */
+};
+
+/* lustre_capa.lc_hmac_alg */
+enum {
+ CAPA_HMAC_ALG_SHA1 = 1, /* sha1 algorithm */
+ CAPA_HMAC_ALG_MAX,
+};
+
+#define CAPA_FL_MASK 0x00ffffff
+#define CAPA_HMAC_ALG_MASK 0xff000000
+
+struct lustre_capa_key {
+ __u64 lk_mdsid; /* mds# */
+ __u32 lk_keyid; /* key# */
+ __u32 lk_padding;
+ __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /* key */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+
+typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *);
+
#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Lustre capability support.
+ */
+
+#ifndef __LINUX_CAPA_H_
+#define __LINUX_CAPA_H_
+
+/*
+ * capability
+ */
+#ifdef __KERNEL__
+#include <linux/crypto.h>
+#endif
+#include <lustre/lustre_idl.h>
+
+#define NR_CAPAHASH 32
+#define CAPA_HASH_SIZE 3000 /* for MDS & OSS */
+
+#define CAPA_TIMEOUT 1800 /* sec, == 30 min */
+#define CAPA_KEY_TIMEOUT (24 * 60 * 60) /* sec, == 1 days */
+
+struct capa_hmac_alg {
+ const char *ha_name;
+ int ha_len;
+ int ha_keylen;
+};
+
+#define DEF_CAPA_HMAC_ALG(name, type, len, keylen) \
+[CAPA_HMAC_ALG_ ## type] = { \
+ .ha_name = name, \
+ .ha_len = len, \
+ .ha_keylen = keylen, \
+}
+
+struct client_capa {
+ struct inode *inode;
+ struct list_head lli_list; /* link to lli_oss_capas */
+ atomic_t open_count; /* open count */
+};
+
+struct target_capa {
+ struct hlist_node c_hash; /* link to capa hash */
+};
+
+struct obd_capa {
+ struct list_head c_list; /* link to capa_list */
+
+ struct lustre_capa c_capa; /* capa */
+ atomic_t c_refc; /* ref count */
+ cfs_time_t c_expiry; /* jiffies */
+ spinlock_t c_lock; /* protect capa content */
+ int c_site;
+ int c_flags;
+
+ union {
+ struct client_capa cli;
+ struct target_capa tgt;
+ } u;
+};
+
+enum {
+ CAPA_SITE_CLIENT = 0,
+ CAPA_SITE_SERVER,
+ CAPA_SITE_MAX
+};
+
+enum {
+ OBD_CAPA_FL_NEW = 1,
+ OBD_CAPA_FL_EXPIRED = 1<<1,
+ OBD_CAPA_FL_ROOT = 1<<2,
+ OBD_CAPA_FL_SPLIT = 1<<3
+};
+
+static inline __u64 capa_opc(struct lustre_capa *capa)
+{
+ return capa->lc_opc;
+}
+
+static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
+{
+ return &capa->lc_fid;
+}
+
+static inline __u32 capa_keyid(struct lustre_capa *capa)
+{
+ return capa->lc_keyid;
+}
+
+static inline __u64 capa_expiry(struct lustre_capa *capa)
+{
+ return capa->lc_expiry;
+}
+
+static inline __u32 capa_flags(struct lustre_capa *capa)
+{
+ return capa->lc_flags & 0xffffff;
+}
+
+static inline __u32 capa_alg(struct lustre_capa *capa)
+{
+ __u32 alg = capa->lc_flags;
+
+ return alg >> 24;
+}
+
+static inline __u64 capa_key_mdsid(struct lustre_capa_key *key)
+{
+ return key->lk_mdsid;
+}
+
+static inline __u32 capa_key_keyid(struct lustre_capa_key *key)
+{
+ return key->lk_keyid;
+}
+
+#define DEBUG_CAPA(level, c, fmt, args...) \
+do { \
+CDEBUG(level, fmt " capability@%p opc "LPX64" fid "DFID" keyid %u expiry "LPU64\
+ " flags %u alg %d\n", \
+ ##args, c, capa_opc(c), PFID(capa_fid(c)), capa_keyid(c), \
+ capa_expiry(c), capa_flags(c), capa_alg(c)); \
+} while (0)
+
+#define DEBUG_CAPA_KEY(level, k, fmt, args...) \
+do { \
+CDEBUG(level, fmt " capability key@%p mdsid "LPU64" keyid %u\n", \
+ ##args, k, capa_key_mdsid(k), capa_key_keyid(k)); \
+} while (0)
+
+/* obdclass/capa.c */
+extern struct list_head capa_list[];
+extern spinlock_t capa_lock;
+extern int capa_count[];
+extern cfs_mem_cache_t *capa_cachep;
+
+struct obd_capa *capa_add(struct lustre_capa *capa);
+struct obd_capa *capa_lookup(struct lustre_capa *capa);
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key);
+void capa_cpy(void *dst, struct obd_capa *ocapa);
+
+void cleanup_capas(int site);
+void dump_capa_hmac(char *buf, char *key);
+
+static inline int obd_capa_is_new(struct obd_capa *oc)
+{
+ return !!((oc)->c_flags & OBD_CAPA_FL_NEW);
+}
+
+static inline int obd_capa_is_expired(struct obd_capa *oc)
+{
+ return !!((oc)->c_flags & OBD_CAPA_FL_EXPIRED);
+}
+
+static inline int obd_capa_is_valid(struct obd_capa *oc)
+{
+ return !!((oc)->c_flags & (OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED));
+}
+
+static inline void obd_capa_set_new(struct obd_capa *oc)
+{
+ oc->c_flags |= OBD_CAPA_FL_NEW;
+}
+
+static inline void obd_capa_set_expired(struct obd_capa *oc)
+{
+ oc->c_flags |= OBD_CAPA_FL_EXPIRED;
+}
+
+static inline void obd_capa_set_valid(struct obd_capa *oc)
+{
+ oc->c_flags &= ~(OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED);
+}
+
+static inline void obd_capa_clear_new(struct obd_capa *oc)
+{
+ oc->c_flags &= ~OBD_CAPA_FL_NEW;
+}
+
+static inline void obd_capa_clear_expired(struct obd_capa *oc)
+{
+ oc->c_flags &= ~OBD_CAPA_FL_EXPIRED;
+}
+
+static inline int obd_capa_is_root(struct obd_capa *oc)
+{
+ return !!((oc)->c_flags & OBD_CAPA_FL_ROOT);
+}
+
+static inline void obd_capa_set_root(struct obd_capa *oc)
+{
+ oc->c_flags |= OBD_CAPA_FL_ROOT;
+}
+
+static inline int obd_capa_is_split(struct obd_capa *oc)
+{
+ return !!((oc)->c_flags & OBD_CAPA_FL_SPLIT);
+}
+
+static inline void obd_capa_set_split(struct obd_capa *oc)
+{
+ oc->c_flags |= OBD_CAPA_FL_SPLIT;
+}
+
+static inline struct obd_capa *alloc_capa(int site)
+{
+#ifdef __KERNEL__
+ struct obd_capa *ocapa;
+
+ OBD_SLAB_ALLOC(ocapa, capa_cachep, SLAB_KERNEL, sizeof(*ocapa));
+ if (ocapa) {
+ atomic_set(&ocapa->c_refc, 0);
+ spin_lock_init(&ocapa->c_lock);
+ INIT_LIST_HEAD(&ocapa->c_list);
+ ocapa->c_site = site;
+ obd_capa_set_new(ocapa);
+ capa_count[site]++;
+ }
+ return ocapa;
+#else
+ return NULL;
+#endif
+}
+
+static inline void free_capa(struct obd_capa *ocapa)
+{
+#ifdef __KERNEL__
+ if (atomic_read(&ocapa->c_refc)) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for",
+ atomic_read(&ocapa->c_refc));
+ LBUG();
+ }
+
+ capa_count[ocapa->c_site]--;
+ if (capa_count[ocapa->c_site] < 0) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "total count %d",
+ capa_count[ocapa->c_site]);
+ LBUG();
+ }
+ OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+#else
+#endif
+}
+
+static inline struct obd_capa *capa_get(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return NULL;
+
+ atomic_inc(&ocapa->c_refc);
+ return ocapa;
+}
+
+static inline void capa_put(struct obd_capa *ocapa)
+{
+ if (!ocapa)
+ return;
+
+ atomic_dec(&ocapa->c_refc);
+}
+
+static inline int open_flags_to_accmode(int flags)
+{
+ int mode = flags;
+
+ if ((mode + 1) & O_ACCMODE)
+ mode++;
+ if (mode & O_TRUNC)
+ mode |= 2;
+
+ return mode;
+}
+
+static inline __u64 capa_open_opc(int mode)
+{
+ return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ;
+}
+
+static inline void set_capa_expiry(struct obd_capa *ocapa)
+{
+ time_t expiry = (time_t)ocapa->c_capa.lc_expiry;
+
+ expiry = (jiffies + (expiry - CURRENT_SECONDS) * HZ) / HZ;
+ ocapa->c_expiry = expiry * HZ;
+}
+
+static inline unsigned long capa_renewal_time(struct obd_capa *ocapa)
+{
+ /* NB, by default dirty_expire_centisecs is 30*100, that is 30 sec,
+ * the following values guarantee that client cache will be flushed
+ * to OSS before capability expires.
+ */
+ return ocapa->c_expiry -
+ ((ocapa->c_capa.lc_flags & CAPA_FL_SHORT_EXPIRY) ? 40:1200) * HZ;
+}
+
+#ifdef __KERNEL__
+static inline int capa_is_to_expire(struct obd_capa *ocapa)
+{
+ return time_before_eq(capa_renewal_time(ocapa), jiffies);
+}
+
+static inline int capa_is_expired(struct obd_capa *ocapa)
+{
+ return time_before_eq(ocapa->c_expiry, jiffies);
+}
+#endif
+
+static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc)
+{
+ return (capa->lc_opc & opc) == opc;
+}
+
+static inline struct lustre_capa *
+lustre_unpack_capa(struct lustre_msg *msg, unsigned int offset)
+{
+ struct lustre_capa *capa;
+
+ capa = lustre_swab_buf(msg, offset, sizeof(*capa),
+ lustre_swab_lustre_capa);
+ if (capa == NULL)
+ CERROR("bufcount %u, bufsize %u\n",
+ lustre_msg_bufcount(msg),
+ (lustre_msg_bufcount(msg) <= offset) ?
+ -1 : lustre_msg_buflen(msg, offset));
+
+ return capa;
+}
+
+struct filter_capa_key {
+ struct list_head k_list;
+ struct lustre_capa_key k_key;
+};
+
+#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT)
+
+#endif /* __LINUX_CAPA_H_ */
#define LAST_RCVD "last_received"
#define LOV_OBJID "lov_objid"
#define HEALTH_CHECK "health_check"
-
+#define CAPA_KEYS "capa_keys"
/****************** persistent mount data *********************/
[MDT_IDMAP_HASHSIZE];
};
-/* remote perm */
-extern int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request);
-
#endif
extern const struct req_format RQF_MDS_WRITEPAGE;
extern const struct req_format RQF_MDS_IS_SUBDIR;
extern const struct req_format RQF_MDS_DONE_WRITING;
+extern const struct req_format RQF_MDS_RENEW_CAPA;
/*
* This is format of direct (non-intent) MDS_GETATTR_NAME request.
extern const struct req_msg_field RMF_ACL;
extern const struct req_msg_field RMF_LOGCOOKIES;
extern const struct req_msg_field RMF_REINT_OPC;
+extern const struct req_msg_field RMF_CAPA1;
+extern const struct req_msg_field RMF_CAPA2;
/* seq-mgr fields */
extern const struct req_msg_field RMF_SEQ_OPC;
struct md_object *obj,
struct md_attr *ma,
struct md_ucred *uc);
+ int (*moo_capa_get)(const struct lu_context *, struct md_object *,
+ struct lustre_capa *);
};
/*
struct md_device *m,
struct kstatfs *sfs,
struct md_ucred *uc);
+
+ int (*mdo_init_capa_keys)(struct md_device *m,
+ struct lustre_capa_key *keys);
+
+ int (*mdo_update_capa_key)(const struct lu_context *ctx,
+ struct md_device *m,
+ struct lustre_capa_key *key);
};
enum md_upcall_event {
return m->mo_ops->moo_ref_del(cx, m, ma, uc);
}
+static inline int mo_capa_get(const struct lu_context *cx,
+ struct md_object *m,
+ struct lustre_capa *c)
+{
+ LASSERT(m->mo_ops->moo_capa_get);
+ return m->mo_ops->moo_capa_get(cx, m, c);
+}
+
static inline int mdo_lookup(const struct lu_context *cx,
struct md_object *p,
const char *name,
#include <lustre_export.h>
#include <lustre_quota.h>
#include <lustre_fld.h>
+#include <lustre_capa.h>
#define MAX_OBD_DEVICES 8192
* level. E.g. it is used for update lsm->lsm_oinfo at every recieved
* request in osc level for enqueue requests. It is also possible to
* update some caller data from LOV layer if needed. */
- obd_enqueue_update_f oi_cb_up;
+ obd_enqueue_update_f oi_cb_up;
+ /* oss capability, its type is obd_capa in client to avoid copy.
+ * in contrary its type is lustre_capa in OSS. */
+ void *oi_capa;
};
/* compare all relevant fields. */
void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa,
obd_valid valid);
int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
+ struct obd_capa *(*ap_lookup_capa)(void *data, int cmd);
};
/* the `oig' is passed down from a caller of obd rw methods. the callee
int fo_fmd_max_num; /* per exp filter_mod_data */
int fo_fmd_max_age; /* jiffies to fmd expiry */
+
+ /* capability related */
+ unsigned int fo_fl_oss_capa;
+ struct list_head fo_capa_keys;
};
#define OSC_MAX_RIF_DEFAULT 8
mds_fl_user_xattr:1,
mds_fl_acl:1;
+
/* For CMD add mds_num */
int mds_num;
/* root squash */
struct rootsquash_info *mds_rootsquash_info;
+
+ /* for capability keys update */
+ struct lustre_capa_key *mds_capa_keys;
};
struct echo_obd {
#define KEY_INIT_RECOV "initial_recov"
#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
#define KEY_FLUSH_CTX "flush_ctx"
+#define KEY_CAPA_KEY "capa_key"
struct lu_context;
struct lov_stripe_md **ea, struct obd_trans_info *oti);
int (*o_destroy)(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
- struct obd_export *md_exp);
+ struct obd_export *md_exp, void *capa);
int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo,
struct obd_trans_info *oti);
int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
struct obd_trans_info *oti,
struct ptlrpc_request_set *rqset);
int (*o_sync)(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *ea, obd_size start, obd_size end);
+ struct lov_stripe_md *ea, obd_size start, obd_size end,
+ void *capa);
int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
struct lov_stripe_md *src, obd_size start,
obd_size end, struct obd_trans_info *oti);
int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, struct obd_trans_info *oti);
+ struct niobuf_local *local, struct obd_trans_info *oti,
+ struct lustre_capa *capa);
int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
/* metadata-only methods */
int (*o_pin)(struct obd_export *, const struct lu_fid *fid,
- struct obd_client_handle *, int flag);
+ struct obd_capa *, struct obd_client_handle *, int flag);
int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
int (*o_import_event)(struct obd_device *, struct obd_import *,
};
struct md_ops {
- int (*m_getstatus)(struct obd_export *, struct lu_fid *);
+ int (*m_getstatus)(struct obd_export *, struct lu_fid *,
+ struct obd_capa **);
int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
ldlm_iterator_t, void *);
int (*m_close)(struct obd_export *, struct md_op_data *,
void *, int, ldlm_completion_callback,
ldlm_blocking_callback, void *, int);
int (*m_getattr)(struct obd_export *, const struct lu_fid *,
- obd_valid, int, struct ptlrpc_request **);
+ struct obd_capa *, obd_valid, int,
+ struct ptlrpc_request **);
int (*m_getattr_name)(struct obd_export *, const struct lu_fid *,
- const char *, int, obd_valid,
+ struct obd_capa *, const char *, int, obd_valid,
int, struct ptlrpc_request **);
int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
void *, int, struct lookup_intent *, int,
const char *, int, const char *, int,
struct ptlrpc_request **);
int (*m_is_subdir)(struct obd_export *, const struct lu_fid *,
- const struct lu_fid *, struct ptlrpc_request **);
+ const struct lu_fid *,
+ struct obd_capa *, struct obd_capa *,
+ struct ptlrpc_request **);
int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
int , void *, int, struct ptlrpc_request **);
int (*m_sync)(struct obd_export *, const struct lu_fid *,
- struct ptlrpc_request **);
+ struct obd_capa *, struct ptlrpc_request **);
int (*m_readpage)(struct obd_export *, const struct lu_fid *,
- __u64, struct page *, struct ptlrpc_request **);
+ struct obd_capa *, __u64, struct page *,
+ struct ptlrpc_request **);
int (*m_unlink)(struct obd_export *, struct md_op_data *,
struct ptlrpc_request **);
int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
- obd_valid, const char *, const char *,
- int, int, int, struct ptlrpc_request **);
+ struct obd_capa *, obd_valid, const char *,
+ const char *, int, int, int,
+ struct ptlrpc_request **);
int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
- obd_valid, const char *, const char *,
- int, int, int, struct ptlrpc_request **);
+ struct obd_capa *, obd_valid, const char *,
+ const char *, int, int, int,
+ struct ptlrpc_request **);
int (*m_init_ea_size)(struct obd_export *, int, int, int);
int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
int flags, void *opaque);
+ int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
+ renew_capa_cb_t cb);
int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
- struct ptlrpc_request **);
+ struct obd_capa *, struct ptlrpc_request **);
/*
* NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
obd_ops->o_quotactl = QUOTA_OP(interface, ctl);
}
+static inline __u64 oinfo_mdsno(struct obd_info *oinfo)
+{
+ return oinfo->oi_oa->o_gr - FILTER_GROUP_MDS0;
+}
+
+static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo)
+{
+ return oinfo->oi_capa;
+}
+
#endif /* __OBD_H */
static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
struct lov_stripe_md *ea,
struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *capa)
{
int rc;
ENTRY;
EXP_CHECK_DT_OP(exp, destroy);
OBD_COUNTER_INCREMENT(exp->exp_obd, destroy);
- rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp);
+ rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp, capa);
RETURN(rc);
}
static inline int obd_sync(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, obd_size start,
- obd_size end)
+ obd_size end, void *capa)
{
int rc;
ENTRY;
OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
OBD_COUNTER_INCREMENT(exp->exp_obd, sync);
- rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end);
+ rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end, capa);
RETURN(rc);
}
static inline int obd_brw_rqset(int cmd, struct obd_export *exp,
struct obdo *oa, struct lov_stripe_md *lsm,
obd_count oa_bufs, struct brw_page *pg,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti,
+ struct obd_capa *ocapa)
{
struct ptlrpc_request_set *set = NULL;
struct obd_info oinfo = { { { 0 } } };
oinfo.oi_oa = oa;
oinfo.oi_md = lsm;
+ oinfo.oi_capa = ocapa;
rc = obd_brw_async(cmd, exp, &oinfo, oa_bufs, pg, oti, set);
if (rc == 0) {
rc = ptlrpc_set_wait(set);
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
struct niobuf_local *local,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti,
+ struct lustre_capa *capa)
{
int rc;
ENTRY;
OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
- remote, local, oti);
+ remote, local, oti, capa);
RETURN(rc);
}
}
static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
- struct obd_client_handle *handle, int flag)
+ struct obd_capa *oc, struct obd_client_handle *handle,
+ int flag)
{
int rc;
EXP_CHECK_DT_OP(exp, pin);
OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
- rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag);
+ rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag);
return(rc);
}
/* metadata helpers */
static inline int md_getstatus(struct obd_export *exp,
- struct lu_fid *fid)
+ struct lu_fid *fid, struct obd_capa **pc)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, getstatus);
MD_COUNTER_INCREMENT(exp->exp_obd, getstatus);
- rc = MDP(exp->exp_obd, getstatus)(exp, fid);
+ rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc);
RETURN(rc);
}
-static inline int md_getattr(struct obd_export *exp,
- const struct lu_fid *fid,
- obd_valid valid, int ea_size,
+static inline int md_getattr(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, obd_valid valid, int ea_size,
struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, getattr);
MD_COUNTER_INCREMENT(exp->exp_obd, getattr);
- rc = MDP(exp->exp_obd, getattr)(exp, fid, valid,
+ rc = MDP(exp->exp_obd, getattr)(exp, fid, oc, valid,
ea_size, request);
RETURN(rc);
}
RETURN(rc);
}
-static inline int md_close(struct obd_export *exp,
- struct md_op_data *op_data,
+static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
struct obd_client_handle *och,
struct ptlrpc_request **request)
{
}
static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode,
- __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev,
+ const void *data, int datalen, int mode, __u32 uid,
+ __u32 gid, __u32 cap_effective, __u64 rdev,
struct ptlrpc_request **request)
{
int rc;
}
static inline int md_getattr_name(struct obd_export *exp,
- const struct lu_fid *fid,
+ const struct lu_fid *fid, struct obd_capa *oc,
const char *name, int namelen,
obd_valid valid, int ea_size,
struct ptlrpc_request **request)
ENTRY;
EXP_CHECK_MD_OP(exp, getattr_name);
MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name);
- rc = MDP(exp->exp_obd, getattr_name)(exp, fid, name, namelen,
+ rc = MDP(exp->exp_obd, getattr_name)(exp, fid, oc, name, namelen,
valid, ea_size, request);
RETURN(rc);
}
static inline int md_intent_lock(struct obd_export *exp,
- struct md_op_data *op_data,
- void *lmm, int lmmsize,
- struct lookup_intent *it,
+ struct md_op_data *op_data, void *lmm,
+ int lmmsize, struct lookup_intent *it,
int flags, struct ptlrpc_request **reqp,
ldlm_blocking_callback cb_blocking,
int extra_lock_flags)
RETURN(rc);
}
-static inline int md_link(struct obd_export *exp,
- struct md_op_data *op_data,
+static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request)
{
int rc;
RETURN(rc);
}
-static inline int md_rename(struct obd_export *exp,
- struct md_op_data *op_data,
- const char *old, int oldlen,
- const char *new, int newlen,
- struct ptlrpc_request **request)
+static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
+ const char *old, int oldlen, const char *new,
+ int newlen, struct ptlrpc_request **request)
{
int rc;
ENTRY;
static inline int md_is_subdir(struct obd_export *exp,
const struct lu_fid *pfid,
const struct lu_fid *cfid,
+ struct obd_capa *pc, struct obd_capa *cc,
struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, is_subdir);
MD_COUNTER_INCREMENT(exp->exp_obd, is_subdir);
- rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
+ rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, pc, cc, request);
RETURN(rc);
}
RETURN(rc);
}
-static inline int md_sync(struct obd_export *exp,
- const struct lu_fid *fid,
- struct ptlrpc_request **request)
+static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, sync);
MD_COUNTER_INCREMENT(exp->exp_obd, sync);
- rc = MDP(exp->exp_obd, sync)(exp, fid, request);
+ rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request);
RETURN(rc);
}
-static inline int md_readpage(struct obd_export *exp,
- const struct lu_fid *fid,
- __u64 offset, struct page *page,
+static inline int md_readpage(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, __u64 offset,
+ struct page *page,
struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, readpage);
MD_COUNTER_INCREMENT(exp->exp_obd, readpage);
- rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request);
+ rc = MDP(exp->exp_obd, readpage)(exp, fid, oc, offset, page, request);
RETURN(rc);
}
}
static inline int md_setxattr(struct obd_export *exp,
- const struct lu_fid *fid,
+ const struct lu_fid *fid, struct obd_capa *oc,
obd_valid valid, const char *name,
const char *input, int input_size,
int output_size, int flags,
ENTRY;
EXP_CHECK_MD_OP(exp, setxattr);
MD_COUNTER_INCREMENT(exp->exp_obd, setxattr);
- RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
+ RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input,
input_size, output_size, flags,
request));
}
static inline int md_getxattr(struct obd_export *exp,
- const struct lu_fid *fid,
+ const struct lu_fid *fid, struct obd_capa *oc,
obd_valid valid, const char *name,
const char *input, int input_size,
int output_size, int flags,
ENTRY;
EXP_CHECK_MD_OP(exp, getxattr);
MD_COUNTER_INCREMENT(exp->exp_obd, getxattr);
- RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
+ RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input,
input_size, output_size, flags,
request));
}
policy, mode, lockh));
}
-static inline int md_init_ea_size(struct obd_export *exp,
- int easize, int def_asize,
- int cookiesize)
+static inline int md_init_ea_size(struct obd_export *exp, int easize,
+ int def_asize, int cookiesize)
{
ENTRY;
EXP_CHECK_MD_OP(exp, init_ea_size);
MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size);
- RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize,
- def_asize,
+ RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
cookiesize));
}
static inline int md_get_remote_perm(struct obd_export *exp,
const struct lu_fid *fid,
+ struct obd_capa *oc,
struct ptlrpc_request **request)
{
ENTRY;
EXP_CHECK_MD_OP(exp, get_remote_perm);
MD_COUNTER_INCREMENT(exp->exp_obd, get_remote_perm);
- RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, request));
+ RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, request));
+}
+
+static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+ renew_capa_cb_t cb)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, renew_capa);
+ MD_COUNTER_INCREMENT(exp->exp_obd, renew_capa);
+ rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb);
+ RETURN(rc);
}
/* OBD Metadata Support */
OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
}
-static inline void obdo2fid(struct obdo *oa,
- struct lu_fid *fid)
+static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
{
/* something here */
}
-static inline void fid2obdo(struct lu_fid *fid,
- struct obdo *oa)
+static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
{
/* something here */
}
#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x136
#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x137
#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x138
+#define OBD_FAIL_MDS_RENEW_CAPA_NET 0x139
+#define OBD_FAIL_MDS_RENEW_CAPA_PACK 0x13a
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
ldlm_lock_dump_handle(D_OTHER, &lockh);
offset = page->index << PAGE_SHIFT;
- rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid,
+ rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
offset, page, &request);
if (!rc) {
body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
}
}
- rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL);
+ rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL);
obdo_free(oa);
if (rc)
CERROR("obd destroy objid 0x"LPX64" error %d\n",
valid |= OBD_MD_FLEASIZE;
}
rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
- valid, ealen, &req);
+ NULL, valid, ealen, &req);
if (rc) {
CERROR("failure %d inode %llu\n", rc,
(long long)llu_i2stat(inode)->st_ino);
RETURN(0);
}
- rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+ rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL,
OBD_MD_LINKNAME, symlen, request);
if (rc) {
CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp);
- err = md_getstatus(sbi->ll_md_exp, &rootfid);
+ err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL);
if (err) {
CERROR("cannot mds_connect: rc = %d\n", err);
GOTO(out_dt_fid, err);
sbi->ll_root_fid = rootfid;
/* fetch attr of root inode */
- err = md_getattr(sbi->ll_md_exp, &rootfid,
+ err = md_getattr(sbi->ll_md_exp, &rootfid, NULL,
OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request);
if (err) {
CERROR("md_getattr failed for root: rc = %d\n", err);
MODULES := lustre
-lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o remote_perm.o
+lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o
+lustre-objs += llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o
+lustre-objs += xattr.o remote_perm.o llite_capa.o
ifeq ($(PATCHLEVEL),4)
lustre-objs += rw24.o super.o
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_dentry_data *ldd = ll_d2d(de);
struct obd_client_handle *handle;
+ struct obd_capa *oc;
int rc = 0;
ENTRY;
LASSERT(ldd);
unlock_kernel();
handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
- rc = obd_pin(sbi->ll_md_exp, &ll_i2info(inode)->lli_fid,
- handle, flag);
-
+ oc = ll_i2mdscapa(inode);
+ rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag);
+ capa_put(oc);
if (rc) {
lock_kernel();
memset(handle, 0, sizeof(*handle));
struct inode *inode = page->mapping->host;
struct ptlrpc_request *request;
struct mdt_body *body;
+ struct obd_capa *oc;
__u64 hash;
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n",
inode->i_ino, inode->i_generation, inode, (unsigned long)hash);
+ oc = ll_i2mdscapa(inode);
rc = md_readpage(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
- hash, page, &request);
+ oc, hash, page, &request);
+ capa_put(oc);
if (!rc) {
body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
sizeof(*body));
int namelen, rc, len = 0;
char *buf = NULL;
char *filename;
+ struct obd_capa *oc;
rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
if (rc)
GOTO(out, rc = -EINVAL);
}
- rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), oc,
filename, namelen, OBD_MD_FLID, 0,
&request);
+ capa_put(oc);
if (rc < 0) {
CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
GOTO(out, rc);
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_md_op_data(op_data, inode,
- NULL, NULL, 0, 0);
-
LASSERT(sizeof(lum) == sizeof(*lump));
LASSERT(sizeof(lum.lmm_objects[0]) ==
sizeof(lump->lmm_objects[0]));
lustre_swab_lov_user_md(&lum);
/* swabbing is done in lov_setstripe() on server side */
+ ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
rc = md_setattr(sbi->ll_md_exp, op_data, &lum,
sizeof(lum), NULL, 0, &request);
+ ll_finish_md_op_data(op_data);
if (rc) {
if (rc != -EPERM && rc != -EACCES)
CERROR("md_setattr fails: rc = %d\n", rc);
struct lov_mds_md *lmm = NULL;
struct mdt_body *body;
char *filename = NULL;
+ struct obd_capa *oc;
int rc, lmmsize;
rc = ll_get_max_mdsize(sbi, &lmmsize);
if (IS_ERR(filename))
RETURN(PTR_ERR(filename));
- rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr_name(sbi->ll_md_exp,
+ ll_inode2fid(inode), oc,
filename, strlen(filename) + 1,
OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
lmmsize, &request);
+ capa_put(oc);
if (rc < 0) {
CDEBUG(D_INFO, "md_getattr_name failed "
"on %s: rc %d\n", filename, rc);
GOTO(out_name, rc);
}
} else {
- rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
lmmsize, &request);
+ capa_put(oc);
if (rc < 0) {
CDEBUG(D_INFO, "md_getattr failed on inode "
"%lu/%u: rc %d\n", inode->i_ino,
((struct ll_iattr *)&op_data->attr)->ia_attr_flags = inode->i_flags;
op_data->ioepoch = ll_i2info(inode)->lli_ioepoch;
memcpy(&op_data->handle, fh, sizeof(op_data->handle));
+ op_data->mod_capa1 = ll_i2mdscapa(inode);
}
static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
epoch_close = (op_data->flags & MF_EPOCH_CLOSE) ||
!S_ISREG(inode->i_mode);
rc = md_close(md_exp, op_data, och, &req);
+
+ ll_finish_md_op_data(op_data);
+ OBD_FREE_PTR(op_data);
if (rc == -EAGAIN) {
/* This close must have closed the epoch. */
LASSERT(epoch_close);
if (!epoch_close)
ll_init_done_writing(inode);
- OBD_FREE_PTR(op_data);
-
if (rc == 0) {
rc = ll_objects_destroy(req, inode);
if (rc)
LUSTRE_FPRIVATE(file) = NULL;
ll_file_data_put(fd);
+ ll_oss_capa_close(inode, file);
RETURN(rc);
}
rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PW, op_data,
&lockh, lmm, lmmsize, ldlm_completion_ast,
ll_md_blocking_ast, NULL, 0);
+
+ ll_finish_md_op_data(op_data);
OBD_FREE_PTR(op_data);
if (rc < 0) {
CERROR("lock enqueue: err: %d\n", rc);
if (!S_ISREG(inode->i_mode))
GOTO(out, rc);
+ ll_oss_capa_open(inode, file);
+
lsm = lli->lli_smd;
if (lsm == NULL) {
if (file->f_flags & O_LOV_DELAY_CREATE ||
OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
OBD_MD_FLCTIME | OBD_MD_FLGROUP;
+ oinfo.oi_capa = ll_i2mdscapa(inode);
set = ptlrpc_prep_set();
if (set == NULL) {
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
}
+ capa_put(oinfo.oi_capa);
if (rc)
RETURN(rc);
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
struct ptlrpc_request *req;
+ struct obd_capa *oc;
int rc, err;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
rc = err;
}
- err = md_sync(ll_i2sbi(inode)->ll_md_exp,
- ll_inode2fid(inode), &req);
+ oc = ll_i2mdscapa(inode);
+ err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
+ &req);
+ capa_put(oc);
if (!rc)
rc = err;
if (!err)
if (data && lsm) {
struct obdo *oa = obdo_alloc();
+ struct obd_capa *ocapa;
if (!oa)
RETURN(rc ? rc : -ENOMEM);
OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLGROUP);
+ ocapa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_WRITE);
err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
- 0, OBD_OBJECT_EOF);
+ 0, OBD_OBJECT_EOF, ocapa);
+ capa_put(ocapa);
if (!rc)
rc = err;
obdo_free(oa);
struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
obd_valid valid = OBD_MD_FLGETATTR;
int ealen = 0;
+ struct obd_capa *oc;
if (S_ISREG(inode->i_mode)) {
rc = ll_get_max_mdsize(sbi, &ealen);
RETURN(rc);
valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
}
- rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, ealen, &req);
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
+ ealen, &req);
+ capa_put(oc);
if (rc) {
rc = ll_inode_revalidate_fini(inode, rc);
RETURN(rc);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/file.h>
+#include <linux/kmod.h>
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+/* for obd_capa.c_list, client capa might stay in three places:
+ * 1. ll_capa_list.
+ * 2. ll_idle_capas.
+ * 3. stand alone: just allocated.
+ */
+
+/* capas for oss writeback and those failed to renew */
+static LIST_HEAD(ll_idle_capas);
+static struct ptlrpc_thread ll_capa_thread;
+static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
+
+/* llite capa renewal timer */
+cfs_timer_t ll_capa_timer;
+/* for debug: indicate whether capa on llite is enabled or not */
+static atomic_t ll_capa_debug = ATOMIC_INIT(0);
+
+static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
+{
+ if (cfs_time_before(expiry, cfs_timer_deadline(&ll_capa_timer)) ||
+ !cfs_timer_is_armed(&ll_capa_timer)) {
+ cfs_timer_arm(&ll_capa_timer, expiry);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "ll_capa_timer update: %lu/%lu by",
+ expiry, cfs_time_current());
+ }
+}
+
+static inline int have_expired_capa(void)
+{
+ struct obd_capa *ocapa = NULL;
+ int expired = 0;
+
+ /* if ll_capa_list has client capa to expire or ll_idle_capas has
+ * expired capa, return 1.
+ */
+ spin_lock(&capa_lock);
+ if (!list_empty(ll_capa_list)) {
+ ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
+ expired = capa_is_to_expire(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ } else if (!list_empty(&ll_idle_capas)) {
+ ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
+ expired = capa_is_expired(ocapa);
+ if (!expired)
+ update_capa_timer(ocapa, ocapa->c_expiry);
+ }
+ spin_unlock(&capa_lock);
+
+ if (expired)
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
+ return expired;
+}
+
+static inline int ll_capa_check_stop(void)
+{
+ return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
+}
+
+static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
+{
+ struct obd_capa *tmp;
+ struct list_head *before = NULL;
+
+ /* TODO: client capa is sorted by expiry, this could be optimized */
+ list_for_each_entry_reverse(tmp, head, c_list) {
+ if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+ before = &tmp->c_list;
+ break;
+ }
+ }
+
+ LASSERT(&ocapa->c_list != before);
+ list_add(&ocapa->c_list, before ?: head);
+}
+
+static int inode_have_md_lock(struct inode *inode, __u64 inodebits)
+{
+ struct obd_export *exp = ll_i2mdexp(inode);
+ struct lustre_handle lockh;
+ struct ldlm_res_id res_id = { .name = {0} };
+ ldlm_policy_data_t policy = { .l_inodebits = {inodebits}};
+ int flags, rc;
+ ENTRY;
+
+ res_id.name[0] = inode->i_ino;
+ res_id.name[1] = inode->i_generation;
+
+ CDEBUG(D_SEC, "trying to match res "LPU64"\n", res_id.name[0]);
+
+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
+ rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags, &res_id,
+ LDLM_IBITS, &policy, LCK_CR|LCK_CW|LCK_PR, &lockh);
+ RETURN(rc);
+}
+
+static void ll_delete_capa(struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
+
+ if (capa_for_mds(&ocapa->c_capa)) {
+ capa_put(ocapa);
+ LASSERT(lli->lli_mds_capa == ocapa);
+ lli->lli_mds_capa = NULL;
+ } else if (capa_for_oss(&ocapa->c_capa)) {
+ list_del_init(&ocapa->u.cli.lli_list);
+ }
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
+ list_del(&ocapa->c_list);
+ free_capa(ocapa);
+}
+
+/* three places where client capa is deleted:
+ * 1. capa_thread_main(), main place to delete expired capa.
+ * 2. ll_clear_inode_capas() in ll_clear_inode().
+ * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
+ */
+static int capa_thread_main(void *unused)
+{
+ struct obd_capa *ocapa, *tmp, *next;
+ struct inode *inode = NULL;
+ struct l_wait_info lwi = { 0 };
+ int rc;
+ ENTRY;
+
+ cfs_daemonize("ll_capa");
+
+ ll_capa_thread.t_flags = SVC_RUNNING;
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+
+ while (1) {
+ l_wait_event(ll_capa_thread.t_ctl_waitq,
+ (ll_capa_check_stop() || have_expired_capa()),
+ &lwi);
+
+ if (ll_capa_check_stop())
+ break;
+
+ spin_lock(&capa_lock);
+ next = NULL;
+ list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
+ LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
+
+ if (!capa_is_to_expire(ocapa)) {
+ next = ocapa;
+ break;
+ }
+
+ if (capa_for_mds(&ocapa->c_capa) &&
+ !ll_have_md_lock(ocapa->u.cli.inode,
+ MDS_INODELOCK_LOOKUP) &&
+ !obd_capa_is_root(ocapa)) {
+ /* fid capa without LOOKUP lock won't renew,
+ * move to idle list (except root fid) */
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "skip renewal for");
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
+
+ if (capa_for_oss(&ocapa->c_capa) &&
+ atomic_read(&ocapa->u.cli.open_count) == 0) {
+ /* oss capa with open_count == 0 won't renew,
+ * move to idle list */
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, &ll_idle_capas);
+ continue;
+ }
+
+ /* NB iput() is in ll_update_capa() */
+ inode = igrab(ocapa->u.cli.inode);
+ if (inode == NULL) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "igrab failed for");
+ ll_delete_capa(ocapa);
+ continue;
+ }
+
+ list_del_init(&ocapa->c_list);
+ capa_get(ocapa);
+ spin_unlock(&capa_lock);
+
+ rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
+ ll_update_capa);
+ spin_lock(&capa_lock);
+ if (rc)
+ sort_add_capa(ocapa, &ll_idle_capas);
+ }
+
+ if (next)
+ update_capa_timer(next, capa_renewal_time(next));
+
+ list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
+ LASSERT(atomic_read(&ocapa->u.cli.open_count) == 0);
+
+ if (!capa_is_expired(ocapa)) {
+ if (!next)
+ update_capa_timer(ocapa, ocapa->c_expiry);
+ break;
+ }
+
+ if (atomic_read(&ocapa->c_refc)) {
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "expired(c_refc %d), don't release",
+ atomic_read(&ocapa->c_refc));
+ obd_capa_set_expired(ocapa);
+ /* don't try to renew any more */
+ list_del_init(&ocapa->c_list);
+ continue;
+ }
+
+ /* expired capa is released. */
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
+ ll_delete_capa(ocapa);
+ }
+
+ spin_unlock(&capa_lock);
+ }
+
+ ll_capa_thread.t_flags = SVC_STOPPED;
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ RETURN(0);
+}
+
+void ll_capa_timer_callback(unsigned long unused)
+{
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+}
+
+int ll_capa_thread_start(void)
+{
+ int rc;
+ ENTRY;
+
+ init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
+
+ rc = kernel_thread(capa_thread_main, NULL, 0);
+ if (rc < 0) {
+ CERROR("cannot start expired capa thread: rc %d\n", rc);
+ RETURN(rc);
+ }
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ ll_capa_thread.t_flags & SVC_RUNNING);
+
+ RETURN(0);
+}
+
+void ll_capa_thread_stop(void)
+{
+ ll_capa_thread.t_flags = SVC_STOPPING;
+ wake_up(&ll_capa_thread.t_ctl_waitq);
+ wait_event(ll_capa_thread.t_ctl_waitq,
+ ll_capa_thread.t_flags & SVC_STOPPED);
+}
+
+static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+
+ /* inside capa_lock */
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (!obd_capa_is_valid(ocapa))
+ continue;
+ if ((capa_opc(&ocapa->c_capa) & opc) == opc)
+ continue;
+
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ return ocapa;
+ }
+
+ return NULL;
+}
+
+struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa;
+ int found = 0;
+
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+ return NULL;
+ ENTRY;
+ LASSERT(opc == CAPA_OPC_OSS_WRITE ||
+ opc == (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ) ||
+ opc == CAPA_OPC_OSS_TRUNC);
+
+ spin_lock(&capa_lock);
+ list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (!obd_capa_is_valid(ocapa))
+ continue;
+ if ((opc & CAPA_OPC_OSS_WRITE) &&
+ capa_opc_supported(&ocapa->c_capa, opc)) {
+ found = 1; break;
+ } else if ((opc & CAPA_OPC_OSS_READ) &&
+ capa_opc_supported(&ocapa->c_capa, opc)) {
+ found = 1; break;
+ } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
+ capa_opc_supported(&ocapa->c_capa, opc)) {
+ found = 1; break;
+ }
+ }
+
+ if (found) {
+ LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+ ll_inode2fid(inode)));
+ LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+ capa_get(ocapa);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+ } else if (atomic_read(&ll_capa_debug)) {
+ CERROR("no capability for "DFID" opc "LPX64"\n",
+ PFID(&lli->lli_fid), opc);
+ atomic_set(&ll_capa_debug, 0);
+ ocapa = NULL;
+ }
+ spin_unlock(&capa_lock);
+ RETURN(ocapa);
+}
+
+struct obd_capa *ll_i2mdscapa(struct inode *inode)
+{
+ struct obd_capa *ocapa;
+
+ LASSERT(inode);
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
+ return NULL;
+
+ spin_lock(&capa_lock);
+ ocapa = capa_get(ll_i2info(inode)->lli_mds_capa);
+ spin_unlock(&capa_lock);
+ if (ocapa && !obd_capa_is_valid(ocapa)) {
+ DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid");
+ capa_put(ocapa);
+ ocapa = NULL;
+ }
+
+ if (!ocapa && atomic_read(&ll_capa_debug)) {
+ CDEBUG(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ?
+ D_ERROR : D_SEC, "no MDS capa for (ino %lu)\n",
+ inode->i_ino);
+ if (inode_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
+ LBUG();
+ atomic_set(&ll_capa_debug, 0);
+ }
+
+ return ocapa;
+}
+
+static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *old = lli->lli_mds_capa;
+ struct obd_capa *ocapa = *pcapa;
+ int rc = 0;
+
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ lli->lli_mds_capa = capa_get(ocapa);
+ obd_capa_clear_new(ocapa);
+ obd_capa_set_valid(ocapa);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add fid");
+ } else {
+ if (ocapa->c_capa.lc_expiry == old->c_capa.lc_expiry) {
+ rc = -EEXIST;
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = ocapa->c_capa;
+ obd_capa_set_valid(old);
+ spin_unlock(&old->c_lock);
+
+ DEBUG_CAPA(D_SEC, &old->c_capa, "update fid");
+ }
+
+ free_capa(ocapa);
+ *pcapa = old;
+ }
+
+ return rc;
+}
+
+static inline void inode_add_oss_capa(struct inode *inode,
+ struct obd_capa *ocapa)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *tmp;
+ struct list_head *next = NULL;
+
+ /* capa is sorted in lli_oss_capas so lookup can always find the
+ * latest one */
+ list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
+ if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+ next = &tmp->u.cli.lli_list;
+ break;
+ }
+ }
+ list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
+}
+
+static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa)
+{
+ struct obd_capa *old, *ocapa = *pcapa;
+ struct lustre_capa *capa = &ocapa->c_capa;
+ int rc = 0;
+
+ LASSERTF(S_ISREG(inode->i_mode),
+ "inode has oss capa, but not regular file, mode: %d\n",
+ inode->i_mode);
+
+ /* FIXME: can't replace it so easily with fine-grained opc */
+ old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY);
+ if (!old) {
+ ocapa->u.cli.inode = inode;
+ atomic_set(&ocapa->u.cli.open_count, 0);
+ INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+ obd_capa_set_valid(ocapa);
+
+ DEBUG_CAPA(D_SEC, capa, "add oss");
+ } else {
+ if (old->c_capa.lc_expiry == capa->lc_expiry) {
+ rc = -EEXIST;
+ } else {
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ obd_capa_set_valid(old);
+ spin_unlock(&old->c_lock);
+
+ DEBUG_CAPA(D_SEC, capa, "update oss");
+ }
+
+ free_capa(ocapa);
+ *pcapa = old;
+ }
+
+ if (!rc)
+ inode_add_oss_capa(inode, *pcapa);
+ return rc;
+}
+
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
+{
+ struct obd_capa **pcapa = &ocapa;
+ int rc;
+
+ spin_lock(&capa_lock);
+ rc = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, pcapa) :
+ do_add_oss_capa(inode, pcapa);
+
+ ocapa = *pcapa;
+ /* truncate capa won't renew, or no existed capa changed, don't update
+ * capa timer. */
+ if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
+ list_del_init(&ocapa->c_list);
+ sort_add_capa(ocapa, ll_capa_list);
+
+ spin_lock(&ocapa->c_lock);
+ set_capa_expiry(ocapa);
+ spin_unlock(&ocapa->c_lock);
+ update_capa_timer(ocapa, capa_renewal_time(ocapa));
+ }
+
+ atomic_set(&ll_capa_debug, 1);
+ spin_unlock(&capa_lock);
+
+ return ocapa;
+}
+
+
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
+{
+ struct inode *inode = ocapa->u.cli.inode;
+ cfs_time_t expiry;
+ int rc = 0;
+
+ LASSERT(ocapa);
+
+ if (IS_ERR(capa)) {
+ /* set error code */
+ rc = PTR_ERR(capa);
+ /* failed capa won't be renewed any longer, but if -EIO, client
+ * might be doing recovery, retry in 1 min. */
+ spin_lock(&capa_lock);
+ if (rc == -EIO) {
+ expiry = cfs_time_current() + cfs_time_seconds(60);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+ "renewal failed: -EIO, retry in 1 min");
+ goto retry;
+ } else {
+ sort_add_capa(ocapa, &ll_idle_capas);
+ }
+ spin_unlock(&capa_lock);
+
+ DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa,
+ "renewal failed(rc: %d) for", rc);
+ goto out;
+ }
+
+ LASSERT(!memcmp(&ocapa->c_capa, capa,
+ offsetof(struct lustre_capa, lc_flags)));
+
+ spin_lock(&ocapa->c_lock);
+ ocapa->c_capa = *capa;
+ set_capa_expiry(ocapa);
+ spin_unlock(&ocapa->c_lock);
+
+ spin_lock(&capa_lock);
+ if (capa->lc_opc & (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE))
+ inode_add_oss_capa(inode, ocapa);
+ DEBUG_CAPA(D_SEC, capa, "renew");
+
+ expiry = capa_renewal_time(ocapa);
+retry:
+ sort_add_capa(ocapa, ll_capa_list);
+ update_capa_timer(ocapa, expiry);
+ spin_unlock(&capa_lock);
+
+out:
+ capa_put(ocapa);
+ iput(inode);
+ return rc;
+}
+
+void ll_oss_capa_open(struct inode *inode, struct file *file)
+{
+ struct obd_capa *ocapa;
+ int opc = capa_open_opc(open_flags_to_accmode(file->f_flags));
+
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ spin_lock(&capa_lock);
+ ocapa = do_lookup_oss_capa(inode, opc);
+ if (!ocapa) {
+ if (atomic_read(&ll_capa_debug)) {
+ CDEBUG(D_ERROR, "no capa for (uid %u op %d ino %lu)\n",
+ (unsigned)current->uid, opc, inode->i_ino);
+ atomic_set(&ll_capa_debug, 0);
+ }
+ spin_unlock(&capa_lock);
+ return;
+ }
+ atomic_inc(&ocapa->u.cli.open_count);
+ spin_unlock(&capa_lock);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "open (count: %d)",
+ atomic_read(&ocapa->u.cli.open_count));
+}
+
+void ll_oss_capa_close(struct inode *inode, struct file *file)
+{
+ struct obd_capa *ocapa;
+ int opc = capa_open_opc(open_flags_to_accmode(file->f_flags));
+
+ if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+ return;
+
+ if (!S_ISREG(inode->i_mode))
+ return;
+
+ spin_lock(&capa_lock);
+ ocapa = do_lookup_oss_capa(inode, opc);
+ if (!ocapa) {
+ spin_unlock(&capa_lock);
+ return;
+ }
+ atomic_dec(&ocapa->u.cli.open_count);
+ spin_unlock(&capa_lock);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "close (count: %d)",
+ atomic_read(&ocapa->u.cli.open_count));
+}
+
+/* delete CAPA_OPC_OSS_TRUNC only */
+void ll_truncate_free_capa(struct obd_capa *ocapa)
+{
+ struct inode *inode;
+
+ if (!ocapa)
+ return;
+
+ LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate");
+
+ inode = ocapa->u.cli.inode;
+
+ spin_lock(&capa_lock);
+ capa_put(ocapa);
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+}
+
+void ll_clear_inode_capas(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *ocapa, *tmp;
+
+ spin_lock(&capa_lock);
+ ocapa = lli->lli_mds_capa;
+ if (ocapa)
+ ll_delete_capa(ocapa);
+
+ list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
+ u.cli.lli_list)
+ ll_delete_capa(ocapa);
+ spin_unlock(&capa_lock);
+}
/* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid;
struct lov_stripe_md *lli_smd;
+
+ /* fid capability */
+ struct obd_capa *lli_mds_capa;
+ /* oss capability list */
+ struct list_head lli_oss_capas;
};
/*
};
/* flags for sbi->ll_flags */
-#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
-#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
-#define LL_SBI_FLOCK 0x04
-#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
-#define LL_SBI_ACL 0x10 /* support ACL */
-#define LL_SBI_JOIN 0x20 /* support JOIN */
-#define LL_SBI_RMT_CLIENT 0x40 /* remote client */
+#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */
+#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */
+#define LL_SBI_FLOCK 0x04
+#define LL_SBI_USER_XATTR 0x08 /* support user xattr */
+#define LL_SBI_ACL 0x10 /* support ACL */
+#define LL_SBI_JOIN 0x20 /* support JOIN */
+#define LL_SBI_RMT_CLIENT 0x40 /* remote client */
+#define LL_SBI_MDS_CAPA 0x80 /* support mds capa */
+#define LL_SBI_OSS_CAPA 0x100 /* support oss capa */
struct ll_sb_info {
struct list_head ll_list;
struct dentry *ll_find_alias(struct inode *, struct dentry *);
int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *data, int flag);
-void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
- struct inode *i2, const char *name, int namelen,
- int mode);
int ll_md_cancel_unused(struct lustre_handle *, struct inode *, int flags,
void *opaque);
#ifndef LUSTRE_KERNEL_VERSION
int ll_process_config(struct lustre_cfg *lcfg);
int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
+void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
+ struct inode *i2, const char *name, int namelen,
+ int mode);
+void ll_finish_md_op_data(struct md_op_data *op_data);
/* llite/llite_nfs.c */
extern struct export_operations lustre_export_operations;
ino_t ll_fid_build_ino(struct ll_sb_info *sbi, struct lu_fid *fid);
+/* llite/llite_capa.c */
+extern cfs_timer_t ll_capa_timer;
+
+int ll_capa_thread_start(void);
+void ll_capa_thread_stop(void);
+void ll_capa_timer_callback(unsigned long unused);
+struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc);
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa);
+void ll_oss_capa_open(struct inode *inode, struct file *file);
+void ll_oss_capa_close(struct inode *inode, struct file *file);
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
+void ll_truncate_free_capa(struct obd_capa *ocapa);
+void ll_clear_inode_capas(struct inode *inode);
+struct obd_capa *ll_i2mdscapa(struct inode *inode);
+
#endif /* LLITE_INTERNAL_H */
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct obd_device *obd;
struct lu_fid rootfid;
+ struct obd_capa *pc = NULL;
struct obd_statfs osfs;
struct ptlrpc_request *request = NULL;
struct lustre_handle dt_conn = {0, };
/* indicate the features supported by this client */
data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
OBD_CONNECT_ACL | OBD_CONNECT_JOIN |
- OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;
+ OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;/* |
+ OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA;*/
data->ocd_ibits_known = MDS_INODELOCK_FULL;
data->ocd_version = LUSTRE_VERSION_CODE;
sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
}
+ if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) {
+ CDEBUG(D_SEC, "client enabled fid capa!\n");
+ sbi->ll_flags |= LL_SBI_MDS_CAPA;
+ }
+
+ if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) {
+ CDEBUG(D_SEC, "client enabled oss capa!\n");
+ sbi->ll_flags |= LL_SBI_OSS_CAPA;
+ }
+
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
/* We set sb->s_dev equal on all lustre clients in order to support
* NFS export clustering. NFSD requires that the FSID be the same
data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE;
+ if (sbi->ll_flags & LL_SBI_OSS_CAPA)
+ data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA;
CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
"ocd_grant: %d\n", data->ocd_connect_flags,
GOTO(out_dt, err);
}
- err = md_getstatus(sbi->ll_md_exp, &rootfid);
+ err = md_getstatus(sbi->ll_md_exp, &rootfid, &pc);
if (err) {
CERROR("cannot mds_connect: rc = %d\n", err);
GOTO(out_dt_fid, err);
/* make root inode
* XXX: move this to after cbd setup? */
- err = md_getattr(sbi->ll_md_exp, &rootfid,
+ err = md_getattr(sbi->ll_md_exp, &rootfid, pc,
OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
(sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
0, &request);
if (err) {
CERROR("md_getattr failed for root: rc = %d\n", err);
+ if (pc)
+ free_capa(pc);
GOTO(out_dt, err);
}
&lmd);
if (err) {
CERROR("failed to understand root inode md: rc = %d\n", err);
+ if (pc)
+ free_capa(pc);
ptlrpc_req_finished (request);
GOTO(out_dt, err);
}
+ if (pc) {
+ obd_capa_set_root(pc);
+ lmd.mds_capa = pc;
+ lmd.body->valid |= OBD_MD_FLMDSCAPA;
+ }
LASSERT(fid_is_sane(&sbi->ll_root_fid));
root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd);
switch(cmd) {
case EXT3_IOC_GETFLAGS: {
struct mdt_body *body;
+ struct obd_capa *oc;
- rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
OBD_MD_FLFLAGS, 0, &req);
+ capa_put(oc);
if (rc) {
CERROR("failure %d inode %lu\n", rc, inode->i_ino);
RETURN(-abs(rc));
return(rc);
}
+/* this function prepares md_op_data hint for passing ot down to MD stack. */
+void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
+ struct inode *i2, const char *name, int namelen,
+ int mode)
+{
+ LASSERT(i1 != NULL);
+ LASSERT(op_data != NULL);
+
+ ll_i2gids(op_data->suppgids, i1, i2);
+ op_data->fid1 = ll_i2info(i1)->lli_fid;
+ op_data->mod_capa1 = ll_i2mdscapa(i1);
+
+ /* @i2 may be NULL. In this case caller itself has to initialize ->fid2
+ * if needed. */
+ if (i2) {
+ op_data->fid2 = *ll_inode2fid(i2);
+ op_data->mod_capa2 = ll_i2mdscapa(i2);
+ }
+
+ op_data->name = name;
+ op_data->namelen = namelen;
+ op_data->create_mode = mode;
+ op_data->mod_time = CURRENT_SECONDS;
+ op_data->fsuid = current->fsuid;
+ op_data->fsgid = current->fsgid;
+ op_data->cap = current->cap_effective;
+}
+
+void ll_finish_md_op_data(struct md_op_data *op_data)
+{
+ capa_put(op_data->mod_capa1);
+ capa_put(op_data->mod_capa2);
+}
+
int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
{
struct ptlrpc_request *req = NULL;
struct mds_body *body;
char *cmd, *buf;
+ struct obd_capa *oc;
int rc, buflen;
ENTRY;
if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
GOTO(out, rc = -EFAULT);
- rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
ioc->cmd_len, ioc->res_len, 0, &req);
+ capa_put(oc);
if (rc < 0) {
CERROR("mdc_getxattr %s [%s] failed: %d\n",
XATTR_NAME_LUSTRE_ACL, cmd, rc);
{
struct ptlrpc_request *req = NULL;
char *cmd, *buf;
+ struct obd_capa *oc;
int buflen, rc;
ENTRY;
if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
GOTO(out, rc = -EFAULT);
- rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
ioc->cmd_len, ioc->res_len, 0, &req);
+ capa_put(oc);
if (rc) {
CERROR("mdc_setxattr %s [%s] failed: %d\n",
XATTR_NAME_LUSTRE_ACL, cmd, rc);
static struct inode *search_inode_for_lustre(struct super_block *sb,
struct lu_fid *fid,
+ struct lustre_capa *capa,
int mode)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_capa *ocapa = NULL;
struct ptlrpc_request *req = NULL;
struct inode *inode = NULL;
unsigned long valid = 0;
valid |= OBD_MD_FLEASIZE;
}
- rc = md_getattr(sbi->ll_md_exp, fid, valid, eadatalen, &req);
+ if (capa) {
+ ocapa = alloc_capa(CAPA_SITE_CLIENT);
+ if (!ocapa)
+ return ERR_PTR(-ENOMEM);
+ ocapa->c_capa = *capa;
+ }
+
+ rc = md_getattr(sbi->ll_md_exp, fid, (struct obd_capa *)ocapa,
+ valid, eadatalen, &req);
+ free_capa(ocapa);
if (rc) {
CERROR("can't get object attrs, fid "DFID", rc %d\n",
PFID(fid), rc);
extern struct dentry_operations ll_d_ops;
static struct dentry *ll_iget_for_nfs(struct super_block *sb,
- struct lu_fid *fid, umode_t mode)
+ struct lu_fid *fid,
+ struct lustre_capa *capa,
+ umode_t mode)
{
struct inode *inode;
struct dentry *result;
if (!fid_is_sane(fid))
return ERR_PTR(-ESTALE);
- inode = search_inode_for_lustre(sb, fid, mode);
+ inode = search_inode_for_lustre(sb, fid, capa, mode);
if (IS_ERR(inode))
return ERR_PTR(PTR_ERR(inode));
return result;
}
+#if 0
static void ll_fh_to_fid(struct lu_fid *fid, __u32 *mode, __u32 *datap)
{
/* unpacking ->f_seq */
*lenp = 5;
return 1;
}
+#endif
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
+ int fh_type,
+ int (*acceptable)(void *, struct dentry *),
+ void *context)
+{
+ int len = (sizeof(struct lu_fid) + sizeof(struct lustre_capa) + 3)/4;
+
+ if (fh_type != 1)
+ return ERR_PTR(-ESTALE);
+ if (fh_len < len)
+ return ERR_PTR(-ESTALE);
+ return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable,
+ context);
+}
+
+int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, int connectable)
+{
+ struct inode *inode = de->d_inode;
+ struct lu_fid *fid = ll_inode2fid(inode);
+ struct obd_capa *ocapa = ll_i2mdscapa(inode);
+ int len = (sizeof(*fid) + sizeof(struct lustre_capa) + 3)/4;
+ char *p = (char *)fh;
+
+ if (*plen < len)
+ return 255;
+
+ memcpy(p, fid, sizeof(*fid));
+ p += sizeof(*fid);
+ if (ocapa) {
+ capa_cpy(p, ocapa);
+ capa_put(ocapa);
+ }
+ *plen = len;
+ return 1;
+}
+
struct dentry *ll_get_dentry(struct super_block *sb, void *data)
{
- __u32 *inump = (__u32*)data;
- struct lu_fid fid;
-
- /* FIXME: seems this is not enough */
- fid.f_seq = inump[0];
- fid.f_oid = inump[1];
+ char *p = (char *)data;
+ struct lu_fid *fid;
+ struct lustre_capa *capa;
+
+ fid = (struct lu_fid *)p;
+ capa = (struct lustre_capa *)(p + sizeof(*fid));
- return ll_iget_for_nfs(sb, &fid, S_IFREG);
+ return ll_iget_for_nfs(sb, fid, (capa->lc_opc == 0) ? capa : NULL,
+ S_IFREG);
}
struct dentry *ll_get_parent(struct dentry *dchild)
{
struct ptlrpc_request *req = NULL;
struct inode *dir = dchild->d_inode;
+ struct obd_capa *oc;
struct ll_sb_info *sbi;
struct dentry *result = NULL;
struct mdt_body *body;
sbi = ll_s2sbi(dir->i_sb);
- rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir),
- dotdot, strlen(dotdot) + 1,
- 0, 0, &req);
+ oc = ll_i2mdscapa(dir);
+ rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), oc,
+ dotdot, strlen(dotdot) + 1, 0, 0, &req);
if (rc) {
+ capa_put(oc);
CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
- return ERR_PTR(rc);
+ RETURN(ERR_PTR(rc));
}
- body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body));
+ body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body));
LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
- result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), S_IFDIR);
+ result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir),
+ oc ? &oc->c_capa : NULL, S_IFDIR);
+ capa_put(oc);
if (IS_ERR(result))
rc = PTR_ERR(result);
}
}
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
- struct inode *i2, const char *name, int namelen,
- int mode)
-{
- LASSERT(i1 != NULL);
- LASSERT(op_data != NULL);
-
- ll_i2gids(op_data->suppgids, i1, i2);
- op_data->fid1 = ll_i2info(i1)->lli_fid;
-
- /* @i2 may be NULL. In this case caller itself has to initialize ->fid2
- * if needed. */
- if (i2)
- op_data->fid2 = ll_i2info(i2)->lli_fid;
-
- op_data->name = name;
- op_data->namelen = namelen;
- op_data->create_mode = mode;
- op_data->mod_time = CURRENT_SECONDS;
- op_data->fsuid = current->fsuid;
- op_data->fsgid = current->fsgid;
- op_data->cap = current->cap_effective;
-}
-
static void ll_d_add(struct dentry *de, struct inode *inode)
{
CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de);
struct lov_stripe_md *lsm = NULL;
struct obd_trans_info oti = { 0 };
struct obdo *oa;
+ struct obd_capa *oc;
int rc;
ENTRY;
}
}
- rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir));
+ /* FIXME: parent mds capability is the only one can find! */
+ oc = ll_i2mdscapa(dir);
+ rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc);
+ capa_put(oc);
obdo_free(oa);
if (rc)
CERROR("obd destroy objid "LPX64" error %d\n",
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req = NULL;
struct mdt_remote_perm *perm;
+ struct obd_capa *oc;
int i = 0, rc;
ENTRY;
LBUG();
}
- rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), &req);
+ oc = ll_i2mdscapa(inode);
+ rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc, &req);
+ capa_put(oc);
if (rc) {
up(&lli->lli_rmtperm_sem);
RETURN(rc);
struct lov_stripe_md *lsm = lli->lli_smd;
struct obd_info oinfo = { { { 0 } } };
struct brw_page pg;
- int rc;
+ int opc, rc;
ENTRY;
pg.pg = page;
LPROC_LL_BRW_READ, pg.count);
oinfo.oi_oa = oa;
oinfo.oi_md = lsm;
+ /* NB partial write, so we might not have CAPA_OPC_OSS_READ capa */
+ opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE :
+ CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ;
+ oinfo.oi_capa = ll_lookup_oss_capa(inode, opc);
rc = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, 1, &pg, NULL);
+ capa_put(oinfo.oi_capa);
if (rc == 0)
obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
else if (rc != -EIO)
ll_inode_size_unlock(inode, 0);
+ oinfo.oi_capa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_TRUNC);
rc = obd_punch_rqset(ll_i2dtexp(inode), &oinfo, NULL);
+ ll_truncate_free_capa(oinfo.oi_capa);
if (rc)
CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
else
EXIT;
}
+static struct obd_capa *ll_ap_lookup_capa(void *data, int cmd)
+{
+ struct ll_async_page *llap = LLAP_FROM_COOKIE(data);
+ int opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE :
+ CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ;
+
+ return ll_lookup_oss_capa(llap->llap_page->mapping->host, opc);
+}
+
static struct obd_async_page_ops ll_async_page_ops = {
.ap_make_ready = ll_ap_make_ready,
.ap_refresh_count = ll_ap_refresh_count,
.ap_fill_obdo = ll_ap_fill_obdo,
.ap_update_obdo = ll_ap_update_obdo,
.ap_completion = ll_ap_completion,
+ .ap_lookup_capa = ll_ap_lookup_capa,
};
struct ll_async_page *llap_cast_private(struct page *page)
{
struct brw_page *pga;
struct obdo oa;
- int i, rc = 0;
+ int opc, i, rc = 0;
size_t length;
+ struct obd_capa *ocapa;
ENTRY;
OBD_ALLOC(pga, sizeof(*pga) * page_count);
if (rw == WRITE) {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_WRITE, size);
+ opc = CAPA_OPC_OSS_WRITE;
llap_write_pending(inode, NULL);
} else {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_READ, size);
+ opc = CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE;
}
+ ocapa = ll_lookup_oss_capa(inode, opc);
rc = obd_brw_rqset(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
- ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL);
+ ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL,
+ ocapa);
+ capa_put(ocapa);
if (rc == 0) {
rc = size;
if (rw == WRITE) {
static int __init init_lustre_lite(void)
{
int rc, seed[2];
+
printk(KERN_INFO "Lustre: Lustre Client File System; "
"info@clusterfs.com\n");
rc = ll_init_inodecache();
get_random_bytes(seed, sizeof(seed));
ll_srand(seed[0], seed[1]);
+ init_timer(&ll_capa_timer);
+ ll_capa_timer.function = ll_capa_timer_callback;
+ rc = ll_capa_thread_start();
return rc;
}
{
int rc;
+ del_timer(&ll_capa_timer);
+ ll_capa_thread_stop();
+ LASSERTF(capa_count[CAPA_SITE_CLIENT] == 0,
+ "client remaining capa count %d\n",
+ capa_count[CAPA_SITE_CLIENT]);
+
lustre_register_client_fill_super(NULL);
lustre_register_client_process_config(NULL);
struct ll_sb_info *sbi = ll_i2sbi(inode);
int rc, symlen = inode->i_size + 1;
struct mdt_body *body;
+ struct obd_capa *oc;
ENTRY;
*request = NULL;
RETURN(0);
}
- rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+ oc = ll_i2mdscapa(inode);
+ rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
OBD_MD_LINKNAME, symlen, request);
+ capa_put(oc);
if (rc) {
if (rc != -ENOENT)
CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ptlrpc_request *req;
int xattr_type, rc;
+ struct obd_capa *oc;
ENTRY;
lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_SETXATTR);
if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0)
RETURN(0);
- rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
- name, value, size, 0, flags, &req);
+ oc = ll_i2mdscapa(inode);
+ rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name,
+ value, size, 0, flags, &req);
+ capa_put(oc);
if (rc) {
if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
LCONSOLE_INFO("Disabling user_xattr feature because "
struct mdt_body *body;
int xattr_type, rc;
void *xdata;
+ struct obd_capa *oc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
#endif
do_getxattr:
- rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
- name, NULL, 0, size, 0, &req);
+ oc = ll_i2mdscapa(inode);
+ rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name,
+ NULL, 0, size, 0, &req);
+ capa_put(oc);
if (rc) {
if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
LCONSOLE_INFO("Disabling user_xattr feature because "
{
struct obd_device *obd = exp->exp_obd;
struct lu_fid rpid = op_data->fid1;
+ struct obd_capa *oc = op_data->mod_capa1;
struct lmv_obd *lmv = &obd->u.lmv;
struct mdt_body *body = NULL;
struct md_op_data *sop_data;
(char *)op_data->name, op_data->namelen);
rpid = obj->lo_inodes[mds].li_fid;
+ oc = obj->lo_inodes[mds].li_capa;
rc = lmv_fld_lookup(lmv, &rpid, &mds);
lmv_obj_put(obj);
if (rc)
}
sop_data->fid1 = rpid;
+ sop_data->mod_capa1 = oc;
rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data,
lmm, lmmsize, it, flags, reqp,
* the request with proper MDS.
*/
LASSERT(lu_fid_eq(&op_data->fid1, &rpid));
- rc = lmv_handle_split(exp, &rpid);
+ rc = lmv_handle_split(exp, &rpid, oc);
if (rc == 0) {
ptlrpc_req_finished(*reqp);
/* We shoudld reallocate the FID for the object */
obj = lmv_obj_grab(obd, &body->fid1);
if (!obj && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
+
+ /* FIXME: capability for remote! */
/* wow! this is split dir, we'd like to handle it */
- obj = lmv_obj_create(exp, &body->fid1, mea);
+ obj = lmv_obj_create(exp, &body->fid1, NULL, mea);
if (IS_ERR(obj))
GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
}
struct lmv_obj *obj = NULL, *obj2 = NULL;
struct obd_device *obd = exp->exp_obd;
struct lu_fid rpid = op_data->fid1;
+ struct obd_capa *oc = op_data->mod_capa1;
struct lmv_obd *lmv = &obd->u.lmv;
struct mdt_body *body = NULL;
struct md_op_data *sop_data;
op_data->namelen);
rpid = obj->lo_inodes[mds].li_fid;
+ oc = obj->lo_inodes[mds].li_capa;
rc = lmv_fld_lookup(lmv, &rpid, &mds);
if (rc) {
lmv_obj_put(obj);
}
sop_data->fid1 = rpid;
+ sop_data->mod_capa1 = oc;
rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm,
lmmsize, it, flags, reqp, cb_blocking,
obj2 = lmv_obj_grab(obd, &body->fid1);
if (!obj2 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
+
+ /* FIXME remote capability! */
/* wow! this is split dir, we'd like to handle it. */
- obj2 = lmv_obj_create(exp, &body->fid1, mea);
+ obj2 = lmv_obj_create(exp, &body->fid1, NULL, mea);
if (IS_ERR(obj2))
GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj2));
}
for (i = 0; i < obj->lo_objcount; i++) {
struct lu_fid fid = obj->lo_inodes[i].li_fid;
+ struct obd_capa *oc= obj->lo_inodes[i].li_capa;
struct ptlrpc_request *req = NULL;
struct obd_export *tgt_exp;
struct lookup_intent it;
memset(op_data, 0, sizeof(*op_data));
op_data->fid1 = fid;
op_data->fid2 = fid;
+ op_data->mod_capa1 = oc;
+ op_data->mod_capa2 = oc;
tgt_exp = lmv_get_export(lmv, &fid);
if (IS_ERR(tgt_exp))
{
struct obd_device *obd = exp->exp_obd;
struct lu_fid rpid = op_data->fid1;
+ struct obd_capa *oc = op_data->mod_capa1;
struct lmv_obd *lmv = &obd->u.lmv;
struct mdt_body *body = NULL;
struct md_op_data *sop_data;
(char *)op_data->name,
op_data->namelen);
rpid = obj->lo_inodes[mds].li_fid;
+ oc = obj->lo_inodes[mds].li_capa;
lmv_obj_put(obj);
}
rc = lmv_fld_lookup(lmv, &rpid, &mds);
(char *)op_data->name,
op_data->namelen);
rpid = obj->lo_inodes[mds].li_fid;
+ oc = obj->lo_inodes[mds].li_capa;
rc = lmv_fld_lookup(lmv, &rpid, &mds);
if (rc) {
lmv_obj_put(obj);
}
sop_data->fid1 = rpid;
+ sop_data->mod_capa1 = oc;
rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize,
it, flags, reqp, cb_blocking, extra_lock_flags);
CWARN("we haven't knew about directory splitting!\n");
LASSERT(obj == NULL);
- obj = lmv_obj_create(exp, &rpid, NULL);
+ obj = lmv_obj_create(exp, &rpid, oc, NULL);
if (IS_ERR(obj))
RETURN((int)PTR_ERR(obj));
lmv_obj_put(obj);
LASSERT(body != NULL);
LASSERT((body->valid & OBD_MD_FLID) != 0);
+ body = lustre_msg_buf((*reqp)->rq_repmsg,
+ DLM_REPLY_REC_OFF, sizeof(*body));
+ LASSERT(body != NULL);
+ LASSERT((body->valid & OBD_MD_FLID) != 0);
+
obj = lmv_obj_grab(obd, &body->fid1);
if (!obj) {
- obj = lmv_obj_create(exp, &body->fid1, mea);
+ /* FIXME: remote capability */
+ obj = lmv_obj_create(exp, &body->fid1, NULL, mea);
if (IS_ERR(obj))
GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
}
for (i = 0; i < obj->lo_objcount; i++) {
struct lu_fid fid = obj->lo_inodes[i].li_fid;
+ struct obd_capa *oc = obj->lo_inodes[i].li_capa;
struct lustre_handle *lockh = NULL;
struct ptlrpc_request *req = NULL;
ldlm_blocking_callback cb;
op_data->fid1 = fid;
op_data->fid2 = fid;
+ op_data->mod_capa1 = oc;
+ op_data->mod_capa2 = oc;
/* is obj valid? */
tgt_exp = lmv_get_export(lmv, &fid);
struct lmv_inode {
struct lu_fid li_fid; /* id of dirobj */
+ struct obd_capa *li_capa; /* fid capability */
unsigned long li_size; /* slave size value */
int li_flags;
};
struct lmv_obj *lmv_obj_create(struct obd_export *exp,
const struct lu_fid *fid,
+ struct obd_capa *oc,
struct lmv_stripe_md *mea);
int lmv_obj_delete(struct obd_export *exp,
ldlm_blocking_callback cb_blocking,
int extra_lock_flags);
-int lmv_handle_split(struct obd_export *, const struct lu_fid *);
+int lmv_handle_split(struct obd_export *, const struct lu_fid *,
+ struct obd_capa *oc);
int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *, int);
int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid,
}
static int lmv_getstatus(struct obd_export *exp,
- struct lu_fid *fid)
+ struct lu_fid *fid,
+ struct obd_capa **pc)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (rc)
RETURN(rc);
- rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
+ rc = md_getstatus(lmv->tgts[0].ltd_exp, fid, pc);
RETURN(rc);
}
static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *name, const char *input,
- int input_size, int output_size, int flags,
- struct ptlrpc_request **request)
+ struct obd_capa *oc, obd_valid valid, const char *name,
+ const char *input, int input_size, int output_size,
+ int flags, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_getxattr(tgt_exp, fid, valid, name, input, input_size,
+ rc = md_getxattr(tgt_exp, fid, oc, valid, name, input, input_size,
output_size, flags, request);
RETURN(rc);
}
static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *name, const char *input,
- int input_size, int output_size, int flags,
- struct ptlrpc_request **request)
+ struct obd_capa *oc, obd_valid valid, const char *name,
+ const char *input, int input_size, int output_size,
+ int flags, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_setxattr(tgt_exp, fid, valid, name,
+ rc = md_setxattr(tgt_exp, fid, oc, valid, name,
input, input_size, output_size, flags, request);
RETURN(rc);
}
static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, int ea_size,
+ struct obd_capa *oc, obd_valid valid, int ea_size,
struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_getattr(tgt_exp, fid, valid, ea_size, request);
+ rc = md_getattr(tgt_exp, fid, oc, valid, ea_size, request);
if (rc)
RETURN(rc);
RETURN(rc);
}
- body = lustre_msg_buf((*request)->rq_repmsg, REQ_REC_OFF,
+ body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
sizeof(*body));
LASSERT(body != NULL);
RETURN(rc);
}
-static int lmv_change_cbdata(struct obd_export *exp,
- const struct lu_fid *fid,
- ldlm_iterator_t it,
- void *data)
+static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
/* called in the case MDS returns -ERESTART on create on open, what means that
* directory is split and its LMV presentation object has to be updated. */
-int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid)
+int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
RETURN(PTR_ERR(tgt_exp));
/* time to update mea of parent fid */
- rc = md_getattr(tgt_exp, fid, valid, mealen, &req);
+ rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req);
if (rc) {
CERROR("md_getattr() failed, error %d\n", rc);
GOTO(cleanup, rc);
if (md.mea == NULL)
GOTO(cleanup, rc = -ENODATA);
- obj = lmv_obj_create(exp, fid, md.mea);
+ obj = lmv_obj_create(exp, fid, oc, md.mea);
if (IS_ERR(obj))
rc = PTR_ERR(obj);
else
mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
op_data->name, op_data->namelen);
- op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
lmv_obj_put(obj);
}
if (rc == 0) {
if (*request == NULL)
RETURN(rc);
- CDEBUG(D_OTHER, "created. "DFID"\n", PFID(&op_data->fid1));
+ CDEBUG(D_OTHER, "created. "DFID"\n",
+ PFID(&op_data->fid1));
} else if (rc == -ERESTART) {
/*
* Directory got split. time to update local object and repeat
* the request with proper MDS.
*/
- rc = lmv_handle_split(exp, &op_data->fid1);
+ rc = lmv_handle_split(exp, &op_data->fid1, op_data->mod_capa1);
if (rc == 0) {
ptlrpc_req_finished(*request);
rc = lmv_alloc_fid_for_split(obd, &op_data->fid1,
* name */
mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
(char *)op_data->name, op_data->namelen);
- op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
lmv_obj_put(obj);
}
}
static int
lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
- const char *filename, int namelen, obd_valid valid,
- int ea_size, struct ptlrpc_request **request)
+ struct obd_capa *oc, const char *filename, int namelen,
+ obd_valid valid, int ea_size, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_export *tgt_exp;
struct lu_fid rid = *fid;
+ struct obd_capa *rcapa = oc;
+ struct obd_export *tgt_exp;
struct mdt_body *body;
struct lmv_obj *obj;
int rc, loop = 0;
repeat:
LASSERT(++loop <= 2);
- obj = lmv_obj_grab(obd, fid);
+ obj = lmv_obj_grab(obd, &rid);
if (obj) {
/* directory is split. look for right mds for this name */
mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
filename, namelen - 1);
rid = obj->lo_inodes[mds].li_fid;
+ rcapa = obj->lo_inodes[mds].li_capa;
lmv_obj_put(obj);
}
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_getattr_name(tgt_exp, &rid, filename, namelen, valid,
+ rc = md_getattr_name(tgt_exp, &rid, rcapa, filename, namelen, valid,
ea_size, request);
if (rc == 0) {
body = lustre_msg_buf((*request)->rq_repmsg,
RETURN(PTR_ERR(tgt_exp));
}
- rc = md_getattr_name(tgt_exp, &rid, NULL, 1, valid,
- ea_size, &req);
+ rc = md_getattr_name(tgt_exp, &rid, rcapa, NULL, 1,
+ valid, ea_size, &req);
ptlrpc_req_finished(*request);
*request = req;
}
} else if (rc == -ERESTART) {
/* directory got split. time to update local object and repeat
* the request with proper MDS */
- rc = lmv_handle_split(exp, &rid);
+ rc = lmv_handle_split(exp, &rid, rcapa);
if (rc == 0) {
ptlrpc_req_finished(*request);
goto repeat;
if (obj) {
rc = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
op_data->name, op_data->namelen);
- op_data->fid2 = obj->lo_inodes[rc].li_fid;
+ op_data->fid2 = obj->lo_inodes[rc].li_fid;
+ op_data->mod_capa2 = obj->lo_inodes[rc].li_capa;
lmv_obj_put(obj);
}
ENTRY;
CDEBUG(D_OTHER, "rename %*s in "DFID" to %*s in "DFID"\n",
- oldlen, old, PFID(&op_data->fid1), newlen, new,
- PFID(&op_data->fid2));
+ oldlen, old, PFID(&op_data->fid1),
+ newlen, new, PFID(&op_data->fid2));
rc = lmv_check_connect(obd);
if (rc)
if (obj) {
mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
(char *)new, newlen);
- op_data->fid2 = obj->lo_inodes[mds].li_fid;
+ op_data->fid2 = obj->lo_inodes[mds].li_fid;
+ op_data->mod_capa2 = obj->lo_inodes[mds].li_capa;
CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
PFID(&op_data->fid2));
lmv_obj_put(obj);
*/
mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
(char *)old, oldlen);
- op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->fid1 = obj->lo_inodes[mds].li_fid;
+ op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
PFID(&op_data->fid1));
lmv_obj_put(obj);
(char *)new, newlen);
op_data->fid2 = obj->lo_inodes[mds].li_fid;
+ op_data->mod_capa2 = obj->lo_inodes[mds].li_capa;
CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
PFID(&op_data->fid2));
lmv_obj_put(obj);
if (mds != mds2) {
CDEBUG(D_OTHER,"cross-node rename "DFID"/%*s to "DFID"/%*s\n",
- PFID(&op_data->fid1), oldlen, old, PFID(&op_data->fid2),
- newlen, new);
+ PFID(&op_data->fid1), oldlen, old,
+ PFID(&op_data->fid2), newlen, new);
}
op_data->fsuid = current->fsuid;
op_data->fsgid = current->fsgid;
if (obj) {
for (i = 0; i < obj->lo_objcount; i++) {
- op_data->fid1 = obj->lo_inodes[i].li_fid;
+ op_data->fid1 = obj->lo_inodes[i].li_fid;
+ op_data->mod_capa1 = obj->lo_inodes[i].li_capa;
tgt_exp = lmv_get_export(lmv, &op_data->fid1);
if (IS_ERR(tgt_exp)) {
}
static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request)
+ struct obd_capa *oc, struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_sync(tgt_exp, fid, request);
+ rc = md_sync(tgt_exp, fid, oc, request);
RETURN(rc);
}
struct page *page = NULL;
struct lu_dirpage *next_dp;
struct obd_export *tgt_exp;
- struct lu_fid rid = *fid;
+ struct lu_fid rid;
+ struct obd_capa *rcapa;
__u32 seg_end, max_hash = MAX_HASH_SIZE;
int rc = 0;
/* Get start offset from next segment */
rid = obj->lo_inodes[index].li_fid;
+ rcapa = obj->lo_inodes[index].li_capa;
tgt_exp = lmv_get_export(lmv, &rid);
if (IS_ERR(tgt_exp))
GOTO(cleanup, PTR_ERR(tgt_exp));
if (!page)
GOTO(cleanup, rc = -ENOMEM);
- rc = md_readpage(tgt_exp, &rid, seg_end, page, &tmp_req);
+ rc = md_readpage(tgt_exp, &rid, rcapa, seg_end, page, &tmp_req);
if (rc) {
/* E2BIG means it already reached the end of the dir,
* no need reset the hash segment end */
RETURN(rc);
}
-static int lmv_readpage(struct obd_export *exp,
- const struct lu_fid *fid,
- __u64 offset, struct page *page,
+static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc, __u64 offset, struct page *page,
struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
struct obd_export *tgt_exp;
struct lu_fid rid = *fid;
+ struct obd_capa *rcapa = oc;
struct lmv_obj *obj;
int i = 0, rc;
ENTRY;
do_div(index, seg);
i = (int)index;
rid = obj->lo_inodes[i].li_fid;
+ rcapa = obj->lo_inodes[i].li_capa;
lmv_obj_unlock(obj);
if (IS_ERR(tgt_exp))
GOTO(cleanup, PTR_ERR(tgt_exp));
- rc = md_readpage(tgt_exp, &rid, offset, page, request);
+ rc = md_readpage(tgt_exp, &rid, rcapa, offset, page, request);
if (rc)
GOTO(cleanup, rc);
if (obj) {
i = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
op_data->name, op_data->namelen);
- op_data->fid1 = obj->lo_inodes[i].li_fid;
+ op_data->fid1 = obj->lo_inodes[i].li_fid;
+ op_data->mod_capa1 = obj->lo_inodes[i].li_capa;
lmv_obj_put(obj);
CDEBUG(D_OTHER, "unlink '%*s' in "DFID" -> %u\n",
op_data->namelen, op_data->name,
}
static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc,
struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_get_remote_perm(tgt_exp, fid, request);
+ rc = md_get_remote_perm(tgt_exp, fid, oc, request);
+
+ RETURN(rc);
+}
+
+static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+ renew_capa_cb_t cb)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ struct obd_export *tgt_exp;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ tgt_exp = lmv_get_export(lmv, &ocapa->c_capa.lc_fid);
+ if (IS_ERR(tgt_exp))
+ RETURN(PTR_ERR(tgt_exp));
+ rc = md_renew_capa(tgt_exp, ocapa, cb);
RETURN(rc);
}
.m_free_lustre_md = lmv_free_lustre_md,
.m_set_open_replay_data = lmv_set_open_replay_data,
.m_clear_open_replay_data = lmv_clear_open_replay_data,
- .m_get_remote_perm = lmv_get_remote_perm
+ .m_get_remote_perm = lmv_get_remote_perm,
+ .m_renew_capa = lmv_renew_capa
};
int __init lmv_init(void)
* obtained from correct MDT and used for constructing the object. */
struct lmv_obj *
lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid,
- struct lmv_stripe_md *mea)
+ struct obd_capa *oc, struct lmv_stripe_md *mea)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
GOTO(cleanup, obj = (void *)tgt_exp);
- rc = md_getattr(tgt_exp, fid, valid, mealen, &req);
+ rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req);
if (rc) {
CERROR("md_getattr() failed, error %d\n", rc);
GOTO(cleanup, obj = ERR_PTR(rc));
static int lov_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *capa)
{
struct lov_request_set *set;
struct obd_info oinfo;
oti->oti_logcookies = set->set_cookies + req->rq_stripe;
err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp,
- req->rq_oi.oi_oa, NULL, oti, NULL);
+ req->rq_oi.oi_oa, NULL, oti, NULL, capa);
err = lov_update_common_set(set, req, err);
if (err) {
CERROR("error: destroying objid "LPX64" subobj "
}
static int lov_sync(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *lsm, obd_off start, obd_off end)
+ struct lov_stripe_md *lsm, obd_off start, obd_off end,
+ void *capa)
{
struct lov_request_set *set;
struct obd_info oinfo;
rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp,
req->rq_oi.oi_oa, NULL,
req->rq_oi.oi_policy.l_extent.start,
- req->rq_oi.oi_policy.l_extent.end);
+ req->rq_oi.oi_policy.l_extent.end, capa);
err = lov_update_common_set(set, req, rc);
if (err) {
CERROR("error: fsync objid "LPX64" subobj "LPX64
return rc;
}
+static struct obd_capa *lov_ap_lookup_capa(void *data, int cmd)
+{
+ struct lov_async_page *lap = LAP_FROM_COOKIE(data);
+
+ return lap->lap_caller_ops->ap_lookup_capa(lap->lap_caller_data, cmd);
+}
+
static struct obd_async_page_ops lov_async_page_ops = {
.ap_make_ready = lov_ap_make_ready,
.ap_refresh_count = lov_ap_refresh_count,
.ap_fill_obdo = lov_ap_fill_obdo,
.ap_update_obdo = lov_ap_update_obdo,
.ap_completion = lov_ap_completion,
+ .ap_lookup_capa = lov_ap_lookup_capa,
};
int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
GOTO(out, rc);
}
- if (KEY_IS("evict_by_nid")) {
+ if (KEY_IS("evict_by_nid") || KEY_IS(KEY_CAPA_KEY)) {
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
/* OST was disconnected or is inactive */
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
continue;
sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
- err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL);
+ err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL,
+ NULL);
if (err)
CERROR("Failed to uncreate objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
__u64 valid, const struct lu_fid *fid,
- int ea_size, int flags);
+ struct obd_capa *oc, int ea_size, int flags);
+void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc);
void mdc_pack_rep_body(struct ptlrpc_request *);
void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset,
- const struct lu_fid *pfid,
- const struct lu_fid *cfid, int flags);
+ const struct lu_fid *pfid, const struct lu_fid *cfid,
+ struct obd_capa *pc, struct obd_capa *cc,
+ int flags);
void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset,
- __u32 size, const struct lu_fid *fid);
+ __u32 size, const struct lu_fid *fid,
+ struct obd_capa *oc);
void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid,
int flags, struct md_op_data *data);
void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
int mdc_init_ea_size(struct obd_export *exp, int easize, int def_easzie,
int cookiesize);
-int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid);
-int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, int ea_size,
- struct ptlrpc_request **request);
-int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
- const char *filename, int namelen, obd_valid valid,
- int ea_size, struct ptlrpc_request **request);
-int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- void *ea, int ealen, void *ea2, int ea2len,
- struct ptlrpc_request **request);
-int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *xattr_name,
- const char *input, int input_size,
- int output_size, int flags,
- struct ptlrpc_request **request);
-int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *xattr_name,
- const char *input, int input_size,
- int output_size, int flags, struct ptlrpc_request **request);
int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags,
struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
struct ptlrpc_request **);
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och);
-int mdc_close(struct obd_export *, struct md_op_data *,
- struct obd_client_handle *och, struct ptlrpc_request **);
-
-int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
- __u64 offset, struct page *, struct ptlrpc_request **);
-
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
- const void *data, int datalen, int mode, __u32 uid,
- __u32 gid, __u32 cap_effective, __u64 rdev,
- struct ptlrpc_request **request);
-
-int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **request);
-
+ const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+ __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request);
int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
- struct ptlrpc_request **);
-
+ struct ptlrpc_request **request);
int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
const char *old, int oldlen, const char *new, int newlen,
struct ptlrpc_request **request);
-
-int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid,
- const struct lu_fid *cfid, struct ptlrpc_request **request);
-
-int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **);
-
+int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request);
+int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
+ struct ptlrpc_request **request);
+int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
+ int flags, void *opaque);
int mdc_lock_match(struct obd_export *exp, int flags,
const struct lu_fid *fid, ldlm_type_t type,
ldlm_policy_data_t *policy, ldlm_mode_t mode,
struct lustre_handle *lockh);
-
-int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
- int flags, void *opaque);
-
-int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
- struct obd_client_handle *och);
-
#endif
#endif
#endif
-void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset,
- __u32 size, const struct lu_fid *fid)
+static void mdc_pack_body(struct mdt_body *b)
{
- struct mdt_body *b;
+ LASSERT (b != NULL);
- b = lustre_msg_buf(req->rq_reqmsg, pos, sizeof(*b));
b->fsuid = current->fsuid;
b->fsgid = current->fsgid;
b->capability = current->cap_effective;
- b->fid1 = *fid;
- b->size = offset; /* !! */
- b->suppgid = -1;
- b->nlink = size; /* !! */
+}
+
+void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc)
+{
+ struct lustre_capa *c;
+
+ if (!oc) {
+ LASSERT(lustre_msg_buflen(req->rq_reqmsg, offset) == 0);
+ return;
+ }
+
+ c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c));
+ LASSERT(c);
+ capa_cpy(c, oc);
+ DEBUG_CAPA(D_SEC, c, "pack");
}
void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset,
const struct lu_fid *pfid,
- const struct lu_fid *cfid, int flags)
+ const struct lu_fid *cfid,
+ struct obd_capa *pc,
+ struct obd_capa *cc, int flags)
{
struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
- if (pfid)
+ if (pfid) {
b->fid1 = *pfid;
- if (cfid)
+ mdc_pack_capa(req, offset + 1, pc);
+ }
+ if (cfid) {
b->fid2 = *cfid;
+ mdc_pack_capa(req, offset + 2, cc);
+ }
b->valid = OBD_MD_FLID;
b->flags = flags;
}
-static void mdc_pack_body(struct mdt_body *b)
-{
- LASSERT (b != NULL);
-
- b->fsuid = current->fsuid;
- b->fsgid = current->fsgid;
- b->capability = current->cap_effective;
-}
-
void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
__u64 valid, const struct lu_fid *fid,
- int ea_size, int flags)
+ struct obd_capa *oc, int ea_size, int flags)
{
struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
- if (fid)
- b->fid1 = *fid;
b->valid = valid;
b->eadatasize = ea_size;
b->flags = flags;
mdc_pack_body(b);
+ if (fid) {
+ b->fid1 = *fid;
+ mdc_pack_capa(req, offset + 1, oc);
+ }
+}
+
+void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pgoff,
+ __u32 size, const struct lu_fid *fid,
+ struct obd_capa *oc)
+{
+ struct mdt_body *b;
+
+ b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+ b->fid1 = *fid;
+ b->size = pgoff; /* !! */
+ b->suppgid = -1;
+ b->nlink = size; /* !! */
+ mdc_pack_body(b);
+ mdc_pack_capa(req, offset + 1, oc);
}
/* packing of MDS records */
rec->cr_time = op_data->mod_time;
rec->cr_suppgid = op_data->suppgids[0];
rec->cr_flags = op_data->flags;
-
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1);
LOGL0(op_data->name, op_data->namelen, tmp);
if (data) {
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, datalen);
memcpy (tmp, data, datalen);
}
}
rec->cr_time = op_data->mod_time;
rec->cr_suppgid = op_data->suppgids[0];
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+ /* the next buffer is child capa, which is used for replay,
+ * will be packed from the data in reply message. */
+
if (op_data->name) {
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3,
op_data->namelen + 1);
LOGL0(op_data->name, op_data->namelen, tmp);
}
/*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */
rec->cr_fid2 = op_data->fid2;
#endif
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen);
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, lmmlen);
memcpy (tmp, lmm, lmmlen);
}
}
rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
mdc_setattr_pack_rec(rec, op_data);
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) {
- epoch = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+ epoch = lustre_msg_buf(req->rq_reqmsg, offset + 2,
sizeof(*epoch));
mdc_epoch_pack(epoch, op_data);
}
if (ealen == 0)
return;
- memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ealen), ea, ealen);
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ealen), ea, ealen);
if (ea2len == 0)
return;
- memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ea2len), ea2, ea2len);
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 4, ea2len), ea2, ea2len);
}
void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
rec->ul_fid2 = op_data->fid2;
rec->ul_time = op_data->mod_time;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1);
LASSERT (tmp != NULL);
LOGL0(op_data->name, op_data->namelen, tmp);
}
rec->lk_fid2 = op_data->fid2;
rec->lk_time = op_data->mod_time;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+ mdc_pack_capa(req, offset + 2, op_data->mod_capa2);
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, op_data->namelen + 1);
LOGL0(op_data->name, op_data->namelen, tmp);
}
rec->rn_time = op_data->mod_time;
rec->rn_mode = op_data->create_mode;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1);
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+ mdc_pack_capa(req, offset + 2, op_data->mod_capa2);
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, oldlen + 1);
LOGL0(old, oldlen, tmp);
if (new) {
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1);
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, newlen + 1);
LOGL0(new, newlen, tmp);
}
}
b->fid1 = op_data->fid1;
b->fid2 = op_data->fid2;
+
+ mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
if (op_data->name) {
char *tmp;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2,
op_data->namelen + 1);
LOGL0(op_data->name, op_data->namelen, tmp);
}
rec = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*rec));
mdc_setattr_pack_rec(rec, op_data);
+ mdc_pack_capa(req, offset + 2, op_data->mod_capa1);
mdc_epoch_pack(epoch, op_data);
}
* but this is incredibly unlikely, and questionable whether the client
* could do MDS recovery under OOM anyways... */
static void mdc_realloc_openmsg(struct ptlrpc_request *req,
- struct mdt_body *body, int size[6])
+ struct mdt_body *body, int size[9])
{
int new_size, old_size;
struct lustre_msg *new_msg;
/* save old size */
- old_size = lustre_msg_size(lustre_request_magic(req), 6, size);
+ old_size = lustre_msg_size(lustre_request_magic(req), 9, size);
- size[DLM_INTENT_REC_OFF + 2] = body->eadatasize;
- new_size = lustre_msg_size(lustre_request_magic(req), 6, size);
+ size[DLM_INTENT_REC_OFF + 4] = body->eadatasize;
+ new_size = lustre_msg_size(lustre_request_magic(req), 9, size);
OBD_ALLOC(new_msg, new_size);
if (new_msg != NULL) {
struct lustre_msg *old_msg = req->rq_reqmsg;
DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n",
body->eadatasize);
memcpy(new_msg, old_msg, old_size);
- lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 2,
+ lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 4,
body->eadatasize);
spin_lock(&req->rq_lock);
struct ldlm_request *lockreq;
struct ldlm_intent *lit;
struct ldlm_reply *lockrep;
- int size[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
+ int size[9] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
[DLM_LOCKREQ_OFF] = sizeof(*lockreq),
[DLM_INTENT_IT_OFF] = sizeof(*lit) };
- int repsize[5] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
+ int repsize[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
[DLM_LOCKREPLY_OFF] = sizeof(*lockrep),
[DLM_REPLY_REC_OFF] = sizeof(struct mdt_body),
[DLM_REPLY_REC_OFF+1] = obddev->u.cli.
cl_max_mds_easize };
int flags = extra_lock_flags | LDLM_FL_HAS_INTENT;
- int repbufcnt = 4, rc;
+ int repbufcnt = 4, ea_off, rc;
void *eadata;
ENTRY;
// ldlm_it2str(it->it_op), it_name, it_inode->i_ino);
if (it->it_op & IT_OPEN) {
+ int do_join = !!(it->it_flags & O_JOIN_FILE);
+
it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_create);
- size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+ /* parent capability */
+ size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+ sizeof(struct lustre_capa) : 0;
+ /* child capability, used for replay only */
+ size[DLM_INTENT_REC_OFF + 2] = op_data->mod_capa1 ?
+ sizeof(struct lustre_capa) : 0;
+ size[DLM_INTENT_REC_OFF + 3] = op_data->namelen + 1;
/* As an optimization, we allocate an RPC request buffer for
* at least a default-sized LOV EA even if we aren't sending
* one. We grow the whole request to the next power-of-two
* size since we get that much from a slab allocation anyways.
* This avoids an allocation below in the common case where
* we need to save a default-sized LOV EA for open replay. */
- size[DLM_INTENT_REC_OFF + 2] = max(lmmsize,
- obddev->u.cli.cl_default_mds_easize);
- rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, 6,
- size);
+ ea_off = DLM_INTENT_REC_OFF + 4;
+ size[ea_off] = max(lmmsize,
+ obddev->u.cli.cl_default_mds_easize);
+ if (do_join)
+ size[DLM_INTENT_REC_OFF + 5] =
+ sizeof(struct mds_rec_join);
+ rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic,
+ 8 + do_join, size);
if (rc & (rc - 1))
- size[DLM_INTENT_REC_OFF + 2] =
- min(size[DLM_INTENT_REC_OFF+2]+round_up(rc)-rc,
- obddev->u.cli.cl_max_mds_easize);
+ size[ea_off] = min(size[ea_off] + round_up(rc) - rc,
+ obddev->u.cli.cl_max_mds_easize);
+
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
+ LDLM_ENQUEUE, 8 + do_join, size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
if (it->it_flags & O_JOIN_FILE) {
__u64 head_size = *(__u32*)cb_data;
/* join is like an unlink of the tail */
policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
- size[DLM_INTENT_REC_OFF + 3] =
- sizeof(struct mdt_rec_join);
- req = ptlrpc_prep_req(class_exp2cliimp(exp),
- LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
- 7, size, NULL);
/* when joining file, cb_data and lmm args together
* indicate the head file size*/
- mdc_join_pack(req, DLM_INTENT_REC_OFF + 3, op_data,
+ mdc_join_pack(req, DLM_INTENT_REC_OFF + 5, op_data,
(head_size << 32) | tsize);
cb_data = NULL;
lmm = NULL;
- } else {
- req = ptlrpc_prep_req(class_exp2cliimp(exp),
- LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
- 6, size, NULL);
}
- if (!req)
- RETURN(-ENOMEM);
-
spin_lock(&req->rq_lock);
req->rq_replay = 1;
spin_unlock(&req->rq_lock);
repsize[repbufcnt++] = client_is_remote(exp) ?
sizeof(struct mdt_remote_perm) :
LUSTRE_POSIX_ACL_MAX_SIZE;
+ repsize[repbufcnt++] = sizeof(struct lustre_capa);
+ repsize[repbufcnt++] = sizeof(struct lustre_capa);
} else if (it->it_op & IT_UNLINK) {
size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink);
- size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+ size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+ sizeof(struct lustre_capa) : 0;
+ size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1;
policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
- LDLM_ENQUEUE, 5, size, NULL);
+ LDLM_ENQUEUE, 6, size, NULL);
if (!req)
RETURN(-ENOMEM);
valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM :
OBD_MD_FLACL;
size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body);
- size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+ size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+ sizeof(struct lustre_capa) : 0;
+ size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1;
if (it->it_op & IT_GETATTR)
policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
- LDLM_ENQUEUE, 5, size, NULL);
+ LDLM_ENQUEUE, 6, size, NULL);
if (!req)
RETURN(-ENOMEM);
repsize[repbufcnt++] = client_is_remote(exp) ?
sizeof(struct mdt_remote_perm) :
LUSTRE_POSIX_ACL_MAX_SIZE;
+ repsize[repbufcnt++] = sizeof(struct lustre_capa);
} else if (it->it_op == IT_READDIR) {
policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
it->it_op,it->d.lustre.it_disposition,it->d.lustre.it_status);
/* We know what to expect, so we do any byte flipping required here */
- LASSERT(repbufcnt == 5 || repbufcnt == 2);
- if (repbufcnt == 5) {
+ LASSERT(repbufcnt == 7 || repbufcnt == 6 || repbufcnt == 2);
+ if (repbufcnt >= 6) {
struct mdt_body *body;
- int offset = DLM_REPLY_REC_OFF;
+ int reply_off = DLM_REPLY_REC_OFF;
- body = lustre_swab_repbuf(req, offset++, sizeof(*body),
+ body = lustre_swab_repbuf(req, reply_off++, sizeof(*body),
lustre_swab_mdt_body);
if (body == NULL) {
CERROR ("Can't swab mdt_body\n");
if ((body->valid & OBD_MD_FLDIREA) != 0) {
if (body->eadatasize) {
- eadata = lustre_swab_repbuf(req, offset++,
+ eadata = lustre_swab_repbuf(req, reply_off++,
body->eadatasize, NULL);
if (eadata == NULL) {
CERROR ("Missing/short eadata\n");
if ((body->valid & OBD_MD_FLEASIZE)) {
/* The eadata is opaque; just check that it is there.
* Eventually, obd_unpackmd() will check the contents */
- eadata = lustre_swab_repbuf(req, offset++,
+ eadata = lustre_swab_repbuf(req, reply_off++,
body->eadatasize, NULL);
if (eadata == NULL) {
CERROR ("Missing/short eadata\n");
* reallocate it here to hold the actual LOV EA. */
if (it->it_op & IT_OPEN) {
if (lustre_msg_buflen(req->rq_reqmsg,
- DLM_INTENT_REC_OFF + 2) <
+ DLM_INTENT_REC_OFF + 4) <
body->eadatasize)
mdc_realloc_openmsg(req, body, size);
lmm = lustre_msg_buf(req->rq_reqmsg,
- DLM_INTENT_REC_OFF + 2,
+ DLM_INTENT_REC_OFF + 4,
body->eadatasize);
if (lmm)
memcpy(lmm, eadata, body->eadatasize);
struct mdt_remote_perm *perm;
LASSERT(client_is_remote(exp));
- perm = lustre_swab_repbuf(req, offset++, sizeof(*perm),
+ perm = lustre_swab_repbuf(req, reply_off++,
+ sizeof(*perm),
lustre_swab_mdt_remote_perm);
if (perm == NULL) {
CERROR("missing remote permission!\n");
RETURN(-EPROTO);
}
+ } else if ((body->valid & OBD_MD_FLACL) && body->aclsize) {
+ reply_off++;
+ }
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ struct lustre_capa *capa, *p;
+
+ capa = lustre_unpack_capa(req->rq_repmsg, reply_off++);
+ if (capa == NULL) {
+ CERROR("Missing/short client fid capa\n");
+ RETURN(-EPROTO);
+ }
+
+ if (it->it_op & IT_OPEN) {
+ /* client fid capa will be checked in replay */
+ p = lustre_msg_buf(req->rq_reqmsg,
+ DLM_INTENT_REC_OFF + 2,
+ sizeof(*p));
+ LASSERT(p);
+ *p = *capa;
+ }
+ }
+ if (body->valid & OBD_MD_FLOSSCAPA) {
+ struct lustre_capa *capa;
+
+ capa = lustre_unpack_capa(req->rq_repmsg, reply_off++);
+ if (capa == NULL) {
+ CERROR("Missing/short client oss capa\n");
+ RETURN(-EPROTO);
+ }
}
}
op_data->namelen, op_data->name, PFID(&op_data->fid2),
PFID(&op_data->fid1), ldlm_it2str(it->it_op), it->it_flags);
- if (fid_is_sane(&op_data->fid2) &&
+ if (fid_is_sane((struct lu_fid *)&op_data->fid2) &&
(it->it_op & (IT_LOOKUP | IT_GETATTR))) {
/* We could just return 1 immediately, but since we should only
* be called in revalidate_it if we already have a lock, let's
struct mdt_rec_setattr *rec;
struct mdc_rpc_lock *rpc_lock;
struct obd_device *obd = exp->exp_obd;
- int size[5] = { sizeof(struct ptlrpc_body),
- sizeof(*rec), 0, ealen, ea2len };
- int bufcount = 3, rc;
+ int size[6] = { sizeof(struct ptlrpc_body),
+ sizeof(*rec), 0, 0, ealen, ea2len };
+ int bufcount = 4, rc;
ENTRY;
LASSERT(op_data != NULL);
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN))
- size[2] = sizeof(struct mdt_epoch);
+ size[REQ_REC_OFF + 2] = sizeof(struct mdt_epoch);
if (ealen > 0) {
bufcount++;
mdc_setattr_pack(req, REQ_REC_OFF, op_data, ea, ealen, ea2, ea2len);
size[REPLY_REC_OFF] = sizeof(struct mdt_body);
- ptlrpc_req_set_repsize(req, 2, size);
+ size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa);
+ ptlrpc_req_set_repsize(req, 3, size);
rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
*request = req;
{
struct obd_device *obd = exp->exp_obd;
struct ptlrpc_request *req;
- int level, bufcount = 3, rc;
- int size[4] = { sizeof(struct ptlrpc_body),
+ int size[5] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_rec_create),
- op_data->namelen + 1 };
+ 0, op_data->namelen + 1 };
+ int level, bufcount = 4, rc;
ENTRY;
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
if (data && datalen) {
size[bufcount] = datalen;
bufcount++;
struct ptlrpc_request *req = *request;
int size[4] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_rec_unlink),
- op_data->namelen + 1 };
+ 0, op_data->namelen + 1 };
int rc;
ENTRY;
LASSERT(req == NULL);
+
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_REINT, 3, size, NULL);
+ MDS_REINT, 4, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
*request = req;
{
struct obd_device *obd = exp->exp_obd;
struct ptlrpc_request *req;
- int size[3] = { sizeof(struct ptlrpc_body),
+ int size[5] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_rec_link),
- op_data->namelen + 1 };
+ 0, 0, op_data->namelen + 1 };
int rc;
ENTRY;
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+ if (op_data->mod_capa2)
+ size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_REINT, 3, size, NULL);
+ MDS_REINT, 5, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
{
struct obd_device *obd = exp->exp_obd;
struct ptlrpc_request *req;
- int size[4] = { sizeof(struct ptlrpc_body),
+ int size[6] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_rec_rename),
- oldlen + 1, newlen + 1 };
+ 0, 0, oldlen + 1, newlen + 1 };
int rc;
ENTRY;
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+ if (op_data->mod_capa2)
+ size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_REINT, 4, size, NULL);
+ MDS_REINT, 6, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
static int mdc_cleanup(struct obd_device *obd);
extern int mds_queue_req(struct ptlrpc_request *);
+
+static inline struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req,
+ unsigned int offset)
+{
+ struct lustre_capa *capa;
+ struct obd_capa *oc;
+
+ /* swabbed already in mdc_enqueue */
+ capa = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*capa));
+ if (capa == NULL) {
+ CERROR("missing capa at offset %d failed!\n", offset);
+ return ERR_PTR(-EFAULT);
+ }
+
+ oc = alloc_capa(CAPA_SITE_CLIENT);
+ if (!oc) {
+ CERROR("alloc capa failed!\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ oc->c_capa = *capa;
+
+ return oc;
+}
+
/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
- int level, int msg_flags)
+ struct obd_capa **pc, int level, int msg_flags)
{
struct ptlrpc_request *req;
struct mdt_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body),
+ sizeof(*body),
+ sizeof(struct lustre_capa) };
ENTRY;
req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS, 2, size,
GOTO(out, rc = -ENOMEM);
req->rq_send_state = level;
- ptlrpc_req_set_repsize(req, 2, size);
+ ptlrpc_req_set_repsize(req, 3, size);
- mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0);
+ mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, NULL, 0, 0);
lustre_msg_add_flags(req->rq_reqmsg, msg_flags);
rc = ptlrpc_queue_wait(req);
*rootfid = body->fid1;
+ if (body->valid & OBD_MD_FLMDSCAPA) {
+ struct obd_capa *oc;
+
+ oc = mdc_unpack_capa(req, REPLY_REC_OFF + 1);
+ if (IS_ERR(oc))
+ GOTO(out, rc = PTR_ERR(oc));
+ *pc = oc;
+ }
+
CDEBUG(D_NET, "root fid="DFID", last_committed="LPU64
", last_xid="LPU64"\n",
PFID(rootfid),
}
/* This should be mdc_get_info("rootfid") */
-int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid)
+int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid,
+ struct obd_capa **pc)
{
- return send_getstatus(class_exp2cliimp(exp), rootfid,
+ return send_getstatus(class_exp2cliimp(exp), rootfid, pc,
LUSTRE_IMP_FULL, 0);
}
{
struct mdt_body *body;
void *eadata;
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[5] = { sizeof(struct ptlrpc_body),
+ sizeof(*body) };
int bufcount = 2, rc;
ENTRY;
/* Request message already built. */
if (ea_size != 0) {
- size[bufcount] = ea_size;
+ size[bufcount++] = ea_size;
CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
ea_size);
}
- bufcount++;
if (acl_size) {
- size[bufcount] = acl_size;
+ size[bufcount++] = acl_size;
CDEBUG(D_INODE, "reserved %u bytes for ACL\n", acl_size);
}
- bufcount++;
+ size[bufcount++] = sizeof(struct lustre_capa);
ptlrpc_req_set_repsize(req, bufcount, size);
rc = ptlrpc_queue_wait(req);
}
int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, int ea_size, struct ptlrpc_request **request)
+ struct obd_capa *oc, obd_valid valid, int ea_size,
+ struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
int acl_size = 0, rc;
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
/*
* XXX do we need to make another request here? We just did a getattr
* to do the lookup in the first place.
*/
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_GETATTR, 2, size, NULL);
+ MDS_GETATTR, 3, size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size,
MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
/* currently only root inode will call us with FLACL */
}
int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
- const char *filename, int namelen, obd_valid valid,
- int ea_size, struct ptlrpc_request **request)
+ struct obd_capa *oc, const char *filename, int namelen,
+ obd_valid valid, int ea_size,
+ struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mdt_body *body;
- int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), namelen};
+ int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body), 0, namelen};
+ int rc;
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_GETATTR_NAME, 3, size, NULL);
+ MDS_GETATTR_NAME, 4, size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size,
MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
LASSERT(strnlen(filename, namelen) == namelen - 1);
- memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen),
+ memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, namelen),
filename, namelen);
rc = mdc_getattr_common(exp, ea_size, 0, req);
}
int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid,
- const struct lu_fid *cfid, struct ptlrpc_request **request)
+ const struct lu_fid *cfid,
+ struct obd_capa *pc, struct obd_capa *cc,
+ struct ptlrpc_request **request)
{
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+ int size[4] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_body) };
struct ptlrpc_request *req;
struct mdt_body *body;
int rc;
ENTRY;
+ if (pc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+ if (cc)
+ size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_IS_SUBDIR, 2, size, NULL);
+ MDS_IS_SUBDIR, 4, size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
- mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, 0);
+ mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, pc, cc, 0);
ptlrpc_req_set_repsize(req, 2, size);
rc = ptlrpc_queue_wait(req);
static
int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc,
int opcode, obd_valid valid, const char *xattr_name,
const char *input, int input_size, int output_size,
int flags, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
- // int size[3] = {sizeof(struct mdt_body)}, bufcnt = 1;
- int rc, xattr_namelen = 0, bufcnt = 2, offset, remote_acl = 0;
+ int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+ int bufcnt = 3, offset = REQ_REC_OFF + 2;
+ int rc, xattr_namelen = 0, remote_acl = 0;
void *tmp;
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
if (xattr_name) {
xattr_namelen = strlen(xattr_name) + 1;
size[bufcnt++] = xattr_namelen;
GOTO(out, rc = -ENOMEM);
/* request data */
- mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags);
+ mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, output_size, flags);
- offset = REQ_REC_OFF + 1;
if (xattr_name) {
tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen);
}
int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *xattr_name,
- const char *input, int input_size,
- int output_size, int flags,
+ struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+ const char *input, int input_size, int output_size, int flags,
struct ptlrpc_request **request)
{
- return mdc_xattr_common(exp, fid, MDS_SETXATTR, valid, xattr_name,
+ return mdc_xattr_common(exp, fid, oc, MDS_SETXATTR, valid, xattr_name,
input, input_size, output_size, flags, request);
}
int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
- obd_valid valid, const char *xattr_name,
- const char *input, int input_size,
- int output_size, int flags, struct ptlrpc_request **request)
+ struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+ const char *input, int input_size, int output_size, int flags,
+ struct ptlrpc_request **request)
{
- return mdc_xattr_common(exp, fid, MDS_GETXATTR, valid, xattr_name,
+ return mdc_xattr_common(exp, fid, oc, MDS_GETXATTR, valid, xattr_name,
input, input_size, output_size, flags, request);
}
if ((md->body->valid & OBD_MD_FLACL) && md->body->aclsize) {
rc = mdc_unpack_acl(dt_exp, req, md, offset);
if (rc)
- GOTO(err_out, rc);
+ GOTO(out, rc);
offset++;
}
LASSERT(md->remote_perm);
offset++;
}
-out:
- RETURN(rc);
-err_out:
- if (md->lsm)
- obd_free_memmd(dt_exp, &md->lsm);
- goto out;
+ if (md->body->valid & OBD_MD_FLMDSCAPA) {
+ struct obd_capa *oc = mdc_unpack_capa(req, offset);
+
+ if (IS_ERR(oc))
+ GOTO(out, rc = PTR_ERR(oc));
+ md->mds_capa = oc;
+ offset++;
+ }
+
+ if (md->body->valid & OBD_MD_FLOSSCAPA) {
+ struct obd_capa *oc = mdc_unpack_capa(req, offset);
+
+ if (IS_ERR(oc))
+ GOTO(out, rc = PTR_ERR(oc));
+ md->oss_capa = oc;
+ offset++;
+ }
+
+ EXIT;
+out:
+ if (rc) {
+ if (md->oss_capa)
+ free_capa(md->oss_capa);
+ if (md->mds_capa)
+ free_capa(md->mds_capa);
+#ifdef CONFIG_FS_POSIX_ACL
+ posix_acl_release(md->posix_acl);
+#endif
+ if (md->lsm)
+ obd_free_memmd(dt_exp, &md->lsm);
+ }
+ return rc;
}
int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
struct obd_client_handle *och, struct ptlrpc_request **request)
{
struct obd_device *obd = class_exp2obd(exp);
- int reqsize[3] = { sizeof(struct ptlrpc_body),
+ int reqsize[4] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_epoch),
sizeof(struct mdt_rec_setattr)};
- int rc, repsize[4] = { sizeof(struct ptlrpc_body),
- sizeof(struct mdt_body),
- obd->u.cli.cl_max_mds_easize,
- obd->u.cli.cl_max_mds_cookiesize };
+ int repsize[4] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_body),
+ obd->u.cli.cl_max_mds_easize,
+ obd->u.cli.cl_max_mds_cookiesize };
struct ptlrpc_request *req;
struct mdc_open_data *mod;
+ int rc;
ENTRY;
+ if (op_data->mod_capa1)
+ reqsize[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_CLOSE, 3, reqsize, NULL);
+ MDS_CLOSE, 4, reqsize, NULL);
if (req == NULL)
GOTO(out, rc = -ENOMEM);
struct obd_client_handle *och)
{
struct ptlrpc_request *req;
- int rc, size[3] = { sizeof(struct ptlrpc_body),
- sizeof(struct mdt_epoch),
- sizeof(struct mdt_rec_setattr)};
+ int size[4] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_epoch),
+ sizeof(struct mdt_rec_setattr)};
int repsize[2] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_body)};
-
+ int rc;
ENTRY;
+
+ if (op_data->mod_capa1)
+ size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_DONE_WRITING, 3, size, NULL);
+ MDS_DONE_WRITING, 4, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
#endif
int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
- __u64 offset, struct page *page,
+ struct obd_capa *oc, __u64 offset, struct page *page,
struct ptlrpc_request **request)
{
struct obd_import *imp = class_exp2cliimp(exp);
struct ptlrpc_request *req = NULL;
struct ptlrpc_bulk_desc *desc = NULL;
struct mdt_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
CDEBUG(D_INODE, "object: "DFID"\n", PFID(fid));
- req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 2, size,
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+ req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 3, size,
NULL);
if (req == NULL)
GOTO(out, rc = -ENOMEM);
ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE);
- mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid);
+ mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid, oc);
ptlrpc_req_set_repsize(req, 2, size);
rc = ptlrpc_queue_wait(req);
return rc;
}
-
static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
void *karg, void *uarg)
{
}
static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc,
struct obd_client_handle *handle, int flag)
{
struct ptlrpc_request *req;
struct mdt_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_PIN, 2, size, NULL);
+ MDS_PIN, 3, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof (*body));
body->fid1 = *fid;
body->flags = flag;
+ mdc_pack_capa(req, REQ_REC_OFF + 1, oc);
ptlrpc_req_set_repsize(req, 2, size);
}
int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
+ struct obd_capa *oc,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
int rc;
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_SYNC, 2, size, NULL);
+ MDS_SYNC, 3, size, NULL);
if (!req)
RETURN(rc = -ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0);
+ mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, oc, 0, 0);
ptlrpc_req_set_repsize(req, 2, size);
/* get remote permission for current user on fid */
int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
- struct ptlrpc_request **request)
+ struct obd_capa *oc, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mdt_body *body;
struct mdt_remote_perm *perm;
- int size[3] = { sizeof(struct ptlrpc_body),
- sizeof(*body),
- sizeof(*perm) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
int rc;
ENTRY;
+ if (oc)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
*request = NULL;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_GETATTR, 2, size, NULL);
+ MDS_GETATTR, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
- mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, 0, 0);
+ mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, oc, 0, 0);
+ size[REPLY_REC_OFF + 1] = sizeof(*perm);
ptlrpc_req_set_repsize(req, 3, size);
rc = ptlrpc_queue_wait(req);
if (rc) {
RETURN(0);
}
+static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
+ renew_capa_cb_t cb)
+{
+ struct ptlrpc_request *req;
+ int size[2] = { sizeof(struct ptlrpc_body),
+ sizeof(struct lustre_capa) };
+ int repsize[3] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_body),
+ sizeof(struct lustre_capa) };
+ ENTRY;
+
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
+ MDS_RENEW_CAPA, 2, size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ mdc_pack_capa(req, REQ_REC_OFF, oc);
+
+ ptlrpc_req_set_repsize(req, 3, repsize);
+ req->rq_interpret_reply = cb;
+ ptlrpcd_add_req(req);
+
+ RETURN(0);
+}
+
struct obd_ops mdc_obd_ops = {
.o_owner = THIS_MODULE,
.o_setup = mdc_setup,
.m_free_lustre_md = mdc_free_lustre_md,
.m_set_open_replay_data = mdc_set_open_replay_data,
.m_clear_open_replay_data = mdc_clear_open_replay_data,
- .m_get_remote_perm = mdc_get_remote_perm
+ .m_get_remote_perm = mdc_get_remote_perm,
+ .m_renew_capa = mdc_renew_capa
};
extern quota_interface_t mdc_quota_interface;
#include <linux/ldiskfs_fs.h>
#include <lustre_mds.h>
+#include <lustre/lustre_idl.h>
#include "mdd_internal.h"
struct mdd_object *m;
ENTRY;
- o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f);
+ o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f, BYPASS_CAPA);
if (IS_ERR(o))
m = (struct mdd_object *)o;
else {
struct mdd_device *mdd = cookie;
struct obd_device *obd = mdd2obd_dev(mdd);
+ LASSERT(obd);
return mds_lov_write_objids(obd);
}
GOTO(out, rc);
dt->dd_ops->dt_conf_get(ctxt, dt, &m->mdd_dt_conf);
- rc = mdd_mount(ctxt, m);
- if (rc)
- GOTO(out, rc);
rc = mdd_init_obd(ctxt, m, cfg);
if (rc) {
CERROR("lov init error %d \n", rc);
GOTO(out, rc);
}
+ rc = mdd_mount(ctxt, m);
+ if (rc)
+ GOTO(out, rc);
break;
case LCFG_CLEANUP:
mdd_device_shutdown(ctxt, m);
{
struct mdd_object *mdd_obj = md2mdd_obj(pobj);
struct dt_object *dir = mdd_object_child(mdd_obj);
- struct dt_rec *rec = (struct dt_rec *)fid;
+ struct dt_rec *rec = (struct dt_rec *)fid;
const struct dt_key *key = (const struct dt_key *)name;
int rc;
ENTRY;
struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
ENTRY;
- *md_size = mdd_lov_mdsize(ctx, mdd);
+ *md_size = mdd_lov_mdsize(ctx, mdd);
*cookie_size = mdd_lov_cookiesize(ctx, mdd);
RETURN(0);
}
+static int mdd_init_capa_keys(struct md_device *m,
+ struct lustre_capa_key *keys)
+{
+ struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+ struct mds_obd *mds = &mdd2obd_dev(mdd)->u.mds;
+ ENTRY;
+
+ mds->mds_capa_keys = keys;
+ RETURN(0);
+}
+
+static int mdd_update_capa_key(const struct lu_context *ctx,
+ struct md_device *m,
+ struct lustre_capa_key *key)
+{
+ struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+ struct obd_export *lov_exp = mdd2obd_dev(mdd)->u.mds.mds_osc_exp;
+ int rc;
+ ENTRY;
+
+ rc = obd_set_info_async(lov_exp, strlen(KEY_CAPA_KEY), KEY_CAPA_KEY,
+ sizeof(*key), key, NULL);
+ RETURN(rc);
+}
+
static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj,
struct thandle *handle)
{
RETURN(rc);
}
+static int mdd_capa_get(const struct lu_context *ctxt, struct md_object *obj,
+ struct lustre_capa *capa)
+{
+ struct mdd_object *mdd_obj = md2mdd_obj(obj);
+ struct mdd_device *mdd = mdo2mdd(obj);
+ struct lu_site *ls = mdd->mdd_md_dev.md_lu_dev.ld_site;
+ struct lustre_capa_key *key = &ls->ls_capa_keys[1];
+ struct obd_capa *ocapa;
+ int rc;
+ ENTRY;
+
+ LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj)));
+
+ capa->lc_fid = *mdo2fid(mdd_obj);
+ if (ls->ls_capa_timeout < CAPA_TIMEOUT)
+ capa->lc_flags |= CAPA_FL_SHORT_EXPIRY;
+ if (lu_fid_eq(&capa->lc_fid, &mdd->mdd_root_fid))
+ capa->lc_flags |= CAPA_FL_ROOT;
+ capa->lc_flags = ls->ls_capa_alg << 23;
+
+ /* TODO: get right permission here after remote uid landing */
+ ocapa = capa_lookup(capa);
+ if (ocapa) {
+ LASSERT(!capa_is_expired(ocapa));
+ capa_cpy(capa, ocapa);
+ capa_put(ocapa);
+ RETURN(0);
+ }
+
+ capa->lc_keyid = key->lk_keyid;
+ capa->lc_expiry = CURRENT_SECONDS + ls->ls_capa_timeout;
+ rc = capa_hmac(capa->lc_hmac, capa, key->lk_key);
+ if (rc)
+ RETURN(rc);
+
+ capa_add(capa);
+ RETURN(0);
+}
+
struct md_device_operations mdd_ops = {
.mdo_statfs = mdd_statfs,
.mdo_root_get = mdd_root_get,
.mdo_maxsize_get = mdd_maxsize_get,
+ .mdo_init_capa_keys = mdd_init_capa_keys,
+ .mdo_update_capa_key= mdd_update_capa_key,
};
static struct md_dir_operations mdd_dir_ops = {
.moo_open = mdd_open,
.moo_close = mdd_close,
.moo_readpage = mdd_readpage,
- .moo_readlink = mdd_readlink
+ .moo_readlink = mdd_readlink,
+ .moo_capa_get = mdd_capa_get
};
static struct obd_ops mdd_obd_device_ops = {
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * linux/mds/mds_lov.c
+ * lustre/mdd/mdd_lov.c
* Lustre Metadata Server (mds) handling of striped file data
*
* Copyright (C) 2001-2006 Cluster File Systems, Inc.
GOTO(out_pop, rc);
}
- req->rq_status = mds_getattr_internal(obd, de, req, body,REPLY_REC_OFF);
+ req->rq_status = mds_getattr_internal(obd, de, req, body,
+ REPLY_REC_OFF);
l_dput(de);
GOTO(out_pop, rc);
GOTO(err_thread3, rc);
ping_evictor_start();
-
RETURN(0);
err_thread3:
int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *unused)
{
struct mds_obd *mds = &exp->exp_obd->u.mds;
struct inode *parent_inode = mds->mds_objects_dir->d_inode;
struct lov_stripe_md **ea, struct obd_trans_info *oti);
int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
- struct obd_export *md_exp);
+ struct obd_export *md_exp, void *unused);
void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt);
/* mds/handler.c */
__u32 mlsi_index; /* index of target */
};
+static int mds_propagate_capa_keys(struct mds_obd *mds)
+{
+ struct lustre_capa_key *key;
+ int i, rc = 0;
+ ENTRY;
+
+ for (i = 0; i < 2; i++) {
+ key = &mds->mds_capa_keys[i];
+ DEBUG_CAPA_KEY(D_SEC, key, "propagate");
+
+ rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_CAPA_KEY),
+ KEY_CAPA_KEY, sizeof(*key), key, NULL);
+ if (rc) {
+ DEBUG_CAPA_KEY(D_ERROR, key,
+ "propagate failed (rc = %d) for", rc);
+ RETURN(rc);
+ }
+ }
+
+ RETURN(0);
+}
+
/* We only sync one osc at a time, so that we don't have to hold
any kind of lock on the whole mds_lov_desc, which may change
(grow) as a result of mds_lov_add_ost. This also avoids any
if (rc != 0)
GOTO(out, rc);
+ /* propagate capability keys */
+ rc = mds_propagate_capa_keys(mds);
+ if (rc)
+ GOTO(out, rc);
+
rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
mds->mds_lov_desc.ld_tgt_count,
NULL, NULL, uuid);
oa->o_valid |= OBD_MD_FLCOOKIE;
oti.oti_logcookies = logcookies;
}
- rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export);
+ rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export,
+ NULL);
obdo_free(oa);
if (rc)
CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
MODULES := mdt
-mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o mdt_open.o
-mdt-objs += mdt_idmap.o mdt_identity.o mdt_rmtacl.o
+mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o
+mdt-objs += mdt_open.o mdt_idmap.o mdt_identity.o mdt_rmtacl.o mdt_capa.o
@INCLUDE_RULES@
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/mdt/mdt_capa.c
+ * Lustre Metadata Target (mdt) capability key read/write/update.
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
+ *
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
+ *
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include "mdt_internal.h"
+
+static inline void set_capa_key_expiry(struct mdt_device *mdt)
+{
+ mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ;
+}
+
+static void make_capa_key(struct lustre_capa_key *key,
+ mdsno_t mdsnum, int keyid)
+{
+ key->lk_mdsid = mdsnum;
+ key->lk_keyid = keyid + 1;
+ get_random_bytes(key->lk_key, sizeof(key->lk_key));
+}
+
+enum {
+ MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1
+};
+
+static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
+ struct lustre_capa_key *src)
+{
+ tgt->lk_mdsid = cpu_to_le64(src->lk_mdsid);
+ tgt->lk_keyid = cpu_to_le32(src->lk_keyid);
+ tgt->lk_padding = cpu_to_le32(src->lk_padding);
+ memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
+}
+
+static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
+ struct lustre_capa_key *src)
+{
+ tgt->lk_mdsid = le64_to_cpu(src->lk_mdsid);
+ tgt->lk_keyid = le32_to_cpu(src->lk_keyid);
+ tgt->lk_padding = le32_to_cpu(src->lk_padding);
+ memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
+}
+
+static int write_capa_keys(const struct lu_context *ctx,
+ struct mdt_device *mdt,
+ struct lustre_capa_key *keys)
+{
+ struct mdt_thread_info *mti;
+ struct lustre_capa_key *tmp;
+ struct thandle *th;
+ loff_t off = 0;
+ int i, rc;
+
+ mti = lu_context_key_get(ctx, &mdt_thread_key);
+
+ th = mdt_trans_start(ctx, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ tmp = &mti->mti_capa_key;
+
+ for (i = 0; i < 2; i++) {
+ lck_cpu_to_le(tmp, &keys[i]);
+
+ rc = mdt_record_write(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp),
+ &off, th);
+ if (rc)
+ break;
+ }
+
+ mdt_trans_stop(ctx, mdt, th);
+
+ CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
+ return rc;
+}
+
+static int read_capa_keys(const struct lu_context *ctx,
+ struct mdt_device *mdt,
+ struct lustre_capa_key *keys)
+{
+ struct mdt_thread_info *mti;
+ struct lustre_capa_key *tmp;
+ loff_t off = 0;
+ int i, rc;
+
+ mti = lu_context_key_get(ctx, &mdt_thread_key);
+ tmp = &mti->mti_capa_key;
+
+ for (i = 0; i < 2; i++) {
+ rc = mdt_record_read(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp),
+ &off);
+ if (rc)
+ return rc;
+
+ lck_le_to_cpu(&keys[i], tmp);
+
+ DEBUG_CAPA_KEY(D_INFO, &keys[i], "read");
+ }
+
+ return 0;
+}
+
+int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt)
+{
+ struct lustre_capa_key *keys = mdt->mdt_capa_keys;
+ struct mdt_thread_info *mti;
+ struct dt_object *obj;
+ struct lu_attr *la;
+ mdsno_t mdsnum;
+ unsigned long size;
+ int rc;
+ ENTRY;
+
+ mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id;
+
+ mti = lu_context_key_get(ctx, &mdt_thread_key);
+ LASSERT(mti != NULL);
+ la = &mti->mti_attr.ma_attr;
+
+ obj = mdt->mdt_ck_obj;
+ obj->do_ops->do_read_lock(ctx, obj);
+ rc = obj->do_ops->do_attr_get(ctx, mdt->mdt_ck_obj, la);
+ obj->do_ops->do_read_unlock(ctx, obj);
+ if (rc)
+ RETURN(rc);
+
+ size = (unsigned long)la->la_size;
+ if (size == 0) {
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ make_capa_key(&keys[i], mdsnum, i);
+ DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
+ }
+
+ rc = write_capa_keys(ctx, mdt, keys);
+ if (rc) {
+ CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
+ RETURN(rc);
+ }
+ } else {
+ rc = read_capa_keys(ctx, mdt, keys);
+ if (rc) {
+ CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
+ RETURN(rc);
+ }
+ }
+ set_capa_key_expiry(mdt);
+ mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
+ CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
+ RETURN(0);
+}
+
+void mdt_ck_timer_callback(unsigned long castmeharder)
+{
+ struct mdt_device *mdt = (struct mdt_device *)castmeharder;
+ struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+
+ ENTRY;
+ thread->t_flags |= SVC_EVENT;
+ wake_up(&thread->t_ctl_waitq);
+ EXIT;
+}
+
+static int mdt_ck_thread_main(void *args)
+{
+ struct mdt_device *mdt = args;
+ struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+ struct lustre_capa_key *tmp, *key = red_capa_key(mdt);
+ struct lu_context ctx;
+ struct mdt_thread_info *info;
+ struct md_device *next;
+ struct l_wait_info lwi = { 0 };
+ mdsno_t mdsnum;
+ int rc;
+ ENTRY;
+
+ ptlrpc_daemonize("mdt_ck");
+ cfs_block_allsigs();
+
+ thread->t_flags = SVC_RUNNING;
+ cfs_waitq_signal(&thread->t_ctl_waitq);
+
+ rc = lu_context_init(&ctx, LCT_MD_THREAD);
+ if (rc)
+ RETURN(rc);
+
+ thread->t_ctx = &ctx;
+ ctx.lc_thread = thread;
+
+ lu_context_enter(&ctx);
+ info = lu_context_key_get(&ctx, &mdt_thread_key);
+ LASSERT(info != NULL);
+
+ tmp = &info->mti_capa_key;
+ mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id;
+ while (1) {
+ l_wait_event(thread->t_ctl_waitq,
+ thread->t_flags & (SVC_STOPPING | SVC_EVENT),
+ &lwi);
+
+ if (thread->t_flags & SVC_STOPPING)
+ break;
+ thread->t_flags &= ~SVC_EVENT;
+
+ if (time_after(mdt->mdt_ck_expiry, jiffies))
+ break;
+
+ *tmp = *key;
+ make_capa_key(tmp, mdsnum, key->lk_keyid);
+
+ next = mdt->mdt_child;
+ rc = next->md_ops->mdo_update_capa_key(&ctx, next, tmp);
+ if (!rc) {
+ rc = write_capa_keys(&ctx, mdt, mdt->mdt_capa_keys);
+ if (!rc) {
+ spin_lock(&capa_lock);
+ mdt->mdt_capa_keys[0] = *key;
+ *key = *tmp;
+ spin_unlock(&capa_lock);
+
+ set_capa_key_expiry(mdt);
+
+ DEBUG_CAPA_KEY(D_SEC, key, "new");
+ }
+ }
+ if (rc) {
+ DEBUG_CAPA_KEY(D_ERROR, key, "update failed for");
+ /* next retry is in 300 sec */
+ mdt->mdt_ck_expiry = jiffies + 300 * HZ;
+ }
+
+ mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
+ CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
+ }
+ lu_context_exit(&ctx);
+ lu_context_fini(&ctx);
+
+ thread->t_flags = SVC_STOPPED;
+ cfs_waitq_signal(&thread->t_ctl_waitq);
+ RETURN(0);
+}
+
+int mdt_ck_thread_start(struct mdt_device *mdt)
+{
+ struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+ int rc;
+
+ cfs_waitq_init(&thread->t_ctl_waitq);
+ rc = kernel_thread(mdt_ck_thread_main, mdt,
+ (CLONE_VM | CLONE_FILES));
+ if (rc < 0) {
+ CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
+ return rc;
+ }
+
+ wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
+ return 0;
+}
+
+void mdt_ck_thread_stop(struct mdt_device *mdt)
+{
+ struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+
+ if (!(thread->t_flags & SVC_RUNNING))
+ return;
+
+ thread->t_flags = SVC_STOPPING;
+ cfs_waitq_signal(&thread->t_ctl_waitq);
+ wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
+}
struct mdt_object *o)
{
struct md_object *next = mdt_object_child(o);
+ struct mdt_device *mdt = info->mti_mdt;
const struct mdt_body *reqbody = info->mti_body;
struct ptlrpc_request *req = mdt_info_req(info);
struct md_attr *ma = &info->mti_attr;
repbody->aclsize = sizeof(struct mdt_remote_perm);
}
}
-
#ifdef CONFIG_FS_POSIX_ACL
else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) &&
(reqbody->valid & OBD_MD_FLACL)) {
}
#endif
+ if (mdt->mdt_opts.mo_mds_capa) {
+ struct lustre_capa *capa;
+
+ spin_lock(&capa_lock);
+ info->mti_capa_key = *red_capa_key(mdt);
+ spin_unlock(&capa_lock);
+
+ capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+ LASSERT(capa);
+ capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
+ rc = mo_capa_get(ctxt, next, capa);
+ if (rc)
+ RETURN(rc);
+ else
+ repbody->valid |= OBD_MD_FLMDSCAPA;
+ }
+
RETURN(rc);
}
}
rc = mdt_getattr_internal(info, obj);
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0);
if (reqbody->valid & OBD_MD_FLRMTPERM)
mdt_exit_ucred(info);
RETURN(rc);
*step 3: find the child object by fid & lock it.
* regardless if it is local or remote.
*/
- child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid);
+ child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid,
+ BYPASS_CAPA);
if (IS_ERR(child))
GOTO(out_parent, rc = PTR_ERR(child));
if (is_resent) {
ldlm_lock_decref(&lhc->mlh_lh, lhc->mlh_mode);
lhc->mlh_lh.cookie = 0;
}
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0);
mdt_exit_ucred(info);
RETURN(rc);
}
return -EOPNOTSUPP;
}
+static int mdt_renew_capa(struct mdt_thread_info *info)
+{
+ struct mdt_device *mdt = info->mti_mdt;
+ struct mdt_object *obj;
+ struct mdt_body *body;
+ struct lustre_capa *capa;
+ int rc;
+
+ body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
+ LASSERT(body);
+
+ capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+ LASSERT(capa);
+
+ spin_lock(&capa_lock);
+ info->mti_capa_key = *red_capa_key(mdt);
+ spin_unlock(&capa_lock);
+
+ obj = mdt_object_find(info->mti_ctxt, info->mti_mdt, &capa->lc_fid,
+ capa);
+ if (!IS_ERR(obj))
+ rc = PTR_ERR(obj);
+
+ /* TODO: add capa check */
+ rc = mo_capa_get(info->mti_ctxt, mdt_object_child(obj), capa);
+ if (rc)
+ RETURN(rc);
+
+ RETURN(rc);
+}
+
/*
* OBD PING and other handlers.
*/
struct mdt_object *mdt_object_find(const struct lu_context *ctxt,
struct mdt_device *d,
- const struct lu_fid *f)
+ const struct lu_fid *f,
+ struct lustre_capa *c)
{
struct lu_object *o;
struct mdt_object *m;
ENTRY;
- o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f);
+ if (!d->mdt_opts.mo_mds_capa)
+ c = BYPASS_CAPA;
+
+ o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f, c);
if (IS_ERR(o))
m = (struct mdt_object *)o;
else
struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info,
const struct lu_fid *f,
struct mdt_lock_handle *lh,
- __u64 ibits)
+ __u64 ibits,
+ struct lustre_capa *capa)
{
struct mdt_object *o;
- o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f);
+ o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f, capa);
if (!IS_ERR(o)) {
int rc;
*/
static int mdt_body_unpack(struct mdt_thread_info *info, __u32 flags)
{
- const struct mdt_body *body;
- struct mdt_object *obj;
- const struct lu_context *ctx;
- struct req_capsule *pill;
- int rc;
+ const struct mdt_body *body;
+ struct lustre_capa *capa = NULL;
+ struct mdt_object *obj;
+ const struct lu_context *ctx;
+ struct req_capsule *pill;
+ int rc;
ctx = info->mti_ctxt;
pill = &info->mti_pill;
body = info->mti_body = req_capsule_client_get(pill, &RMF_MDT_BODY);
- if (body != NULL) {
- if (fid_is_sane(&body->fid1)) {
- obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1);
- if (!IS_ERR(obj)) {
- if ((flags & HABEO_CORPUS) &&
- !lu_object_exists(&obj->mot_obj.mo_lu)) {
- mdt_object_put(ctx, obj);
- rc = -ENOENT;
- } else {
- info->mti_object = obj;
- rc = 0;
- }
- } else
- rc = PTR_ERR(obj);
+ if (body == NULL)
+ return -EFAULT;
+
+ if (!fid_is_sane(&body->fid1)) {
+ CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1));
+ return -EINVAL;
+ }
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ capa = req_capsule_client_get(pill, &RMF_CAPA1);
+ obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1, capa);
+ if (!IS_ERR(obj)) {
+ if ((flags & HABEO_CORPUS) &&
+ !lu_object_exists(&obj->mot_obj.mo_lu)) {
+ mdt_object_put(ctx, obj);
+ rc = -ENOENT;
} else {
- CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1));
- rc = -EINVAL;
+ info->mti_object = obj;
+ rc = 0;
}
} else
- rc = -EFAULT;
+ rc = PTR_ERR(obj);
+
return rc;
}
ldlm_rep->lock_policy_res2 =
mdt_getattr_name_lock(info, lhc, child_bits, ldlm_rep);
- mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 0);
if (mdt_get_disposition(ldlm_rep, DISP_LOOKUP_NEG))
ldlm_rep->lock_policy_res2 = 0;
m->mdt_rootsquash_info = NULL;
}
+ cleanup_capas(CAPA_SITE_SERVER);
+ del_timer(&m->mdt_ck_timer);
+ mdt_ck_thread_stop(m);
+
mdt_fs_cleanup(ctx, m);
/* finish the stack */
m->mdt_opts.mo_user_xattr = 0;
m->mdt_opts.mo_acl = 0;
m->mdt_opts.mo_compat_resname = 0;
+ m->mdt_opts.mo_mds_capa = 0;
+ m->mdt_opts.mo_oss_capa = 0;
+ m->mdt_capa_alg = CAPA_HMAC_ALG_SHA1;
+ m->mdt_capa_timeout = CAPA_TIMEOUT;
+ m->mdt_ck_timeout = CAPA_KEY_TIMEOUT;
obd->obd_replayable = 1;
spin_lock_init(&m->mdt_client_bitmap_lock);
GOTO(err_free_ns, rc);
}
- rc = mdt_start_ptlrpc_service(m);
+ rc = mdt_ck_thread_start(m);
if (rc)
GOTO(err_free_ns, rc);
+ m->mdt_ck_timer.function = mdt_ck_timer_callback;
+ m->mdt_ck_timer.data = (unsigned long)m;
+ init_timer(&m->mdt_ck_timer);
+
+ s->ls_capa_keys = m->mdt_capa_keys;
+ s->ls_capa_timeout = m->mdt_capa_timeout;
+ s->ls_capa_alg = m->mdt_capa_alg;
+
+ rc = mdt_start_ptlrpc_service(m);
+ if (rc)
+ GOTO(err_capa, rc);
ping_evictor_start();
rc = mdt_fs_setup(ctx, m, obd);
err_stop_service:
mdt_stop_ptlrpc_service(m);
+err_capa:
+ del_timer(&m->mdt_ck_timer);
+ mdt_ck_thread_stop(m);
err_free_ns:
upcall_cache_cleanup(m->mdt_rmtacl_cache);
m->mdt_rmtacl_cache = NULL;
if (!mdt->mdt_opts.mo_user_xattr)
data->ocd_connect_flags &= ~OBD_CONNECT_XATTR;
+ if (!mdt->mdt_opts.mo_mds_capa)
+ data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA;
+
+ if (!mdt->mdt_opts.mo_oss_capa)
+ data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
+
exp->exp_connect_flags = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
exp->exp_mdt_data.med_ibits_known = data->ocd_ibits_known;
return -EBADE;
}
+ if (mdt->mdt_opts.mo_mds_capa &&
+ ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) {
+ CWARN("%s: MDS requires capability support, but client not\n",
+ mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
+ return -EBADE;
+ }
+
+ if (mdt->mdt_opts.mo_oss_capa &&
+ ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) {
+ CWARN("%s: MDS requires OSS capability support, "
+ "but client not\n",
+ mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
+ return -EBADE;
+ }
+
return 0;
}
rc = class_register_type(&mdt_obd_device_ops, NULL,
lvars.module_vars, LUSTRE_MDT_NAME,
&mdt_device_type);
+
return rc;
}
DEF_MDT_HNDL_0(0, SYNC, mdt_sync),
DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR, mdt_is_subdir),
DEF_MDT_HNDL_0(0, QUOTACHECK, mdt_quotacheck_handle),
-DEF_MDT_HNDL_0(0, QUOTACTL, mdt_quotactl_handle)
+DEF_MDT_HNDL_0(0, QUOTACTL, mdt_quotactl_handle),
+DEF_MDT_HNDL_0(0 |HABEO_REFERO, RENEW_CAPA, mdt_renew_capa)
};
#define DEF_OBD_HNDL(flags, name, fn) \
* Options bit-fields.
*/
struct {
- signed int mo_user_xattr :1;
- signed int mo_acl :1;
- signed int mo_compat_resname:1;
+ signed int mo_user_xattr :1,
+ mo_acl :1,
+ mo_compat_resname:1,
+ mo_mds_capa :1,
+ mo_oss_capa :1;
} mdt_opts;
/* lock to pretect epoch and write count */
/* root squash */
struct rootsquash_info *mdt_rootsquash_info;
int no_gss_support;
+
+ /* capability */
+ __u32 mdt_capa_alg;
+ unsigned long mdt_capa_timeout;
+ unsigned long mdt_ck_timeout;
+ struct dt_object *mdt_ck_obj;
+ unsigned long mdt_ck_expiry;
+ struct timer_list mdt_ck_timer;
+ struct ptlrpc_thread mdt_ck_thread;
+ struct lustre_capa_key mdt_capa_keys[2];
};
/*XXX copied from mds_internal.h */
int rr_logcookielen;
const struct llog_cookie *rr_logcookies;
__u32 rr_flags;
+ struct lustre_capa *rr_capa1;
+ struct lustre_capa *rr_capa2;
};
enum mdt_reint_flag {
struct mdt_client_data mti_mcd;
loff_t mti_off;
struct txn_param mti_txn_param;
+ struct lustre_capa_key mti_capa_key;
};
/*
* Info allocated per-transaction.
struct mdt_object *mdt_object_find(const struct lu_context *,
struct mdt_device *,
- const struct lu_fid *);
+ const struct lu_fid *,
+ struct lustre_capa *);
struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *,
const struct lu_fid *,
struct mdt_lock_handle *,
- __u64);
+ __u64 ibits,
+ struct lustre_capa *);
void mdt_object_unlock_put(struct mdt_thread_info *,
struct mdt_object *,
struct mdt_lock_handle *,
int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
int flags);
int mdt_done_writing(struct mdt_thread_info *info);
-void mdt_shrink_reply(struct mdt_thread_info *info, int offset);
+void mdt_shrink_reply(struct mdt_thread_info *info, int offset,
+ int mdscapa, int osscapa);
int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
const struct md_attr *);
void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
+struct thandle* mdt_trans_start(const struct lu_context *ctx,
+ struct mdt_device *mdt, int credits);
+void mdt_trans_stop(const struct lu_context *ctx,
+ struct mdt_device *mdt, struct thandle *th);
+int mdt_record_write(const struct lu_context *ctx,
+ struct dt_object *dt, const void *buf,
+ size_t count, loff_t *pos, struct thandle *th);
+int mdt_record_read(const struct lu_context *ctx,
+ struct dt_object *dt, void *buf,
+ size_t count, loff_t *pos);
void mdt_dump_lmm(int level, const struct lov_mds_md *lmm);
} \
} while(0)
+/*
+ * fid Capability
+ */
+int mdt_ck_thread_start(struct mdt_device *mdt);
+void mdt_ck_thread_stop(struct mdt_device *mdt);
+void mdt_ck_timer_callback(unsigned long castmeharder);
+int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt);
+
+static inline struct lustre_capa_key *red_capa_key(struct mdt_device *mdt)
+{
+ return &mdt->mdt_capa_keys[1];
+}
+
#endif /* __KERNEL__ */
#endif /* _MDT_H */
}
}
-void mdt_shrink_reply(struct mdt_thread_info *info, int offset)
+void mdt_shrink_reply(struct mdt_thread_info *info, int offset,
+ int mdscapa, int osscapa)
{
struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_body *body;
- int acl_size;
- int md_size;
+ int acl_size, md_size;
body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
LASSERT(body != NULL);
md_size, acl_size);
lustre_shrink_reply(req, offset, md_size, 1);
- lustre_shrink_reply(req, md_size ? offset + 1 : offset, acl_size, 0);
+ offset += !!md_size;
+ lustre_shrink_reply(req, offset, acl_size, 1);
+ offset += !!acl_size;
+ if (mdscapa && !(body->valid & OBD_MD_FLMDSCAPA))
+ lustre_shrink_reply(req, offset, 0, 0);
+ offset += mdscapa;
+ if (osscapa && !(body->valid & OBD_MD_FLOSSCAPA))
+ lustre_shrink_reply(req, offset, 0, 0);
+ offset += osscapa;
}
la->la_atime = rec->sa_atime;
la->la_mtime = rec->sa_mtime;
ma->ma_valid = MA_INODE;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
RETURN(0);
}
RCL_CLIENT);
ma->ma_valid |= MA_LOV;
}
+
if (req_capsule_field_present(pill, &RMF_LOGCOOKIES, RCL_CLIENT)) {
ma->ma_cookie = req_capsule_client_get(pill,
&RMF_LOGCOOKIES);
struct lu_attr *attr = &info->mti_attr.ma_attr;
struct mdt_reint_record *rr = &info->mti_rr;
struct req_capsule *pill = &info->mti_pill;
- int result = 0;
ENTRY;
rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
- if (rec != NULL) {
- uc->mu_fsuid = rec->cr_fsuid;
- uc->mu_fsgid = rec->cr_fsgid;
- uc->mu_cap = rec->cr_cap;
- uc->mu_suppgids[0] = rec->cr_suppgid;
- uc->mu_suppgids[1] = -1;
-
- rr->rr_fid1 = &rec->cr_fid1;
- rr->rr_fid2 = &rec->cr_fid2;
- attr->la_mode = rec->cr_mode;
- attr->la_rdev = rec->cr_rdev;
- attr->la_uid = rec->cr_fsuid;
- attr->la_gid = rec->cr_fsgid;
- attr->la_ctime = rec->cr_time;
- attr->la_mtime = rec->cr_time;
- attr->la_atime = rec->cr_time;
- attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
- LA_CTIME | LA_MTIME | LA_ATIME;
- info->mti_spec.sp_cr_flags = rec->cr_flags;
-
- rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
- if (S_ISDIR(attr->la_mode)) {
- struct md_create_spec *sp = &info->mti_spec;
- /* pass parent fid for cross-ref cases */
- sp->u.sp_pfid = rr->rr_fid1;
- if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
- /* create salve object req, need
- * unpack split ea here
- */
- req_capsule_extend(pill,
- &RQF_MDS_REINT_CREATE_SLAVE);
- LASSERT(req_capsule_field_present(pill,
- &RMF_EADATA, RCL_CLIENT));
- sp->u.sp_ea.eadata = req_capsule_client_get(pill,
- &RMF_EADATA);
- sp->u.sp_ea.eadatalen =req_capsule_get_size(pill,
- &RMF_EADATA, RCL_CLIENT);
- sp->u.sp_ea.fid = rr->rr_fid1;
- }
- } else if (S_ISLNK(attr->la_mode)) {
- const char *tgt = NULL;
- req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM);
- if (req_capsule_field_present(pill, &RMF_SYMTGT,
- RCL_CLIENT)) {
- tgt = req_capsule_client_get(pill,
- &RMF_SYMTGT);
- info->mti_spec.u.sp_symname = tgt;
- }
- if (tgt == NULL)
- result = -EFAULT;
+ if (rec == NULL)
+ RETURN(-EFAULT);
+
+ uc->mu_fsuid = rec->cr_fsuid;
+ uc->mu_fsgid = rec->cr_fsgid;
+ uc->mu_cap = rec->cr_cap;
+ uc->mu_suppgids[0] = rec->cr_suppgid;
+ uc->mu_suppgids[1] = -1;
+
+ rr->rr_fid1 = &rec->cr_fid1;
+ rr->rr_fid2 = &rec->cr_fid2;
+ attr->la_mode = rec->cr_mode;
+ attr->la_rdev = rec->cr_rdev;
+ attr->la_uid = rec->cr_fsuid;
+ attr->la_gid = rec->cr_fsgid;
+ attr->la_ctime = rec->cr_time;
+ attr->la_mtime = rec->cr_time;
+ attr->la_atime = rec->cr_time;
+ attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
+ LA_CTIME | LA_MTIME | LA_ATIME;
+ info->mti_spec.sp_cr_flags = rec->cr_flags;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
+ rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+ if (S_ISDIR(attr->la_mode)) {
+ struct md_create_spec *sp = &info->mti_spec;
+
+ /* pass parent fid for cross-ref cases */
+ sp->u.sp_pfid = rr->rr_fid1;
+ if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
+ /* create salve object req, need
+ * unpack split ea here
+ */
+ req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SLAVE);
+ LASSERT(req_capsule_field_present(pill, &RMF_EADATA,
+ RCL_CLIENT));
+ sp->u.sp_ea.eadata = req_capsule_client_get(pill,
+ &RMF_EADATA);
+ sp->u.sp_ea.eadatalen = req_capsule_get_size(pill,
+ &RMF_EADATA,
+ RCL_CLIENT);
+ sp->u.sp_ea.fid = rr->rr_fid1;
+ }
+ } else if (S_ISLNK(attr->la_mode)) {
+ const char *tgt = NULL;
+
+ req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM);
+ if (req_capsule_field_present(pill, &RMF_SYMTGT, RCL_CLIENT)) {
+ tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
+ info->mti_spec.u.sp_symname = tgt;
}
- } else
- result = -EFAULT;
- RETURN(result);
+ if (tgt == NULL)
+ RETURN(-EFAULT);
+ }
+ RETURN(0);
}
static int mdt_link_unpack(struct mdt_thread_info *info)
struct lu_attr *attr = &info->mti_attr.ma_attr;
struct mdt_reint_record *rr = &info->mti_rr;
struct req_capsule *pill = &info->mti_pill;
- int result = 0;
ENTRY;
rec = req_capsule_client_get(pill, &RMF_REC_LINK);
- if (rec != NULL) {
- uc->mu_fsuid = rec->lk_fsuid;
- uc->mu_fsgid = rec->lk_fsgid;
- uc->mu_cap = rec->lk_cap;
- uc->mu_suppgids[0] = rec->lk_suppgid1;
- uc->mu_suppgids[1] = rec->lk_suppgid2;
-
- attr->la_uid = rec->lk_fsuid;
- attr->la_gid = rec->lk_fsgid;
- rr->rr_fid1 = &rec->lk_fid1;
- rr->rr_fid2 = &rec->lk_fid2;
- attr->la_ctime = rec->lk_time;
- attr->la_mtime = rec->lk_time;
- attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
- rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
- if (rr->rr_name == NULL)
- result = -EFAULT;
- } else
- result = -EFAULT;
- RETURN(result);
+ if (rec == NULL)
+ RETURN(-EFAULT);
+
+ uc->mu_fsuid = rec->lk_fsuid;
+ uc->mu_fsgid = rec->lk_fsgid;
+ uc->mu_cap = rec->lk_cap;
+ uc->mu_suppgids[0] = rec->lk_suppgid1;
+ uc->mu_suppgids[1] = rec->lk_suppgid2;
+
+ attr->la_uid = rec->lk_fsuid;
+ attr->la_gid = rec->lk_fsgid;
+ rr->rr_fid1 = &rec->lk_fid1;
+ rr->rr_fid2 = &rec->lk_fid2;
+ attr->la_ctime = rec->lk_time;
+ attr->la_mtime = rec->lk_time;
+ attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+ if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+ rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+ rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+ if (rr->rr_name == NULL)
+ RETURN(-EFAULT);
+
+ RETURN(0);
}
static int mdt_unlink_unpack(struct mdt_thread_info *info)
struct lu_attr *attr = &info->mti_attr.ma_attr;
struct mdt_reint_record *rr = &info->mti_rr;
struct req_capsule *pill = &info->mti_pill;
- int result = 0;
ENTRY;
rec = req_capsule_client_get(pill, &RMF_REC_UNLINK);
- if (rec != NULL) {
- uc->mu_fsuid = rec->ul_fsuid;
- uc->mu_fsgid = rec->ul_fsgid;
- uc->mu_cap = rec->ul_cap;
- uc->mu_suppgids[0] = rec->ul_suppgid;
- uc->mu_suppgids[1] = -1;
+ if (rec == NULL)
+ RETURN(-EFAULT);
+
+ uc->mu_fsuid = rec->ul_fsuid;
+ uc->mu_fsgid = rec->ul_fsgid;
+ uc->mu_cap = rec->ul_cap;
+ uc->mu_suppgids[0] = rec->ul_suppgid;
+ uc->mu_suppgids[1] = -1;
- attr->la_uid = rec->ul_fsuid;
- attr->la_gid = rec->ul_fsgid;
- rr->rr_fid1 = &rec->ul_fid1;
- rr->rr_fid2 = &rec->ul_fid2;
- attr->la_ctime = rec->ul_time;
- attr->la_mtime = rec->ul_time;
- attr->la_mode = rec->ul_mode;
-
- attr->la_valid = LA_UID | LA_GID | LA_CTIME |
- LA_MTIME | LA_MODE;
- rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
- if (rr->rr_name == NULL)
- result = -EFAULT;
- } else
- result = -EFAULT;
- RETURN(result);
+ attr->la_uid = rec->ul_fsuid;
+ attr->la_gid = rec->ul_fsgid;
+ rr->rr_fid1 = &rec->ul_fid1;
+ rr->rr_fid2 = &rec->ul_fid2;
+ attr->la_ctime = rec->ul_time;
+ attr->la_mtime = rec->ul_time;
+ attr->la_mode = rec->ul_mode;
+ attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
+ rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+ if (rr->rr_name == NULL)
+ RETURN(-EFAULT);
+
+ RETURN(0);
}
static int mdt_rename_unpack(struct mdt_thread_info *info)
struct lu_attr *attr = &info->mti_attr.ma_attr;
struct mdt_reint_record *rr = &info->mti_rr;
struct req_capsule *pill = &info->mti_pill;
- int result = 0;
ENTRY;
rec = req_capsule_client_get(pill, &RMF_REC_RENAME);
- if (rec != NULL) {
- uc->mu_fsuid = rec->rn_fsuid;
- uc->mu_fsgid = rec->rn_fsgid;
- uc->mu_cap = rec->rn_cap;
- uc->mu_suppgids[0] = rec->rn_suppgid1;
- uc->mu_suppgids[1] = rec->rn_suppgid2;
+ if (rec == NULL)
+ RETURN(-EFAULT);
+
+ uc->mu_fsuid = rec->rn_fsuid;
+ uc->mu_fsgid = rec->rn_fsgid;
+ uc->mu_cap = rec->rn_cap;
+ uc->mu_suppgids[0] = rec->rn_suppgid1;
+ uc->mu_suppgids[1] = rec->rn_suppgid2;
- attr->la_uid = rec->rn_fsuid;
- attr->la_gid = rec->rn_fsgid;
- rr->rr_fid1 = &rec->rn_fid1;
- rr->rr_fid2 = &rec->rn_fid2;
- attr->la_ctime = rec->rn_time;
- attr->la_mtime = rec->rn_time;
- /* rename_tgt contains the mode already */
- attr->la_mode = rec->rn_mode;
- attr->la_valid = LA_UID | LA_GID | LA_CTIME |
- LA_MTIME | LA_MODE;
- rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
- rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
- if (rr->rr_name == NULL || rr->rr_tgt == NULL)
- result = -EFAULT;
- } else
- result = -EFAULT;
- RETURN(result);
+ attr->la_uid = rec->rn_fsuid;
+ attr->la_gid = rec->rn_fsgid;
+ rr->rr_fid1 = &rec->rn_fid1;
+ rr->rr_fid2 = &rec->rn_fid2;
+ attr->la_ctime = rec->rn_time;
+ attr->la_mtime = rec->rn_time;
+ /* rename_tgt contains the mode already */
+ attr->la_mode = rec->rn_mode;
+ attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+ if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+ rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+ rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+ rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
+ if (rr->rr_name == NULL || rr->rr_tgt == NULL)
+ RETURN(-EFAULT);
+
+ RETURN(0);
}
static int mdt_open_unpack(struct mdt_thread_info *info)
struct lu_attr *attr = &info->mti_attr.ma_attr;
struct req_capsule *pill = &info->mti_pill;
struct mdt_reint_record *rr = &info->mti_rr;
- int result;
ENTRY;
rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
- if (rec != NULL) {
- uc->mu_fsuid = rec->cr_fsuid;
- uc->mu_fsgid = rec->cr_fsgid;
- uc->mu_cap = rec->cr_cap;
- uc->mu_suppgids[0] = rec->cr_suppgid;
- uc->mu_suppgids[1] = -1;
-
- rr->rr_fid1 = &rec->cr_fid1;
- rr->rr_fid2 = &rec->cr_fid2;
- attr->la_mode = rec->cr_mode;
- attr->la_rdev = rec->cr_rdev;
- attr->la_uid = rec->cr_fsuid;
- attr->la_gid = rec->cr_fsgid;
- attr->la_ctime = rec->cr_time;
- attr->la_mtime = rec->cr_time;
- attr->la_atime = rec->cr_time;
- attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
- LA_CTIME | LA_MTIME | LA_ATIME;
- info->mti_spec.sp_cr_flags = rec->cr_flags;
- rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
- if (rr->rr_name == NULL)
- result = -EFAULT;
- else
- result = 0;
- } else
- result = -EFAULT;
+ if (rec == NULL)
+ RETURN(-EFAULT);
+
+ uc->mu_fsuid = rec->cr_fsuid;
+ uc->mu_fsgid = rec->cr_fsgid;
+ uc->mu_cap = rec->cr_cap;
+ uc->mu_suppgids[0] = rec->cr_suppgid;
+ uc->mu_suppgids[1] = -1;
+
+ rr->rr_fid1 = &rec->cr_fid1;
+ rr->rr_fid2 = &rec->cr_fid2;
+ attr->la_mode = rec->cr_mode;
+ attr->la_rdev = rec->cr_rdev;
+ attr->la_uid = rec->cr_fsuid;
+ attr->la_gid = rec->cr_fsgid;
+ attr->la_ctime = rec->cr_time;
+ attr->la_mtime = rec->cr_time;
+ attr->la_atime = rec->cr_time;
+ attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
+ LA_CTIME | LA_MTIME | LA_ATIME;
+ info->mti_spec.sp_cr_flags = rec->cr_flags;
+
+ if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+ rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+ if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+ rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+ rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+ if (rr->rr_name == NULL)
+ RETURN(-EFAULT);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
struct md_create_spec *sp = &info->mti_spec;
sp->u.sp_ea.no_lov_create = 1;
}
- RETURN(result);
+ RETURN(0);
}
typedef int (*reint_unpacker)(struct mdt_thread_info *info);
{
struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+ struct mdt_device *mdt = info->mti_mdt;
struct md_attr *ma = &info->mti_attr;
struct lu_attr *la = &ma->ma_attr;
struct mdt_file_data *mfd;
}
}
+ spin_lock(&capa_lock);
+ info->mti_capa_key = *red_capa_key(mdt);
+ spin_unlock(&capa_lock);
+
+ if (mdt->mdt_opts.mo_mds_capa) {
+ struct lustre_capa *capa;
+
+ capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+ LASSERT(capa);
+ capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
+ rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa);
+ if (rc)
+ RETURN(rc);
+ repbody->valid |= OBD_MD_FLMDSCAPA;
+ }
+ if (mdt->mdt_opts.mo_oss_capa) {
+ struct lustre_capa *capa;
+
+ capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA2);
+ LASSERT(capa);
+ capa->lc_opc = CAPA_OPC_OSS_DEFAULT;
+ rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa);
+ if (rc)
+ RETURN(rc);
+ repbody->valid |= OBD_MD_FLOSSCAPA;
+ }
+
/* if we are following a symlink, don't open; and
* do not return open handle for special nodes as client required
*/
if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE) &&
req->rq_status != 0) {
/* We did not create successfully, return error to client. */
- mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
GOTO(out, rc = req->rq_status);
}
* We failed after creation, but we do not know in which step
* we failed. So try to check the child object.
*/
- parent = mdt_object_find(ctxt, mdt, rr->rr_fid1);
+ parent = mdt_object_find(ctxt, mdt, rr->rr_fid1, rr->rr_capa1);
LASSERT(!IS_ERR(parent));
- child = mdt_object_find(ctxt, mdt, rr->rr_fid2);
+ child = mdt_object_find(ctxt, mdt, rr->rr_fid2, rr->rr_capa2);
LASSERT(!IS_ERR(child));
rc = lu_object_exists(&child->mot_obj.mo_lu);
}
mdt_object_put(ctxt, parent);
mdt_object_put(ctxt, child);
- mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
GOTO(out, rc);
} else {
regular_open:
int rc;
ENTRY;
- o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2);
+ o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2,
+ rr->rr_capa2);
if (IS_ERR(o))
RETURN(rc = PTR_ERR(o));
rc = lu_object_exists(&o->mot_obj.mo_lu);
-
if (rc > 0) {
const struct lu_context *ctxt = info->mti_ctxt;
int rc;
ENTRY;
- o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid);
+ o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid, BYPASS_CAPA);
if (IS_ERR(o))
RETURN(rc = PTR_ERR(o));
if (rr->rr_name[0] == 0) {
/* this is cross-ref open */
mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
- result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep, create_flags);
+ result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep,
+ create_flags);
GOTO(out, result);
}
else
lh->mlh_mode = LCK_EX;
parent = mdt_object_find_lock(info, rr->rr_fid1, lh,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa1);
if (IS_ERR(parent))
GOTO(out, result = PTR_ERR(parent));
mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
}
- child = mdt_object_find(info->mti_ctxt, mdt, child_fid);
+ child = mdt_object_find(info->mti_ctxt, mdt, child_fid, BYPASS_CAPA);
if (IS_ERR(child))
GOTO(out_parent, result = PTR_ERR(child));
out_parent:
mdt_object_unlock_put(info, parent, lh, result);
out:
- mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
if (result)
lustre_msg_set_transno(req->rq_repmsg, 0);
return result;
mdt_object_put(info->mti_ctxt, o);
}
if (repbody != NULL)
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
if (MDT_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK))
RETURN(-ENOMEM);
struct mdt_device *mdt);
/* TODO: maybe this pair should be defined in dt_object.c */
-static int mdt_record_read(const struct lu_context *ctx,
- struct dt_object *dt, void *buf,
- size_t count, loff_t *pos)
+int mdt_record_read(const struct lu_context *ctx,
+ struct dt_object *dt, void *buf,
+ size_t count, loff_t *pos)
{
int rc;
return rc;
}
-static int mdt_record_write(const struct lu_context *ctx,
- struct dt_object *dt, const void *buf,
- size_t count, loff_t *pos, struct thandle *th)
+int mdt_record_write(const struct lu_context *ctx,
+ struct dt_object *dt, const void *buf,
+ size_t count, loff_t *pos, struct thandle *th)
{
int rc;
MDT_TXN_LAST_RCVD_WRITE_CREDITS = 3
};
-static struct thandle* mdt_trans_start(const struct lu_context *ctx,
- struct mdt_device *mdt, int credits)
+struct thandle* mdt_trans_start(const struct lu_context *ctx,
+ struct mdt_device *mdt, int credits)
{
struct mdt_thread_info *mti;
struct txn_param *p;
return mdt->mdt_bottom->dd_ops->dt_trans_start(ctx, mdt->mdt_bottom, p);
}
-static void mdt_trans_stop(const struct lu_context *ctx,
- struct mdt_device *mdt, struct thandle *th)
+void mdt_trans_stop(const struct lu_context *ctx,
+ struct mdt_device *mdt, struct thandle *th)
{
mdt->mdt_bottom->dd_ops->dt_trans_stop(ctx, th);
}
int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt,
struct obd_device *obd)
{
- struct lu_fid last_fid;
- struct dt_object *last;
+ struct lu_fid fid;
+ struct dt_object *o;
int rc = 0;
ENTRY;
dt_txn_callback_add(mdt->mdt_bottom, &mdt->mdt_txn_cb);
- last = dt_store_open(ctx, mdt->mdt_bottom,
- LAST_RCVD, &last_fid);
- if(!IS_ERR(last)) {
- mdt->mdt_last_rcvd = last;
+ o = dt_store_open(ctx, mdt->mdt_bottom, LAST_RCVD, &fid);
+ if(!IS_ERR(o)) {
+ mdt->mdt_last_rcvd = o;
rc = mdt_server_data_init(ctx, mdt);
if (rc) {
- lu_object_put(ctx, &last->do_lu);
+ lu_object_put(ctx, &o->do_lu);
mdt->mdt_last_rcvd = NULL;
}
} else {
- rc = PTR_ERR(last);
+ rc = PTR_ERR(o);
CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc);
}
+ if (rc)
+ RETURN(rc);
+
+ o = dt_store_open(ctx, mdt->mdt_bottom, CAPA_KEYS, &fid);
+ if(!IS_ERR(o)) {
+ struct md_device *next = mdt->mdt_child;
+ mdt->mdt_ck_obj = o;
+ rc = mdt_capa_keys_init(ctx, mdt);
+ if (rc) {
+ lu_object_put(ctx, &o->do_lu);
+ mdt->mdt_ck_obj = NULL;
+ RETURN(rc);
+ }
+ rc = next->md_ops->mdo_init_capa_keys(next, mdt->mdt_capa_keys);
+ } else {
+ rc = PTR_ERR(o);
+ CERROR("cannot open %s: rc = %d\n", CAPA_KEYS, rc);
+ }
+
+ if (rc)
+ RETURN(rc);
+
OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
obd->obd_lvfs_ctxt.pwdmnt = current->fs->pwdmnt;
obd->obd_lvfs_ctxt.pwd = current->fs->pwd;
obd->obd_lvfs_ctxt.fs = get_ds();
- RETURN (rc);
+ RETURN(0);
}
if (mdt->mdt_last_rcvd)
lu_object_put(ctx, &mdt->mdt_last_rcvd->do_lu);
mdt->mdt_last_rcvd = NULL;
+ if (mdt->mdt_ck_obj)
+ lu_object_put(ctx, &mdt->mdt_ck_obj->do_lu);
+ mdt->mdt_ck_obj = NULL;
}
/* reconstruction code */
return;
/* if no error, so child was created with requested fid */
- child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2);
+ child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2,
+ mti->mti_rr.rr_capa2);
LASSERT(!IS_ERR(child));
body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
return;
body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
- obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1);
+ obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1,
+ mti->mti_rr.rr_capa1);
LASSERT(!IS_ERR(obj));
mo_attr_get(mti->mti_ctxt, mdt_object_child(obj),
&mti->mti_attr, NULL);
struct mdt_lock_handle *lhc)
{
mdt_reconstruct_generic(mti, lhc);
- mdt_shrink_reply(mti, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(mti, REPLY_REC_OFF + 1, 0, 0);
}
typedef void (*mdt_reconstructor)(struct mdt_thread_info *mti,
lh->mlh_mode = LCK_EX;
parent = mdt_object_find_lock(info, rr->rr_fid1,
- lh, MDS_INODELOCK_UPDATE);
+ lh, MDS_INODELOCK_UPDATE,
+ rr->rr_capa1);
if (IS_ERR(parent))
RETURN(PTR_ERR(parent));
- child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2);
+ child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2, BYPASS_CAPA);
if (!IS_ERR(child)) {
struct md_object *next = mdt_object_child(parent);
repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
- o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2);
+ o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2,
+ BYPASS_CAPA);
if (!IS_ERR(o)) {
struct md_object *next = mdt_object_child(o);
static int mdt_reint_setattr(struct mdt_thread_info *info,
struct mdt_lock_handle *lhc)
{
+ struct mdt_device *mdt = info->mti_mdt;
struct md_attr *ma = &info->mti_attr;
struct mdt_reint_record *rr = &info->mti_rr;
struct ptlrpc_request *req = mdt_info_req(info);
(unsigned int)ma->ma_attr.la_valid);
repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
- mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1);
+ mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1,
+ rr->rr_capa1);
if (IS_ERR(mo))
RETURN(rc = PTR_ERR(mo));
GOTO(out, rc);
mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo));
+
+ if (mdt->mdt_opts.mo_oss_capa) {
+ struct lustre_capa *capa;
+
+ capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+ LASSERT(capa);
+ capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC;
+ rc = mo_capa_get(info->mti_ctxt, mdt_object_child(mo), capa);
+ if (rc)
+ RETURN(rc);
+ repbody->valid |= OBD_MD_FLOSSCAPA;
+ }
+
mdt_body_reverse_idmap(info, repbody);
EXIT;
out:
parent_lh = &info->mti_lh[MDT_LH_PARENT];
parent_lh->mlh_mode = LCK_EX;
mp = mdt_object_find_lock(info, rr->rr_fid1, parent_lh,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa1);
if (IS_ERR(mp))
GOTO(out, rc = PTR_ERR(mp));
GOTO(out_unlock_parent, rc);
/* we will lock the child regardless it is local or remote. No harm. */
- mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid);
+ mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid,
+ BYPASS_CAPA);
if (IS_ERR(mc))
GOTO(out_unlock_parent, rc = PTR_ERR(mc));
child_lh = &info->mti_lh[MDT_LH_CHILD];
out_unlock_parent:
mdt_object_unlock_put(info, mp, parent_lh, rc);
out:
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
return rc;
}
lhs = &info->mti_lh[MDT_LH_PARENT];
lhs->mlh_mode = LCK_EX;
ms = mdt_object_find_lock(info, rr->rr_fid1, lhs,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa1);
if (IS_ERR(ms))
RETURN(PTR_ERR(ms));
lhp = &info->mti_lh[MDT_LH_CHILD];
lhp->mlh_mode = LCK_EX;
mp = mdt_object_find_lock(info, rr->rr_fid2, lhp,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa2);
if (IS_ERR(mp))
GOTO(out_unlock_source, rc = PTR_ERR(mp));
lh_tgtdir = &info->mti_lh[MDT_LH_PARENT];
lh_tgtdir->mlh_mode = LCK_EX;
mtgtdir = mdt_object_find_lock(info, rr->rr_fid1, lh_tgtdir,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa1);
if (IS_ERR(mtgtdir))
GOTO(out, rc = PTR_ERR(mtgtdir));
lh_tgt->mlh_mode = LCK_EX;
mtgt = mdt_object_find_lock(info, tgt_fid, lh_tgt,
- MDS_INODELOCK_LOOKUP);
+ MDS_INODELOCK_LOOKUP, BYPASS_CAPA);
if (IS_ERR(mtgt))
GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt));
out_unlock_tgtdir:
mdt_object_unlock_put(info, mtgtdir, lh_tgtdir, rc);
out:
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
return rc;
}
ENTRY;
do {
- dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid);
+ dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid,
+ BYPASS_CAPA);
if (!IS_ERR(dst)) {
rc = mdo_is_subdir(info->mti_ctxt,
mdt_object_child(dst),
lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
lh_srcdirp->mlh_mode = LCK_EX;
msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp,
- MDS_INODELOCK_UPDATE);
+ MDS_INODELOCK_UPDATE, rr->rr_capa1);
if (IS_ERR(msrcdir))
GOTO(out, rc = PTR_ERR(msrcdir));
mdt_object_get(info->mti_ctxt, msrcdir);
mtgtdir = msrcdir;
} else {
- mtgtdir = mdt_object_find(info->mti_ctxt,
- info->mti_mdt, rr->rr_fid2);
+ mtgtdir = mdt_object_find(info->mti_ctxt, info->mti_mdt,
+ rr->rr_fid2, rr->rr_capa2);
if (IS_ERR(mtgtdir))
GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir));
lh_oldp = &info->mti_lh[MDT_LH_OLD];
lh_oldp->mlh_mode = LCK_EX;
mold = mdt_object_find_lock(info, old_fid, lh_oldp,
- MDS_INODELOCK_LOOKUP);
+ MDS_INODELOCK_LOOKUP, BYPASS_CAPA);
if (IS_ERR(mold))
GOTO(out_unlock_target, rc = PTR_ERR(mold));
GOTO(out_unlock_old, rc = -EINVAL);
lh_newp->mlh_mode = LCK_EX;
- mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid);
+ mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid,
+ BYPASS_CAPA);
if (IS_ERR(mnew))
GOTO(out_unlock_old, rc = PTR_ERR(mnew));
mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc);
out:
mdt_rename_unlock(&rename_lh);
- mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+ mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
return rc;
}
obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o
obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o
-obdclass-all-objs += lu_object.o dt_object.o hash.o
+obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o
obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c
liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c
-liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c
+liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c
liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c
liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\"
liblustreclass_a_CFLAGS = $(LLCFLAGS)
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/obdclass/capa.c
+ * Lustre Capability Hash Management
+ *
+ * Copyright (c) 2005 Cluster File Systems, Inc.
+ * Author: Lai Siyao<lsy@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifdef __KERNEL__
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lustre/lustre_idl.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <libcfs/list.h>
+#include <lustre_capa.h>
+
+cfs_mem_cache_t *capa_cachep = NULL;
+
+#ifdef __KERNEL__
+struct list_head capa_list[CAPA_SITE_MAX];
+spinlock_t capa_lock = SPIN_LOCK_UNLOCKED; /* lock for capa_hash/capa_list */
+
+static struct hlist_head *capa_hash;
+#endif
+/* capa count */
+int capa_count[CAPA_SITE_MAX] = { 0, };
+
+static struct capa_hmac_alg capa_hmac_algs[] = {
+ DEF_CAPA_HMAC_ALG("sha1", SHA1, 20, 20),
+};
+
+static const char *capa_site_name[] = {
+ [CAPA_SITE_CLIENT] = "client",
+ [CAPA_SITE_SERVER] = "server",
+ [CAPA_SITE_MAX] = "error"
+};
+
+EXPORT_SYMBOL(capa_cachep);
+EXPORT_SYMBOL(capa_list);
+EXPORT_SYMBOL(capa_lock);
+EXPORT_SYMBOL(capa_count);
+
+int init_capa_hash(void)
+{
+#ifdef __KERNEL__
+ int nr_hash, i;
+
+ OBD_ALLOC(capa_hash, PAGE_SIZE);
+ if (!capa_hash)
+ return -ENOMEM;
+
+ nr_hash = PAGE_SIZE / sizeof(struct hlist_head);
+ LASSERT(nr_hash > NR_CAPAHASH);
+
+ for (i = 0; i < NR_CAPAHASH; i++)
+ INIT_HLIST_HEAD(capa_hash + i);
+ for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
+ INIT_LIST_HEAD(&capa_list[i]);
+#endif
+ return 0;
+}
+
+#ifdef __KERNEL__
+void cleanup_capa_hash(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CAPAHASH; i++)
+ LASSERTF(hlist_empty(capa_hash + i),
+ "capa hash %d not empty\n", i);
+ for (i = CAPA_SITE_MAX; i < CAPA_SITE_MAX; i++)
+ LASSERTF(list_empty(&capa_list[i]),
+ "capa list %d not empty\n", i);
+ OBD_FREE(capa_hash, PAGE_SIZE);
+}
+
+static inline int const capa_hashfn(struct lu_fid *fid)
+{
+ return (fid_oid(fid) ^ fid_ver(fid)) *
+ (unsigned long)(fid_seq(fid) + 1) % NR_CAPAHASH;
+}
+
+static inline int capa_on_server(struct obd_capa *ocapa)
+{
+ return ocapa->c_site == CAPA_SITE_SERVER;
+}
+
+static struct obd_capa *find_capa(struct lustre_capa *capa,
+ struct hlist_head *head)
+{
+ struct hlist_node *pos;
+ struct obd_capa *ocapa;
+ int len = offsetof(struct lustre_capa, lc_hmac);
+
+ /* MDS get capa case */
+ if (capa->lc_expiry == 0)
+ len = offsetof(struct lustre_capa, lc_keyid);
+
+ hlist_for_each_entry(ocapa, pos, head, u.tgt.c_hash) {
+ if (memcmp(&ocapa->c_capa, capa, len))
+ continue;
+ /* don't return an expired one in this case */
+ if (capa->lc_expiry == 0 && capa_is_to_expire(ocapa))
+ continue;
+
+ LASSERT(capa_on_server(ocapa));
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found");
+ return ocapa;
+ }
+
+ return NULL;
+}
+
+static inline void capa_delete(struct obd_capa *ocapa)
+{
+ LASSERT(capa_on_server(ocapa));
+ hlist_del(&ocapa->u.tgt.c_hash);
+ list_del(&ocapa->c_list);
+ free_capa(ocapa);
+}
+
+static inline void free_capa_lru(struct list_head *head)
+{
+ struct list_head *node = head->next;
+ struct obd_capa *ocapa;
+ int count = 0;
+
+ /* free 12 unused capa from head */
+ while (node != head && count < 12) {
+ ocapa = list_entry(node, struct obd_capa, c_list);
+ node = node->next;
+
+ LASSERT(capa_on_server(ocapa));
+ if (atomic_read(&ocapa->c_refc))
+ continue;
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free unused");
+ capa_delete(ocapa);
+ count++;
+ }
+}
+
+/* add or update */
+struct obd_capa *capa_add(struct lustre_capa *capa)
+{
+ struct hlist_head *head = capa_hash + capa_hashfn(&capa->lc_fid);
+ struct obd_capa *ocapa, *old = NULL;
+
+ ocapa = alloc_capa(CAPA_SITE_SERVER);
+ if (!ocapa)
+ return NULL;
+
+ spin_lock(&capa_lock);
+
+ old = find_capa(capa, head);
+ if (!old) {
+ ocapa->c_capa = *capa;
+ set_capa_expiry(ocapa);
+ hlist_add_head(&ocapa->u.tgt.c_hash, head);
+ list_add_tail(&ocapa->c_list, &capa_list[CAPA_SITE_SERVER]);
+
+ if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE)
+ free_capa_lru(&capa_list[CAPA_SITE_SERVER]);
+
+ DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new");
+
+ spin_unlock(&capa_lock);
+ return ocapa;
+ }
+
+ spin_lock(&old->c_lock);
+ old->c_capa = *capa;
+ set_capa_expiry(old);
+ spin_unlock(&old->c_lock);
+
+ list_move_tail(&old->c_list, &capa_list[CAPA_SITE_SERVER]);
+
+ spin_unlock(&capa_lock);
+
+ DEBUG_CAPA(D_SEC, &old->c_capa, "update");
+
+ free_capa(ocapa);
+ return old;
+}
+
+struct obd_capa *capa_lookup(struct lustre_capa *capa)
+{
+ struct hlist_head *head;
+ struct obd_capa *ocapa;
+
+ head = capa_hash + capa_hashfn(&capa->lc_fid);
+
+ spin_lock(&capa_lock);
+ ocapa = find_capa(capa, head);
+ if (ocapa)
+ capa_get(ocapa);
+ spin_unlock(&capa_lock);
+
+ return ocapa;
+}
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key)
+{
+ struct crypto_tfm *tfm;
+ struct capa_hmac_alg *alg;
+ int keylen;
+ struct scatterlist sl = {
+ .page = virt_to_page(capa),
+ .offset = (unsigned long)(capa) % PAGE_SIZE,
+ .length = offsetof(struct lustre_capa, lc_hmac),
+ };
+
+ if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1)
+ RETURN(-EFAULT);
+
+ alg = &capa_hmac_algs[capa_alg(capa)];
+
+ tfm = crypto_alloc_tfm(alg->ha_name, 0);
+ if (!tfm)
+ return -ENOMEM;
+ keylen = alg->ha_keylen;
+
+ crypto_hmac(tfm, key, &keylen, &sl, 1, hmac);
+ crypto_free_tfm(tfm);
+
+ return 0;
+}
+
+void cleanup_capas(int site)
+{
+ struct obd_capa *ocapa, *tmp;
+
+ spin_lock(&capa_lock);
+ list_for_each_entry_safe(ocapa, tmp, &capa_list[site], c_list)
+ if (site == ocapa->c_site)
+ capa_delete(ocapa);
+ spin_unlock(&capa_lock);
+ LASSERTF(capa_count[site] == 0, "%s capability count is %d\n",
+ capa_site_name[site], capa_count[site]);
+}
+#endif
+
+void capa_cpy(void *capa, struct obd_capa *ocapa)
+{
+ spin_lock(&ocapa->c_lock);
+ *(struct lustre_capa *)capa = ocapa->c_capa;
+ spin_unlock(&ocapa->c_lock);
+}
+
+void dump_capa_hmac(char *buf, char *key)
+{
+ int i, n = 0;
+
+ for (i = 0; i < CAPA_HMAC_MAX_LEN; i++)
+ n += sprintf(buf + n, "%02x", (unsigned char) key[i]);
+}
+
+EXPORT_SYMBOL(capa_add);
+EXPORT_SYMBOL(capa_lookup);
+
+EXPORT_SYMBOL(capa_hmac);
+EXPORT_SYMBOL(capa_cpy);
+
+EXPORT_SYMBOL(cleanup_capas);
+EXPORT_SYMBOL(dump_capa_hmac);
#define obd_init_checks() do {} while(0)
#endif
+extern int init_capa_hash(void);
+extern void cleanup_capa_hash(void);
+
extern spinlock_t obd_types_lock;
extern spinlock_t handle_lock;
extern int class_procfs_init(void);
CDEBUG(D_INFO, " Build Version: "BUILD_VERSION"\n");
#endif
+ err = init_capa_hash();
+ if (err)
+ return err;
+
spin_lock_init(&obd_types_lock);
spin_lock_init(&handle_lock);
cfs_waitq_init(&obd_race_waitq);
}
lu_global_fini();
+ cleanup_capa_hash();
obd_cleanup_caches();
obd_sysctl_clean();
struct lu_object *obj;
struct dt_object *dt;
- obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid);
+ obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid, BYPASS_CAPA);
if (!IS_ERR(obj)) {
obj = lu_object_locate(obj->lo_header, dev->dd_lu_dev.ld_type);
LASSERT(obj != NULL);
if (result == 0) {
root = dt_locate(ctx, dt, fid);
if (!IS_ERR(root)) {
+ lu_object_bypass_capa(&root->do_lu);
result = dt_lookup(ctx, root, name, fid);
if (result == 0)
child = dt_locate(ctx, dt, fid);
LASSERTF(rc == 0, "Cannot destory ll_import_cache\n");
import_cachep = NULL;
}
+ if (capa_cachep) {
+ rc = cfs_mem_cache_destroy(capa_cachep);
+ LASSERTF(rc == 0, "Cannot destory capa_cache\n");
+ capa_cachep = NULL;
+ }
EXIT;
}
if (!import_cachep)
GOTO(out, -ENOMEM);
+ LASSERT(capa_cachep == NULL);
+ capa_cachep = cfs_mem_cache_create("capa_cache",
+ sizeof(struct obd_capa), 0, 0);
+ if (!capa_cachep)
+ GOTO(out, -ENOMEM);
+
RETURN(0);
out:
obd_cleanup_caches();
if (rc)
GOTO(out, rc);
- rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL);
+ rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL,
+ NULL);
out:
obdo_free(oa);
RETURN(rc);
"remote_client",
"max_byte_per_rpc",
"64bit_qdata",
- "fid_capability",
+ "mds_capability",
"oss_capability",
NULL
};
*/
static struct lu_object *lu_object_alloc(const struct lu_context *ctxt,
struct lu_site *s,
- const struct lu_fid *f)
+ const struct lu_fid *f,
+ const struct lustre_capa *capa)
{
struct lu_object *scan;
struct lu_object *top;
* This is the only place where object fid is assigned. It's constant
* after this point.
*/
- top->lo_header->loh_fid = *f;
+ top->lo_header->loh_fid = *f;
+ if (capa == BYPASS_CAPA)
+ lu_object_bypass_capa(top);
+ else
+ top->lo_header->loh_capa = *capa;
layers = &top->lo_header->loh_layers;
do {
/*
* any case, additional reference is acquired on the returned object.
*/
struct lu_object *lu_object_find(const struct lu_context *ctxt,
- struct lu_site *s, const struct lu_fid *f)
+ struct lu_site *s, const struct lu_fid *f,
+ struct lustre_capa *capa)
{
struct lu_object *o;
struct lu_object *shadow;
struct hlist_head *bucket;
+ int rc;
/*
* This uses standard index maintenance protocol:
o = htable_lookup(s, bucket, f);
spin_unlock(&s->ls_guard);
- if (o != NULL)
+ if (o != NULL) {
+ if (capa == BYPASS_CAPA) {
+ o->lo_header->loh_capa_bypass = 1;
+ } else {
+ rc = lu_object_auth(ctxt, o, capa,
+ CAPA_OPC_INDEX_LOOKUP);
+ if (rc)
+ return ERR_PTR(rc);
+ o->lo_header->loh_capa = *capa;
+ }
return o;
+ }
+
/*
* Allocate new object. This may result in rather complicated
* operations, including fld queries, inode loading, etc.
*/
- o = lu_object_alloc(ctxt, s, f);
+ o = lu_object_alloc(ctxt, s, f, capa);
if (IS_ERR(o))
return o;
}
EXPORT_SYMBOL(lu_object_find);
+int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o,
+ struct lustre_capa *capa, __u64 opc)
+{
+ struct lu_object_header *top = o->lo_header;
+ int rc;
+
+ list_for_each_entry(o, &top->loh_layers, lo_linkage) {
+ if (o->lo_ops->loo_object_auth) {
+ rc = o->lo_ops->loo_object_auth(ctxt, o, capa, opc);
+ if (rc)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(lu_object_auth);
+
enum {
LU_SITE_HTABLE_BITS = 8,
LU_SITE_HTABLE_SIZE = (1 << LU_SITE_HTABLE_BITS),
goto invalid;
clear++;
}
-
/* Linux 2.4 doesn't pass the device, so we stuck it at the
end of the options. */
else if (strncmp(s1, "device=", 7) == 0) {
int echo_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *unused)
{
struct obd_device *obd = class_exp2obd(exp);
int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
int objcount, struct obd_ioobj *obj, int niocount,
struct niobuf_remote *nb, struct niobuf_local *res,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti, struct lustre_capa *unused)
{
struct obd_device *obd;
struct niobuf_local *r = res;
oa->o_id, on_target ? " (undoing create)" : "");
if (on_target)
- obd_destroy(ec->ec_exp, oa, lsm, oti, NULL);
+ obd_destroy(ec->ec_exp, oa, lsm, oti, NULL, NULL);
rc = -EEXIST;
goto failed;
ioo.ioo_bufcnt = npages;
oti->oti_transno = 0;
- ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti);
+ ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti,
+ NULL);
if (ret != 0)
GOTO(out, ret);
oa->o_gr = FILTER_GROUP_ECHO;
oa->o_valid |= OBD_MD_FLGROUP;
rc = obd_destroy(ec->ec_exp, oa, eco->eco_lsm,
- &dummy_oti, NULL);
+ &dummy_oti, NULL, NULL);
if (rc == 0)
eco->eco_deleted = 1;
echo_put_object(eco);
MODULES := obdfilter
obdfilter-objs := filter.o filter_io.o filter_log.o
-obdfilter-objs += lproc_obdfilter.o filter_lvb.o
+obdfilter-objs += lproc_obdfilter.o filter_lvb.o filter_capa.o
ifeq ($(PATCHLEVEL),4)
obdfilter-objs += filter_io_24.o
filter_cleanup_groups(obd);
filter_free_server_data(filter);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ filter_free_capa_keys(filter);
}
static void filter_set_last_id(struct filter_obd *filter,
obd->obd_replayable ? "enabled" : "disabled");
}
+ filter->fo_fl_oss_capa = 0;
+ INIT_LIST_HEAD(&filter->fo_capa_keys);
+
RETURN(0);
err_post:
int rc = 0;
ENTRY;
+ rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+ oinfo_capa(oinfo), CAPA_OPC_META_READ);
+ if (rc)
+ RETURN(rc);
+
obd = class_exp2obd(exp);
if (obd == NULL) {
CDEBUG(D_IOCTL, "invalid client export %p\n", exp);
int rc;
ENTRY;
+ rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+ oinfo_capa(oinfo), CAPA_OPC_META_WRITE);
+ if (rc)
+ RETURN(rc);
+
dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa,
__FUNCTION__, 1);
if (IS_ERR(dentry))
exp->exp_obd->obd_name, oa->o_id + 1, last);
for (id = last; id > oa->o_id; id--) {
doa.o_id = id;
- rc = filter_destroy(exp, &doa, NULL, NULL, NULL);
+ rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
if (rc && rc != -ENOENT) /* this is pretty fatal... */
CEMERG("error destroying precreate objid "LPU64": %d\n",
id, rc);
int filter_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *md, struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *capa)
{
unsigned int qcids[MAXQUOTAS] = {0, 0};
struct obd_device *obd;
ENTRY;
LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+
+#if 0 /* some places don't support capability yet */
+ rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa),
+ (struct lustre_capa *)capa,
+ CAPA_OPC_INDEX_LOOKUP);
+ if (rc)
+ RETURN(rc);
+#endif
+
#if 0
if (!(oa->o_valid & OBD_MD_FLGROUP))
oa->o_gr = 0;
", o_size = "LPD64"\n", oinfo->oi_oa->o_id,
oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start);
+ rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+ oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC);
+ if (rc)
+ RETURN(rc);
+
oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
rc = filter_setattr(exp, oinfo, oti);
RETURN(rc);
}
static int filter_sync(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *lsm, obd_off start, obd_off end)
+ struct lov_stripe_md *lsm, obd_off start, obd_off end,
+ void *capa)
{
struct lvfs_run_ctxt saved;
struct filter_obd *filter;
int rc, rc2;
ENTRY;
+ rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa),
+ (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE);
+ if (rc)
+ RETURN(rc);
+
filter = &exp->exp_obd->u.filter;
/* an objid of zero is taken to mean "sync whole filesystem" */
RETURN(-EINVAL);
}
+ if (KEY_IS(KEY_CAPA_KEY)) {
+ rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val);
+ if (rc)
+ CERROR("filter update capability key failed: %d\n", rc);
+ RETURN(rc);
+ }
+
if (keylen < strlen(KEY_MDS_CONN) ||
memcmp(key, KEY_MDS_CONN, keylen) != 0)
RETURN(-EINVAL);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/file.h>
+#include <linux/kmod.h>
+
+#include <lustre_fsfilt.h>
+#include <lustre_capa.h>
+
+#include "filter_internal.h"
+
+static inline __u32 filter_ck_keyid(struct filter_capa_key *key)
+{
+ return key->k_key.lk_keyid;
+}
+
+int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ struct filter_capa_key *k, *rkey = NULL, *bkey = NULL;
+
+ spin_lock(&capa_lock);
+ list_for_each_entry(k, &filter->fo_capa_keys, k_list) {
+ if (k->k_key.lk_mdsid != key->lk_mdsid)
+ continue;
+
+ if (rkey)
+ bkey = k;
+ else
+ rkey = k;
+ }
+ spin_unlock(&capa_lock);
+
+ if (rkey && bkey && filter_ck_keyid(rkey) < filter_ck_keyid(bkey)) {
+ k = rkey;
+ rkey = bkey;
+ bkey = k;
+ }
+
+ if (bkey) {
+ k = bkey;
+ } else {
+ OBD_ALLOC_PTR(k);
+ if (!k)
+ RETURN(-ENOMEM);
+ INIT_LIST_HEAD(&k->k_list);
+ }
+
+ spin_lock(&capa_lock);
+ k->k_key = *key;
+ if (list_empty(&k->k_list))
+ list_add(&k->k_list, &filter->fo_capa_keys);
+ spin_unlock(&capa_lock);
+
+ DEBUG_CAPA_KEY(D_SEC, key, "new");
+ RETURN(0);
+}
+
+int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
+ struct lustre_capa *capa, __u64 opc)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct filter_obd *filter = &obd->u.filter;
+ struct filter_capa_key *k;
+ struct lustre_capa_key key;
+ struct obd_capa *c;
+ __u8 *hmac;
+ int keys_ready = 0, key_found = 0, rc = 0;
+ ENTRY;
+
+ /* capability is disabled */
+ if (!filter->fo_fl_oss_capa)
+ RETURN(0);
+
+ if (capa == NULL) {
+ CERROR("no capa has been passed\n");
+ RETURN(-EACCES);
+ }
+
+#warning "enable fid check in filter_verify_capa when fid ready"
+
+ if (!capa_opc_supported(capa, opc)) {
+ DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc);
+ RETURN(-EACCES);
+ }
+
+ c = capa_lookup(capa);
+ if (c) {
+ spin_lock(&c->c_lock);
+ if (memcmp(&c->c_capa, capa, sizeof(*capa))) {
+ DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+ rc = -EACCES;
+ } else if (capa_is_expired(c)) {
+ DEBUG_CAPA(D_ERROR, capa, "expired");
+ rc = -ESTALE;
+ }
+ spin_unlock(&c->c_lock);
+
+ capa_put(c);
+ RETURN(rc);
+ }
+
+ spin_lock(&capa_lock);
+ list_for_each_entry(k, &filter->fo_capa_keys, k_list)
+ if (k->k_key.lk_mdsid == mdsid) {
+ keys_ready = 1;
+ if (k->k_key.lk_keyid == capa_keyid(capa)) {
+ key = k->k_key;
+ key_found = 1;
+ break;
+ }
+ }
+ spin_unlock(&capa_lock);
+
+ if (!keys_ready) {
+ CDEBUG(D_SEC, "MDS hasn't propagated capability keys yet, "
+ "ignore check!\n");
+ RETURN(0);
+ }
+
+ if (!key_found) {
+ DEBUG_CAPA(D_ERROR, capa, "no matched capability key for");
+ RETURN(-ESTALE);
+ }
+
+ OBD_ALLOC(hmac, CAPA_HMAC_MAX_LEN);
+ if (hmac == NULL)
+ RETURN(-ENOMEM);
+
+ rc = capa_hmac(hmac, capa, key.lk_key);
+ if (rc) {
+ DEBUG_CAPA(D_ERROR, capa, "HMAC failed: rc %d", rc);
+ OBD_FREE(hmac, CAPA_HMAC_MAX_LEN);
+ RETURN(rc);
+ }
+
+ rc = memcmp(hmac, capa->lc_hmac, CAPA_HMAC_MAX_LEN);
+ OBD_FREE(hmac, CAPA_HMAC_MAX_LEN);
+ if (rc) {
+ DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+ RETURN(-EACCES);
+ }
+
+ /* store in capa hash */
+ capa_add(capa);
+ RETURN(0);
+}
+
+void filter_free_capa_keys(struct filter_obd *filter)
+{
+ struct filter_capa_key *key, *n;
+
+ spin_lock(&capa_lock);
+ list_for_each_entry_safe(key, n, &filter->fo_capa_keys, k_list) {
+ list_del_init(&key->k_list);
+ OBD_FREE(key, sizeof(*key));
+ }
+ spin_unlock(&capa_lock);
+}
void *option);
int filter_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *md, struct obd_trans_info *,
- struct obd_export *);
+ struct obd_export *, void *capa);
int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
struct obdo *oa, struct obd_trans_info *oti);
int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
/* filter_io.c */
int filter_preprw(int cmd, struct obd_export *, struct obdo *, int objcount,
struct obd_ioobj *, int niocount, struct niobuf_remote *,
- struct niobuf_local *, struct obd_trans_info *);
+ struct niobuf_local *, struct obd_trans_info *,
+ struct lustre_capa *);
int filter_commitrw(int cmd, struct obd_export *, struct obdo *, int objcount,
struct obd_ioobj *, int niocount, struct niobuf_local *,
struct obd_trans_info *, int rc);
/* Quota stuff */
extern quota_interface_t *quota_interface;
+/* Capability */
+static inline __u64 obdo_mdsno(struct obdo *oa)
+{
+ return oa->o_gr - FILTER_GROUP_MDS0;
+}
+
+int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key);
+int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
+ struct lustre_capa *capa, __u64 opc);
+void filter_free_capa_keys(struct filter_obd *filter);
#endif /* _FILTER_INTERNAL_H */
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
struct niobuf_local *res,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti,
+ struct lustre_capa *capa)
{
struct obd_device *obd = exp->exp_obd;
struct lvfs_run_ctxt saved;
LASSERTF(objcount == 1, "%d\n", objcount);
LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt);
+ rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa,
+ CAPA_OPC_OSS_READ);
+ if (rc)
+ RETURN(rc);
+
if (oa && oa->o_valid & OBD_MD_FLGRANT) {
spin_lock(&obd->obd_osfs_lock);
filter_grant_incoming(exp, oa);
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
struct niobuf_local *res,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti,
+ struct lustre_capa *capa)
{
struct lvfs_run_ctxt saved;
struct niobuf_remote *rnb;
LASSERT(objcount == 1);
LASSERT(obj->ioo_bufcnt > 0);
+ rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa,
+ CAPA_OPC_OSS_WRITE);
+ if (rc)
+ RETURN(rc);
+
push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti);
if (IS_ERR(iobuf))
int filter_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj, int niocount,
struct niobuf_remote *nb, struct niobuf_local *res,
- struct obd_trans_info *oti)
+ struct obd_trans_info *oti, struct lustre_capa *capa)
{
if (cmd == OBD_BRW_WRITE)
return filter_preprw_write(cmd, exp, oa, objcount, obj,
- niocount, nb, res, oti);
+ niocount, nb, res, oti, capa);
if (cmd == OBD_BRW_READ)
return filter_preprw_read(cmd, exp, oa, objcount, obj,
- niocount, nb, res, oti);
+ niocount, nb, res, oti, capa);
LBUG();
return -EPROTO;
}
ioo.ioo_bufcnt = oa_bufs;
ret = filter_preprw(cmd, exp, oinfo->oi_oa, 1, &ioo,
- oa_bufs, rnb, lnb, oti);
+ oa_bufs, rnb, lnb, oti, oinfo_capa(oinfo));
if (ret != 0)
GOTO(out, ret);
memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie));
oid = oa->o_id;
- rc = filter_destroy(exp, oa, NULL, NULL, NULL);
+ rc = filter_destroy(exp, oa, NULL, NULL, NULL, NULL);
obdo_free(oa);
if (rc == -ENOENT) {
CDEBUG(D_HA, "object already removed, send cookie\n");
RETURN(lsm_size);
}
+static inline void osc_pack_capa(struct ptlrpc_request *req, int offset,
+ struct ost_body *body, void *capa)
+{
+ struct obd_capa *oc = (struct obd_capa *)capa;
+ struct lustre_capa *c;
+
+ if (!capa)
+ return;
+
+ c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c));
+ capa_cpy(c, oc);
+ body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+ DEBUG_CAPA(D_SEC, c, "pack");
+}
+
+static inline void osc_pack_req_body(struct ptlrpc_request *req, int offset,
+ struct obd_info *oinfo)
+{
+ struct ost_body *body;
+
+ body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+ body->oa = *oinfo->oi_oa;
+ osc_pack_capa(req, offset + 1, body, oinfo->oi_capa);
+}
+
static int osc_getattr_interpret(struct ptlrpc_request *req,
struct osc_async_args *aa, int rc)
{
{
struct ptlrpc_request *req;
struct ost_body *body;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
struct osc_async_args *aa;
ENTRY;
+ size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_GETATTR, 2, size,NULL);
+ OST_GETATTR, 3, size,NULL);
if (!req)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+ osc_pack_req_body(req, REQ_REC_OFF, oinfo);
ptlrpc_req_set_repsize(req, 2, size);
req->rq_interpret_reply = osc_getattr_interpret;
{
struct ptlrpc_request *req;
struct ost_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
+ size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_GETATTR, 2, size, NULL);
+ OST_GETATTR, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+ osc_pack_req_body(req, REQ_REC_OFF, oinfo);
ptlrpc_req_set_repsize(req, 2, size);
{
struct ptlrpc_request *req;
struct ost_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
LASSERT(!(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP) ||
oinfo->oi_oa->o_gr > 0);
+ size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SETATTR, 2, size, NULL);
+ OST_SETATTR, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+ osc_pack_req_body(req, REQ_REC_OFF, oinfo);
ptlrpc_req_set_repsize(req, 2, size);
{
struct ptlrpc_request *req;
struct ost_body *body;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
struct osc_async_args *aa;
ENTRY;
+ size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SETATTR, 2, size, NULL);
+ OST_SETATTR, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-
+ osc_pack_req_body(req, REQ_REC_OFF, oinfo);
if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) {
LASSERT(oti);
+ body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+ sizeof(*body));
memcpy(obdo_logcookie(oinfo->oi_oa), oti->oti_logcookies,
sizeof(*oti->oti_logcookies));
}
- memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
ptlrpc_req_set_repsize(req, 2, size);
/* do mds to ost setattr asynchronouly */
if (!rqset) {
struct ptlrpc_request *req;
struct ost_body *body;
struct lov_stripe_md *lsm;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
LASSERT(oa);
RETURN(rc);
}
+ /* FIXME: how to find one OSS WRITE capability? */
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_CREATE, 2, size, NULL);
+ OST_CREATE, 3, size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oa, sizeof(body->oa));
+ body->oa = *oa;
ptlrpc_req_set_repsize(req, 2, size);
if (oa->o_valid & OBD_MD_FLINLINE) {
struct ptlrpc_request *req;
struct osc_async_args *aa;
struct ost_body *body;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ struct lustre_capa *capa = oinfo->oi_capa;
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
if (!oinfo->oi_oa) {
RETURN(-EINVAL);
}
+ size[REQ_REC_OFF + 1] = capa ? sizeof(*capa) : 0;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_PUNCH, 2, size, NULL);
+ OST_PUNCH, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
OBD_CONNECT_REQPORTAL)
req->rq_request_portal = OST_IO_PORTAL;
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
-
+ osc_pack_req_body(req, REQ_REC_OFF, oinfo);
/* overload the size and blocks fields in the oa with start/end */
+ body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
body->oa.o_size = oinfo->oi_policy.l_extent.start;
body->oa.o_blocks = oinfo->oi_policy.l_extent.end;
body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ if (capa) {
+ struct lustre_capa *c;
+
+ c = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, sizeof(*c));
+ /* setattr_raw is protected by i_sem, no need to lock here */
+ *c = *capa;
+ body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+ }
+
ptlrpc_req_set_repsize(req, 2, size);
req->rq_interpret_reply = osc_punch_interpret;
}
static int osc_sync(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *md, obd_size start, obd_size end)
+ struct lov_stripe_md *md, obd_size start, obd_size end,
+ void *capa)
{
struct ptlrpc_request *req;
struct ost_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
if (!oa) {
RETURN(-EINVAL);
}
+ if (capa)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_SYNC, 2, size, NULL);
+ OST_SYNC, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- memcpy(&body->oa, oa, sizeof(*oa));
-
/* overload the size and blocks fields in the oa with start/end */
+ body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
+ body->oa = *oa;
body->oa.o_size = start;
body->oa.o_blocks = end;
body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ osc_pack_capa(req, REQ_REC_OFF + 1, body, capa);
+
ptlrpc_req_set_repsize(req, 2, size);
rc = ptlrpc_queue_wait(req);
* cookies to the MDS after committing destroy transactions. */
static int osc_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
- struct obd_export *md_export)
+ struct obd_export *md_export, void *capa)
{
struct ptlrpc_request *req;
struct ost_body *body;
- int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
if (!oa) {
RETURN(-EINVAL);
}
+ if (capa)
+ size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
- OST_DESTROY, 2, size, NULL);
+ OST_DESTROY, 3, size, NULL);
if (!req)
RETURN(-ENOMEM);
req->rq_request_portal = OST_IO_PORTAL;
body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-
- if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) {
+ body->oa = *oa;
+ if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
memcpy(obdo_logcookie(oa), oti->oti_logcookies,
sizeof(*oti->oti_logcookies));
- }
- memcpy(&body->oa, oa, sizeof(*oa));
+ osc_pack_capa(req, REQ_REC_OFF + 1, body, capa);
+
ptlrpc_req_set_repsize(req, 2, size);
ptlrpcd_add_req(req);
static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page **pga, int *requested_nobp,
- int *niocountp, struct ptlrpc_request **reqp)
+ int *niocountp, struct ptlrpc_request **reqp,
+ struct obd_capa *ocapa)
{
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
struct ost_body *body;
struct obd_ioobj *ioobj;
struct niobuf_remote *niobuf;
- int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int size[5] = { sizeof(struct ptlrpc_body), sizeof(*body) };
int niocount, i, requested_nob, opc, rc;
struct ptlrpc_request_pool *pool;
+ struct lustre_capa *capa;
ENTRY;
opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
size[REQ_REC_OFF + 1] = sizeof(*ioobj);
size[REQ_REC_OFF + 2] = niocount * sizeof(*niobuf);
+ if (ocapa)
+ size[REQ_REC_OFF + 3] = sizeof(*capa);
OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM);
- req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL,
+ req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 5, size, NULL,
pool, NULL);
if (req == NULL)
RETURN (-ENOMEM);
niobuf = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
niocount * sizeof(*niobuf));
- memcpy(&body->oa, oa, sizeof(*oa));
+ body->oa = *oa;
obdo_to_ioobj(oa, ioobj);
ioobj->ioo_bufcnt = niocount;
+ if (ocapa) {
+ capa = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 3,
+ sizeof(*capa));
+ capa_cpy(capa, ocapa);
+ body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+ }
LASSERT (page_count > 0);
for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa,
struct lov_stripe_md *lsm,
- obd_count page_count, struct brw_page **pga)
+ obd_count page_count, struct brw_page **pga,
+ struct obd_capa *ocapa)
{
int requested_nob;
int niocount;
restart_bulk:
rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
page_count, pga, &requested_nob, &niocount,
- &req);
+ &req, ocapa);
if (rc != 0)
return (rc);
static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page **pga, struct ptlrpc_request_set *set)
+ struct brw_page **pga, struct ptlrpc_request_set *set,
+ struct obd_capa *ocapa)
{
struct ptlrpc_request *req;
int requested_nob;
rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
page_count, pga, &requested_nob, &nio_count,
- &req);
+ &req, ocapa);
if (rc == 0) {
LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
}
rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md,
- pages_per_brw, ppga);
+ pages_per_brw, ppga, oinfo->oi_capa);
if (rc != 0)
break;
copy = ppga;
rc = async_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md,
- pages_per_brw, copy, set);
+ pages_per_brw, copy, set, oinfo->oi_capa);
if (rc != 0)
break;
struct obd_async_page_ops *ops = NULL;
void *caller_data = NULL;
struct list_head *pos;
+ struct obd_capa *ocapa;
int i, rc;
ENTRY;
/* always get the data for the obdo for the rpc */
LASSERT(ops != NULL);
ops->ap_fill_obdo(caller_data, cmd, oa);
+ ocapa = ops->ap_lookup_capa(caller_data, cmd);
sort_brw_pages(pga, page_count);
rc = osc_brw_prep_request(cmd, cli->cl_import, oa, NULL, page_count,
- pga, &requested_nob, &nio_count, &req);
+ pga, &requested_nob, &nio_count, &req, ocapa);
+ capa_put(ocapa);
if (rc != 0) {
CERROR("prep_req failed: %d\n", rc);
GOTO(out, req = ERR_PTR(rc));
no_match:
if (intent) {
- int size[3] = {
+ int size[2] = {
[MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
[DLM_LOCKREQ_OFF] = sizeof(struct ldlm_request) };
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
- LDLM_ENQUEUE, 2, size, NULL);
+ LDLM_ENQUEUE, 3, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
EXIT;
}
-
static struct dt_device_operations osd_dt_ops = {
.dt_root_get = osd_root_get,
.dt_statfs = osd_statfs,
up_write(&obj->oo_sem);
}
-static int osd_attr_get(const struct lu_context *ctxt, struct dt_object *dt,
+static inline int osd_object_auth(const struct lu_context *ctx,
+ const struct lu_object *o,
+ __u64 opc)
+{
+ return o->lo_ops->loo_object_auth(ctx, o, lu_object_capa(o), opc);
+}
+
+static int osd_attr_get(const struct lu_context *ctxt,
+ struct dt_object *dt,
struct lu_attr *attr)
{
struct osd_object *obj = osd_dt_obj(dt);
+
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(obj));
LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+ return -EACCES;
+
return osd_inode_getattr(ctxt, obj->oo_inode, attr);
}
LASSERT(osd_invariant(obj));
LASSERT(osd_write_locked(ctxt, obj));
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+ return -EACCES;
+
return osd_inode_setattr(ctxt, obj->oo_inode, attr);
}
/*
* XXX missing: permission checks.
*/
+ if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT))
+ RETURN(-EACCES);
/*
* XXX missing: sanity checks (valid ->la_mode, etc.)
LASSERT(osd_write_locked(ctxt, obj));
LASSERT(th != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) {
+ LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu,
+ "no capability to link!\n");
+ return;
+ }
+
if (inode->i_nlink < LDISKFS_LINK_MAX) {
inode->i_nlink ++;
mark_inode_dirty(inode);
LASSERT(osd_write_locked(ctxt, obj));
LASSERT(th != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) {
+ LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu,
+ "no capability to unlink!\n");
+ return;
+ }
+
if (inode->i_nlink > 0) {
inode->i_nlink --;
mark_inode_dirty(inode);
LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL);
LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+ return -EACCES;
+
dentry->d_inode = inode;
return inode->i_op->getxattr(dentry, name, buf, size);
}
LASSERT(osd_write_locked(ctxt, obj));
LASSERT(handle != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+ return -EACCES;
+
dentry->d_inode = inode;
fs_flags = 0;
LASSERT(inode->i_op != NULL && inode->i_op->listxattr != NULL);
LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+ return -EACCES;
+
dentry->d_inode = inode;
return inode->i_op->listxattr(dentry, buf, size);
}
LASSERT(osd_write_locked(ctxt, obj));
LASSERT(handle != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+ return -EACCES;
+
dentry->d_inode = inode;
return inode->i_op->removexattr(dentry, name);
}
LASSERT(rdpg->rp_pages != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_BODY_READ))
+ return -EACCES;
+
if (rdpg->rp_count <= 0)
return -EFAULT;
LASSERT(osd_invariant(obj));
LASSERT(dt_object_exists(dt));
+ if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP))
+ RETURN(-EACCES);
+
if (osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode) {
dt->do_index_ops = &osd_index_compat_ops;
result = 0;
LASSERT(obj->oo_ipd != NULL);
LASSERT(handle != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_DELETE))
+ RETURN(-EACCES);
+
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
LASSERT(obj->oo_container.ic_object == obj->oo_inode);
LASSERT(obj->oo_ipd != NULL);
+ if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP))
+ return -EACCES;
+
rc = iam_lookup(&obj->oo_container, (const struct iam_key *)key,
(struct iam_rec *)rec, obj->oo_ipd);
RETURN(rc);
}
-
static int osd_index_insert(const struct lu_context *ctx, struct dt_object *dt,
const struct dt_rec *rec, const struct dt_key *key,
struct thandle *th)
LASSERT(obj->oo_ipd != NULL);
LASSERT(th != NULL);
+ if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT))
+ return -EACCES;
+
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
rc = iam_insert(oh->ot_handle, &obj->oo_container,
static void osd_it_put(const struct lu_context *ctx, struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
iam_it_put(&it->oi_it);
}
static int osd_it_next(const struct lu_context *ctx, struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
return iam_it_next(&it->oi_it);
}
struct osd_thandle *oh;
LASSERT(th != NULL);
+
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
const struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
return (struct dt_key *)iam_it_key_get(&it->oi_it);
}
static int osd_it_key_size(const struct lu_context *ctx, const struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
return iam_it_key_size(&it->oi_it);
}
const struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
return (struct dt_rec *)iam_it_rec_get(&it->oi_it);
}
static __u32 osd_it_store(const struct lu_context *ctxt, const struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
+
return iam_it_store(&it->oi_it);
}
const struct dt_it *di, __u32 hash)
{
struct osd_it *it = (struct osd_it *)di;
+
return iam_it_load(&it->oi_it, hash);
}
LASSERT(handle != NULL);
LASSERT(S_ISDIR(obj->oo_inode->i_mode));
ENTRY;
+
RETURN(-EOPNOTSUPP);
}
LASSERT(osd_invariant(obj));
LASSERT(th != NULL);
- luch = lu_object_find(ctx, ludev->ld_site, fid);
+ luch = lu_object_find(ctx, ludev->ld_site, fid, BYPASS_CAPA);
if (!IS_ERR(luch)) {
if (lu_object_exists(luch)) {
struct osd_object *child;
return osd_invariant(osd_obj(l));
}
+static int capa_is_sane(const struct lu_context *ctx,
+ struct lustre_capa *capa,
+ struct lustre_capa_key *keys)
+{
+ struct obd_capa *c;
+ struct osd_thread_info *oti = lu_context_key_get(ctx, &osd_key);
+ int i, rc;
+ ENTRY;
+
+ c = capa_lookup(capa);
+ if (c) {
+ spin_lock(&c->c_lock);
+ if (memcmp(&c->c_capa, capa, sizeof(*capa))) {
+ DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+ rc = -EACCES;
+ } else if (capa_is_expired(c)) {
+ DEBUG_CAPA(D_ERROR, capa, "expired");
+ rc = -ESTALE;
+ }
+ spin_unlock(&c->c_lock);
+
+ capa_put(c);
+ RETURN(rc);
+ }
+
+ spin_lock(&capa_lock);
+ for (i = 0; i < 2; i++) {
+ if (keys[i].lk_keyid == capa->lc_keyid) {
+ oti->oti_capa_key = keys[i];
+ break;
+ }
+ }
+ spin_unlock(&capa_lock);
+
+ if (i == 2) {
+ DEBUG_CAPA(D_ERROR, capa, "no matched capa key");
+ RETURN(-ESTALE);
+ }
+
+ rc = capa_hmac(oti->oti_capa_hmac, capa, oti->oti_capa_key.lk_key);
+ if (rc)
+ RETURN(rc);
+ if (memcmp(oti->oti_capa_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) {
+ DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+ RETURN(-EACCES);
+ }
+
+ capa_add(capa);
+
+ RETURN(0);
+}
+
+static int osd_object_capa_auth(const struct lu_context *ctx,
+ const struct lu_object *obj,
+ struct lustre_capa *capa,
+ __u64 opc)
+{
+ const struct lu_fid *fid = lu_object_fid(obj);
+
+ return 0;
+
+ if (lu_object_capa_bypass(obj))
+ return 0;
+
+ if (!capa) {
+ CERROR("no capability is provided for fid "DFID"\n", PFID(fid));
+ return -EACCES;
+ }
+
+ if (!lu_fid_eq(fid, &capa->lc_fid)) {
+ DEBUG_CAPA(D_ERROR, capa, "fid "DFID" mismatch with",
+ PFID(fid));
+ return -EACCES;
+ }
+
+ if (!capa_opc_supported(capa, opc)) {
+ DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc);
+ return -EACCES;
+ }
+
+ if (!capa_is_sane(ctx, capa, obj->lo_dev->ld_site->ls_capa_keys)) {
+ DEBUG_CAPA(D_ERROR, capa, "insane");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
static struct lu_object_operations osd_lu_obj_ops = {
.loo_object_init = osd_object_init,
.loo_object_delete = osd_object_delete,
.loo_object_release = osd_object_release,
.loo_object_free = osd_object_free,
.loo_object_print = osd_object_print,
- .loo_object_invariant = osd_object_invariant
+ .loo_object_invariant = osd_object_invariant,
+ .loo_object_auth = osd_object_capa_auth
};
static struct lu_device_operations osd_lu_ops = {
int oti_r_locks;
int oti_w_locks;
int oti_txns;
+ /*
+ *XXX temporary: for capa operations.
+ */
+ char oti_capa_hmac[CAPA_HMAC_KEY_MAX_LEN];
+ struct lustre_capa_key oti_capa_key;
};
#endif /* __KERNEL__ */
struct obd_trans_info *oti)
{
struct ost_body *body, *repbody;
+ struct lustre_capa *capa;
int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
ENTRY;
repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL);
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+ capa = lustre_unpack_capa(req->rq_repmsg, REQ_REC_OFF + 1);
+ req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL, capa);
RETURN(0);
}
repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
sizeof(*repbody));
- memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+ repbody->oa = body->oa;
oinfo.oi_oa = &repbody->oa;
+ if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+ oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+ REQ_REC_OFF + 1);
req->rq_status = obd_getattr(exp, &oinfo);
RETURN(0);
}
*/
oinfo.oi_oa->o_valid &= ~OBD_MD_FLFLAGS;
+ if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+ oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+ REQ_REC_OFF + 1);
req->rq_status = obd_punch(exp, &oinfo, oti, NULL);
ost_punch_lock_put(exp, oinfo.oi_oa, &lh);
}
static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
{
struct ost_body *body, *repbody;
+ struct lustre_capa *capa = NULL;
int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) };
ENTRY;
if (body == NULL)
RETURN(-EFAULT);
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+ capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1);
+
rc = lustre_pack_reply(req, 2, size, NULL);
if (rc)
RETURN(rc);
sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size,
- repbody->oa.o_blocks);
+ repbody->oa.o_blocks, capa);
RETURN(0);
}
repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
sizeof(*repbody));
- memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+ repbody->oa = body->oa;
oinfo.oi_oa = &repbody->oa;
+ if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+ oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+ REQ_REC_OFF + 1);
req->rq_status = obd_setattr(exp, &oinfo, oti);
RETURN(0);
}
struct niobuf_local *local_nb;
struct obd_ioobj *ioo;
struct ost_body *body, *repbody;
+ struct lustre_capa *capa = NULL;
struct l_wait_info lwi;
struct lustre_handle lockh = { 0 };
int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
lustre_swab_niobuf_remote (&remote_nb[i]);
}
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+ capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3);
+
rc = lustre_pack_reply(req, 2, size, NULL);
if (rc)
GOTO(out, rc);
GOTO(out_bulk, rc);
rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
- ioo, npages, pp_rnb, local_nb, oti);
+ ioo, npages, pp_rnb, local_nb, oti, capa);
if (rc != 0)
GOTO(out_lock, rc);
struct ost_body *body, *repbody;
struct l_wait_info lwi;
struct lustre_handle lockh = {0};
+ struct lustre_capa *capa = NULL;
__u32 *rcs;
int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
int objcount, niocount, npages, comms_error = 0;
lustre_swab_niobuf_remote (&remote_nb[i]);
}
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+ capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3);
+
size[REPLY_REC_OFF + 1] = niocount * sizeof(*rcs);
rc = lustre_pack_reply(req, 3, size, NULL);
if (rc != 0)
do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM);
rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
- ioo, npages, pp_rnb, local_nb, oti);
+ ioo, npages, pp_rnb, local_nb, oti, capa);
if (rc != 0)
GOTO(out_lock, rc);
&RMF_MDT_BODY
};
+static const struct req_msg_field *mdt_renew_capa_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_CAPA1
+};
+
+static const struct req_msg_field *mdt_body_capa[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_BODY,
+ &RMF_CAPA1
+};
+
static const struct req_msg_field *mdt_close_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_EPOCH,
- &RMF_REC_SETATTR
+ &RMF_REC_SETATTR,
+ &RMF_CAPA1
};
static const struct req_msg_field *mds_statfs_server[] = {
static const struct req_msg_field *mds_getattr_name_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
+ &RMF_CAPA1,
&RMF_NAME
};
static const struct req_msg_field *mds_reint_create_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_CREATE,
+ &RMF_CAPA1,
&RMF_NAME,
};
static const struct req_msg_field *mds_reint_create_sym_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_CREATE,
+ &RMF_CAPA1,
&RMF_NAME,
&RMF_SYMTGT
};
static const struct req_msg_field *mds_reint_create_slave_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_CREATE,
+ &RMF_CAPA1,
&RMF_NAME,
&RMF_EADATA
};
static const struct req_msg_field *mds_reint_open_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_CREATE,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
&RMF_NAME,
&RMF_EADATA
};
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
&RMF_MDT_MD,
- &RMF_ACL
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
};
static const struct req_msg_field *mds_reint_unlink_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_UNLINK,
+ &RMF_CAPA1,
&RMF_NAME
};
static const struct req_msg_field *mds_reint_link_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_LINK,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
&RMF_NAME
};
static const struct req_msg_field *mds_reint_rename_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_RENAME,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
&RMF_NAME,
&RMF_SYMTGT
};
static const struct req_msg_field *mds_reint_setattr_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_SETATTR,
+ &RMF_CAPA1,
&RMF_MDT_EPOCH,
&RMF_EADATA,
&RMF_LOGCOOKIES
&RMF_DLM_REP,
&RMF_MDT_BODY,
&RMF_MDT_MD,
- &RMF_ACL
+ &RMF_ACL,
+ &RMF_CAPA1,
+ &RMF_CAPA2
};
static const struct req_msg_field *ldlm_intent_getattr_client[] = {
&RMF_DLM_REQ,
&RMF_LDLM_INTENT,
&RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */
+ &RMF_CAPA1,
&RMF_NAME
};
&RMF_DLM_REQ,
&RMF_LDLM_INTENT,
&RMF_REC_CREATE, /* coincides with mds_reint_create_client[] */
+ &RMF_CAPA1,
&RMF_NAME,
&RMF_EADATA
};
&RMF_DLM_REQ,
&RMF_LDLM_INTENT,
&RMF_REC_CREATE, /* coincides with mds_reint_open_client[] */
+ &RMF_CAPA1,
+ &RMF_CAPA2,
&RMF_NAME,
&RMF_EADATA
};
&RMF_DLM_REQ,
&RMF_LDLM_INTENT,
&RMF_REC_UNLINK, /* coincides with mds_reint_unlink_client[] */
+ &RMF_CAPA1,
&RMF_NAME
};
static const struct req_msg_field *mds_getxattr_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
+ &RMF_CAPA1,
&RMF_NAME,
&RMF_EADATA
};
static const struct req_msg_field *mds_setxattr_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
+ &RMF_CAPA1,
&RMF_NAME,
&RMF_EADATA
};
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
&RMF_MDT_MD,
- &RMF_ACL
+ &RMF_ACL,
+ &RMF_CAPA1
};
static const struct req_format *req_formats[] = {
DEFINE_MSGF("reint_opc", 0, sizeof(__u32), lustre_swab_generic_32s);
EXPORT_SYMBOL(RMF_REINT_OPC);
+const struct req_msg_field RMF_CAPA1 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa);
+EXPORT_SYMBOL(RMF_CAPA1);
+
+const struct req_msg_field RMF_CAPA2 =
+ DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+ lustre_swab_lustre_capa);
+EXPORT_SYMBOL(RMF_CAPA2);
+
/*
* Request formats.
*/
EXPORT_SYMBOL(RQF_FLD_QUERY);
const struct req_format RQF_MDS_GETSTATUS =
- DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_only);
+ DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_capa);
EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
const struct req_format RQF_MDS_STATFS =
EXPORT_SYMBOL(RQF_MDS_STATFS);
const struct req_format RQF_MDS_SYNC =
- DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_only, mdt_body_only);
+ DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_SYNC);
const struct req_format RQF_MDS_GETATTR =
- DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_only, mds_getattr_server);
+ DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
EXPORT_SYMBOL(RQF_MDS_GETATTR);
const struct req_format RQF_MDS_GETXATTR =
const struct req_format RQF_MDS_REINT_SETATTR =
DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
- mds_reint_setattr_client, mdt_body_only);
+ mds_reint_setattr_client, mdt_body_capa);
EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
const struct req_format RQF_MDS_CONNECT =
const struct req_format RQF_MDS_PIN =
DEFINE_REQ_FMT0("MDS_PIN",
- mdt_body_only, mdt_body_only);
+ mdt_body_capa, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_PIN);
const struct req_format RQF_MDS_DONE_WRITING =
const struct req_format RQF_MDS_READPAGE =
DEFINE_REQ_FMT0("MDS_READPAGE",
- mdt_body_only, mdt_body_only);
+ mdt_body_capa, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_READPAGE);
const struct req_format RQF_MDS_WRITEPAGE =
mdt_body_only, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
+const struct req_format RQF_MDS_RENEW_CAPA =
+ DEFINE_REQ_FMT0("MDS_RENEW_CAPA",
+ mdt_renew_capa_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_RENEW_CAPA);
+
#if !defined(__REQ_LAYOUT_USER__)
int req_layout_init(void)
{ MDS_SETXATTR, "mds_setxattr" },
{ MDS_WRITEPAGE, "mds_writepage" },
{ MDS_IS_SUBDIR, "mds_is_subdir" },
+ { MDS_RENEW_CAPA, "mds_renew_capa" },
{ LDLM_ENQUEUE, "ldlm_enqueue" },
{ LDLM_CONVERT, "ldlm_convert" },
{ LDLM_CANCEL, "ldlm_cancel" },
req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0);
}
EXPORT_SYMBOL(debug_req);
+
+void lustre_swab_lustre_capa(struct lustre_capa *c)
+{
+ lustre_swab_lu_fid(&c->lc_fid);
+ __swab64s (&c->lc_opc);
+ __swab32s (&c->lc_flags);
+ __swab32s (&c->lc_keyid);
+ __swab64s (&c->lc_expiry);
+}
+
+void lustre_swab_lustre_capa_key (struct lustre_capa_key *k)
+{
+ __swab64s (&k->lk_mdsid);
+ __swab32s (&k->lk_keyid);
+ __swab32s (&k->lk_padding);
+}
EXPORT_SYMBOL(lustre_swab_mgs_target_info);
EXPORT_SYMBOL(lustre_swab_md_fld);
EXPORT_SYMBOL(lustre_swab_generic_32s);
+EXPORT_SYMBOL(lustre_swab_lustre_capa);
+EXPORT_SYMBOL(lustre_swab_lustre_capa_key);
/* recover.c */
EXPORT_SYMBOL(ptlrpc_disconnect_import);
goto out_umount;
}
+ snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, CAPA_KEYS);
+ ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 1, 1, 4);
+ if (ret) {
+ goto out_umount;
+ }
+
umount(mntpt);
ret = mount(source, mntpt, fstype, 0, NULL);
if (ret) {
#define lustre_swab_mdt_rec_rename NULL
#define lustre_swab_mdt_rec_create NULL
#define lustre_swab_mdt_rec_setattr NULL
+#define lustre_swab_lustre_capa NULL
+#define lustre_swab_lustre_capa_key NULL
/*
* Yes, include .c file.