From be9858c5c257ba2442f8813e16b870e811849cef Mon Sep 17 00:00:00 2001 From: lsy Date: Wed, 27 Sep 2006 06:34:44 +0000 Subject: [PATCH] land capability. --- lustre/cmm/cmm_device.c | 28 +- lustre/cmm/cmm_object.c | 27 +- lustre/cmm/mdc_object.c | 7 +- lustre/include/Makefile.am | 2 +- lustre/include/lu_object.h | 45 ++- lustre/include/lustre/lustre_idl.h | 106 +++++- lustre/include/lustre_capa.h | 357 ++++++++++++++++++++ lustre/include/lustre_disk.h | 2 +- lustre/include/lustre_mdt.h | 4 - lustre/include/lustre_req_layout.h | 3 + lustre/include/md_object.h | 17 + lustre/include/obd.h | 67 +++- lustre/include/obd_class.h | 114 ++++--- lustre/include/obd_support.h | 2 + lustre/liblustre/dir.c | 2 +- lustre/liblustre/file.c | 2 +- lustre/liblustre/super.c | 8 +- lustre/llite/Makefile.in | 4 +- lustre/llite/dcache.c | 7 +- lustre/llite/dir.c | 25 +- lustre/llite/file.c | 31 +- lustre/llite/llite_capa.c | 651 +++++++++++++++++++++++++++++++++++++ lustre/llite/llite_internal.h | 43 ++- lustre/llite/llite_lib.c | 78 ++++- lustre/llite/llite_nfs.c | 88 ++++- lustre/llite/namei.c | 31 +- lustre/llite/remote_perm.c | 5 +- lustre/llite/rw.c | 19 +- lustre/llite/rw26.c | 10 +- lustre/llite/super25.c | 10 + lustre/llite/symlink.c | 5 +- lustre/llite/xattr.c | 14 +- lustre/lmv/lmv_intent.c | 36 +- lustre/lmv/lmv_internal.h | 5 +- lustre/lmv/lmv_obd.c | 139 +++++--- lustre/lmv/lmv_object.c | 4 +- lustre/lov/lov_obd.c | 19 +- lustre/lov/lov_request.c | 3 +- lustre/mdc/mdc_internal.h | 69 +--- lustre/mdc/mdc_lib.c | 115 +++++-- lustre/mdc/mdc_locks.c | 127 +++++--- lustre/mdc/mdc_reint.c | 53 ++- lustre/mdc/mdc_request.c | 268 ++++++++++----- lustre/mdd/mdd_handler.c | 83 ++++- lustre/mdd/mdd_lov.c | 2 +- lustre/mds/handler.c | 4 +- lustre/mds/mds_fs.c | 2 +- lustre/mds/mds_internal.h | 2 +- lustre/mds/mds_lov.c | 27 ++ lustre/mds/mds_unlink_open.c | 3 +- lustre/mdt/Makefile.in | 4 +- lustre/mdt/mdt_capa.c | 298 +++++++++++++++++ lustre/mdt/mdt_handler.c | 170 ++++++++-- lustre/mdt/mdt_internal.h | 53 ++- lustre/mdt/mdt_lib.c | 339 ++++++++++--------- lustre/mdt/mdt_open.c | 53 ++- lustre/mdt/mdt_recovery.c | 70 ++-- lustre/mdt/mdt_reint.c | 58 ++-- lustre/obdclass/Makefile.in | 2 +- lustre/obdclass/autoMakefile.am | 2 +- lustre/obdclass/capa.c | 298 +++++++++++++++++ lustre/obdclass/class_obd.c | 8 + lustre/obdclass/dt_object.c | 3 +- lustre/obdclass/genops.c | 11 + lustre/obdclass/llog_lvfs.c | 3 +- lustre/obdclass/lprocfs_status.c | 2 +- lustre/obdclass/lu_object.c | 46 ++- lustre/obdclass/obd_mount.c | 1 - lustre/obdecho/echo.c | 4 +- lustre/obdecho/echo_client.c | 7 +- lustre/obdfilter/Makefile.in | 2 +- lustre/obdfilter/filter.c | 48 ++- lustre/obdfilter/filter_capa.c | 184 +++++++++++ lustre/obdfilter/filter_internal.h | 15 +- lustre/obdfilter/filter_io.c | 24 +- lustre/obdfilter/filter_log.c | 2 +- lustre/osc/osc_request.c | 161 ++++++--- lustre/osd/osd_handler.c | 162 ++++++++- lustre/osd/osd_internal.h | 5 + lustre/ost/ost_handler.c | 36 +- lustre/ptlrpc/layout.c | 71 +++- lustre/ptlrpc/lproc_ptlrpc.c | 1 + lustre/ptlrpc/pack_generic.c | 16 + lustre/ptlrpc/ptlrpc_module.c | 2 + lustre/utils/mkfs_lustre.c | 6 + lustre/utils/req-layout.c | 2 + 86 files changed, 4113 insertions(+), 831 deletions(-) create mode 100644 lustre/include/lustre_capa.h create mode 100644 lustre/llite/llite_capa.c create mode 100644 lustre/mdt/mdt_capa.c create mode 100644 lustre/obdclass/capa.c create mode 100644 lustre/obdfilter/filter_capa.c diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c index 8a996aa..d6be418 100644 --- a/lustre/cmm/cmm_device.c +++ b/lustre/cmm/cmm_device.c @@ -85,10 +85,37 @@ static int cmm_maxsize_get(const struct lu_context *ctxt, struct md_device *md, RETURN(rc); } +static int cmm_init_capa_keys(struct md_device *md, + struct lustre_capa_key *keys) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + ENTRY; + LASSERT(cmm_child_ops(cmm_dev)->mdo_init_capa_keys); + rc = cmm_child_ops(cmm_dev)->mdo_init_capa_keys(cmm_dev->cmm_child, + keys); + RETURN(rc); +} + +static int cmm_update_capa_key(const struct lu_context *ctxt, + struct md_device *md, + struct lustre_capa_key *key) +{ + struct cmm_device *cmm_dev = md2cmm_dev(md); + int rc; + ENTRY; + rc = cmm_child_ops(cmm_dev)->mdo_update_capa_key(ctxt, + cmm_dev->cmm_child, + key); + RETURN(rc); +} + static struct md_device_operations cmm_md_ops = { .mdo_statfs = cmm_statfs, .mdo_root_get = cmm_root_get, .mdo_maxsize_get = cmm_maxsize_get, + .mdo_init_capa_keys = cmm_init_capa_keys, + .mdo_update_capa_key= cmm_update_capa_key, }; extern struct lu_device_type mdc_device_type; @@ -295,7 +322,6 @@ static void cmm_device_free(const struct lu_context *ctx, struct lu_device *d) { struct cmm_device *m = lu2cmm_dev(d); - LASSERT(atomic_read(&d->ld_ref) == 0); LASSERT(m->cmm_tgt_count == 0); LASSERT(list_empty(&m->cmm_targets)); md_device_fini(&m->cmm_md_dev); diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 77f8115..26981bf 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -337,6 +337,15 @@ static int cml_readpage(const struct lu_context *ctxt, struct md_object *mo, RETURN(rc); } +static int cml_capa_get(const struct lu_context *ctxt, struct md_object *mo, + struct lustre_capa *capa) +{ + int rc; + ENTRY; + rc = mo_capa_get(ctxt, md_object_next(mo), capa); + RETURN(rc); +} + static struct md_object_operations cml_mo_ops = { .moo_permission = cml_permission, .moo_attr_get = cml_attr_get, @@ -351,7 +360,8 @@ static struct md_object_operations cml_mo_ops = { .moo_open = cml_open, .moo_close = cml_close, .moo_readpage = cml_readpage, - .moo_readlink = cml_readlink + .moo_readlink = cml_readlink, + .moo_capa_get = cml_capa_get }; /* md_dir operations */ @@ -422,14 +432,14 @@ static int cml_unlink(const struct lu_context *ctx, struct md_object *mo_p, /* rename is split to local/remote by location of new parent dir */ struct md_object *md_object_find(const struct lu_context *ctx, - struct md_device *md, - const struct lu_fid *f) + struct md_device *md, + const struct lu_fid *f) { struct lu_object *o; struct md_object *m; ENTRY; - o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f); + o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f, BYPASS_CAPA); if (IS_ERR(o)) m = (struct md_object *)o; else { @@ -724,6 +734,12 @@ static int cmr_readpage(const struct lu_context *ctxt, struct md_object *mo, RETURN(-EREMOTE); } +static int cmr_capa_get(const struct lu_context *ctxt, struct md_object *mo, + struct lustre_capa *capa) +{ + RETURN(-EFAULT); +} + static struct md_object_operations cmr_mo_ops = { .moo_permission = cmr_permission, .moo_attr_get = cmr_attr_get, @@ -738,7 +754,8 @@ static struct md_object_operations cmr_mo_ops = { .moo_open = cmr_open, .moo_close = cmr_close, .moo_readpage = cmr_readpage, - .moo_readlink = cmr_readlink + .moo_readlink = cmr_readlink, + .moo_capa_get = cmr_capa_get }; /* remote part of md_dir operations */ diff --git a/lustre/cmm/mdc_object.c b/lustre/cmm/mdc_object.c index a26370a..58fea39 100644 --- a/lustre/cmm/mdc_object.c +++ b/lustre/cmm/mdc_object.c @@ -215,7 +215,8 @@ static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo, memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata)); - rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), + /* FIXME: split capability */ + rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), NULL, OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLFLAGS, 0, &mci->mci_req); @@ -463,9 +464,9 @@ static int mdc_is_subdir(const struct lu_context *ctx, struct md_object *mo, mci = mdc_info_init(ctx); + /* FIXME: capability for split! */ rc = md_is_subdir(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), - fid, &mci->mci_req); - + fid, NULL, NULL, &mci->mci_req); if (rc) GOTO(out, rc); diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index 3bbaac1..c10de60 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -15,5 +15,5 @@ EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h \ obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \ obd_ost.h obd_support.h lustre_ver.h lu_object.h \ md_object.h dt_object.h lustre_param.h lustre_mdt.h \ - lustre_fid.h lustre_fld.h lustre_req_layout.h + lustre_fid.h lustre_fld.h lustre_req_layout.h lustre_capa.h diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 8f3018a..8e77057 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -207,6 +207,13 @@ struct lu_object_operations { * consistent. */ int (*loo_object_invariant)(const struct lu_object *o); + /* + * Called to authorize action by capability. + */ + int (*loo_object_auth)(const struct lu_context *ctx, + const struct lu_object *o, + struct lustre_capa *capa, + __u64 opc); }; /* @@ -448,6 +455,11 @@ struct lu_object_header { */ struct lu_fid loh_fid; /* + * Fid capability. + */ + unsigned int loh_capa_bypass:1; /* bypass capability check */ + struct lustre_capa loh_capa; /* capability sent by client */ + /* * Common object attributes, cached for efficiency. From enum * lu_object_header_attr. */ @@ -568,6 +580,11 @@ struct lu_site { __u32 s_cache_race; __u32 s_lru_purged; } ls_stats; + + /* Capability */ + struct lustre_capa_key *ls_capa_keys; + unsigned long ls_capa_timeout; + __u32 ls_capa_alg; }; /* @@ -681,7 +698,14 @@ void lu_site_purge(const struct lu_context *ctx, * any case, additional reference is acquired on the returned object. */ struct lu_object *lu_object_find(const struct lu_context *ctxt, - struct lu_site *s, const struct lu_fid *f); + struct lu_site *s, const struct lu_fid *f, + struct lustre_capa *c); + +/* + * Auth lu_object capability. + */ +int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o, + struct lustre_capa *capa, __u64 opc); /* * Helpers. @@ -713,6 +737,20 @@ static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) } /* + * Pointer to the fid capability of this object. + */ +static inline struct lustre_capa * +lu_object_capa(const struct lu_object *o) +{ + return &o->lo_header->loh_capa; +} + +static inline int lu_object_capa_bypass(const struct lu_object *o) +{ + return o->lo_header->loh_capa_bypass; +} + +/* * return device operations vector for this object */ static inline struct lu_device_operations * @@ -805,6 +843,11 @@ static inline const __u32 lu_object_attr(const struct lu_object *o) return o->lo_header->loh_attr; } +static inline void lu_object_bypass_capa(struct lu_object *o) +{ + o->lo_header->loh_capa_bypass = 1; +} + struct lu_rdpg { /* input params, should be filled out by mdt */ __u32 rp_hash; /* hash */ diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 22986a2..98883d0 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -244,10 +244,10 @@ static inline int fid_is_sane(const struct lu_fid *fid) #define DFID "[%16.16"LPF64"x/%8.8x:%8.8x]" -#define PFID(fid) \ - fid_seq((fid)), \ - fid_oid((fid)), \ - fid_ver((fid)) +#define PFID(fid) \ + fid_seq(fid), \ + fid_oid(fid), \ + fid_ver(fid) extern void lustre_swab_lu_fid(struct lu_fid *fid); extern void lustre_swab_lu_range(struct lu_range *range); @@ -301,6 +301,7 @@ static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent) #define MEA_MAGIC_HASH_SEGMENT 0xb222a11b #define MAX_HASH_SIZE 0x7fffffff +/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */ struct lmv_stripe_md { __u32 mea_magic; __u32 mea_count; @@ -359,7 +360,7 @@ struct lustre_msg_v2 { __u32 lm_buflens[0]; }; -/* without security, ptlrpc_body is put in the first buffer. */ +/* without gss, ptlrpc_body is put at the first buffer. */ struct ptlrpc_body { struct lustre_handle pb_handle; __u32 pb_type; @@ -441,7 +442,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_RMT_CLIENT 0x40000ULL /* Remote 1.8 client */ #define OBD_CONNECT_BRW_SIZE 0x80000ULL /* Max bytes per rpc */ #define OBD_CONNECT_QUOTA64 0x100000ULL /* 64bit qunit_data.qd_count b=10707*/ -#define OBD_CONNECT_FID_CAPA 0x200000ULL /* fid capability */ +#define OBD_CONNECT_MDS_CAPA 0x200000ULL /* MDS capability */ #define OBD_CONNECT_OSS_CAPA 0x400000ULL /* OSS capability */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ @@ -620,6 +621,9 @@ struct md_op_data { /* Size-on-MDS epoch and flags. */ __u64 ioepoch; __u32 flags; + + struct obd_capa *mod_capa1; + struct obd_capa *mod_capa2; }; #define MDS_MODE_DONT_LOCK (1 << 30) @@ -704,6 +708,8 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ #define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */ +#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ +#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ #define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ @@ -833,6 +839,7 @@ typedef enum { MDS_SETXATTR = 50, MDS_WRITEPAGE = 51, MDS_IS_SUBDIR = 52, + MDS_RENEW_CAPA = 53, MDS_LAST_OPC } mds_cmd_t; @@ -1028,7 +1035,9 @@ struct lustre_md { #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; #endif - struct mdt_remote_perm *remote_perm; + struct mdt_remote_perm *remote_perm; + struct obd_capa *mds_capa; + struct obd_capa *oss_capa; }; #define Q_QUOTACHECK 0x800100 @@ -1118,7 +1127,7 @@ struct mdt_rec_setattr { __u32 sa_uid; __u32 sa_gid; __u32 sa_attr_flags; - __u32 sa_padding; /* also fix lustre_swab_mdt_rec_setattr */ + __u32 sa_padding; /* also fix lustre_swab_mds_rec_setattr */ }; extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa); @@ -1878,4 +1887,85 @@ typedef enum { SEC_LAST_OPC } sec_cmd_t; +/* + * capa related definitions + */ +#define CAPA_HMAC_MAX_LEN 64 +#define CAPA_HMAC_KEY_MAX_LEN 56 + +/* NB take care when changing the sequence of elements this struct, + * because the offset info is used in find_capa() */ +struct lustre_capa { + struct lu_fid lc_fid; /* fid */ + __u64 lc_opc; /* operations allowed */ + __u32 lc_flags; /* HMAC algorithm & flags */ + __u32 lc_keyid; /* key used for the capability */ + __u64 lc_expiry; /* expiry time (sec) */ + __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /* HMAC */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa(struct lustre_capa *c); + +/* lustre_capa.lc_opc */ +enum { + /* MDS only fid capability */ + CAPA_OPC_BODY_WRITE = 1, /* write fid data */ + CAPA_OPC_BODY_READ = 1<<1, /* read fid data */ + CAPA_OPC_INDEX_LOOKUP = 1<<2, /* lookup fid */ + CAPA_OPC_INDEX_INSERT = 1<<3, /* insert fid */ + CAPA_OPC_INDEX_DELETE = 1<<4, /* delete fid */ + /* OSS only fid capability */ + CAPA_OPC_OSS_WRITE = 1<<5, /* write oss object data */ + CAPA_OPC_OSS_READ = 1<<6, /* read oss object data */ + CAPA_OPC_OSS_TRUNC = 1<<7, /* truncate oss object */ + /* MDS & OSS both might have */ + CAPA_OPC_META_WRITE = 1<<8, /* write fid meta data */ + CAPA_OPC_META_READ = 1<<9, /* read fid meta data */ + +}; + +#define CAPA_OPC_MDS_ONLY \ + (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | \ + CAPA_OPC_INDEX_LOOKUP | CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) +#define CAPA_OPC_OSS_ONLY \ + (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC) +#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY +#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) + +static inline int capa_for_mds(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_MDS_ONLY) != 0; +} + +static inline int capa_for_oss(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_OSS_ONLY) != 0; +} + +/* lustre_capa.lc_flags */ +enum { + CAPA_FL_SHORT_EXPIRY = 1, /* short capa expiry */ + CAPA_FL_ROOT = 2, /* root fid capa, will always renew */ +}; + +/* lustre_capa.lc_hmac_alg */ +enum { + CAPA_HMAC_ALG_SHA1 = 1, /* sha1 algorithm */ + CAPA_HMAC_ALG_MAX, +}; + +#define CAPA_FL_MASK 0x00ffffff +#define CAPA_HMAC_ALG_MASK 0xff000000 + +struct lustre_capa_key { + __u64 lk_mdsid; /* mds# */ + __u32 lk_keyid; /* key# */ + __u32 lk_padding; + __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /* key */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); + +typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *); + #endif diff --git a/lustre/include/lustre_capa.h b/lustre/include/lustre_capa.h new file mode 100644 index 0000000..555ee5f --- /dev/null +++ b/lustre/include/lustre_capa.h @@ -0,0 +1,357 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Lustre capability support. + */ + +#ifndef __LINUX_CAPA_H_ +#define __LINUX_CAPA_H_ + +/* + * capability + */ +#ifdef __KERNEL__ +#include +#endif +#include + +#define NR_CAPAHASH 32 +#define CAPA_HASH_SIZE 3000 /* for MDS & OSS */ + +#define CAPA_TIMEOUT 1800 /* sec, == 30 min */ +#define CAPA_KEY_TIMEOUT (24 * 60 * 60) /* sec, == 1 days */ + +struct capa_hmac_alg { + const char *ha_name; + int ha_len; + int ha_keylen; +}; + +#define DEF_CAPA_HMAC_ALG(name, type, len, keylen) \ +[CAPA_HMAC_ALG_ ## type] = { \ + .ha_name = name, \ + .ha_len = len, \ + .ha_keylen = keylen, \ +} + +struct client_capa { + struct inode *inode; + struct list_head lli_list; /* link to lli_oss_capas */ + atomic_t open_count; /* open count */ +}; + +struct target_capa { + struct hlist_node c_hash; /* link to capa hash */ +}; + +struct obd_capa { + struct list_head c_list; /* link to capa_list */ + + struct lustre_capa c_capa; /* capa */ + atomic_t c_refc; /* ref count */ + cfs_time_t c_expiry; /* jiffies */ + spinlock_t c_lock; /* protect capa content */ + int c_site; + int c_flags; + + union { + struct client_capa cli; + struct target_capa tgt; + } u; +}; + +enum { + CAPA_SITE_CLIENT = 0, + CAPA_SITE_SERVER, + CAPA_SITE_MAX +}; + +enum { + OBD_CAPA_FL_NEW = 1, + OBD_CAPA_FL_EXPIRED = 1<<1, + OBD_CAPA_FL_ROOT = 1<<2, + OBD_CAPA_FL_SPLIT = 1<<3 +}; + +static inline __u64 capa_opc(struct lustre_capa *capa) +{ + return capa->lc_opc; +} + +static inline struct lu_fid *capa_fid(struct lustre_capa *capa) +{ + return &capa->lc_fid; +} + +static inline __u32 capa_keyid(struct lustre_capa *capa) +{ + return capa->lc_keyid; +} + +static inline __u64 capa_expiry(struct lustre_capa *capa) +{ + return capa->lc_expiry; +} + +static inline __u32 capa_flags(struct lustre_capa *capa) +{ + return capa->lc_flags & 0xffffff; +} + +static inline __u32 capa_alg(struct lustre_capa *capa) +{ + __u32 alg = capa->lc_flags; + + return alg >> 24; +} + +static inline __u64 capa_key_mdsid(struct lustre_capa_key *key) +{ + return key->lk_mdsid; +} + +static inline __u32 capa_key_keyid(struct lustre_capa_key *key) +{ + return key->lk_keyid; +} + +#define DEBUG_CAPA(level, c, fmt, args...) \ +do { \ +CDEBUG(level, fmt " capability@%p opc "LPX64" fid "DFID" keyid %u expiry "LPU64\ + " flags %u alg %d\n", \ + ##args, c, capa_opc(c), PFID(capa_fid(c)), capa_keyid(c), \ + capa_expiry(c), capa_flags(c), capa_alg(c)); \ +} while (0) + +#define DEBUG_CAPA_KEY(level, k, fmt, args...) \ +do { \ +CDEBUG(level, fmt " capability key@%p mdsid "LPU64" keyid %u\n", \ + ##args, k, capa_key_mdsid(k), capa_key_keyid(k)); \ +} while (0) + +/* obdclass/capa.c */ +extern struct list_head capa_list[]; +extern spinlock_t capa_lock; +extern int capa_count[]; +extern cfs_mem_cache_t *capa_cachep; + +struct obd_capa *capa_add(struct lustre_capa *capa); +struct obd_capa *capa_lookup(struct lustre_capa *capa); + +int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key); +void capa_cpy(void *dst, struct obd_capa *ocapa); + +void cleanup_capas(int site); +void dump_capa_hmac(char *buf, char *key); + +static inline int obd_capa_is_new(struct obd_capa *oc) +{ + return !!((oc)->c_flags & OBD_CAPA_FL_NEW); +} + +static inline int obd_capa_is_expired(struct obd_capa *oc) +{ + return !!((oc)->c_flags & OBD_CAPA_FL_EXPIRED); +} + +static inline int obd_capa_is_valid(struct obd_capa *oc) +{ + return !!((oc)->c_flags & (OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED)); +} + +static inline void obd_capa_set_new(struct obd_capa *oc) +{ + oc->c_flags |= OBD_CAPA_FL_NEW; +} + +static inline void obd_capa_set_expired(struct obd_capa *oc) +{ + oc->c_flags |= OBD_CAPA_FL_EXPIRED; +} + +static inline void obd_capa_set_valid(struct obd_capa *oc) +{ + oc->c_flags &= ~(OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED); +} + +static inline void obd_capa_clear_new(struct obd_capa *oc) +{ + oc->c_flags &= ~OBD_CAPA_FL_NEW; +} + +static inline void obd_capa_clear_expired(struct obd_capa *oc) +{ + oc->c_flags &= ~OBD_CAPA_FL_EXPIRED; +} + +static inline int obd_capa_is_root(struct obd_capa *oc) +{ + return !!((oc)->c_flags & OBD_CAPA_FL_ROOT); +} + +static inline void obd_capa_set_root(struct obd_capa *oc) +{ + oc->c_flags |= OBD_CAPA_FL_ROOT; +} + +static inline int obd_capa_is_split(struct obd_capa *oc) +{ + return !!((oc)->c_flags & OBD_CAPA_FL_SPLIT); +} + +static inline void obd_capa_set_split(struct obd_capa *oc) +{ + oc->c_flags |= OBD_CAPA_FL_SPLIT; +} + +static inline struct obd_capa *alloc_capa(int site) +{ +#ifdef __KERNEL__ + struct obd_capa *ocapa; + + OBD_SLAB_ALLOC(ocapa, capa_cachep, SLAB_KERNEL, sizeof(*ocapa)); + if (ocapa) { + atomic_set(&ocapa->c_refc, 0); + spin_lock_init(&ocapa->c_lock); + INIT_LIST_HEAD(&ocapa->c_list); + ocapa->c_site = site; + obd_capa_set_new(ocapa); + capa_count[site]++; + } + return ocapa; +#else + return NULL; +#endif +} + +static inline void free_capa(struct obd_capa *ocapa) +{ +#ifdef __KERNEL__ + if (atomic_read(&ocapa->c_refc)) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for", + atomic_read(&ocapa->c_refc)); + LBUG(); + } + + capa_count[ocapa->c_site]--; + if (capa_count[ocapa->c_site] < 0) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "total count %d", + capa_count[ocapa->c_site]); + LBUG(); + } + OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa)); +#else +#endif +} + +static inline struct obd_capa *capa_get(struct obd_capa *ocapa) +{ + if (!ocapa) + return NULL; + + atomic_inc(&ocapa->c_refc); + return ocapa; +} + +static inline void capa_put(struct obd_capa *ocapa) +{ + if (!ocapa) + return; + + atomic_dec(&ocapa->c_refc); +} + +static inline int open_flags_to_accmode(int flags) +{ + int mode = flags; + + if ((mode + 1) & O_ACCMODE) + mode++; + if (mode & O_TRUNC) + mode |= 2; + + return mode; +} + +static inline __u64 capa_open_opc(int mode) +{ + return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ; +} + +static inline void set_capa_expiry(struct obd_capa *ocapa) +{ + time_t expiry = (time_t)ocapa->c_capa.lc_expiry; + + expiry = (jiffies + (expiry - CURRENT_SECONDS) * HZ) / HZ; + ocapa->c_expiry = expiry * HZ; +} + +static inline unsigned long capa_renewal_time(struct obd_capa *ocapa) +{ + /* NB, by default dirty_expire_centisecs is 30*100, that is 30 sec, + * the following values guarantee that client cache will be flushed + * to OSS before capability expires. + */ + return ocapa->c_expiry - + ((ocapa->c_capa.lc_flags & CAPA_FL_SHORT_EXPIRY) ? 40:1200) * HZ; +} + +#ifdef __KERNEL__ +static inline int capa_is_to_expire(struct obd_capa *ocapa) +{ + return time_before_eq(capa_renewal_time(ocapa), jiffies); +} + +static inline int capa_is_expired(struct obd_capa *ocapa) +{ + return time_before_eq(ocapa->c_expiry, jiffies); +} +#endif + +static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc) +{ + return (capa->lc_opc & opc) == opc; +} + +static inline struct lustre_capa * +lustre_unpack_capa(struct lustre_msg *msg, unsigned int offset) +{ + struct lustre_capa *capa; + + capa = lustre_swab_buf(msg, offset, sizeof(*capa), + lustre_swab_lustre_capa); + if (capa == NULL) + CERROR("bufcount %u, bufsize %u\n", + lustre_msg_bufcount(msg), + (lustre_msg_bufcount(msg) <= offset) ? + -1 : lustre_msg_buflen(msg, offset)); + + return capa; +} + +struct filter_capa_key { + struct list_head k_list; + struct lustre_capa_key k_key; +}; + +#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT) + +#endif /* __LINUX_CAPA_H_ */ diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index e445bf4..9e71c09 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -37,7 +37,7 @@ #define LAST_RCVD "last_received" #define LOV_OBJID "lov_objid" #define HEALTH_CHECK "health_check" - +#define CAPA_KEYS "capa_keys" /****************** persistent mount data *********************/ diff --git a/lustre/include/lustre_mdt.h b/lustre/include/lustre_mdt.h index 911c8b4..8dc2d79 100644 --- a/lustre/include/lustre_mdt.h +++ b/lustre/include/lustre_mdt.h @@ -58,8 +58,4 @@ struct mdt_idmap_table { [MDT_IDMAP_HASHSIZE]; }; -/* remote perm */ -extern int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request); - #endif diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h index fe92be7..dd548ac 100644 --- a/lustre/include/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -112,6 +112,7 @@ extern const struct req_format RQF_MDS_READPAGE; extern const struct req_format RQF_MDS_WRITEPAGE; extern const struct req_format RQF_MDS_IS_SUBDIR; extern const struct req_format RQF_MDS_DONE_WRITING; +extern const struct req_format RQF_MDS_RENEW_CAPA; /* * This is format of direct (non-intent) MDS_GETATTR_NAME request. @@ -159,6 +160,8 @@ extern const struct req_msg_field RMF_EADATA; extern const struct req_msg_field RMF_ACL; extern const struct req_msg_field RMF_LOGCOOKIES; extern const struct req_msg_field RMF_REINT_OPC; +extern const struct req_msg_field RMF_CAPA1; +extern const struct req_msg_field RMF_CAPA2; /* seq-mgr fields */ extern const struct req_msg_field RMF_SEQ_OPC; diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 403f2f2..6b76a32 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -194,6 +194,8 @@ struct md_object_operations { struct md_object *obj, struct md_attr *ma, struct md_ucred *uc); + int (*moo_capa_get)(const struct lu_context *, struct md_object *, + struct lustre_capa *); }; /* @@ -290,6 +292,13 @@ struct md_device_operations { struct md_device *m, struct kstatfs *sfs, struct md_ucred *uc); + + int (*mdo_init_capa_keys)(struct md_device *m, + struct lustre_capa_key *keys); + + int (*mdo_update_capa_key)(const struct lu_context *ctx, + struct md_device *m, + struct lustre_capa_key *key); }; enum md_upcall_event { @@ -493,6 +502,14 @@ static inline int mo_ref_del(const struct lu_context *cx, return m->mo_ops->moo_ref_del(cx, m, ma, uc); } +static inline int mo_capa_get(const struct lu_context *cx, + struct md_object *m, + struct lustre_capa *c) +{ + LASSERT(m->mo_ops->moo_capa_get); + return m->mo_ops->moo_capa_get(cx, m, c); +} + static inline int mdo_lookup(const struct lu_context *cx, struct md_object *p, const char *name, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 65cd41a..12ddad3 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -33,6 +33,7 @@ #include #include #include +#include #define MAX_OBD_DEVICES 8192 @@ -168,7 +169,10 @@ struct obd_info { * level. E.g. it is used for update lsm->lsm_oinfo at every recieved * request in osc level for enqueue requests. It is also possible to * update some caller data from LOV layer if needed. */ - obd_enqueue_update_f oi_cb_up; + obd_enqueue_update_f oi_cb_up; + /* oss capability, its type is obd_capa in client to avoid copy. + * in contrary its type is lustre_capa in OSS. */ + void *oi_capa; }; /* compare all relevant fields. */ @@ -223,6 +227,7 @@ struct obd_async_page_ops { void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa, obd_valid valid); int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); + struct obd_capa *(*ap_lookup_capa)(void *data, int cmd); }; /* the `oig' is passed down from a caller of obd rw methods. the callee @@ -397,6 +402,10 @@ struct filter_obd { int fo_fmd_max_num; /* per exp filter_mod_data */ int fo_fmd_max_age; /* jiffies to fmd expiry */ + + /* capability related */ + unsigned int fo_fl_oss_capa; + struct list_head fo_capa_keys; }; #define OSC_MAX_RIF_DEFAULT 8 @@ -563,6 +572,7 @@ struct mds_obd { mds_fl_user_xattr:1, mds_fl_acl:1; + /* For CMD add mds_num */ int mds_num; @@ -571,6 +581,9 @@ struct mds_obd { /* root squash */ struct rootsquash_info *mds_rootsquash_info; + + /* for capability keys update */ + struct lustre_capa_key *mds_capa_keys; }; struct echo_obd { @@ -953,6 +966,7 @@ enum obd_cleanup_stage { #define KEY_INIT_RECOV "initial_recov" #define KEY_INIT_RECOV_BACKUP "init_recov_bk" #define KEY_FLUSH_CTX "flush_ctx" +#define KEY_CAPA_KEY "capa_key" struct lu_context; @@ -1014,7 +1028,7 @@ struct obd_ops { struct lov_stripe_md **ea, struct obd_trans_info *oti); int (*o_destroy)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); + struct obd_export *md_exp, void *capa); int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti); int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo, @@ -1066,7 +1080,8 @@ struct obd_ops { struct obd_trans_info *oti, struct ptlrpc_request_set *rqset); int (*o_sync)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, obd_size end); + struct lov_stripe_md *ea, obd_size start, obd_size end, + void *capa); int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, struct lov_stripe_md *src, obd_size start, obd_size end, struct obd_trans_info *oti); @@ -1079,7 +1094,8 @@ struct obd_ops { int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, struct obd_trans_info *oti); + struct niobuf_local *local, struct obd_trans_info *oti, + struct lustre_capa *capa); int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, @@ -1111,7 +1127,7 @@ struct obd_ops { /* metadata-only methods */ int (*o_pin)(struct obd_export *, const struct lu_fid *fid, - struct obd_client_handle *, int flag); + struct obd_capa *, struct obd_client_handle *, int flag); int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int); int (*o_import_event)(struct obd_device *, struct obd_import *, @@ -1134,7 +1150,8 @@ struct obd_ops { }; struct md_ops { - int (*m_getstatus)(struct obd_export *, struct lu_fid *); + int (*m_getstatus)(struct obd_export *, struct lu_fid *, + struct obd_capa **); int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *, ldlm_iterator_t, void *); int (*m_close)(struct obd_export *, struct md_op_data *, @@ -1149,9 +1166,10 @@ struct md_ops { void *, int, ldlm_completion_callback, ldlm_blocking_callback, void *, int); int (*m_getattr)(struct obd_export *, const struct lu_fid *, - obd_valid, int, struct ptlrpc_request **); + struct obd_capa *, obd_valid, int, + struct ptlrpc_request **); int (*m_getattr_name)(struct obd_export *, const struct lu_fid *, - const char *, int, obd_valid, + struct obd_capa *, const char *, int, obd_valid, int, struct ptlrpc_request **); int (*m_intent_lock)(struct obd_export *, struct md_op_data *, void *, int, struct lookup_intent *, int, @@ -1163,24 +1181,29 @@ struct md_ops { const char *, int, const char *, int, struct ptlrpc_request **); int (*m_is_subdir)(struct obd_export *, const struct lu_fid *, - const struct lu_fid *, struct ptlrpc_request **); + const struct lu_fid *, + struct obd_capa *, struct obd_capa *, + struct ptlrpc_request **); int (*m_setattr)(struct obd_export *, struct md_op_data *, void *, int , void *, int, struct ptlrpc_request **); int (*m_sync)(struct obd_export *, const struct lu_fid *, - struct ptlrpc_request **); + struct obd_capa *, struct ptlrpc_request **); int (*m_readpage)(struct obd_export *, const struct lu_fid *, - __u64, struct page *, struct ptlrpc_request **); + struct obd_capa *, __u64, struct page *, + struct ptlrpc_request **); int (*m_unlink)(struct obd_export *, struct md_op_data *, struct ptlrpc_request **); int (*m_setxattr)(struct obd_export *, const struct lu_fid *, - obd_valid, const char *, const char *, - int, int, int, struct ptlrpc_request **); + struct obd_capa *, obd_valid, const char *, + const char *, int, int, int, + struct ptlrpc_request **); int (*m_getxattr)(struct obd_export *, const struct lu_fid *, - obd_valid, const char *, const char *, - int, int, int, struct ptlrpc_request **); + struct obd_capa *, obd_valid, const char *, + const char *, int, int, int, + struct ptlrpc_request **); int (*m_init_ea_size)(struct obd_export *, int, int, int); @@ -1203,9 +1226,11 @@ struct md_ops { int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *, int flags, void *opaque); + int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, + renew_capa_cb_t cb); int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, - struct ptlrpc_request **); + struct obd_capa *, struct ptlrpc_request **); /* * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to @@ -1280,4 +1305,14 @@ static inline void init_obd_quota_ops(quota_interface_t *interface, obd_ops->o_quotactl = QUOTA_OP(interface, ctl); } +static inline __u64 oinfo_mdsno(struct obd_info *oinfo) +{ + return oinfo->oi_oa->o_gr - FILTER_GROUP_MDS0; +} + +static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo) +{ + return oinfo->oi_capa; +} + #endif /* __OBD_H */ diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 94b3743..d5afcd1 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -609,7 +609,7 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo, static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { int rc; ENTRY; @@ -617,7 +617,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, EXP_CHECK_DT_OP(exp, destroy); OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); - rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); + rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp, capa); RETURN(rc); } @@ -995,7 +995,7 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, static inline int obd_sync(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, obd_size start, - obd_size end) + obd_size end, void *capa) { int rc; ENTRY; @@ -1003,7 +1003,7 @@ static inline int obd_sync(struct obd_export *exp, struct obdo *oa, OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP); OBD_COUNTER_INCREMENT(exp->exp_obd, sync); - rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end); + rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end, capa); RETURN(rc); } @@ -1086,7 +1086,8 @@ static inline int obd_brw_async(int cmd, struct obd_export *exp, static inline int obd_brw_rqset(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pg, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct obd_capa *ocapa) { struct ptlrpc_request_set *set = NULL; struct obd_info oinfo = { { { 0 } } }; @@ -1099,6 +1100,7 @@ static inline int obd_brw_rqset(int cmd, struct obd_export *exp, oinfo.oi_oa = oa; oinfo.oi_md = lsm; + oinfo.oi_capa = ocapa; rc = obd_brw_async(cmd, exp, &oinfo, oa_bufs, pg, oti, set); if (rc == 0) { rc = ptlrpc_set_wait(set); @@ -1217,7 +1219,8 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, struct niobuf_local *local, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { int rc; ENTRY; @@ -1226,7 +1229,7 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount, - remote, local, oti); + remote, local, oti, capa); RETURN(rc); } @@ -1394,14 +1397,15 @@ static inline int obd_join_lru(struct obd_export *exp, } static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid, - struct obd_client_handle *handle, int flag) + struct obd_capa *oc, struct obd_client_handle *handle, + int flag) { int rc; EXP_CHECK_DT_OP(exp, pin); OBD_COUNTER_INCREMENT(exp->exp_obd, pin); - rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag); + rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag); return(rc); } @@ -1561,27 +1565,26 @@ static inline int obd_register_observer(struct obd_device *obd, /* metadata helpers */ static inline int md_getstatus(struct obd_export *exp, - struct lu_fid *fid) + struct lu_fid *fid, struct obd_capa **pc) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, getstatus); MD_COUNTER_INCREMENT(exp->exp_obd, getstatus); - rc = MDP(exp->exp_obd, getstatus)(exp, fid); + rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc); RETURN(rc); } -static inline int md_getattr(struct obd_export *exp, - const struct lu_fid *fid, - obd_valid valid, int ea_size, +static inline int md_getattr(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, obd_valid valid, int ea_size, struct ptlrpc_request **request) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, getattr); MD_COUNTER_INCREMENT(exp->exp_obd, getattr); - rc = MDP(exp->exp_obd, getattr)(exp, fid, valid, + rc = MDP(exp->exp_obd, getattr)(exp, fid, oc, valid, ea_size, request); RETURN(rc); } @@ -1598,8 +1601,7 @@ static inline int md_change_cbdata(struct obd_export *exp, RETURN(rc); } -static inline int md_close(struct obd_export *exp, - struct md_op_data *op_data, +static inline int md_close(struct obd_export *exp, struct md_op_data *op_data, struct obd_client_handle *och, struct ptlrpc_request **request) { @@ -1612,8 +1614,8 @@ static inline int md_close(struct obd_export *exp, } static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, int datalen, int mode, - __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev, + const void *data, int datalen, int mode, __u32 uid, + __u32 gid, __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request) { int rc; @@ -1658,7 +1660,7 @@ static inline int md_enqueue(struct obd_export *exp, int lock_type, } static inline int md_getattr_name(struct obd_export *exp, - const struct lu_fid *fid, + const struct lu_fid *fid, struct obd_capa *oc, const char *name, int namelen, obd_valid valid, int ea_size, struct ptlrpc_request **request) @@ -1667,15 +1669,14 @@ static inline int md_getattr_name(struct obd_export *exp, ENTRY; EXP_CHECK_MD_OP(exp, getattr_name); MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name); - rc = MDP(exp->exp_obd, getattr_name)(exp, fid, name, namelen, + rc = MDP(exp->exp_obd, getattr_name)(exp, fid, oc, name, namelen, valid, ea_size, request); RETURN(rc); } static inline int md_intent_lock(struct obd_export *exp, - struct md_op_data *op_data, - void *lmm, int lmmsize, - struct lookup_intent *it, + struct md_op_data *op_data, void *lmm, + int lmmsize, struct lookup_intent *it, int flags, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking, int extra_lock_flags) @@ -1690,8 +1691,7 @@ static inline int md_intent_lock(struct obd_export *exp, RETURN(rc); } -static inline int md_link(struct obd_export *exp, - struct md_op_data *op_data, +static inline int md_link(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request) { int rc; @@ -1702,11 +1702,9 @@ static inline int md_link(struct obd_export *exp, RETURN(rc); } -static inline int md_rename(struct obd_export *exp, - struct md_op_data *op_data, - const char *old, int oldlen, - const char *new, int newlen, - struct ptlrpc_request **request) +static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, + const char *old, int oldlen, const char *new, + int newlen, struct ptlrpc_request **request) { int rc; ENTRY; @@ -1720,13 +1718,14 @@ static inline int md_rename(struct obd_export *exp, static inline int md_is_subdir(struct obd_export *exp, const struct lu_fid *pfid, const struct lu_fid *cfid, + struct obd_capa *pc, struct obd_capa *cc, struct ptlrpc_request **request) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, is_subdir); MD_COUNTER_INCREMENT(exp->exp_obd, is_subdir); - rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request); + rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, pc, cc, request); RETURN(rc); } @@ -1743,28 +1742,27 @@ static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -static inline int md_sync(struct obd_export *exp, - const struct lu_fid *fid, - struct ptlrpc_request **request) +static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, sync); MD_COUNTER_INCREMENT(exp->exp_obd, sync); - rc = MDP(exp->exp_obd, sync)(exp, fid, request); + rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request); RETURN(rc); } -static inline int md_readpage(struct obd_export *exp, - const struct lu_fid *fid, - __u64 offset, struct page *page, +static inline int md_readpage(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, __u64 offset, + struct page *page, struct ptlrpc_request **request) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, readpage); MD_COUNTER_INCREMENT(exp->exp_obd, readpage); - rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request); + rc = MDP(exp->exp_obd, readpage)(exp, fid, oc, offset, page, request); RETURN(rc); } @@ -1802,7 +1800,7 @@ static inline int md_free_lustre_md(struct obd_export *exp, } static inline int md_setxattr(struct obd_export *exp, - const struct lu_fid *fid, + const struct lu_fid *fid, struct obd_capa *oc, obd_valid valid, const char *name, const char *input, int input_size, int output_size, int flags, @@ -1811,13 +1809,13 @@ static inline int md_setxattr(struct obd_export *exp, ENTRY; EXP_CHECK_MD_OP(exp, setxattr); MD_COUNTER_INCREMENT(exp->exp_obd, setxattr); - RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input, + RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input, input_size, output_size, flags, request)); } static inline int md_getxattr(struct obd_export *exp, - const struct lu_fid *fid, + const struct lu_fid *fid, struct obd_capa *oc, obd_valid valid, const char *name, const char *input, int input_size, int output_size, int flags, @@ -1826,7 +1824,7 @@ static inline int md_getxattr(struct obd_export *exp, ENTRY; EXP_CHECK_MD_OP(exp, getxattr); MD_COUNTER_INCREMENT(exp->exp_obd, getxattr); - RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input, + RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input, input_size, output_size, flags, request)); } @@ -1885,26 +1883,36 @@ static inline int md_lock_match(struct obd_export *exp, int flags, policy, mode, lockh)); } -static inline int md_init_ea_size(struct obd_export *exp, - int easize, int def_asize, - int cookiesize) +static inline int md_init_ea_size(struct obd_export *exp, int easize, + int def_asize, int cookiesize) { ENTRY; EXP_CHECK_MD_OP(exp, init_ea_size); MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size); - RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, - def_asize, + RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, cookiesize)); } static inline int md_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) { ENTRY; EXP_CHECK_MD_OP(exp, get_remote_perm); MD_COUNTER_INCREMENT(exp->exp_obd, get_remote_perm); - RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, request)); + RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, request)); +} + +static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa, + renew_capa_cb_t cb) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, renew_capa); + MD_COUNTER_INCREMENT(exp->exp_obd, renew_capa); + rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb); + RETURN(rc); } /* OBD Metadata Support */ @@ -1925,14 +1933,12 @@ static inline void obdo_free(struct obdo *oa) OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); } -static inline void obdo2fid(struct obdo *oa, - struct lu_fid *fid) +static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid) { /* something here */ } -static inline void fid2obdo(struct lu_fid *fid, - struct obdo *oa) +static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa) { /* something here */ } diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 2ec7f9b..6a93eda 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -97,6 +97,8 @@ extern int obd_race_state; #define OBD_FAIL_MDS_WRITEPAGE_PACK 0x136 #define OBD_FAIL_MDS_IS_SUBDIR_NET 0x137 #define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x138 +#define OBD_FAIL_MDS_RENEW_CAPA_NET 0x139 +#define OBD_FAIL_MDS_RENEW_CAPA_PACK 0x13a #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index ca1f72e..6e206c3 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -102,7 +102,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) ldlm_lock_dump_handle(D_OTHER, &lockh); offset = page->index << PAGE_SHIFT; - rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, + rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL, offset, page, &request); if (!rc) { body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 6779f1b..8a74c18 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -300,7 +300,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } } - rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL); + rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL); obdo_free(oa); if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 89a14b5..1950081 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -451,7 +451,7 @@ static int llu_inode_revalidate(struct inode *inode) valid |= OBD_MD_FLEASIZE; } rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), - valid, ealen, &req); + NULL, valid, ealen, &req); if (rc) { CERROR("failure %d inode %llu\n", rc, (long long)llu_i2stat(inode)->st_ino); @@ -982,7 +982,7 @@ static int llu_readlink_internal(struct inode *inode, RETURN(0); } - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL, OBD_MD_LINKNAME, symlen, request); if (rc) { CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc); @@ -2124,7 +2124,7 @@ llu_fsswop_mount(const char *source, llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp); - err = md_getstatus(sbi->ll_md_exp, &rootfid); + err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_dt_fid, err); @@ -2133,7 +2133,7 @@ llu_fsswop_mount(const char *source, sbi->ll_root_fid = rootfid; /* fetch attr of root inode */ - err = md_getattr(sbi->ll_md_exp, &rootfid, + err = md_getattr(sbi->ll_md_exp, &rootfid, NULL, OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request); if (err) { CERROR("md_getattr failed for root: rc = %d\n", err); diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index 2a671f3..957e5f0 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -1,5 +1,7 @@ MODULES := lustre -lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o remote_perm.o +lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o +lustre-objs += llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o +lustre-objs += xattr.o remote_perm.o llite_capa.o ifeq ($(PATCHLEVEL),4) lustre-objs += rw24.o super.o diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 590010d..bbdefd6 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -616,6 +616,7 @@ do_lookup: struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_dentry_data *ldd = ll_d2d(de); struct obd_client_handle *handle; + struct obd_capa *oc; int rc = 0; ENTRY; LASSERT(ldd); @@ -639,9 +640,9 @@ do_lookup: unlock_kernel(); handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och; - rc = obd_pin(sbi->ll_md_exp, &ll_i2info(inode)->lli_fid, - handle, flag); - + oc = ll_i2mdscapa(inode); + rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag); + capa_put(oc); if (rc) { lock_kernel(); memset(handle, 0, sizeof(*handle)); diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 81f0dc7..3f97fcf 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -142,6 +142,7 @@ static int ll_dir_readpage(struct file *file, struct page *page) struct inode *inode = page->mapping->host; struct ptlrpc_request *request; struct mdt_body *body; + struct obd_capa *oc; __u64 hash; int rc; ENTRY; @@ -150,8 +151,10 @@ static int ll_dir_readpage(struct file *file, struct page *page) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n", inode->i_ino, inode->i_generation, inode, (unsigned long)hash); + oc = ll_i2mdscapa(inode); rc = md_readpage(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), - hash, page, &request); + oc, hash, page, &request); + capa_put(oc); if (!rc) { body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); @@ -579,6 +582,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, int namelen, rc, len = 0; char *buf = NULL; char *filename; + struct obd_capa *oc; rc = obd_ioctl_getdata(&buf, &len, (void *)arg); if (rc) @@ -593,9 +597,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, GOTO(out, rc = -EINVAL); } - rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), oc, filename, namelen, OBD_MD_FLID, 0, &request); + capa_put(oc); if (rc < 0) { CDEBUG(D_INFO, "md_getattr_name: %d\n", rc); GOTO(out, rc); @@ -618,9 +624,6 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_md_op_data(op_data, inode, - NULL, NULL, 0, 0); - LASSERT(sizeof(lum) == sizeof(*lump)); LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); @@ -640,8 +643,10 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, lustre_swab_lov_user_md(&lum); /* swabbing is done in lov_setstripe() on server side */ + ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0); rc = md_setattr(sbi->ll_md_exp, op_data, &lum, sizeof(lum), NULL, 0, &request); + ll_finish_md_op_data(op_data); if (rc) { if (rc != -EPERM && rc != -EACCES) CERROR("md_setattr fails: rc = %d\n", rc); @@ -661,6 +666,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, struct lov_mds_md *lmm = NULL; struct mdt_body *body; char *filename = NULL; + struct obd_capa *oc; int rc, lmmsize; rc = ll_get_max_mdsize(sbi, &lmmsize); @@ -673,19 +679,24 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (IS_ERR(filename)) RETURN(PTR_ERR(filename)); - rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getattr_name(sbi->ll_md_exp, + ll_inode2fid(inode), oc, filename, strlen(filename) + 1, OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize, &request); + capa_put(oc); if (rc < 0) { CDEBUG(D_INFO, "md_getattr_name failed " "on %s: rc %d\n", filename, rc); GOTO(out_name, rc); } } else { - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize, &request); + capa_put(oc); if (rc < 0) { CDEBUG(D_INFO, "md_getattr failed on inode " "%lu/%u: rc %d\n", inode->i_ino, diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 0f55396..bbc56b5 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -60,6 +60,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, ((struct ll_iattr *)&op_data->attr)->ia_attr_flags = inode->i_flags; op_data->ioepoch = ll_i2info(inode)->lli_ioepoch; memcpy(&op_data->handle, fh, sizeof(op_data->handle)); + op_data->mod_capa1 = ll_i2mdscapa(inode); } static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, @@ -138,6 +139,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, epoch_close = (op_data->flags & MF_EPOCH_CLOSE) || !S_ISREG(inode->i_mode); rc = md_close(md_exp, op_data, och, &req); + + ll_finish_md_op_data(op_data); + OBD_FREE_PTR(op_data); if (rc == -EAGAIN) { /* This close must have closed the epoch. */ LASSERT(epoch_close); @@ -157,8 +161,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, if (!epoch_close) ll_init_done_writing(inode); - OBD_FREE_PTR(op_data); - if (rc == 0) { rc = ll_objects_destroy(req, inode); if (rc) @@ -282,6 +284,7 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); + ll_oss_capa_close(inode, file); RETURN(rc); } @@ -360,6 +363,8 @@ static int ll_intent_file_open(struct file *file, void *lmm, rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PW, op_data, &lockh, lmm, lmmsize, ldlm_completion_ast, ll_md_blocking_ast, NULL, 0); + + ll_finish_md_op_data(op_data); OBD_FREE_PTR(op_data); if (rc < 0) { CERROR("lock enqueue: err: %d\n", rc); @@ -590,6 +595,8 @@ int ll_file_open(struct inode *inode, struct file *file) if (!S_ISREG(inode->i_mode)) GOTO(out, rc); + ll_oss_capa_open(inode, file); + lsm = lli->lli_smd; if (lsm == NULL) { if (file->f_flags & O_LOV_DELAY_CREATE || @@ -639,6 +646,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo) OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP; + oinfo.oi_capa = ll_i2mdscapa(inode); set = ptlrpc_prep_set(); if (set == NULL) { @@ -650,6 +658,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } + capa_put(oinfo.oi_capa); if (rc) RETURN(rc); @@ -2215,6 +2224,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct ptlrpc_request *req; + struct obd_capa *oc; int rc, err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, @@ -2238,8 +2248,10 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) rc = err; } - err = md_sync(ll_i2sbi(inode)->ll_md_exp, - ll_inode2fid(inode), &req); + oc = ll_i2mdscapa(inode); + err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, + &req); + capa_put(oc); if (!rc) rc = err; if (!err) @@ -2247,6 +2259,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) if (data && lsm) { struct obdo *oa = obdo_alloc(); + struct obd_capa *ocapa; if (!oa) RETURN(rc ? rc : -ENOMEM); @@ -2257,8 +2270,10 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP); + ocapa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_WRITE); err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm, - 0, OBD_OBJECT_EOF); + 0, OBD_OBJECT_EOF, ocapa); + capa_put(ocapa); if (!rc) rc = err; obdo_free(oa); @@ -2464,6 +2479,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); obd_valid valid = OBD_MD_FLGETATTR; int ealen = 0; + struct obd_capa *oc; if (S_ISREG(inode->i_mode)) { rc = ll_get_max_mdsize(sbi, &ealen); @@ -2471,7 +2487,10 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) RETURN(rc); valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; } - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, ealen, &req); + oc = ll_i2mdscapa(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, + ealen, &req); + capa_put(oc); if (rc) { rc = ll_inode_revalidate_fini(inode, rc); RETURN(rc); diff --git a/lustre/llite/llite_capa.c b/lustre/llite/llite_capa.c new file mode 100644 index 0000000..5bbae3f --- /dev/null +++ b/lustre/llite/llite_capa.c @@ -0,0 +1,651 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include + +#include +#include "llite_internal.h" + +/* for obd_capa.c_list, client capa might stay in three places: + * 1. ll_capa_list. + * 2. ll_idle_capas. + * 3. stand alone: just allocated. + */ + +/* capas for oss writeback and those failed to renew */ +static LIST_HEAD(ll_idle_capas); +static struct ptlrpc_thread ll_capa_thread; +static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT]; + +/* llite capa renewal timer */ +cfs_timer_t ll_capa_timer; +/* for debug: indicate whether capa on llite is enabled or not */ +static atomic_t ll_capa_debug = ATOMIC_INIT(0); + +static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry) +{ + if (cfs_time_before(expiry, cfs_timer_deadline(&ll_capa_timer)) || + !cfs_timer_is_armed(&ll_capa_timer)) { + cfs_timer_arm(&ll_capa_timer, expiry); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "ll_capa_timer update: %lu/%lu by", + expiry, cfs_time_current()); + } +} + +static inline int have_expired_capa(void) +{ + struct obd_capa *ocapa = NULL; + int expired = 0; + + /* if ll_capa_list has client capa to expire or ll_idle_capas has + * expired capa, return 1. + */ + spin_lock(&capa_lock); + if (!list_empty(ll_capa_list)) { + ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list); + expired = capa_is_to_expire(ocapa); + if (!expired) + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } else if (!list_empty(&ll_idle_capas)) { + ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list); + expired = capa_is_expired(ocapa); + if (!expired) + update_capa_timer(ocapa, ocapa->c_expiry); + } + spin_unlock(&capa_lock); + + if (expired) + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired"); + return expired; +} + +static inline int ll_capa_check_stop(void) +{ + return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0; +} + +static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head) +{ + struct obd_capa *tmp; + struct list_head *before = NULL; + + /* TODO: client capa is sorted by expiry, this could be optimized */ + list_for_each_entry_reverse(tmp, head, c_list) { + if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) { + before = &tmp->c_list; + break; + } + } + + LASSERT(&ocapa->c_list != before); + list_add(&ocapa->c_list, before ?: head); +} + +static int inode_have_md_lock(struct inode *inode, __u64 inodebits) +{ + struct obd_export *exp = ll_i2mdexp(inode); + struct lustre_handle lockh; + struct ldlm_res_id res_id = { .name = {0} }; + ldlm_policy_data_t policy = { .l_inodebits = {inodebits}}; + int flags, rc; + ENTRY; + + res_id.name[0] = inode->i_ino; + res_id.name[1] = inode->i_generation; + + CDEBUG(D_SEC, "trying to match res "LPU64"\n", res_id.name[0]); + + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags, &res_id, + LDLM_IBITS, &policy, LCK_CR|LCK_CW|LCK_PR, &lockh); + RETURN(rc); +} + +static void ll_delete_capa(struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode); + + if (capa_for_mds(&ocapa->c_capa)) { + capa_put(ocapa); + LASSERT(lli->lli_mds_capa == ocapa); + lli->lli_mds_capa = NULL; + } else if (capa_for_oss(&ocapa->c_capa)) { + list_del_init(&ocapa->u.cli.lli_list); + } + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client"); + list_del(&ocapa->c_list); + free_capa(ocapa); +} + +/* three places where client capa is deleted: + * 1. capa_thread_main(), main place to delete expired capa. + * 2. ll_clear_inode_capas() in ll_clear_inode(). + * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate(). + */ +static int capa_thread_main(void *unused) +{ + struct obd_capa *ocapa, *tmp, *next; + struct inode *inode = NULL; + struct l_wait_info lwi = { 0 }; + int rc; + ENTRY; + + cfs_daemonize("ll_capa"); + + ll_capa_thread.t_flags = SVC_RUNNING; + wake_up(&ll_capa_thread.t_ctl_waitq); + + while (1) { + l_wait_event(ll_capa_thread.t_ctl_waitq, + (ll_capa_check_stop() || have_expired_capa()), + &lwi); + + if (ll_capa_check_stop()) + break; + + spin_lock(&capa_lock); + next = NULL; + list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) { + LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC); + + if (!capa_is_to_expire(ocapa)) { + next = ocapa; + break; + } + + if (capa_for_mds(&ocapa->c_capa) && + !ll_have_md_lock(ocapa->u.cli.inode, + MDS_INODELOCK_LOOKUP) && + !obd_capa_is_root(ocapa)) { + /* fid capa without LOOKUP lock won't renew, + * move to idle list (except root fid) */ + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "skip renewal for"); + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + if (capa_for_oss(&ocapa->c_capa) && + atomic_read(&ocapa->u.cli.open_count) == 0) { + /* oss capa with open_count == 0 won't renew, + * move to idle list */ + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, &ll_idle_capas); + continue; + } + + /* NB iput() is in ll_update_capa() */ + inode = igrab(ocapa->u.cli.inode); + if (inode == NULL) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "igrab failed for"); + ll_delete_capa(ocapa); + continue; + } + + list_del_init(&ocapa->c_list); + capa_get(ocapa); + spin_unlock(&capa_lock); + + rc = md_renew_capa(ll_i2mdexp(inode), ocapa, + ll_update_capa); + spin_lock(&capa_lock); + if (rc) + sort_add_capa(ocapa, &ll_idle_capas); + } + + if (next) + update_capa_timer(next, capa_renewal_time(next)); + + list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) { + LASSERT(atomic_read(&ocapa->u.cli.open_count) == 0); + + if (!capa_is_expired(ocapa)) { + if (!next) + update_capa_timer(ocapa, ocapa->c_expiry); + break; + } + + if (atomic_read(&ocapa->c_refc)) { + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "expired(c_refc %d), don't release", + atomic_read(&ocapa->c_refc)); + obd_capa_set_expired(ocapa); + /* don't try to renew any more */ + list_del_init(&ocapa->c_list); + continue; + } + + /* expired capa is released. */ + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired"); + ll_delete_capa(ocapa); + } + + spin_unlock(&capa_lock); + } + + ll_capa_thread.t_flags = SVC_STOPPED; + wake_up(&ll_capa_thread.t_ctl_waitq); + RETURN(0); +} + +void ll_capa_timer_callback(unsigned long unused) +{ + wake_up(&ll_capa_thread.t_ctl_waitq); +} + +int ll_capa_thread_start(void) +{ + int rc; + ENTRY; + + init_waitqueue_head(&ll_capa_thread.t_ctl_waitq); + + rc = kernel_thread(capa_thread_main, NULL, 0); + if (rc < 0) { + CERROR("cannot start expired capa thread: rc %d\n", rc); + RETURN(rc); + } + wait_event(ll_capa_thread.t_ctl_waitq, + ll_capa_thread.t_flags & SVC_RUNNING); + + RETURN(0); +} + +void ll_capa_thread_stop(void) +{ + ll_capa_thread.t_flags = SVC_STOPPING; + wake_up(&ll_capa_thread.t_ctl_waitq); + wait_event(ll_capa_thread.t_ctl_waitq, + ll_capa_thread.t_flags & SVC_STOPPED); +} + +static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + + /* inside capa_lock */ + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (!obd_capa_is_valid(ocapa)) + continue; + if ((capa_opc(&ocapa->c_capa) & opc) == opc) + continue; + + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + return ocapa; + } + + return NULL; +} + +struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + int found = 0; + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + return NULL; + ENTRY; + LASSERT(opc == CAPA_OPC_OSS_WRITE || + opc == (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ) || + opc == CAPA_OPC_OSS_TRUNC); + + spin_lock(&capa_lock); + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if (!obd_capa_is_valid(ocapa)) + continue; + if ((opc & CAPA_OPC_OSS_WRITE) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; break; + } else if ((opc & CAPA_OPC_OSS_READ) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; break; + } else if ((opc & CAPA_OPC_OSS_TRUNC) && + capa_opc_supported(&ocapa->c_capa, opc)) { + found = 1; break; + } + } + + if (found) { + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + capa_get(ocapa); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + } else if (atomic_read(&ll_capa_debug)) { + CERROR("no capability for "DFID" opc "LPX64"\n", + PFID(&lli->lli_fid), opc); + atomic_set(&ll_capa_debug, 0); + ocapa = NULL; + } + spin_unlock(&capa_lock); + RETURN(ocapa); +} + +struct obd_capa *ll_i2mdscapa(struct inode *inode) +{ + struct obd_capa *ocapa; + + LASSERT(inode); + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0) + return NULL; + + spin_lock(&capa_lock); + ocapa = capa_get(ll_i2info(inode)->lli_mds_capa); + spin_unlock(&capa_lock); + if (ocapa && !obd_capa_is_valid(ocapa)) { + DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid"); + capa_put(ocapa); + ocapa = NULL; + } + + if (!ocapa && atomic_read(&ll_capa_debug)) { + CDEBUG(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ? + D_ERROR : D_SEC, "no MDS capa for (ino %lu)\n", + inode->i_ino); + if (inode_have_md_lock(inode, MDS_INODELOCK_LOOKUP)) + LBUG(); + atomic_set(&ll_capa_debug, 0); + } + + return ocapa; +} + +static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *old = lli->lli_mds_capa; + struct obd_capa *ocapa = *pcapa; + int rc = 0; + + if (!old) { + ocapa->u.cli.inode = inode; + lli->lli_mds_capa = capa_get(ocapa); + obd_capa_clear_new(ocapa); + obd_capa_set_valid(ocapa); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add fid"); + } else { + if (ocapa->c_capa.lc_expiry == old->c_capa.lc_expiry) { + rc = -EEXIST; + } else { + spin_lock(&old->c_lock); + old->c_capa = ocapa->c_capa; + obd_capa_set_valid(old); + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, &old->c_capa, "update fid"); + } + + free_capa(ocapa); + *pcapa = old; + } + + return rc; +} + +static inline void inode_add_oss_capa(struct inode *inode, + struct obd_capa *ocapa) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *tmp; + struct list_head *next = NULL; + + /* capa is sorted in lli_oss_capas so lookup can always find the + * latest one */ + list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) { + if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) { + next = &tmp->u.cli.lli_list; + break; + } + } + list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas); +} + +static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa) +{ + struct obd_capa *old, *ocapa = *pcapa; + struct lustre_capa *capa = &ocapa->c_capa; + int rc = 0; + + LASSERTF(S_ISREG(inode->i_mode), + "inode has oss capa, but not regular file, mode: %d\n", + inode->i_mode); + + /* FIXME: can't replace it so easily with fine-grained opc */ + old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY); + if (!old) { + ocapa->u.cli.inode = inode; + atomic_set(&ocapa->u.cli.open_count, 0); + INIT_LIST_HEAD(&ocapa->u.cli.lli_list); + obd_capa_set_valid(ocapa); + + DEBUG_CAPA(D_SEC, capa, "add oss"); + } else { + if (old->c_capa.lc_expiry == capa->lc_expiry) { + rc = -EEXIST; + } else { + spin_lock(&old->c_lock); + old->c_capa = *capa; + obd_capa_set_valid(old); + spin_unlock(&old->c_lock); + + DEBUG_CAPA(D_SEC, capa, "update oss"); + } + + free_capa(ocapa); + *pcapa = old; + } + + if (!rc) + inode_add_oss_capa(inode, *pcapa); + return rc; +} + +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa) +{ + struct obd_capa **pcapa = &ocapa; + int rc; + + spin_lock(&capa_lock); + rc = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, pcapa) : + do_add_oss_capa(inode, pcapa); + + ocapa = *pcapa; + /* truncate capa won't renew, or no existed capa changed, don't update + * capa timer. */ + if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) { + list_del_init(&ocapa->c_list); + sort_add_capa(ocapa, ll_capa_list); + + spin_lock(&ocapa->c_lock); + set_capa_expiry(ocapa); + spin_unlock(&ocapa->c_lock); + update_capa_timer(ocapa, capa_renewal_time(ocapa)); + } + + atomic_set(&ll_capa_debug, 1); + spin_unlock(&capa_lock); + + return ocapa; +} + + +int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa) +{ + struct inode *inode = ocapa->u.cli.inode; + cfs_time_t expiry; + int rc = 0; + + LASSERT(ocapa); + + if (IS_ERR(capa)) { + /* set error code */ + rc = PTR_ERR(capa); + /* failed capa won't be renewed any longer, but if -EIO, client + * might be doing recovery, retry in 1 min. */ + spin_lock(&capa_lock); + if (rc == -EIO) { + expiry = cfs_time_current() + cfs_time_seconds(60); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, + "renewal failed: -EIO, retry in 1 min"); + goto retry; + } else { + sort_add_capa(ocapa, &ll_idle_capas); + } + spin_unlock(&capa_lock); + + DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa, + "renewal failed(rc: %d) for", rc); + goto out; + } + + LASSERT(!memcmp(&ocapa->c_capa, capa, + offsetof(struct lustre_capa, lc_flags))); + + spin_lock(&ocapa->c_lock); + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + spin_unlock(&ocapa->c_lock); + + spin_lock(&capa_lock); + if (capa->lc_opc & (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)) + inode_add_oss_capa(inode, ocapa); + DEBUG_CAPA(D_SEC, capa, "renew"); + + expiry = capa_renewal_time(ocapa); +retry: + sort_add_capa(ocapa, ll_capa_list); + update_capa_timer(ocapa, expiry); + spin_unlock(&capa_lock); + +out: + capa_put(ocapa); + iput(inode); + return rc; +} + +void ll_oss_capa_open(struct inode *inode, struct file *file) +{ + struct obd_capa *ocapa; + int opc = capa_open_opc(open_flags_to_accmode(file->f_flags)); + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + spin_lock(&capa_lock); + ocapa = do_lookup_oss_capa(inode, opc); + if (!ocapa) { + if (atomic_read(&ll_capa_debug)) { + CDEBUG(D_ERROR, "no capa for (uid %u op %d ino %lu)\n", + (unsigned)current->uid, opc, inode->i_ino); + atomic_set(&ll_capa_debug, 0); + } + spin_unlock(&capa_lock); + return; + } + atomic_inc(&ocapa->u.cli.open_count); + spin_unlock(&capa_lock); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "open (count: %d)", + atomic_read(&ocapa->u.cli.open_count)); +} + +void ll_oss_capa_close(struct inode *inode, struct file *file) +{ + struct obd_capa *ocapa; + int opc = capa_open_opc(open_flags_to_accmode(file->f_flags)); + + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0) + return; + + if (!S_ISREG(inode->i_mode)) + return; + + spin_lock(&capa_lock); + ocapa = do_lookup_oss_capa(inode, opc); + if (!ocapa) { + spin_unlock(&capa_lock); + return; + } + atomic_dec(&ocapa->u.cli.open_count); + spin_unlock(&capa_lock); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "close (count: %d)", + atomic_read(&ocapa->u.cli.open_count)); +} + +/* delete CAPA_OPC_OSS_TRUNC only */ +void ll_truncate_free_capa(struct obd_capa *ocapa) +{ + struct inode *inode; + + if (!ocapa) + return; + + LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC); + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate"); + + inode = ocapa->u.cli.inode; + + spin_lock(&capa_lock); + capa_put(ocapa); + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} + +void ll_clear_inode_capas(struct inode *inode) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa, *tmp; + + spin_lock(&capa_lock); + ocapa = lli->lli_mds_capa; + if (ocapa) + ll_delete_capa(ocapa); + + list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas, + u.cli.lli_list) + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); +} diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 8eff9fa..13442da 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -135,6 +135,11 @@ struct ll_inode_info { /* identifying fields for both metadata and data stacks. */ struct lu_fid lli_fid; struct lov_stripe_md *lli_smd; + + /* fid capability */ + struct obd_capa *lli_mds_capa; + /* oss capability list */ + struct list_head lli_oss_capas; }; /* @@ -218,13 +223,15 @@ struct ll_rw_process_info { }; /* flags for sbi->ll_flags */ -#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ -#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ -#define LL_SBI_FLOCK 0x04 -#define LL_SBI_USER_XATTR 0x08 /* support user xattr */ -#define LL_SBI_ACL 0x10 /* support ACL */ -#define LL_SBI_JOIN 0x20 /* support JOIN */ -#define LL_SBI_RMT_CLIENT 0x40 /* remote client */ +#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ +#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ +#define LL_SBI_FLOCK 0x04 +#define LL_SBI_USER_XATTR 0x08 /* support user xattr */ +#define LL_SBI_ACL 0x10 /* support ACL */ +#define LL_SBI_JOIN 0x20 /* support JOIN */ +#define LL_SBI_RMT_CLIENT 0x40 /* remote client */ +#define LL_SBI_MDS_CAPA 0x80 /* support mds capa */ +#define LL_SBI_OSS_CAPA 0x100 /* support oss capa */ struct ll_sb_info { struct list_head ll_list; @@ -451,9 +458,6 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, struct dentry *ll_find_alias(struct inode *, struct dentry *); int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, void *data, int flag); -void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, - struct inode *i2, const char *name, int namelen, - int mode); int ll_md_cancel_unused(struct lustre_handle *, struct inode *, int flags, void *opaque); #ifndef LUSTRE_KERNEL_VERSION @@ -569,6 +573,10 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_process_config(struct lustre_cfg *lcfg); int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc); +void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode); +void ll_finish_md_op_data(struct md_op_data *op_data); /* llite/llite_nfs.c */ extern struct export_operations lustre_export_operations; @@ -731,4 +739,19 @@ int ll_fid_dt_alloc(struct ll_sb_info *sbi, struct lu_fid *fid, ino_t ll_fid_build_ino(struct ll_sb_info *sbi, struct lu_fid *fid); +/* llite/llite_capa.c */ +extern cfs_timer_t ll_capa_timer; + +int ll_capa_thread_start(void); +void ll_capa_thread_stop(void); +void ll_capa_timer_callback(unsigned long unused); +struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc); +struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa); +void ll_oss_capa_open(struct inode *inode, struct file *file); +void ll_oss_capa_close(struct inode *inode, struct file *file); +int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa); +void ll_truncate_free_capa(struct obd_capa *ocapa); +void ll_clear_inode_capas(struct inode *inode); +struct obd_capa *ll_i2mdscapa(struct inode *inode); + #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index aebc9cb..2ece804 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -156,6 +156,7 @@ static int client_common_fill_super(struct super_block *sb, struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; struct lu_fid rootfid; + struct obd_capa *pc = NULL; struct obd_statfs osfs; struct ptlrpc_request *request = NULL; struct lustre_handle dt_conn = {0, }; @@ -185,7 +186,8 @@ static int client_common_fill_super(struct super_block *sb, /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_ACL | OBD_CONNECT_JOIN | - OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION; + OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;/* | + OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA;*/ data->ocd_ibits_known = MDS_INODELOCK_FULL; data->ocd_version = LUSTRE_VERSION_CODE; @@ -262,6 +264,16 @@ static int client_common_fill_super(struct super_block *sb, sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; } + if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) { + CDEBUG(D_SEC, "client enabled fid capa!\n"); + sbi->ll_flags |= LL_SBI_MDS_CAPA; + } + + if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) { + CDEBUG(D_SEC, "client enabled oss capa!\n"); + sbi->ll_flags |= LL_SBI_OSS_CAPA; + } + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) /* We set sb->s_dev equal on all lustre clients in order to support * NFS export clustering. NFSD requires that the FSID be the same @@ -289,6 +301,8 @@ static int client_common_fill_super(struct super_block *sb, data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE; + if (sbi->ll_flags & LL_SBI_OSS_CAPA) + data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA; CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " "ocd_grant: %d\n", data->ocd_connect_flags, @@ -343,7 +357,7 @@ static int client_common_fill_super(struct super_block *sb, GOTO(out_dt, err); } - err = md_getstatus(sbi->ll_md_exp, &rootfid); + err = md_getstatus(sbi->ll_md_exp, &rootfid, &pc); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_dt_fid, err); @@ -358,12 +372,14 @@ static int client_common_fill_super(struct super_block *sb, /* make root inode * XXX: move this to after cbd setup? */ - err = md_getattr(sbi->ll_md_exp, &rootfid, + err = md_getattr(sbi->ll_md_exp, &rootfid, pc, OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0), 0, &request); if (err) { CERROR("md_getattr failed for root: rc = %d\n", err); + if (pc) + free_capa(pc); GOTO(out_dt, err); } @@ -372,9 +388,16 @@ static int client_common_fill_super(struct super_block *sb, &lmd); if (err) { CERROR("failed to understand root inode md: rc = %d\n", err); + if (pc) + free_capa(pc); ptlrpc_req_finished (request); GOTO(out_dt, err); } + if (pc) { + obd_capa_set_root(pc); + lmd.mds_capa = pc; + lmd.body->valid |= OBD_MD_FLMDSCAPA; + } LASSERT(fid_is_sane(&sbi->ll_root_fid)); root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd); @@ -1862,9 +1885,12 @@ int ll_iocontrol(struct inode *inode, struct file *file, switch(cmd) { case EXT3_IOC_GETFLAGS: { struct mdt_body *body; + struct obd_capa *oc; - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_FLFLAGS, 0, &req); + capa_put(oc); if (rc) { CERROR("failure %d inode %lu\n", rc, inode->i_ino); RETURN(-abs(rc)); @@ -2184,11 +2210,46 @@ int ll_process_config(struct lustre_cfg *lcfg) return(rc); } +/* this function prepares md_op_data hint for passing ot down to MD stack. */ +void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode) +{ + LASSERT(i1 != NULL); + LASSERT(op_data != NULL); + + ll_i2gids(op_data->suppgids, i1, i2); + op_data->fid1 = ll_i2info(i1)->lli_fid; + op_data->mod_capa1 = ll_i2mdscapa(i1); + + /* @i2 may be NULL. In this case caller itself has to initialize ->fid2 + * if needed. */ + if (i2) { + op_data->fid2 = *ll_inode2fid(i2); + op_data->mod_capa2 = ll_i2mdscapa(i2); + } + + op_data->name = name; + op_data->namelen = namelen; + op_data->create_mode = mode; + op_data->mod_time = CURRENT_SECONDS; + op_data->fsuid = current->fsuid; + op_data->fsgid = current->fsgid; + op_data->cap = current->cap_effective; +} + +void ll_finish_md_op_data(struct md_op_data *op_data) +{ + capa_put(op_data->mod_capa1); + capa_put(op_data->mod_capa2); +} + int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) { struct ptlrpc_request *req = NULL; struct mds_body *body; char *cmd, *buf; + struct obd_capa *oc; int rc, buflen; ENTRY; @@ -2200,9 +2261,11 @@ int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) GOTO(out, rc = -EFAULT); - rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, ioc->cmd_len, ioc->res_len, 0, &req); + capa_put(oc); if (rc < 0) { CERROR("mdc_getxattr %s [%s] failed: %d\n", XATTR_NAME_LUSTRE_ACL, cmd, rc); @@ -2230,6 +2293,7 @@ int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) { struct ptlrpc_request *req = NULL; char *cmd, *buf; + struct obd_capa *oc; int buflen, rc; ENTRY; @@ -2241,9 +2305,11 @@ int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc) if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len)) GOTO(out, rc = -EFAULT); - rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd, ioc->cmd_len, ioc->res_len, 0, &req); + capa_put(oc); if (rc) { CERROR("mdc_setxattr %s [%s] failed: %d\n", XATTR_NAME_LUSTRE_ACL, cmd, rc); diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 0cc0e6d..62845d9 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -59,9 +59,11 @@ static int ll_nfs_test_inode(struct inode *inode, void *opaque) static struct inode *search_inode_for_lustre(struct super_block *sb, struct lu_fid *fid, + struct lustre_capa *capa, int mode) { struct ll_sb_info *sbi = ll_s2sbi(sb); + struct obd_capa *ocapa = NULL; struct ptlrpc_request *req = NULL; struct inode *inode = NULL; unsigned long valid = 0; @@ -79,7 +81,16 @@ static struct inode *search_inode_for_lustre(struct super_block *sb, valid |= OBD_MD_FLEASIZE; } - rc = md_getattr(sbi->ll_md_exp, fid, valid, eadatalen, &req); + if (capa) { + ocapa = alloc_capa(CAPA_SITE_CLIENT); + if (!ocapa) + return ERR_PTR(-ENOMEM); + ocapa->c_capa = *capa; + } + + rc = md_getattr(sbi->ll_md_exp, fid, (struct obd_capa *)ocapa, + valid, eadatalen, &req); + free_capa(ocapa); if (rc) { CERROR("can't get object attrs, fid "DFID", rc %d\n", PFID(fid), rc); @@ -99,7 +110,9 @@ static struct inode *search_inode_for_lustre(struct super_block *sb, extern struct dentry_operations ll_d_ops; static struct dentry *ll_iget_for_nfs(struct super_block *sb, - struct lu_fid *fid, umode_t mode) + struct lu_fid *fid, + struct lustre_capa *capa, + umode_t mode) { struct inode *inode; struct dentry *result; @@ -110,7 +123,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, if (!fid_is_sane(fid)) return ERR_PTR(-ESTALE); - inode = search_inode_for_lustre(sb, fid, mode); + inode = search_inode_for_lustre(sb, fid, capa, mode); if (IS_ERR(inode)) return ERR_PTR(PTR_ERR(inode)); @@ -160,6 +173,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, return result; } +#if 0 static void ll_fh_to_fid(struct lu_fid *fid, __u32 *mode, __u32 *datap) { /* unpacking ->f_seq */ @@ -246,24 +260,63 @@ int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp, *lenp = 5; return 1; } +#endif #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, + int fh_type, + int (*acceptable)(void *, struct dentry *), + void *context) +{ + int len = (sizeof(struct lu_fid) + sizeof(struct lustre_capa) + 3)/4; + + if (fh_type != 1) + return ERR_PTR(-ESTALE); + if (fh_len < len) + return ERR_PTR(-ESTALE); + return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, + context); +} + +int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, int connectable) +{ + struct inode *inode = de->d_inode; + struct lu_fid *fid = ll_inode2fid(inode); + struct obd_capa *ocapa = ll_i2mdscapa(inode); + int len = (sizeof(*fid) + sizeof(struct lustre_capa) + 3)/4; + char *p = (char *)fh; + + if (*plen < len) + return 255; + + memcpy(p, fid, sizeof(*fid)); + p += sizeof(*fid); + if (ocapa) { + capa_cpy(p, ocapa); + capa_put(ocapa); + } + *plen = len; + return 1; +} + struct dentry *ll_get_dentry(struct super_block *sb, void *data) { - __u32 *inump = (__u32*)data; - struct lu_fid fid; - - /* FIXME: seems this is not enough */ - fid.f_seq = inump[0]; - fid.f_oid = inump[1]; + char *p = (char *)data; + struct lu_fid *fid; + struct lustre_capa *capa; + + fid = (struct lu_fid *)p; + capa = (struct lustre_capa *)(p + sizeof(*fid)); - return ll_iget_for_nfs(sb, &fid, S_IFREG); + return ll_iget_for_nfs(sb, fid, (capa->lc_opc == 0) ? capa : NULL, + S_IFREG); } struct dentry *ll_get_parent(struct dentry *dchild) { struct ptlrpc_request *req = NULL; struct inode *dir = dchild->d_inode; + struct obd_capa *oc; struct ll_sb_info *sbi; struct dentry *result = NULL; struct mdt_body *body; @@ -275,18 +328,21 @@ struct dentry *ll_get_parent(struct dentry *dchild) sbi = ll_s2sbi(dir->i_sb); - rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), - dotdot, strlen(dotdot) + 1, - 0, 0, &req); + oc = ll_i2mdscapa(dir); + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), oc, + dotdot, strlen(dotdot) + 1, 0, 0, &req); if (rc) { + capa_put(oc); CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); - return ERR_PTR(rc); + RETURN(ERR_PTR(rc)); } - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); + body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID)); - result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), S_IFDIR); + result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), + oc ? &oc->c_capa : NULL, S_IFDIR); + capa_put(oc); if (IS_ERR(result)) rc = PTR_ERR(result); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index d9f4c15..a2cc955 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -295,31 +295,6 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) } } -/* this function prepares md_op_data hint for passing ot down to MD stack. */ -void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, - struct inode *i2, const char *name, int namelen, - int mode) -{ - LASSERT(i1 != NULL); - LASSERT(op_data != NULL); - - ll_i2gids(op_data->suppgids, i1, i2); - op_data->fid1 = ll_i2info(i1)->lli_fid; - - /* @i2 may be NULL. In this case caller itself has to initialize ->fid2 - * if needed. */ - if (i2) - op_data->fid2 = ll_i2info(i2)->lli_fid; - - op_data->name = name; - op_data->namelen = namelen; - op_data->create_mode = mode; - op_data->mod_time = CURRENT_SECONDS; - op_data->fsuid = current->fsuid; - op_data->fsgid = current->fsgid; - op_data->cap = current->cap_effective; -} - static void ll_d_add(struct dentry *de, struct inode *inode) { CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de); @@ -1071,6 +1046,7 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; struct obdo *oa; + struct obd_capa *oc; int rc; ENTRY; @@ -1129,7 +1105,10 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } } - rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir)); + /* FIXME: parent mds capability is the only one can find! */ + oc = ll_i2mdscapa(dir); + rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc); + capa_put(oc); obdo_free(oa); if (rc) CERROR("obd destroy objid "LPX64" error %d\n", diff --git a/lustre/llite/remote_perm.c b/lustre/llite/remote_perm.c index 6eaf01e..4183fa6 100644 --- a/lustre/llite/remote_perm.c +++ b/lustre/llite/remote_perm.c @@ -216,6 +216,7 @@ int lustre_check_remote_perm(struct inode *inode, int mask) struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; struct mdt_remote_perm *perm; + struct obd_capa *oc; int i = 0, rc; ENTRY; @@ -239,7 +240,9 @@ check: LBUG(); } - rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), &req); + oc = ll_i2mdscapa(inode); + rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc, &req); + capa_put(oc); if (rc) { up(&lli->lli_rmtperm_sem); RETURN(rc); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 38dfca5..250e6a9 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -65,7 +65,7 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, struct lov_stripe_md *lsm = lli->lli_smd; struct obd_info oinfo = { { { 0 } } }; struct brw_page pg; - int rc; + int opc, rc; ENTRY; pg.pg = page; @@ -96,7 +96,12 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, LPROC_LL_BRW_READ, pg.count); oinfo.oi_oa = oa; oinfo.oi_md = lsm; + /* NB partial write, so we might not have CAPA_OPC_OSS_READ capa */ + opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : + CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ; + oinfo.oi_capa = ll_lookup_oss_capa(inode, opc); rc = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, 1, &pg, NULL); + capa_put(oinfo.oi_capa); if (rc == 0) obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS); else if (rc != -EIO) @@ -182,7 +187,9 @@ void ll_truncate(struct inode *inode) ll_inode_size_unlock(inode, 0); + oinfo.oi_capa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_TRUNC); rc = obd_punch_rqset(ll_i2dtexp(inode), &oinfo, NULL); + ll_truncate_free_capa(oinfo.oi_capa); if (rc) CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino); else @@ -403,12 +410,22 @@ static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa, EXIT; } +static struct obd_capa *ll_ap_lookup_capa(void *data, int cmd) +{ + struct ll_async_page *llap = LLAP_FROM_COOKIE(data); + int opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : + CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ; + + return ll_lookup_oss_capa(llap->llap_page->mapping->host, opc); +} + static struct obd_async_page_ops ll_async_page_ops = { .ap_make_ready = ll_ap_make_ready, .ap_refresh_count = ll_ap_refresh_count, .ap_fill_obdo = ll_ap_fill_obdo, .ap_update_obdo = ll_ap_update_obdo, .ap_completion = ll_ap_completion, + .ap_lookup_capa = ll_ap_lookup_capa, }; struct ll_async_page *llap_cast_private(struct page *page) diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index bb7287e..1c15af3 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -141,8 +141,9 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct file *file, { struct brw_page *pga; struct obdo oa; - int i, rc = 0; + int opc, i, rc = 0; size_t length; + struct obd_capa *ocapa; ENTRY; OBD_ALLOC(pga, sizeof(*pga) * page_count); @@ -166,13 +167,18 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct file *file, if (rw == WRITE) { lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRECT_WRITE, size); + opc = CAPA_OPC_OSS_WRITE; llap_write_pending(inode, NULL); } else { lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRECT_READ, size); + opc = CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE; } + ocapa = ll_lookup_oss_capa(inode, opc); rc = obd_brw_rqset(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL); + ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL, + ocapa); + capa_put(ocapa); if (rc == 0) { rc = size; if (rw == WRITE) { diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 1e6052c..ddf5b8f 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -103,6 +103,7 @@ void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)); static int __init init_lustre_lite(void) { int rc, seed[2]; + printk(KERN_INFO "Lustre: Lustre Client File System; " "info@clusterfs.com\n"); rc = ll_init_inodecache(); @@ -150,6 +151,9 @@ static int __init init_lustre_lite(void) get_random_bytes(seed, sizeof(seed)); ll_srand(seed[0], seed[1]); + init_timer(&ll_capa_timer); + ll_capa_timer.function = ll_capa_timer_callback; + rc = ll_capa_thread_start(); return rc; } @@ -157,6 +161,12 @@ static void __exit exit_lustre_lite(void) { int rc; + del_timer(&ll_capa_timer); + ll_capa_thread_stop(); + LASSERTF(capa_count[CAPA_SITE_CLIENT] == 0, + "client remaining capa count %d\n", + capa_count[CAPA_SITE_CLIENT]); + lustre_register_client_fill_super(NULL); lustre_register_client_process_config(NULL); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index e44dcbf..b23d6b8 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -36,6 +36,7 @@ static int ll_readlink_internal(struct inode *inode, struct ll_sb_info *sbi = ll_i2sbi(inode); int rc, symlen = inode->i_size + 1; struct mdt_body *body; + struct obd_capa *oc; ENTRY; *request = NULL; @@ -46,8 +47,10 @@ static int ll_readlink_internal(struct inode *inode, RETURN(0); } - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), + oc = ll_i2mdscapa(inode); + rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_LINKNAME, symlen, request); + capa_put(oc); if (rc) { if (rc != -ENOENT) CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 1f4b2dc..6038ef8 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -111,6 +111,7 @@ int ll_setxattr_common(struct inode *inode, const char *name, struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req; int xattr_type, rc; + struct obd_capa *oc; ENTRY; lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_SETXATTR); @@ -124,8 +125,10 @@ int ll_setxattr_common(struct inode *inode, const char *name, if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) RETURN(0); - rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, - name, value, size, 0, flags, &req); + oc = ll_i2mdscapa(inode); + rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name, + value, size, 0, flags, &req); + capa_put(oc); if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because " @@ -177,6 +180,7 @@ int ll_getxattr_common(struct inode *inode, const char *name, struct mdt_body *body; int xattr_type, rc; void *xdata; + struct obd_capa *oc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", @@ -221,8 +225,10 @@ int ll_getxattr_common(struct inode *inode, const char *name, #endif do_getxattr: - rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, - name, NULL, 0, size, 0, &req); + oc = ll_i2mdscapa(inode); + rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name, + NULL, 0, size, 0, &req); + capa_put(oc); if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because " diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index e8f5219..edd0b30 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -185,6 +185,7 @@ int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct lu_fid rpid = op_data->fid1; + struct obd_capa *oc = op_data->mod_capa1; struct lmv_obd *lmv = &obd->u.lmv; struct mdt_body *body = NULL; struct md_op_data *sop_data; @@ -217,6 +218,7 @@ repeat: (char *)op_data->name, op_data->namelen); rpid = obj->lo_inodes[mds].li_fid; + oc = obj->lo_inodes[mds].li_capa; rc = lmv_fld_lookup(lmv, &rpid, &mds); lmv_obj_put(obj); if (rc) @@ -227,6 +229,7 @@ repeat: } sop_data->fid1 = rpid; + sop_data->mod_capa1 = oc; rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize, it, flags, reqp, @@ -238,7 +241,7 @@ repeat: * the request with proper MDS. */ LASSERT(lu_fid_eq(&op_data->fid1, &rpid)); - rc = lmv_handle_split(exp, &rpid); + rc = lmv_handle_split(exp, &rpid, oc); if (rc == 0) { ptlrpc_req_finished(*reqp); /* We shoudld reallocate the FID for the object */ @@ -301,8 +304,10 @@ repeat: obj = lmv_obj_grab(obd, &body->fid1); if (!obj && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) { + + /* FIXME: capability for remote! */ /* wow! this is split dir, we'd like to handle it */ - obj = lmv_obj_create(exp, &body->fid1, mea); + obj = lmv_obj_create(exp, &body->fid1, NULL, mea); if (IS_ERR(obj)) GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); } @@ -337,6 +342,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, struct lmv_obj *obj = NULL, *obj2 = NULL; struct obd_device *obd = exp->exp_obd; struct lu_fid rpid = op_data->fid1; + struct obd_capa *oc = op_data->mod_capa1; struct lmv_obd *lmv = &obd->u.lmv; struct mdt_body *body = NULL; struct md_op_data *sop_data; @@ -397,6 +403,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, op_data->namelen); rpid = obj->lo_inodes[mds].li_fid; + oc = obj->lo_inodes[mds].li_capa; rc = lmv_fld_lookup(lmv, &rpid, &mds); if (rc) { lmv_obj_put(obj); @@ -410,6 +417,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, } sop_data->fid1 = rpid; + sop_data->mod_capa1 = oc; rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize, it, flags, reqp, cb_blocking, @@ -468,8 +476,10 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data, obj2 = lmv_obj_grab(obd, &body->fid1); if (!obj2 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) { + + /* FIXME remote capability! */ /* wow! this is split dir, we'd like to handle it. */ - obj2 = lmv_obj_create(exp, &body->fid1, mea); + obj2 = lmv_obj_create(exp, &body->fid1, NULL, mea); if (IS_ERR(obj2)) GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj2)); } @@ -542,6 +552,7 @@ int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp) for (i = 0; i < obj->lo_objcount; i++) { struct lu_fid fid = obj->lo_inodes[i].li_fid; + struct obd_capa *oc= obj->lo_inodes[i].li_capa; struct ptlrpc_request *req = NULL; struct obd_export *tgt_exp; struct lookup_intent it; @@ -559,6 +570,8 @@ int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp) memset(op_data, 0, sizeof(*op_data)); op_data->fid1 = fid; op_data->fid2 = fid; + op_data->mod_capa1 = oc; + op_data->mod_capa2 = oc; tgt_exp = lmv_get_export(lmv, &fid); if (IS_ERR(tgt_exp)) @@ -620,6 +633,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct lu_fid rpid = op_data->fid1; + struct obd_capa *oc = op_data->mod_capa1; struct lmv_obd *lmv = &obd->u.lmv; struct mdt_body *body = NULL; struct md_op_data *sop_data; @@ -655,6 +669,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, (char *)op_data->name, op_data->namelen); rpid = obj->lo_inodes[mds].li_fid; + oc = obj->lo_inodes[mds].li_capa; lmv_obj_put(obj); } rc = lmv_fld_lookup(lmv, &rpid, &mds); @@ -684,6 +699,7 @@ repeat: (char *)op_data->name, op_data->namelen); rpid = obj->lo_inodes[mds].li_fid; + oc = obj->lo_inodes[mds].li_capa; rc = lmv_fld_lookup(lmv, &rpid, &mds); if (rc) { lmv_obj_put(obj); @@ -696,6 +712,7 @@ repeat: } sop_data->fid1 = rpid; + sop_data->mod_capa1 = oc; rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize, it, flags, reqp, cb_blocking, extra_lock_flags); @@ -730,7 +747,7 @@ repeat: CWARN("we haven't knew about directory splitting!\n"); LASSERT(obj == NULL); - obj = lmv_obj_create(exp, &rpid, NULL); + obj = lmv_obj_create(exp, &rpid, oc, NULL); if (IS_ERR(obj)) RETURN((int)PTR_ERR(obj)); lmv_obj_put(obj); @@ -754,9 +771,15 @@ repeat: LASSERT(body != NULL); LASSERT((body->valid & OBD_MD_FLID) != 0); + body = lustre_msg_buf((*reqp)->rq_repmsg, + DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); + LASSERT((body->valid & OBD_MD_FLID) != 0); + obj = lmv_obj_grab(obd, &body->fid1); if (!obj) { - obj = lmv_obj_create(exp, &body->fid1, mea); + /* FIXME: remote capability */ + obj = lmv_obj_create(exp, &body->fid1, NULL, mea); if (IS_ERR(obj)) GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj)); } @@ -844,6 +867,7 @@ int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp, for (i = 0; i < obj->lo_objcount; i++) { struct lu_fid fid = obj->lo_inodes[i].li_fid; + struct obd_capa *oc = obj->lo_inodes[i].li_capa; struct lustre_handle *lockh = NULL; struct ptlrpc_request *req = NULL; ldlm_blocking_callback cb; @@ -883,6 +907,8 @@ int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp, op_data->fid1 = fid; op_data->fid2 = fid; + op_data->mod_capa1 = oc; + op_data->mod_capa2 = oc; /* is obj valid? */ tgt_exp = lmv_get_export(lmv, &fid); diff --git a/lustre/lmv/lmv_internal.h b/lustre/lmv/lmv_internal.h index af1454e..ad45117 100644 --- a/lustre/lmv/lmv_internal.h +++ b/lustre/lmv/lmv_internal.h @@ -44,6 +44,7 @@ struct qstr { struct lmv_inode { struct lu_fid li_fid; /* id of dirobj */ + struct obd_capa *li_capa; /* fid capability */ unsigned long li_size; /* slave size value */ int li_flags; }; @@ -97,6 +98,7 @@ struct lmv_obj *lmv_obj_alloc(struct obd_device *obd, struct lmv_obj *lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct lmv_stripe_md *mea); int lmv_obj_delete(struct obd_export *exp, @@ -133,7 +135,8 @@ int lmv_revalidate_slaves(struct obd_export *, struct ptlrpc_request **, ldlm_blocking_callback cb_blocking, int extra_lock_flags); -int lmv_handle_split(struct obd_export *, const struct lu_fid *); +int lmv_handle_split(struct obd_export *, const struct lu_fid *, + struct obd_capa *oc); int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, void *, int); int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 83ed667..3148f81 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1050,7 +1050,8 @@ out_free_temp: } static int lmv_getstatus(struct obd_export *exp, - struct lu_fid *fid) + struct lu_fid *fid, + struct obd_capa **pc) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1061,15 +1062,15 @@ static int lmv_getstatus(struct obd_export *exp, if (rc) RETURN(rc); - rc = md_getstatus(lmv->tgts[0].ltd_exp, fid); + rc = md_getstatus(lmv->tgts[0].ltd_exp, fid, pc); RETURN(rc); } static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *name, const char *input, - int input_size, int output_size, int flags, - struct ptlrpc_request **request) + struct obd_capa *oc, obd_valid valid, const char *name, + const char *input, int input_size, int output_size, + int flags, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1085,16 +1086,16 @@ static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_getxattr(tgt_exp, fid, valid, name, input, input_size, + rc = md_getxattr(tgt_exp, fid, oc, valid, name, input, input_size, output_size, flags, request); RETURN(rc); } static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *name, const char *input, - int input_size, int output_size, int flags, - struct ptlrpc_request **request) + struct obd_capa *oc, obd_valid valid, const char *name, + const char *input, int input_size, int output_size, + int flags, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1110,14 +1111,14 @@ static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_setxattr(tgt_exp, fid, valid, name, + rc = md_setxattr(tgt_exp, fid, oc, valid, name, input, input_size, output_size, flags, request); RETURN(rc); } static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, int ea_size, + struct obd_capa *oc, obd_valid valid, int ea_size, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; @@ -1135,7 +1136,7 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_getattr(tgt_exp, fid, valid, ea_size, request); + rc = md_getattr(tgt_exp, fid, oc, valid, ea_size, request); if (rc) RETURN(rc); @@ -1157,7 +1158,7 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid, RETURN(rc); } - body = lustre_msg_buf((*request)->rq_repmsg, REQ_REC_OFF, + body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); LASSERT(body != NULL); @@ -1184,10 +1185,8 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid, RETURN(rc); } -static int lmv_change_cbdata(struct obd_export *exp, - const struct lu_fid *fid, - ldlm_iterator_t it, - void *data) +static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, + ldlm_iterator_t it, void *data) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1235,7 +1234,8 @@ static int lmv_close(struct obd_export *exp, /* called in the case MDS returns -ERESTART on create on open, what means that * directory is split and its LMV presentation object has to be updated. */ -int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid) +int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1257,7 +1257,7 @@ int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid) RETURN(PTR_ERR(tgt_exp)); /* time to update mea of parent fid */ - rc = md_getattr(tgt_exp, fid, valid, mealen, &req); + rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req); if (rc) { CERROR("md_getattr() failed, error %d\n", rc); GOTO(cleanup, rc); @@ -1272,7 +1272,7 @@ int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid) if (md.mea == NULL) GOTO(cleanup, rc = -ENODATA); - obj = lmv_obj_create(exp, fid, md.mea); + obj = lmv_obj_create(exp, fid, oc, md.mea); if (IS_ERR(obj)) rc = PTR_ERR(obj); else @@ -1312,7 +1312,8 @@ repeat: mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, op_data->name, op_data->namelen); - op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->mod_capa1 = obj->lo_inodes[mds].li_capa; lmv_obj_put(obj); } @@ -1328,13 +1329,14 @@ repeat: if (rc == 0) { if (*request == NULL) RETURN(rc); - CDEBUG(D_OTHER, "created. "DFID"\n", PFID(&op_data->fid1)); + CDEBUG(D_OTHER, "created. "DFID"\n", + PFID(&op_data->fid1)); } else if (rc == -ERESTART) { /* * Directory got split. time to update local object and repeat * the request with proper MDS. */ - rc = lmv_handle_split(exp, &op_data->fid1); + rc = lmv_handle_split(exp, &op_data->fid1, op_data->mod_capa1); if (rc == 0) { ptlrpc_req_finished(*request); rc = lmv_alloc_fid_for_split(obd, &op_data->fid1, @@ -1530,7 +1532,8 @@ lmv_enqueue(struct obd_export *exp, int lock_type, * name */ mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, (char *)op_data->name, op_data->namelen); - op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->mod_capa1 = obj->lo_inodes[mds].li_capa; lmv_obj_put(obj); } } @@ -1555,13 +1558,14 @@ lmv_enqueue(struct obd_export *exp, int lock_type, static int lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid, - const char *filename, int namelen, obd_valid valid, - int ea_size, struct ptlrpc_request **request) + struct obd_capa *oc, const char *filename, int namelen, + obd_valid valid, int ea_size, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; - struct obd_export *tgt_exp; struct lu_fid rid = *fid; + struct obd_capa *rcapa = oc; + struct obd_export *tgt_exp; struct mdt_body *body; struct lmv_obj *obj; int rc, loop = 0; @@ -1574,12 +1578,13 @@ lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid, repeat: LASSERT(++loop <= 2); - obj = lmv_obj_grab(obd, fid); + obj = lmv_obj_grab(obd, &rid); if (obj) { /* directory is split. look for right mds for this name */ mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, filename, namelen - 1); rid = obj->lo_inodes[mds].li_fid; + rcapa = obj->lo_inodes[mds].li_capa; lmv_obj_put(obj); } @@ -1590,7 +1595,7 @@ repeat: if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_getattr_name(tgt_exp, &rid, filename, namelen, valid, + rc = md_getattr_name(tgt_exp, &rid, rcapa, filename, namelen, valid, ea_size, request); if (rc == 0) { body = lustre_msg_buf((*request)->rq_repmsg, @@ -1610,15 +1615,15 @@ repeat: RETURN(PTR_ERR(tgt_exp)); } - rc = md_getattr_name(tgt_exp, &rid, NULL, 1, valid, - ea_size, &req); + rc = md_getattr_name(tgt_exp, &rid, rcapa, NULL, 1, + valid, ea_size, &req); ptlrpc_req_finished(*request); *request = req; } } else if (rc == -ERESTART) { /* directory got split. time to update local object and repeat * the request with proper MDS */ - rc = lmv_handle_split(exp, &rid); + rc = lmv_handle_split(exp, &rid, rcapa); if (rc == 0) { ptlrpc_req_finished(*request); goto repeat; @@ -1651,7 +1656,8 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, if (obj) { rc = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, op_data->name, op_data->namelen); - op_data->fid2 = obj->lo_inodes[rc].li_fid; + op_data->fid2 = obj->lo_inodes[rc].li_fid; + op_data->mod_capa2 = obj->lo_inodes[rc].li_capa; lmv_obj_put(obj); } @@ -1695,8 +1701,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, ENTRY; CDEBUG(D_OTHER, "rename %*s in "DFID" to %*s in "DFID"\n", - oldlen, old, PFID(&op_data->fid1), newlen, new, - PFID(&op_data->fid2)); + oldlen, old, PFID(&op_data->fid1), + newlen, new, PFID(&op_data->fid2)); rc = lmv_check_connect(obd); if (rc) @@ -1724,7 +1730,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, if (obj) { mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, (char *)new, newlen); - op_data->fid2 = obj->lo_inodes[mds].li_fid; + op_data->fid2 = obj->lo_inodes[mds].li_fid; + op_data->mod_capa2 = obj->lo_inodes[mds].li_capa; CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds, PFID(&op_data->fid2)); lmv_obj_put(obj); @@ -1740,7 +1747,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, */ mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, (char *)old, oldlen); - op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->fid1 = obj->lo_inodes[mds].li_fid; + op_data->mod_capa1 = obj->lo_inodes[mds].li_capa; CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds, PFID(&op_data->fid1)); lmv_obj_put(obj); @@ -1756,6 +1764,7 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, (char *)new, newlen); op_data->fid2 = obj->lo_inodes[mds].li_fid; + op_data->mod_capa2 = obj->lo_inodes[mds].li_capa; CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds, PFID(&op_data->fid2)); lmv_obj_put(obj); @@ -1772,8 +1781,8 @@ request: if (mds != mds2) { CDEBUG(D_OTHER,"cross-node rename "DFID"/%*s to "DFID"/%*s\n", - PFID(&op_data->fid1), oldlen, old, PFID(&op_data->fid2), - newlen, new); + PFID(&op_data->fid1), oldlen, old, + PFID(&op_data->fid2), newlen, new); } op_data->fsuid = current->fsuid; op_data->fsgid = current->fsgid; @@ -1807,7 +1816,8 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, if (obj) { for (i = 0; i < obj->lo_objcount; i++) { - op_data->fid1 = obj->lo_inodes[i].li_fid; + op_data->fid1 = obj->lo_inodes[i].li_fid; + op_data->mod_capa1 = obj->lo_inodes[i].li_capa; tgt_exp = lmv_get_export(lmv, &op_data->fid1); if (IS_ERR(tgt_exp)) { @@ -1844,7 +1854,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, } static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) + struct obd_capa *oc, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1860,7 +1870,7 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_sync(tgt_exp, fid, request); + rc = md_sync(tgt_exp, fid, oc, request); RETURN(rc); } @@ -1912,7 +1922,8 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj, struct page *page = NULL; struct lu_dirpage *next_dp; struct obd_export *tgt_exp; - struct lu_fid rid = *fid; + struct lu_fid rid; + struct obd_capa *rcapa; __u32 seg_end, max_hash = MAX_HASH_SIZE; int rc = 0; @@ -1926,6 +1937,7 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj, /* Get start offset from next segment */ rid = obj->lo_inodes[index].li_fid; + rcapa = obj->lo_inodes[index].li_capa; tgt_exp = lmv_get_export(lmv, &rid); if (IS_ERR(tgt_exp)) GOTO(cleanup, PTR_ERR(tgt_exp)); @@ -1936,7 +1948,7 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj, if (!page) GOTO(cleanup, rc = -ENOMEM); - rc = md_readpage(tgt_exp, &rid, seg_end, page, &tmp_req); + rc = md_readpage(tgt_exp, &rid, rcapa, seg_end, page, &tmp_req); if (rc) { /* E2BIG means it already reached the end of the dir, * no need reset the hash segment end */ @@ -1963,15 +1975,15 @@ cleanup: RETURN(rc); } -static int lmv_readpage(struct obd_export *exp, - const struct lu_fid *fid, - __u64 offset, struct page *page, +static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, __u64 offset, struct page *page, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct obd_export *tgt_exp; struct lu_fid rid = *fid; + struct obd_capa *rcapa = oc; struct lmv_obj *obj; int i = 0, rc; ENTRY; @@ -1994,6 +2006,7 @@ static int lmv_readpage(struct obd_export *exp, do_div(index, seg); i = (int)index; rid = obj->lo_inodes[i].li_fid; + rcapa = obj->lo_inodes[i].li_capa; lmv_obj_unlock(obj); @@ -2005,7 +2018,7 @@ static int lmv_readpage(struct obd_export *exp, if (IS_ERR(tgt_exp)) GOTO(cleanup, PTR_ERR(tgt_exp)); - rc = md_readpage(tgt_exp, &rid, offset, page, request); + rc = md_readpage(tgt_exp, &rid, rcapa, offset, page, request); if (rc) GOTO(cleanup, rc); @@ -2108,7 +2121,8 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, if (obj) { i = raw_name2idx(obj->lo_hashtype, obj->lo_objcount, op_data->name, op_data->namelen); - op_data->fid1 = obj->lo_inodes[i].li_fid; + op_data->fid1 = obj->lo_inodes[i].li_fid; + op_data->mod_capa1 = obj->lo_inodes[i].li_capa; lmv_obj_put(obj); CDEBUG(D_OTHER, "unlink '%*s' in "DFID" -> %u\n", op_data->namelen, op_data->name, @@ -2479,6 +2493,7 @@ int lmv_clear_open_replay_data(struct obd_export *exp, } static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; @@ -2496,8 +2511,29 @@ static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_get_remote_perm(tgt_exp, fid, request); + rc = md_get_remote_perm(tgt_exp, fid, oc, request); + + RETURN(rc); +} + +static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *ocapa, + renew_capa_cb_t cb) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct obd_export *tgt_exp; + int rc; + ENTRY; + + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); + + tgt_exp = lmv_get_export(lmv, &ocapa->c_capa.lc_fid); + if (IS_ERR(tgt_exp)) + RETURN(PTR_ERR(tgt_exp)); + rc = md_renew_capa(tgt_exp, ocapa, cb); RETURN(rc); } @@ -2550,7 +2586,8 @@ struct md_ops lmv_md_ops = { .m_free_lustre_md = lmv_free_lustre_md, .m_set_open_replay_data = lmv_set_open_replay_data, .m_clear_open_replay_data = lmv_clear_open_replay_data, - .m_get_remote_perm = lmv_get_remote_perm + .m_get_remote_perm = lmv_get_remote_perm, + .m_renew_capa = lmv_renew_capa }; int __init lmv_init(void) diff --git a/lustre/lmv/lmv_object.c b/lustre/lmv/lmv_object.c index f37f5cc..00b1c7a 100644 --- a/lustre/lmv/lmv_object.c +++ b/lustre/lmv/lmv_object.c @@ -282,7 +282,7 @@ __lmv_obj_create(struct obd_device *obd, const struct lu_fid *fid, * obtained from correct MDT and used for constructing the object. */ struct lmv_obj * lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid, - struct lmv_stripe_md *mea) + struct obd_capa *oc, struct lmv_stripe_md *mea) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -312,7 +312,7 @@ lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid, if (IS_ERR(tgt_exp)) GOTO(cleanup, obj = (void *)tgt_exp); - rc = md_getattr(tgt_exp, fid, valid, mealen, &req); + rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req); if (rc) { CERROR("md_getattr() failed, error %d\n", rc); GOTO(cleanup, obj = ERR_PTR(rc)); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 1f2cccf..f81813a 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1031,7 +1031,7 @@ do { static int lov_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { struct lov_request_set *set; struct obd_info oinfo; @@ -1064,7 +1064,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, oti->oti_logcookies = set->set_cookies + req->rq_stripe; err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, oti, NULL); + req->rq_oi.oi_oa, NULL, oti, NULL, capa); err = lov_update_common_set(set, req, err); if (err) { CERROR("error: destroying objid "LPX64" subobj " @@ -1400,7 +1400,8 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo, } static int lov_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end) + struct lov_stripe_md *lsm, obd_off start, obd_off end, + void *capa) { struct lov_request_set *set; struct obd_info oinfo; @@ -1426,7 +1427,7 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa, rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, req->rq_oi.oi_oa, NULL, req->rq_oi.oi_policy.l_extent.start, - req->rq_oi.oi_policy.l_extent.end); + req->rq_oi.oi_policy.l_extent.end, capa); err = lov_update_common_set(set, req, rc); if (err) { CERROR("error: fsync objid "LPX64" subobj "LPX64 @@ -1626,12 +1627,20 @@ static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) return rc; } +static struct obd_capa *lov_ap_lookup_capa(void *data, int cmd) +{ + struct lov_async_page *lap = LAP_FROM_COOKIE(data); + + return lap->lap_caller_ops->ap_lookup_capa(lap->lap_caller_data, cmd); +} + static struct obd_async_page_ops lov_async_page_ops = { .ap_make_ready = lov_ap_make_ready, .ap_refresh_count = lov_ap_refresh_count, .ap_fill_obdo = lov_ap_fill_obdo, .ap_update_obdo = lov_ap_update_obdo, .ap_completion = lov_ap_completion, + .ap_lookup_capa = lov_ap_lookup_capa, }; int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, @@ -2391,7 +2400,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, GOTO(out, rc); } - if (KEY_IS("evict_by_nid")) { + if (KEY_IS("evict_by_nid") || KEY_IS(KEY_CAPA_KEY)) { for (i = 0; i < lov->desc.ld_tgt_count; i++) { /* OST was disconnected or is inactive */ if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 92d5467..6538c45 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -615,7 +615,8 @@ cleanup: continue; sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp; - err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL); + err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL, + NULL); if (err) CERROR("Failed to uncreate objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 311f5df..d444750 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -29,13 +29,16 @@ void mdc_pack_req_body(struct ptlrpc_request *req, int offset, __u64 valid, const struct lu_fid *fid, - int ea_size, int flags); + struct obd_capa *oc, int ea_size, int flags); +void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc); void mdc_pack_rep_body(struct ptlrpc_request *); void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset, - const struct lu_fid *pfid, - const struct lu_fid *cfid, int flags); + const struct lu_fid *pfid, const struct lu_fid *cfid, + struct obd_capa *pc, struct obd_capa *cc, + int flags); void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset, - __u32 size, const struct lu_fid *fid); + __u32 size, const struct lu_fid *fid, + struct obd_capa *oc); void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid, int flags, struct md_op_data *data); void mdc_setattr_pack(struct ptlrpc_request *req, int offset, @@ -139,25 +142,6 @@ int mdc_enqueue(struct obd_export *exp, int mdc_init_ea_size(struct obd_export *exp, int easize, int def_easzie, int cookiesize); -int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid); -int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, int ea_size, - struct ptlrpc_request **request); -int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid, - const char *filename, int namelen, obd_valid valid, - int ea_size, struct ptlrpc_request **request); -int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request); -int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, - struct ptlrpc_request **request); -int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, struct ptlrpc_request **request); int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags, struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, struct ptlrpc_request **); @@ -178,42 +162,23 @@ int mdc_set_open_replay_data(struct obd_export *exp, int mdc_clear_open_replay_data(struct obd_export *exp, struct obd_client_handle *och); -int mdc_close(struct obd_export *, struct md_op_data *, - struct obd_client_handle *och, struct ptlrpc_request **); - -int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, - __u64 offset, struct page *, struct ptlrpc_request **); - int mdc_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, int datalen, int mode, __u32 uid, - __u32 gid, __u32 cap_effective, __u64 rdev, - struct ptlrpc_request **request); - -int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request); - + const void *data, int datalen, int mode, __u32 uid, __u32 gid, + __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request); int mdc_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **); - + struct ptlrpc_request **request); int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, const char *old, int oldlen, const char *new, int newlen, struct ptlrpc_request **request); - -int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid, - const struct lu_fid *cfid, struct ptlrpc_request **request); - -int mdc_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **); - +int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, + void *ea, int ealen, void *ea2, int ea2len, + struct ptlrpc_request **request); +int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request); +int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, + int flags, void *opaque); int mdc_lock_match(struct obd_export *exp, int flags, const struct lu_fid *fid, ldlm_type_t type, ldlm_policy_data_t *policy, ldlm_mode_t mode, struct lustre_handle *lockh); - -int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - int flags, void *opaque); - -int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och); - #endif diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index f89cbbe..1b43f1a 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -38,56 +38,79 @@ #endif #endif -void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset, - __u32 size, const struct lu_fid *fid) +static void mdc_pack_body(struct mdt_body *b) { - struct mdt_body *b; + LASSERT (b != NULL); - b = lustre_msg_buf(req->rq_reqmsg, pos, sizeof(*b)); b->fsuid = current->fsuid; b->fsgid = current->fsgid; b->capability = current->cap_effective; - b->fid1 = *fid; - b->size = offset; /* !! */ - b->suppgid = -1; - b->nlink = size; /* !! */ +} + +void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc) +{ + struct lustre_capa *c; + + if (!oc) { + LASSERT(lustre_msg_buflen(req->rq_reqmsg, offset) == 0); + return; + } + + c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c)); + LASSERT(c); + capa_cpy(c, oc); + DEBUG_CAPA(D_SEC, c, "pack"); } void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset, const struct lu_fid *pfid, - const struct lu_fid *cfid, int flags) + const struct lu_fid *cfid, + struct obd_capa *pc, + struct obd_capa *cc, int flags) { struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); - if (pfid) + if (pfid) { b->fid1 = *pfid; - if (cfid) + mdc_pack_capa(req, offset + 1, pc); + } + if (cfid) { b->fid2 = *cfid; + mdc_pack_capa(req, offset + 2, cc); + } b->valid = OBD_MD_FLID; b->flags = flags; } -static void mdc_pack_body(struct mdt_body *b) -{ - LASSERT (b != NULL); - - b->fsuid = current->fsuid; - b->fsgid = current->fsgid; - b->capability = current->cap_effective; -} - void mdc_pack_req_body(struct ptlrpc_request *req, int offset, __u64 valid, const struct lu_fid *fid, - int ea_size, int flags) + struct obd_capa *oc, int ea_size, int flags) { struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); - if (fid) - b->fid1 = *fid; b->valid = valid; b->eadatasize = ea_size; b->flags = flags; mdc_pack_body(b); + if (fid) { + b->fid1 = *fid; + mdc_pack_capa(req, offset + 1, oc); + } +} + +void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pgoff, + __u32 size, const struct lu_fid *fid, + struct obd_capa *oc) +{ + struct mdt_body *b; + + b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + b->fid1 = *fid; + b->size = pgoff; /* !! */ + b->suppgid = -1; + b->nlink = size; /* !! */ + mdc_pack_body(b); + mdc_pack_capa(req, offset + 1, oc); } /* packing of MDS records */ @@ -111,12 +134,14 @@ void mdc_create_pack(struct ptlrpc_request *req, int offset, rec->cr_time = op_data->mod_time; rec->cr_suppgid = op_data->suppgids[0]; rec->cr_flags = op_data->flags; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); + + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1); LOGL0(op_data->name, op_data->namelen, tmp); if (data) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, datalen); memcpy (tmp, data, datalen); } } @@ -176,8 +201,12 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset, rec->cr_time = op_data->mod_time; rec->cr_suppgid = op_data->suppgids[0]; + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + /* the next buffer is child capa, which is used for replay, + * will be packed from the data in reply message. */ + if (op_data->name) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, op_data->namelen + 1); LOGL0(op_data->name, op_data->namelen, tmp); } @@ -188,7 +217,7 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset, /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */ rec->cr_fid2 = op_data->fid2; #endif - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, lmmlen); memcpy (tmp, lmm, lmmlen); } } @@ -237,8 +266,10 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset, rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); mdc_setattr_pack_rec(rec, op_data); + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) { - epoch = lustre_msg_buf(req->rq_reqmsg, offset + 1, + epoch = lustre_msg_buf(req->rq_reqmsg, offset + 2, sizeof(*epoch)); mdc_epoch_pack(epoch, op_data); } @@ -246,12 +277,12 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset, if (ealen == 0) return; - memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ealen), ea, ealen); + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ealen), ea, ealen); if (ea2len == 0) return; - memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ea2len), ea2, ea2len); + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 4, ea2len), ea2, ea2len); } void mdc_unlink_pack(struct ptlrpc_request *req, int offset, @@ -273,7 +304,9 @@ void mdc_unlink_pack(struct ptlrpc_request *req, int offset, rec->ul_fid2 = op_data->fid2; rec->ul_time = op_data->mod_time; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1); LASSERT (tmp != NULL); LOGL0(op_data->name, op_data->namelen, tmp); } @@ -296,7 +329,10 @@ void mdc_link_pack(struct ptlrpc_request *req, int offset, rec->lk_fid2 = op_data->fid2; rec->lk_time = op_data->mod_time; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + mdc_pack_capa(req, offset + 2, op_data->mod_capa2); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, op_data->namelen + 1); LOGL0(op_data->name, op_data->namelen, tmp); } @@ -321,11 +357,14 @@ void mdc_rename_pack(struct ptlrpc_request *req, int offset, rec->rn_time = op_data->mod_time; rec->rn_mode = op_data->create_mode; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1); + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + mdc_pack_capa(req, offset + 2, op_data->mod_capa2); + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, oldlen + 1); LOGL0(old, oldlen, tmp); if (new) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1); + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, newlen + 1); LOGL0(new, newlen, tmp); } } @@ -345,9 +384,12 @@ void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid, b->fid1 = op_data->fid1; b->fid2 = op_data->fid2; + + mdc_pack_capa(req, offset + 1, op_data->mod_capa1); + if (op_data->name) { char *tmp; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1); LOGL0(op_data->name, op_data->namelen, tmp); } @@ -363,6 +405,7 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset, rec = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*rec)); mdc_setattr_pack_rec(rec, op_data); + mdc_pack_capa(req, offset + 2, op_data->mod_capa1); mdc_epoch_pack(epoch, op_data); } diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 6b688eb..a1c19d6 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -233,16 +233,16 @@ static int round_up(int val) * but this is incredibly unlikely, and questionable whether the client * could do MDS recovery under OOM anyways... */ static void mdc_realloc_openmsg(struct ptlrpc_request *req, - struct mdt_body *body, int size[6]) + struct mdt_body *body, int size[9]) { int new_size, old_size; struct lustre_msg *new_msg; /* save old size */ - old_size = lustre_msg_size(lustre_request_magic(req), 6, size); + old_size = lustre_msg_size(lustre_request_magic(req), 9, size); - size[DLM_INTENT_REC_OFF + 2] = body->eadatasize; - new_size = lustre_msg_size(lustre_request_magic(req), 6, size); + size[DLM_INTENT_REC_OFF + 4] = body->eadatasize; + new_size = lustre_msg_size(lustre_request_magic(req), 9, size); OBD_ALLOC(new_msg, new_size); if (new_msg != NULL) { struct lustre_msg *old_msg = req->rq_reqmsg; @@ -250,7 +250,7 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req, DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n", body->eadatasize); memcpy(new_msg, old_msg, old_size); - lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 2, + lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 4, body->eadatasize); spin_lock(&req->rq_lock); @@ -289,16 +289,16 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_request *lockreq; struct ldlm_intent *lit; struct ldlm_reply *lockrep; - int size[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), + int size[9] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREQ_OFF] = sizeof(*lockreq), [DLM_INTENT_IT_OFF] = sizeof(*lit) }; - int repsize[5] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), + int repsize[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREPLY_OFF] = sizeof(*lockrep), [DLM_REPLY_REC_OFF] = sizeof(struct mdt_body), [DLM_REPLY_REC_OFF+1] = obddev->u.cli. cl_max_mds_easize }; int flags = extra_lock_flags | LDLM_FL_HAS_INTENT; - int repbufcnt = 4, rc; + int repbufcnt = 4, ea_off, rc; void *eadata; ENTRY; @@ -307,24 +307,40 @@ int mdc_enqueue(struct obd_export *exp, // ldlm_it2str(it->it_op), it_name, it_inode->i_ino); if (it->it_op & IT_OPEN) { + int do_join = !!(it->it_flags & O_JOIN_FILE); + it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG; size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_create); - size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1; + /* parent capability */ + size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ? + sizeof(struct lustre_capa) : 0; + /* child capability, used for replay only */ + size[DLM_INTENT_REC_OFF + 2] = op_data->mod_capa1 ? + sizeof(struct lustre_capa) : 0; + size[DLM_INTENT_REC_OFF + 3] = op_data->namelen + 1; /* As an optimization, we allocate an RPC request buffer for * at least a default-sized LOV EA even if we aren't sending * one. We grow the whole request to the next power-of-two * size since we get that much from a slab allocation anyways. * This avoids an allocation below in the common case where * we need to save a default-sized LOV EA for open replay. */ - size[DLM_INTENT_REC_OFF + 2] = max(lmmsize, - obddev->u.cli.cl_default_mds_easize); - rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, 6, - size); + ea_off = DLM_INTENT_REC_OFF + 4; + size[ea_off] = max(lmmsize, + obddev->u.cli.cl_default_mds_easize); + if (do_join) + size[DLM_INTENT_REC_OFF + 5] = + sizeof(struct mds_rec_join); + rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, + 8 + do_join, size); if (rc & (rc - 1)) - size[DLM_INTENT_REC_OFF + 2] = - min(size[DLM_INTENT_REC_OFF+2]+round_up(rc)-rc, - obddev->u.cli.cl_max_mds_easize); + size[ea_off] = min(size[ea_off] + round_up(rc) - rc, + obddev->u.cli.cl_max_mds_easize); + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, + LDLM_ENQUEUE, 8 + do_join, size, NULL); + if (!req) + RETURN(-ENOMEM); if (it->it_flags & O_JOIN_FILE) { __u64 head_size = *(__u32*)cb_data; @@ -332,26 +348,14 @@ int mdc_enqueue(struct obd_export *exp, /* join is like an unlink of the tail */ policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; - size[DLM_INTENT_REC_OFF + 3] = - sizeof(struct mdt_rec_join); - req = ptlrpc_prep_req(class_exp2cliimp(exp), - LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - 7, size, NULL); /* when joining file, cb_data and lmm args together * indicate the head file size*/ - mdc_join_pack(req, DLM_INTENT_REC_OFF + 3, op_data, + mdc_join_pack(req, DLM_INTENT_REC_OFF + 5, op_data, (head_size << 32) | tsize); cb_data = NULL; lmm = NULL; - } else { - req = ptlrpc_prep_req(class_exp2cliimp(exp), - LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - 6, size, NULL); } - if (!req) - RETURN(-ENOMEM); - spin_lock(&req->rq_lock); req->rq_replay = 1; spin_unlock(&req->rq_lock); @@ -370,12 +374,16 @@ int mdc_enqueue(struct obd_export *exp, repsize[repbufcnt++] = client_is_remote(exp) ? sizeof(struct mdt_remote_perm) : LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = sizeof(struct lustre_capa); + repsize[repbufcnt++] = sizeof(struct lustre_capa); } else if (it->it_op & IT_UNLINK) { size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink); - size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1; + size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ? + sizeof(struct lustre_capa) : 0; + size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1; policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, - LDLM_ENQUEUE, 5, size, NULL); + LDLM_ENQUEUE, 6, size, NULL); if (!req) RETURN(-ENOMEM); @@ -394,13 +402,15 @@ int mdc_enqueue(struct obd_export *exp, valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM : OBD_MD_FLACL; size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body); - size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1; + size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ? + sizeof(struct lustre_capa) : 0; + size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1; if (it->it_op & IT_GETATTR) policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, - LDLM_ENQUEUE, 5, size, NULL); + LDLM_ENQUEUE, 6, size, NULL); if (!req) RETURN(-ENOMEM); @@ -416,6 +426,7 @@ int mdc_enqueue(struct obd_export *exp, repsize[repbufcnt++] = client_is_remote(exp) ? sizeof(struct mdt_remote_perm) : LUSTRE_POSIX_ACL_MAX_SIZE; + repsize[repbufcnt++] = sizeof(struct lustre_capa); } else if (it->it_op == IT_READDIR) { policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, @@ -496,12 +507,12 @@ int mdc_enqueue(struct obd_export *exp, it->it_op,it->d.lustre.it_disposition,it->d.lustre.it_status); /* We know what to expect, so we do any byte flipping required here */ - LASSERT(repbufcnt == 5 || repbufcnt == 2); - if (repbufcnt == 5) { + LASSERT(repbufcnt == 7 || repbufcnt == 6 || repbufcnt == 2); + if (repbufcnt >= 6) { struct mdt_body *body; - int offset = DLM_REPLY_REC_OFF; + int reply_off = DLM_REPLY_REC_OFF; - body = lustre_swab_repbuf(req, offset++, sizeof(*body), + body = lustre_swab_repbuf(req, reply_off++, sizeof(*body), lustre_swab_mdt_body); if (body == NULL) { CERROR ("Can't swab mdt_body\n"); @@ -518,7 +529,7 @@ int mdc_enqueue(struct obd_export *exp, if ((body->valid & OBD_MD_FLDIREA) != 0) { if (body->eadatasize) { - eadata = lustre_swab_repbuf(req, offset++, + eadata = lustre_swab_repbuf(req, reply_off++, body->eadatasize, NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); @@ -529,7 +540,7 @@ int mdc_enqueue(struct obd_export *exp, if ((body->valid & OBD_MD_FLEASIZE)) { /* The eadata is opaque; just check that it is there. * Eventually, obd_unpackmd() will check the contents */ - eadata = lustre_swab_repbuf(req, offset++, + eadata = lustre_swab_repbuf(req, reply_off++, body->eadatasize, NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); @@ -557,12 +568,12 @@ int mdc_enqueue(struct obd_export *exp, * reallocate it here to hold the actual LOV EA. */ if (it->it_op & IT_OPEN) { if (lustre_msg_buflen(req->rq_reqmsg, - DLM_INTENT_REC_OFF + 2) < + DLM_INTENT_REC_OFF + 4) < body->eadatasize) mdc_realloc_openmsg(req, body, size); lmm = lustre_msg_buf(req->rq_reqmsg, - DLM_INTENT_REC_OFF + 2, + DLM_INTENT_REC_OFF + 4, body->eadatasize); if (lmm) memcpy(lmm, eadata, body->eadatasize); @@ -572,12 +583,42 @@ int mdc_enqueue(struct obd_export *exp, struct mdt_remote_perm *perm; LASSERT(client_is_remote(exp)); - perm = lustre_swab_repbuf(req, offset++, sizeof(*perm), + perm = lustre_swab_repbuf(req, reply_off++, + sizeof(*perm), lustre_swab_mdt_remote_perm); if (perm == NULL) { CERROR("missing remote permission!\n"); RETURN(-EPROTO); } + } else if ((body->valid & OBD_MD_FLACL) && body->aclsize) { + reply_off++; + } + if (body->valid & OBD_MD_FLMDSCAPA) { + struct lustre_capa *capa, *p; + + capa = lustre_unpack_capa(req->rq_repmsg, reply_off++); + if (capa == NULL) { + CERROR("Missing/short client fid capa\n"); + RETURN(-EPROTO); + } + + if (it->it_op & IT_OPEN) { + /* client fid capa will be checked in replay */ + p = lustre_msg_buf(req->rq_reqmsg, + DLM_INTENT_REC_OFF + 2, + sizeof(*p)); + LASSERT(p); + *p = *capa; + } + } + if (body->valid & OBD_MD_FLOSSCAPA) { + struct lustre_capa *capa; + + capa = lustre_unpack_capa(req->rq_repmsg, reply_off++); + if (capa == NULL) { + CERROR("Missing/short client oss capa\n"); + RETURN(-EPROTO); + } } } @@ -630,7 +671,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, op_data->namelen, op_data->name, PFID(&op_data->fid2), PFID(&op_data->fid1), ldlm_it2str(it->it_op), it->it_flags); - if (fid_is_sane(&op_data->fid2) && + if (fid_is_sane((struct lu_fid *)&op_data->fid2) && (it->it_op & (IT_LOOKUP | IT_GETATTR))) { /* We could just return 1 immediately, but since we should only * be called in revalidate_it if we already have a lock, let's diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 4982a34..69db90e 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -74,15 +74,18 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, struct mdt_rec_setattr *rec; struct mdc_rpc_lock *rpc_lock; struct obd_device *obd = exp->exp_obd; - int size[5] = { sizeof(struct ptlrpc_body), - sizeof(*rec), 0, ealen, ea2len }; - int bufcount = 3, rc; + int size[6] = { sizeof(struct ptlrpc_body), + sizeof(*rec), 0, 0, ealen, ea2len }; + int bufcount = 4, rc; ENTRY; LASSERT(op_data != NULL); + if (op_data->mod_capa1) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) - size[2] = sizeof(struct mdt_epoch); + size[REQ_REC_OFF + 2] = sizeof(struct mdt_epoch); if (ealen > 0) { bufcount++; @@ -109,7 +112,8 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, mdc_setattr_pack(req, REQ_REC_OFF, op_data, ea, ealen, ea2, ea2len); size[REPLY_REC_OFF] = sizeof(struct mdt_body); - ptlrpc_req_set_repsize(req, 2, size); + size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa); + ptlrpc_req_set_repsize(req, 3, size); rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL); *request = req; @@ -125,12 +129,15 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; - int level, bufcount = 3, rc; - int size[4] = { sizeof(struct ptlrpc_body), + int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_rec_create), - op_data->namelen + 1 }; + 0, op_data->namelen + 1 }; + int level, bufcount = 4, rc; ENTRY; + if (op_data->mod_capa1) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + if (data && datalen) { size[bufcount] = datalen; bufcount++; @@ -169,13 +176,17 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request *req = *request; int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_rec_unlink), - op_data->namelen + 1 }; + 0, op_data->namelen + 1 }; int rc; ENTRY; LASSERT(req == NULL); + + if (op_data->mod_capa1) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 3, size, NULL); + MDS_REINT, 4, size, NULL); if (req == NULL) RETURN(-ENOMEM); *request = req; @@ -198,14 +209,19 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; - int size[3] = { sizeof(struct ptlrpc_body), + int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_rec_link), - op_data->namelen + 1 }; + 0, 0, op_data->namelen + 1 }; int rc; ENTRY; + if (op_data->mod_capa1) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + if (op_data->mod_capa2) + size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 3, size, NULL); + MDS_REINT, 5, size, NULL); if (req == NULL) RETURN(-ENOMEM); @@ -228,14 +244,19 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, { struct obd_device *obd = exp->exp_obd; struct ptlrpc_request *req; - int size[4] = { sizeof(struct ptlrpc_body), + int size[6] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_rec_rename), - oldlen + 1, newlen + 1 }; + 0, 0, oldlen + 1, newlen + 1 }; int rc; ENTRY; + if (op_data->mod_capa1) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + if (op_data->mod_capa2) + size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_REINT, 4, size, NULL); + MDS_REINT, 6, size, NULL); if (req == NULL) RETURN(-ENOMEM); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 2871f15..def5806 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -52,14 +52,40 @@ static quota_interface_t *quota_interface; static int mdc_cleanup(struct obd_device *obd); extern int mds_queue_req(struct ptlrpc_request *); + +static inline struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req, + unsigned int offset) +{ + struct lustre_capa *capa; + struct obd_capa *oc; + + /* swabbed already in mdc_enqueue */ + capa = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*capa)); + if (capa == NULL) { + CERROR("missing capa at offset %d failed!\n", offset); + return ERR_PTR(-EFAULT); + } + + oc = alloc_capa(CAPA_SITE_CLIENT); + if (!oc) { + CERROR("alloc capa failed!\n"); + return ERR_PTR(-ENOMEM); + } + oc->c_capa = *capa; + + return oc; +} + /* Helper that implements most of mdc_getstatus and signal_completed_replay. */ /* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, - int level, int msg_flags) + struct obd_capa **pc, int level, int msg_flags) { struct ptlrpc_request *req; struct mdt_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), + sizeof(*body), + sizeof(struct lustre_capa) }; ENTRY; req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS, 2, size, @@ -68,9 +94,9 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, GOTO(out, rc = -ENOMEM); req->rq_send_state = level; - ptlrpc_req_set_repsize(req, 2, size); + ptlrpc_req_set_repsize(req, 3, size); - mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0); + mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, NULL, 0, 0); lustre_msg_add_flags(req->rq_reqmsg, msg_flags); rc = ptlrpc_queue_wait(req); @@ -84,6 +110,15 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, *rootfid = body->fid1; + if (body->valid & OBD_MD_FLMDSCAPA) { + struct obd_capa *oc; + + oc = mdc_unpack_capa(req, REPLY_REC_OFF + 1); + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + *pc = oc; + } + CDEBUG(D_NET, "root fid="DFID", last_committed="LPU64 ", last_xid="LPU64"\n", PFID(rootfid), @@ -98,9 +133,10 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, } /* This should be mdc_get_info("rootfid") */ -int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid) +int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid, + struct obd_capa **pc) { - return send_getstatus(class_exp2cliimp(exp), rootfid, + return send_getstatus(class_exp2cliimp(exp), rootfid, pc, LUSTRE_IMP_FULL, 0); } @@ -120,24 +156,24 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, { struct mdt_body *body; void *eadata; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[5] = { sizeof(struct ptlrpc_body), + sizeof(*body) }; int bufcount = 2, rc; ENTRY; /* Request message already built. */ if (ea_size != 0) { - size[bufcount] = ea_size; + size[bufcount++] = ea_size; CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n", ea_size); } - bufcount++; if (acl_size) { - size[bufcount] = acl_size; + size[bufcount++] = acl_size; CDEBUG(D_INODE, "reserved %u bytes for ACL\n", acl_size); } - bufcount++; + size[bufcount++] = sizeof(struct lustre_capa); ptlrpc_req_set_repsize(req, bufcount, size); rc = ptlrpc_queue_wait(req); @@ -178,23 +214,27 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, } int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, int ea_size, struct ptlrpc_request **request) + struct obd_capa *oc, obd_valid valid, int ea_size, + struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; int acl_size = 0, rc; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + /* * XXX do we need to make another request here? We just did a getattr * to do the lookup in the first place. */ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_GETATTR, 2, size, NULL); + MDS_GETATTR, 3, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size, + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size, MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/); /* currently only root inode will call us with FLACL */ @@ -212,24 +252,29 @@ int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid, } int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid, - const char *filename, int namelen, obd_valid valid, - int ea_size, struct ptlrpc_request **request) + struct obd_capa *oc, const char *filename, int namelen, + obd_valid valid, int ea_size, + struct ptlrpc_request **request) { struct ptlrpc_request *req; struct mdt_body *body; - int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), namelen}; + int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body), 0, namelen}; + int rc; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_GETATTR_NAME, 3, size, NULL); + MDS_GETATTR_NAME, 4, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size, + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size, MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/); LASSERT(strnlen(filename, namelen) == namelen - 1); - memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen), + memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, namelen), filename, namelen); rc = mdc_getattr_common(exp, ea_size, 0, req); @@ -243,20 +288,28 @@ int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid, } int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid, - const struct lu_fid *cfid, struct ptlrpc_request **request) + const struct lu_fid *cfid, + struct obd_capa *pc, struct obd_capa *cc, + struct ptlrpc_request **request) { - int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + int size[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body) }; struct ptlrpc_request *req; struct mdt_body *body; int rc; ENTRY; + if (pc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + if (cc) + size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_IS_SUBDIR, 2, size, NULL); + MDS_IS_SUBDIR, 4, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); - mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, 0); + mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, pc, cc, 0); ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); @@ -277,17 +330,20 @@ int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid, static int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, int opcode, obd_valid valid, const char *xattr_name, const char *input, int input_size, int output_size, int flags, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; - // int size[3] = {sizeof(struct mdt_body)}, bufcnt = 1; - int rc, xattr_namelen = 0, bufcnt = 2, offset, remote_acl = 0; + int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + int bufcnt = 3, offset = REQ_REC_OFF + 2; + int rc, xattr_namelen = 0, remote_acl = 0; void *tmp; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); if (xattr_name) { xattr_namelen = strlen(xattr_name) + 1; size[bufcnt++] = xattr_namelen; @@ -303,9 +359,8 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid, GOTO(out, rc = -ENOMEM); /* request data */ - mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags); + mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, output_size, flags); - offset = REQ_REC_OFF + 1; if (xattr_name) { tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen); @@ -363,21 +418,20 @@ err_out: } int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, + struct obd_capa *oc, obd_valid valid, const char *xattr_name, + const char *input, int input_size, int output_size, int flags, struct ptlrpc_request **request) { - return mdc_xattr_common(exp, fid, MDS_SETXATTR, valid, xattr_name, + return mdc_xattr_common(exp, fid, oc, MDS_SETXATTR, valid, xattr_name, input, input_size, output_size, flags, request); } int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid, - obd_valid valid, const char *xattr_name, - const char *input, int input_size, - int output_size, int flags, struct ptlrpc_request **request) + struct obd_capa *oc, obd_valid valid, const char *xattr_name, + const char *input, int input_size, int output_size, int flags, + struct ptlrpc_request **request) { - return mdc_xattr_common(exp, fid, MDS_GETXATTR, valid, xattr_name, + return mdc_xattr_common(exp, fid, oc, MDS_GETXATTR, valid, xattr_name, input, input_size, output_size, flags, request); } @@ -498,7 +552,7 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, if ((md->body->valid & OBD_MD_FLACL) && md->body->aclsize) { rc = mdc_unpack_acl(dt_exp, req, md, offset); if (rc) - GOTO(err_out, rc); + GOTO(out, rc); offset++; } @@ -509,13 +563,39 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, LASSERT(md->remote_perm); offset++; } -out: - RETURN(rc); -err_out: - if (md->lsm) - obd_free_memmd(dt_exp, &md->lsm); - goto out; + if (md->body->valid & OBD_MD_FLMDSCAPA) { + struct obd_capa *oc = mdc_unpack_capa(req, offset); + + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + md->mds_capa = oc; + offset++; + } + + if (md->body->valid & OBD_MD_FLOSSCAPA) { + struct obd_capa *oc = mdc_unpack_capa(req, offset); + + if (IS_ERR(oc)) + GOTO(out, rc = PTR_ERR(oc)); + md->oss_capa = oc; + offset++; + } + + EXIT; +out: + if (rc) { + if (md->oss_capa) + free_capa(md->oss_capa); + if (md->mds_capa) + free_capa(md->mds_capa); +#ifdef CONFIG_FS_POSIX_ACL + posix_acl_release(md->posix_acl); +#endif + if (md->lsm) + obd_free_memmd(dt_exp, &md->lsm); + } + return rc; } int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) @@ -698,19 +778,22 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, struct obd_client_handle *och, struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); - int reqsize[3] = { sizeof(struct ptlrpc_body), + int reqsize[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_epoch), sizeof(struct mdt_rec_setattr)}; - int rc, repsize[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mdt_body), - obd->u.cli.cl_max_mds_easize, - obd->u.cli.cl_max_mds_cookiesize }; + int repsize[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body), + obd->u.cli.cl_max_mds_easize, + obd->u.cli.cl_max_mds_cookiesize }; struct ptlrpc_request *req; struct mdc_open_data *mod; + int rc; ENTRY; + if (op_data->mod_capa1) + reqsize[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_CLOSE, 3, reqsize, NULL); + MDS_CLOSE, 4, reqsize, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); @@ -787,15 +870,18 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, struct obd_client_handle *och) { struct ptlrpc_request *req; - int rc, size[3] = { sizeof(struct ptlrpc_body), - sizeof(struct mdt_epoch), - sizeof(struct mdt_rec_setattr)}; + int size[4] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_epoch), + sizeof(struct mdt_rec_setattr)}; int repsize[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body)}; - + int rc; ENTRY; + + if (op_data->mod_capa1) + size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_DONE_WRITING, 3, size, NULL); + MDS_DONE_WRITING, 4, size, NULL); if (req == NULL) RETURN(-ENOMEM); @@ -847,19 +933,21 @@ EXPORT_SYMBOL(mdc_sendpage); #endif int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, - __u64 offset, struct page *page, + struct obd_capa *oc, __u64 offset, struct page *page, struct ptlrpc_request **request) { struct obd_import *imp = class_exp2cliimp(exp); struct ptlrpc_request *req = NULL; struct ptlrpc_bulk_desc *desc = NULL; struct mdt_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; CDEBUG(D_INODE, "object: "DFID"\n", PFID(fid)); - req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 2, size, + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 3, size, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); @@ -874,7 +962,7 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE); - mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid); + mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid, oc); ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); @@ -902,7 +990,6 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, return rc; } - static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -1095,21 +1182,25 @@ out: } static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct obd_client_handle *handle, int flag) { struct ptlrpc_request *req; struct mdt_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_PIN, 2, size, NULL); + MDS_PIN, 3, size, NULL); if (req == NULL) RETURN(-ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof (*body)); body->fid1 = *fid; body->flags = flag; + mdc_pack_capa(req, REQ_REC_OFF + 1, oc); ptlrpc_req_set_repsize(req, 2, size); @@ -1177,19 +1268,22 @@ static int mdc_unpin(struct obd_export *exp, } int mdc_sync(struct obd_export *exp, const struct lu_fid *fid, + struct obd_capa *oc, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) }; int rc; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_SYNC, 2, size, NULL); + MDS_SYNC, 3, size, NULL); if (!req) RETURN(rc = -ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0); + mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, oc, 0, 0); ptlrpc_req_set_repsize(req, 2, size); @@ -1462,25 +1556,27 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) /* get remote permission for current user on fid */ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) + struct obd_capa *oc, struct ptlrpc_request **request) { struct ptlrpc_request *req; struct mdt_body *body; struct mdt_remote_perm *perm; - int size[3] = { sizeof(struct ptlrpc_body), - sizeof(*body), - sizeof(*perm) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; int rc; ENTRY; + if (oc) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + *request = NULL; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, - MDS_GETATTR, 2, size, NULL); + MDS_GETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, 0, 0); + mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, oc, 0, 0); + size[REPLY_REC_OFF + 1] = sizeof(*perm); ptlrpc_req_set_repsize(req, 3, size); rc = ptlrpc_queue_wait(req); if (rc) { @@ -1501,6 +1597,31 @@ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, RETURN(0); } +static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc, + renew_capa_cb_t cb) +{ + struct ptlrpc_request *req; + int size[2] = { sizeof(struct ptlrpc_body), + sizeof(struct lustre_capa) }; + int repsize[3] = { sizeof(struct ptlrpc_body), + sizeof(struct mdt_body), + sizeof(struct lustre_capa) }; + ENTRY; + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, + MDS_RENEW_CAPA, 2, size, NULL); + if (!req) + RETURN(-ENOMEM); + + mdc_pack_capa(req, REQ_REC_OFF, oc); + + ptlrpc_req_set_repsize(req, 3, repsize); + req->rq_interpret_reply = cb; + ptlrpcd_add_req(req); + + RETURN(0); +} + struct obd_ops mdc_obd_ops = { .o_owner = THIS_MODULE, .o_setup = mdc_setup, @@ -1552,7 +1673,8 @@ struct md_ops mdc_md_ops = { .m_free_lustre_md = mdc_free_lustre_md, .m_set_open_replay_data = mdc_set_open_replay_data, .m_clear_open_replay_data = mdc_clear_open_replay_data, - .m_get_remote_perm = mdc_get_remote_perm + .m_get_remote_perm = mdc_get_remote_perm, + .m_renew_capa = mdc_renew_capa }; extern quota_interface_t mdc_quota_interface; diff --git a/lustre/mdd/mdd_handler.c b/lustre/mdd/mdd_handler.c index 1204385..df3b80c 100644 --- a/lustre/mdd/mdd_handler.c +++ b/lustre/mdd/mdd_handler.c @@ -40,6 +40,7 @@ #include #include +#include #include "mdd_internal.h" @@ -325,7 +326,7 @@ struct mdd_object *mdd_object_find(const struct lu_context *ctxt, struct mdd_object *m; ENTRY; - o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f); + o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f, BYPASS_CAPA); if (IS_ERR(o)) m = (struct mdd_object *)o; else { @@ -666,6 +667,7 @@ static int mdd_txn_stop_cb(const struct lu_context *ctx, struct mdd_device *mdd = cookie; struct obd_device *obd = mdd2obd_dev(mdd); + LASSERT(obd); return mds_lov_write_objids(obd); } @@ -748,14 +750,14 @@ static int mdd_process_config(const struct lu_context *ctxt, GOTO(out, rc); dt->dd_ops->dt_conf_get(ctxt, dt, &m->mdd_dt_conf); - rc = mdd_mount(ctxt, m); - if (rc) - GOTO(out, rc); rc = mdd_init_obd(ctxt, m, cfg); if (rc) { CERROR("lov init error %d \n", rc); GOTO(out, rc); } + rc = mdd_mount(ctxt, m); + if (rc) + GOTO(out, rc); break; case LCFG_CLEANUP: mdd_device_shutdown(ctxt, m); @@ -1893,7 +1895,7 @@ static int mdd_lookup_intent(const struct lu_context *ctxt, { struct mdd_object *mdd_obj = md2mdd_obj(pobj); struct dt_object *dir = mdd_object_child(mdd_obj); - struct dt_rec *rec = (struct dt_rec *)fid; + struct dt_rec *rec = (struct dt_rec *)fid; const struct dt_key *key = (const struct dt_key *)name; int rc; ENTRY; @@ -2590,12 +2592,37 @@ static int mdd_maxsize_get(const struct lu_context *ctx, struct md_device *m, struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); ENTRY; - *md_size = mdd_lov_mdsize(ctx, mdd); + *md_size = mdd_lov_mdsize(ctx, mdd); *cookie_size = mdd_lov_cookiesize(ctx, mdd); RETURN(0); } +static int mdd_init_capa_keys(struct md_device *m, + struct lustre_capa_key *keys) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct mds_obd *mds = &mdd2obd_dev(mdd)->u.mds; + ENTRY; + + mds->mds_capa_keys = keys; + RETURN(0); +} + +static int mdd_update_capa_key(const struct lu_context *ctx, + struct md_device *m, + struct lustre_capa_key *key) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_export *lov_exp = mdd2obd_dev(mdd)->u.mds.mds_osc_exp; + int rc; + ENTRY; + + rc = obd_set_info_async(lov_exp, strlen(KEY_CAPA_KEY), KEY_CAPA_KEY, + sizeof(*key), key, NULL); + RETURN(rc); +} + static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj, struct thandle *handle) { @@ -3060,10 +3087,51 @@ static int mdd_permission(const struct lu_context *ctxt, struct md_object *obj, RETURN(rc); } +static int mdd_capa_get(const struct lu_context *ctxt, struct md_object *obj, + struct lustre_capa *capa) +{ + struct mdd_object *mdd_obj = md2mdd_obj(obj); + struct mdd_device *mdd = mdo2mdd(obj); + struct lu_site *ls = mdd->mdd_md_dev.md_lu_dev.ld_site; + struct lustre_capa_key *key = &ls->ls_capa_keys[1]; + struct obd_capa *ocapa; + int rc; + ENTRY; + + LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj))); + + capa->lc_fid = *mdo2fid(mdd_obj); + if (ls->ls_capa_timeout < CAPA_TIMEOUT) + capa->lc_flags |= CAPA_FL_SHORT_EXPIRY; + if (lu_fid_eq(&capa->lc_fid, &mdd->mdd_root_fid)) + capa->lc_flags |= CAPA_FL_ROOT; + capa->lc_flags = ls->ls_capa_alg << 23; + + /* TODO: get right permission here after remote uid landing */ + ocapa = capa_lookup(capa); + if (ocapa) { + LASSERT(!capa_is_expired(ocapa)); + capa_cpy(capa, ocapa); + capa_put(ocapa); + RETURN(0); + } + + capa->lc_keyid = key->lk_keyid; + capa->lc_expiry = CURRENT_SECONDS + ls->ls_capa_timeout; + rc = capa_hmac(capa->lc_hmac, capa, key->lk_key); + if (rc) + RETURN(rc); + + capa_add(capa); + RETURN(0); +} + struct md_device_operations mdd_ops = { .mdo_statfs = mdd_statfs, .mdo_root_get = mdd_root_get, .mdo_maxsize_get = mdd_maxsize_get, + .mdo_init_capa_keys = mdd_init_capa_keys, + .mdo_update_capa_key= mdd_update_capa_key, }; static struct md_dir_operations mdd_dir_ops = { @@ -3093,7 +3161,8 @@ static struct md_object_operations mdd_obj_ops = { .moo_open = mdd_open, .moo_close = mdd_close, .moo_readpage = mdd_readpage, - .moo_readlink = mdd_readlink + .moo_readlink = mdd_readlink, + .moo_capa_get = mdd_capa_get }; static struct obd_ops mdd_obd_device_ops = { diff --git a/lustre/mdd/mdd_lov.c b/lustre/mdd/mdd_lov.c index dbd3c16..ae3d61b 100644 --- a/lustre/mdd/mdd_lov.c +++ b/lustre/mdd/mdd_lov.c @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * linux/mds/mds_lov.c + * lustre/mdd/mdd_lov.c * Lustre Metadata Server (mds) handling of striped file data * * Copyright (C) 2001-2006 Cluster File Systems, Inc. diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index b93dbb5..73eb57a 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1006,7 +1006,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) GOTO(out_pop, rc); } - req->rq_status = mds_getattr_internal(obd, de, req, body,REPLY_REC_OFF); + req->rq_status = mds_getattr_internal(obd, de, req, body, + REPLY_REC_OFF); l_dput(de); GOTO(out_pop, rc); @@ -2614,7 +2615,6 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg) GOTO(err_thread3, rc); ping_evictor_start(); - RETURN(0); err_thread3: diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 0571def..b3ce673 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -735,7 +735,7 @@ out_pop: int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *unused) { struct mds_obd *mds = &exp->exp_obd->u.mds; struct inode *parent_inode = mds->mds_objects_dir->d_inode; diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index fd63280..172b40f 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -223,7 +223,7 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); + struct obd_export *md_exp, void *unused); void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt); /* mds/handler.c */ diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 8fc6095..892efa1 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -631,6 +631,28 @@ struct mds_lov_sync_info { __u32 mlsi_index; /* index of target */ }; +static int mds_propagate_capa_keys(struct mds_obd *mds) +{ + struct lustre_capa_key *key; + int i, rc = 0; + ENTRY; + + for (i = 0; i < 2; i++) { + key = &mds->mds_capa_keys[i]; + DEBUG_CAPA_KEY(D_SEC, key, "propagate"); + + rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_CAPA_KEY), + KEY_CAPA_KEY, sizeof(*key), key, NULL); + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, key, + "propagate failed (rc = %d) for", rc); + RETURN(rc); + } + } + + RETURN(0); +} + /* We only sync one osc at a time, so that we don't have to hold any kind of lock on the whole mds_lov_desc, which may change (grow) as a result of mds_lov_add_ost. This also avoids any @@ -665,6 +687,11 @@ static int __mds_lov_synchronize(void *data) if (rc != 0) GOTO(out, rc); + /* propagate capability keys */ + rc = mds_propagate_capa_keys(mds); + if (rc) + GOTO(out, rc); + rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), mds->mds_lov_desc.ld_tgt_count, NULL, NULL, uuid); diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index bc68bc3..09e5321 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -86,7 +86,8 @@ static int mds_osc_destroy_orphan(struct obd_device *obd, oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = logcookies; } - rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export); + rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export, + NULL); obdo_free(oa); if (rc) CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error " diff --git a/lustre/mdt/Makefile.in b/lustre/mdt/Makefile.in index 3dcf8bf..af73004 100644 --- a/lustre/mdt/Makefile.in +++ b/lustre/mdt/Makefile.in @@ -1,5 +1,5 @@ MODULES := mdt -mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o mdt_open.o -mdt-objs += mdt_idmap.o mdt_identity.o mdt_rmtacl.o +mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o +mdt-objs += mdt_open.o mdt_idmap.o mdt_identity.o mdt_rmtacl.o mdt_capa.o @INCLUDE_RULES@ diff --git a/lustre/mdt/mdt_capa.c b/lustre/mdt/mdt_capa.c new file mode 100644 index 0000000..b4e754e --- /dev/null +++ b/lustre/mdt/mdt_capa.c @@ -0,0 +1,298 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/mdt/mdt_capa.c + * Lustre Metadata Target (mdt) capability key read/write/update. + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#include "mdt_internal.h" + +static inline void set_capa_key_expiry(struct mdt_device *mdt) +{ + mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ; +} + +static void make_capa_key(struct lustre_capa_key *key, + mdsno_t mdsnum, int keyid) +{ + key->lk_mdsid = mdsnum; + key->lk_keyid = keyid + 1; + get_random_bytes(key->lk_key, sizeof(key->lk_key)); +} + +enum { + MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1 +}; + +static inline void lck_cpu_to_le(struct lustre_capa_key *tgt, + struct lustre_capa_key *src) +{ + tgt->lk_mdsid = cpu_to_le64(src->lk_mdsid); + tgt->lk_keyid = cpu_to_le32(src->lk_keyid); + tgt->lk_padding = cpu_to_le32(src->lk_padding); + memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key)); +} + +static inline void lck_le_to_cpu(struct lustre_capa_key *tgt, + struct lustre_capa_key *src) +{ + tgt->lk_mdsid = le64_to_cpu(src->lk_mdsid); + tgt->lk_keyid = le32_to_cpu(src->lk_keyid); + tgt->lk_padding = le32_to_cpu(src->lk_padding); + memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key)); +} + +static int write_capa_keys(const struct lu_context *ctx, + struct mdt_device *mdt, + struct lustre_capa_key *keys) +{ + struct mdt_thread_info *mti; + struct lustre_capa_key *tmp; + struct thandle *th; + loff_t off = 0; + int i, rc; + + mti = lu_context_key_get(ctx, &mdt_thread_key); + + th = mdt_trans_start(ctx, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + tmp = &mti->mti_capa_key; + + for (i = 0; i < 2; i++) { + lck_cpu_to_le(tmp, &keys[i]); + + rc = mdt_record_write(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp), + &off, th); + if (rc) + break; + } + + mdt_trans_stop(ctx, mdt, th); + + CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc); + return rc; +} + +static int read_capa_keys(const struct lu_context *ctx, + struct mdt_device *mdt, + struct lustre_capa_key *keys) +{ + struct mdt_thread_info *mti; + struct lustre_capa_key *tmp; + loff_t off = 0; + int i, rc; + + mti = lu_context_key_get(ctx, &mdt_thread_key); + tmp = &mti->mti_capa_key; + + for (i = 0; i < 2; i++) { + rc = mdt_record_read(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp), + &off); + if (rc) + return rc; + + lck_le_to_cpu(&keys[i], tmp); + + DEBUG_CAPA_KEY(D_INFO, &keys[i], "read"); + } + + return 0; +} + +int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt) +{ + struct lustre_capa_key *keys = mdt->mdt_capa_keys; + struct mdt_thread_info *mti; + struct dt_object *obj; + struct lu_attr *la; + mdsno_t mdsnum; + unsigned long size; + int rc; + ENTRY; + + mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id; + + mti = lu_context_key_get(ctx, &mdt_thread_key); + LASSERT(mti != NULL); + la = &mti->mti_attr.ma_attr; + + obj = mdt->mdt_ck_obj; + obj->do_ops->do_read_lock(ctx, obj); + rc = obj->do_ops->do_attr_get(ctx, mdt->mdt_ck_obj, la); + obj->do_ops->do_read_unlock(ctx, obj); + if (rc) + RETURN(rc); + + size = (unsigned long)la->la_size; + if (size == 0) { + int i; + + for (i = 0; i < 2; i++) { + make_capa_key(&keys[i], mdsnum, i); + DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing"); + } + + rc = write_capa_keys(ctx, mdt, keys); + if (rc) { + CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc); + RETURN(rc); + } + } else { + rc = read_capa_keys(ctx, mdt, keys); + if (rc) { + CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc); + RETURN(rc); + } + } + set_capa_key_expiry(mdt); + mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry); + CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry); + RETURN(0); +} + +void mdt_ck_timer_callback(unsigned long castmeharder) +{ + struct mdt_device *mdt = (struct mdt_device *)castmeharder; + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + + ENTRY; + thread->t_flags |= SVC_EVENT; + wake_up(&thread->t_ctl_waitq); + EXIT; +} + +static int mdt_ck_thread_main(void *args) +{ + struct mdt_device *mdt = args; + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + struct lustre_capa_key *tmp, *key = red_capa_key(mdt); + struct lu_context ctx; + struct mdt_thread_info *info; + struct md_device *next; + struct l_wait_info lwi = { 0 }; + mdsno_t mdsnum; + int rc; + ENTRY; + + ptlrpc_daemonize("mdt_ck"); + cfs_block_allsigs(); + + thread->t_flags = SVC_RUNNING; + cfs_waitq_signal(&thread->t_ctl_waitq); + + rc = lu_context_init(&ctx, LCT_MD_THREAD); + if (rc) + RETURN(rc); + + thread->t_ctx = &ctx; + ctx.lc_thread = thread; + + lu_context_enter(&ctx); + info = lu_context_key_get(&ctx, &mdt_thread_key); + LASSERT(info != NULL); + + tmp = &info->mti_capa_key; + mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id; + while (1) { + l_wait_event(thread->t_ctl_waitq, + thread->t_flags & (SVC_STOPPING | SVC_EVENT), + &lwi); + + if (thread->t_flags & SVC_STOPPING) + break; + thread->t_flags &= ~SVC_EVENT; + + if (time_after(mdt->mdt_ck_expiry, jiffies)) + break; + + *tmp = *key; + make_capa_key(tmp, mdsnum, key->lk_keyid); + + next = mdt->mdt_child; + rc = next->md_ops->mdo_update_capa_key(&ctx, next, tmp); + if (!rc) { + rc = write_capa_keys(&ctx, mdt, mdt->mdt_capa_keys); + if (!rc) { + spin_lock(&capa_lock); + mdt->mdt_capa_keys[0] = *key; + *key = *tmp; + spin_unlock(&capa_lock); + + set_capa_key_expiry(mdt); + + DEBUG_CAPA_KEY(D_SEC, key, "new"); + } + } + if (rc) { + DEBUG_CAPA_KEY(D_ERROR, key, "update failed for"); + /* next retry is in 300 sec */ + mdt->mdt_ck_expiry = jiffies + 300 * HZ; + } + + mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry); + CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry); + } + lu_context_exit(&ctx); + lu_context_fini(&ctx); + + thread->t_flags = SVC_STOPPED; + cfs_waitq_signal(&thread->t_ctl_waitq); + RETURN(0); +} + +int mdt_ck_thread_start(struct mdt_device *mdt) +{ + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + int rc; + + cfs_waitq_init(&thread->t_ctl_waitq); + rc = kernel_thread(mdt_ck_thread_main, mdt, + (CLONE_VM | CLONE_FILES)); + if (rc < 0) { + CERROR("cannot start mdt_ck thread, rc = %d\n", rc); + return rc; + } + + wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING); + return 0; +} + +void mdt_ck_thread_stop(struct mdt_device *mdt) +{ + struct ptlrpc_thread *thread = &mdt->mdt_ck_thread; + + if (!(thread->t_flags & SVC_RUNNING)) + return; + + thread->t_flags = SVC_STOPPING; + cfs_waitq_signal(&thread->t_ctl_waitq); + wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); +} diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index c129286..96a89da 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -256,6 +256,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, struct mdt_object *o) { struct md_object *next = mdt_object_child(o); + struct mdt_device *mdt = info->mti_mdt; const struct mdt_body *reqbody = info->mti_body; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; @@ -359,7 +360,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, repbody->aclsize = sizeof(struct mdt_remote_perm); } } - #ifdef CONFIG_FS_POSIX_ACL else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) && (reqbody->valid & OBD_MD_FLACL)) { @@ -381,6 +381,23 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, } #endif + if (mdt->mdt_opts.mo_mds_capa) { + struct lustre_capa *capa; + + spin_lock(&capa_lock); + info->mti_capa_key = *red_capa_key(mdt); + spin_unlock(&capa_lock); + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + rc = mo_capa_get(ctxt, next, capa); + if (rc) + RETURN(rc); + else + repbody->valid |= OBD_MD_FLMDSCAPA; + } + RETURN(rc); } @@ -406,7 +423,7 @@ static int mdt_getattr(struct mdt_thread_info *info) } rc = mdt_getattr_internal(info, obj); - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0); if (reqbody->valid & OBD_MD_FLRMTPERM) mdt_exit_ucred(info); RETURN(rc); @@ -544,7 +561,8 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, *step 3: find the child object by fid & lock it. * regardless if it is local or remote. */ - child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid); + child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid, + BYPASS_CAPA); if (IS_ERR(child)) GOTO(out_parent, rc = PTR_ERR(child)); if (is_resent) { @@ -632,7 +650,7 @@ static int mdt_getattr_name(struct mdt_thread_info *info) ldlm_lock_decref(&lhc->mlh_lh, lhc->mlh_mode); lhc->mlh_lh.cookie = 0; } - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0); mdt_exit_ucred(info); RETURN(rc); } @@ -1130,6 +1148,37 @@ static int mdt_quotactl_handle(struct mdt_thread_info *info) return -EOPNOTSUPP; } +static int mdt_renew_capa(struct mdt_thread_info *info) +{ + struct mdt_device *mdt = info->mti_mdt; + struct mdt_object *obj; + struct mdt_body *body; + struct lustre_capa *capa; + int rc; + + body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); + LASSERT(body); + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + + spin_lock(&capa_lock); + info->mti_capa_key = *red_capa_key(mdt); + spin_unlock(&capa_lock); + + obj = mdt_object_find(info->mti_ctxt, info->mti_mdt, &capa->lc_fid, + capa); + if (!IS_ERR(obj)) + rc = PTR_ERR(obj); + + /* TODO: add capa check */ + rc = mo_capa_get(info->mti_ctxt, mdt_object_child(obj), capa); + if (rc) + RETURN(rc); + + RETURN(rc); +} + /* * OBD PING and other handlers. */ @@ -1226,13 +1275,17 @@ static struct mdt_object *mdt_obj(struct lu_object *o) struct mdt_object *mdt_object_find(const struct lu_context *ctxt, struct mdt_device *d, - const struct lu_fid *f) + const struct lu_fid *f, + struct lustre_capa *c) { struct lu_object *o; struct mdt_object *m; ENTRY; - o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f); + if (!d->mdt_opts.mo_mds_capa) + c = BYPASS_CAPA; + + o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f, c); if (IS_ERR(o)) m = (struct mdt_object *)o; else @@ -1300,11 +1353,12 @@ void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *o, struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info, const struct lu_fid *f, struct mdt_lock_handle *lh, - __u64 ibits) + __u64 ibits, + struct lustre_capa *capa) { struct mdt_object *o; - o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f); + o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f, capa); if (!IS_ERR(o)) { int rc; @@ -1383,36 +1437,40 @@ static int mdt_lock_reply_compat(struct mdt_device *m, struct ldlm_reply *rep) */ static int mdt_body_unpack(struct mdt_thread_info *info, __u32 flags) { - const struct mdt_body *body; - struct mdt_object *obj; - const struct lu_context *ctx; - struct req_capsule *pill; - int rc; + const struct mdt_body *body; + struct lustre_capa *capa = NULL; + struct mdt_object *obj; + const struct lu_context *ctx; + struct req_capsule *pill; + int rc; ctx = info->mti_ctxt; pill = &info->mti_pill; body = info->mti_body = req_capsule_client_get(pill, &RMF_MDT_BODY); - if (body != NULL) { - if (fid_is_sane(&body->fid1)) { - obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1); - if (!IS_ERR(obj)) { - if ((flags & HABEO_CORPUS) && - !lu_object_exists(&obj->mot_obj.mo_lu)) { - mdt_object_put(ctx, obj); - rc = -ENOENT; - } else { - info->mti_object = obj; - rc = 0; - } - } else - rc = PTR_ERR(obj); + if (body == NULL) + return -EFAULT; + + if (!fid_is_sane(&body->fid1)) { + CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1)); + return -EINVAL; + } + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + capa = req_capsule_client_get(pill, &RMF_CAPA1); + obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1, capa); + if (!IS_ERR(obj)) { + if ((flags & HABEO_CORPUS) && + !lu_object_exists(&obj->mot_obj.mo_lu)) { + mdt_object_put(ctx, obj); + rc = -ENOENT; } else { - CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1)); - rc = -EINVAL; + info->mti_object = obj; + rc = 0; } } else - rc = -EFAULT; + rc = PTR_ERR(obj); + return rc; } @@ -2105,7 +2163,7 @@ static int mdt_intent_getattr(enum mdt_it_code opcode, ldlm_rep->lock_policy_res2 = mdt_getattr_name_lock(info, lhc, child_bits, ldlm_rep); - mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); + mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 0); if (mdt_get_disposition(ldlm_rep, DISP_LOOKUP_NEG)) ldlm_rep->lock_policy_res2 = 0; @@ -3034,6 +3092,10 @@ static void mdt_fini(const struct lu_context *ctx, struct mdt_device *m) m->mdt_rootsquash_info = NULL; } + cleanup_capas(CAPA_SITE_SERVER); + del_timer(&m->mdt_ck_timer); + mdt_ck_thread_stop(m); + mdt_fs_cleanup(ctx, m); /* finish the stack */ @@ -3080,6 +3142,11 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m, m->mdt_opts.mo_user_xattr = 0; m->mdt_opts.mo_acl = 0; m->mdt_opts.mo_compat_resname = 0; + m->mdt_opts.mo_mds_capa = 0; + m->mdt_opts.mo_oss_capa = 0; + m->mdt_capa_alg = CAPA_HMAC_ALG_SHA1; + m->mdt_capa_timeout = CAPA_TIMEOUT; + m->mdt_ck_timeout = CAPA_KEY_TIMEOUT; obd->obd_replayable = 1; spin_lock_init(&m->mdt_client_bitmap_lock); @@ -3154,9 +3221,20 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m, GOTO(err_free_ns, rc); } - rc = mdt_start_ptlrpc_service(m); + rc = mdt_ck_thread_start(m); if (rc) GOTO(err_free_ns, rc); + m->mdt_ck_timer.function = mdt_ck_timer_callback; + m->mdt_ck_timer.data = (unsigned long)m; + init_timer(&m->mdt_ck_timer); + + s->ls_capa_keys = m->mdt_capa_keys; + s->ls_capa_timeout = m->mdt_capa_timeout; + s->ls_capa_alg = m->mdt_capa_alg; + + rc = mdt_start_ptlrpc_service(m); + if (rc) + GOTO(err_capa, rc); ping_evictor_start(); rc = mdt_fs_setup(ctx, m, obd); @@ -3172,6 +3250,9 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m, err_stop_service: mdt_stop_ptlrpc_service(m); +err_capa: + del_timer(&m->mdt_ck_timer); + mdt_ck_thread_stop(m); err_free_ns: upcall_cache_cleanup(m->mdt_rmtacl_cache); m->mdt_rmtacl_cache = NULL; @@ -3421,6 +3502,12 @@ static int mdt_connect_internal(struct obd_export *exp, if (!mdt->mdt_opts.mo_user_xattr) data->ocd_connect_flags &= ~OBD_CONNECT_XATTR; + if (!mdt->mdt_opts.mo_mds_capa) + data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA; + + if (!mdt->mdt_opts.mo_oss_capa) + data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA; + exp->exp_connect_flags = data->ocd_connect_flags; data->ocd_version = LUSTRE_VERSION_CODE; exp->exp_mdt_data.med_ibits_known = data->ocd_ibits_known; @@ -3440,6 +3527,21 @@ static int mdt_connect_internal(struct obd_export *exp, return -EBADE; } + if (mdt->mdt_opts.mo_mds_capa && + ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) { + CWARN("%s: MDS requires capability support, but client not\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + + if (mdt->mdt_opts.mo_oss_capa && + ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) { + CWARN("%s: MDS requires OSS capability support, " + "but client not\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); + return -EBADE; + } + return 0; } @@ -3898,6 +4000,7 @@ static int __init mdt_mod_init(void) rc = class_register_type(&mdt_obd_device_ops, NULL, lvars.module_vars, LUSTRE_MDT_NAME, &mdt_device_type); + return rc; } @@ -3959,7 +4062,8 @@ DEF_MDT_HNDL_F(0 |HABEO_REFERO, PIN, mdt_pin), DEF_MDT_HNDL_0(0, SYNC, mdt_sync), DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR, mdt_is_subdir), DEF_MDT_HNDL_0(0, QUOTACHECK, mdt_quotacheck_handle), -DEF_MDT_HNDL_0(0, QUOTACTL, mdt_quotactl_handle) +DEF_MDT_HNDL_0(0, QUOTACTL, mdt_quotactl_handle), +DEF_MDT_HNDL_0(0 |HABEO_REFERO, RENEW_CAPA, mdt_renew_capa) }; #define DEF_OBD_HNDL(flags, name, fn) \ diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index d37743a..103ee178 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -133,9 +133,11 @@ struct mdt_device { * Options bit-fields. */ struct { - signed int mo_user_xattr :1; - signed int mo_acl :1; - signed int mo_compat_resname:1; + signed int mo_user_xattr :1, + mo_acl :1, + mo_compat_resname:1, + mo_mds_capa :1, + mo_oss_capa :1; } mdt_opts; /* lock to pretect epoch and write count */ @@ -168,6 +170,16 @@ struct mdt_device { /* root squash */ struct rootsquash_info *mdt_rootsquash_info; int no_gss_support; + + /* capability */ + __u32 mdt_capa_alg; + unsigned long mdt_capa_timeout; + unsigned long mdt_ck_timeout; + struct dt_object *mdt_ck_obj; + unsigned long mdt_ck_expiry; + struct timer_list mdt_ck_timer; + struct ptlrpc_thread mdt_ck_thread; + struct lustre_capa_key mdt_capa_keys[2]; }; /*XXX copied from mds_internal.h */ @@ -209,6 +221,8 @@ struct mdt_reint_record { int rr_logcookielen; const struct llog_cookie *rr_logcookies; __u32 rr_flags; + struct lustre_capa *rr_capa1; + struct lustre_capa *rr_capa2; }; enum mdt_reint_flag { @@ -316,6 +330,7 @@ struct mdt_thread_info { struct mdt_client_data mti_mcd; loff_t mti_off; struct txn_param mti_txn_param; + struct lustre_capa_key mti_capa_key; }; /* * Info allocated per-transaction. @@ -380,11 +395,13 @@ void mdt_object_unlock(struct mdt_thread_info *, struct mdt_object *mdt_object_find(const struct lu_context *, struct mdt_device *, - const struct lu_fid *); + const struct lu_fid *, + struct lustre_capa *); struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *, const struct lu_fid *, struct mdt_lock_handle *, - __u64); + __u64 ibits, + struct lustre_capa *); void mdt_object_unlock_put(struct mdt_thread_info *, struct mdt_object *, struct mdt_lock_handle *, @@ -443,10 +460,21 @@ int mdt_close(struct mdt_thread_info *info); int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, int flags); int mdt_done_writing(struct mdt_thread_info *info); -void mdt_shrink_reply(struct mdt_thread_info *info, int offset); +void mdt_shrink_reply(struct mdt_thread_info *info, int offset, + int mdscapa, int osscapa); int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *, const struct md_attr *); void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *); +struct thandle* mdt_trans_start(const struct lu_context *ctx, + struct mdt_device *mdt, int credits); +void mdt_trans_stop(const struct lu_context *ctx, + struct mdt_device *mdt, struct thandle *th); +int mdt_record_write(const struct lu_context *ctx, + struct dt_object *dt, const void *buf, + size_t count, loff_t *pos, struct thandle *th); +int mdt_record_read(const struct lu_context *ctx, + struct dt_object *dt, void *buf, + size_t count, loff_t *pos); void mdt_dump_lmm(int level, const struct lov_mds_md *lmm); @@ -546,5 +574,18 @@ do { \ } \ } while(0) +/* + * fid Capability + */ +int mdt_ck_thread_start(struct mdt_device *mdt); +void mdt_ck_thread_stop(struct mdt_device *mdt); +void mdt_ck_timer_callback(unsigned long castmeharder); +int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt); + +static inline struct lustre_capa_key *red_capa_key(struct mdt_device *mdt) +{ + return &mdt->mdt_capa_keys[1]; +} + #endif /* __KERNEL__ */ #endif /* _MDT_H */ diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index adfe5ac..74eb3d4 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -487,12 +487,12 @@ void mdt_dump_lmm(int level, const struct lov_mds_md *lmm) } } -void mdt_shrink_reply(struct mdt_thread_info *info, int offset) +void mdt_shrink_reply(struct mdt_thread_info *info, int offset, + int mdscapa, int osscapa) { struct ptlrpc_request *req = mdt_info_req(info); struct mdt_body *body; - int acl_size; - int md_size; + int acl_size, md_size; body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); LASSERT(body != NULL); @@ -504,7 +504,15 @@ void mdt_shrink_reply(struct mdt_thread_info *info, int offset) md_size, acl_size); lustre_shrink_reply(req, offset, md_size, 1); - lustre_shrink_reply(req, md_size ? offset + 1 : offset, acl_size, 0); + offset += !!md_size; + lustre_shrink_reply(req, offset, acl_size, 1); + offset += !!acl_size; + if (mdscapa && !(body->valid & OBD_MD_FLMDSCAPA)) + lustre_shrink_reply(req, offset, 0, 0); + offset += mdscapa; + if (osscapa && !(body->valid & OBD_MD_FLOSSCAPA)) + lustre_shrink_reply(req, offset, 0, 0); + offset += osscapa; } @@ -624,6 +632,10 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info) la->la_atime = rec->sa_atime; la->la_mtime = rec->sa_mtime; ma->ma_valid = MA_INODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + RETURN(0); } @@ -662,6 +674,7 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info) RCL_CLIENT); ma->ma_valid |= MA_LOV; } + if (req_capsule_field_present(pill, &RMF_LOGCOOKIES, RCL_CLIENT)) { ma->ma_cookie = req_capsule_client_get(pill, &RMF_LOGCOOKIES); @@ -693,64 +706,66 @@ static int mdt_create_unpack(struct mdt_thread_info *info) struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; struct req_capsule *pill = &info->mti_pill; - int result = 0; ENTRY; rec = req_capsule_client_get(pill, &RMF_REC_CREATE); - if (rec != NULL) { - uc->mu_fsuid = rec->cr_fsuid; - uc->mu_fsgid = rec->cr_fsgid; - uc->mu_cap = rec->cr_cap; - uc->mu_suppgids[0] = rec->cr_suppgid; - uc->mu_suppgids[1] = -1; - - rr->rr_fid1 = &rec->cr_fid1; - rr->rr_fid2 = &rec->cr_fid2; - attr->la_mode = rec->cr_mode; - attr->la_rdev = rec->cr_rdev; - attr->la_uid = rec->cr_fsuid; - attr->la_gid = rec->cr_fsgid; - attr->la_ctime = rec->cr_time; - attr->la_mtime = rec->cr_time; - attr->la_atime = rec->cr_time; - attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | - LA_CTIME | LA_MTIME | LA_ATIME; - info->mti_spec.sp_cr_flags = rec->cr_flags; - - rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); - if (S_ISDIR(attr->la_mode)) { - struct md_create_spec *sp = &info->mti_spec; - /* pass parent fid for cross-ref cases */ - sp->u.sp_pfid = rr->rr_fid1; - if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { - /* create salve object req, need - * unpack split ea here - */ - req_capsule_extend(pill, - &RQF_MDS_REINT_CREATE_SLAVE); - LASSERT(req_capsule_field_present(pill, - &RMF_EADATA, RCL_CLIENT)); - sp->u.sp_ea.eadata = req_capsule_client_get(pill, - &RMF_EADATA); - sp->u.sp_ea.eadatalen =req_capsule_get_size(pill, - &RMF_EADATA, RCL_CLIENT); - sp->u.sp_ea.fid = rr->rr_fid1; - } - } else if (S_ISLNK(attr->la_mode)) { - const char *tgt = NULL; - req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM); - if (req_capsule_field_present(pill, &RMF_SYMTGT, - RCL_CLIENT)) { - tgt = req_capsule_client_get(pill, - &RMF_SYMTGT); - info->mti_spec.u.sp_symname = tgt; - } - if (tgt == NULL) - result = -EFAULT; + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + uc->mu_suppgids[0] = rec->cr_suppgid; + uc->mu_suppgids[1] = -1; + + rr->rr_fid1 = &rec->cr_fid1; + rr->rr_fid2 = &rec->cr_fid2; + attr->la_mode = rec->cr_mode; + attr->la_rdev = rec->cr_rdev; + attr->la_uid = rec->cr_fsuid; + attr->la_gid = rec->cr_fsgid; + attr->la_ctime = rec->cr_time; + attr->la_mtime = rec->cr_time; + attr->la_atime = rec->cr_time; + attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | + LA_CTIME | LA_MTIME | LA_ATIME; + info->mti_spec.sp_cr_flags = rec->cr_flags; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (S_ISDIR(attr->la_mode)) { + struct md_create_spec *sp = &info->mti_spec; + + /* pass parent fid for cross-ref cases */ + sp->u.sp_pfid = rr->rr_fid1; + if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) { + /* create salve object req, need + * unpack split ea here + */ + req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SLAVE); + LASSERT(req_capsule_field_present(pill, &RMF_EADATA, + RCL_CLIENT)); + sp->u.sp_ea.eadata = req_capsule_client_get(pill, + &RMF_EADATA); + sp->u.sp_ea.eadatalen = req_capsule_get_size(pill, + &RMF_EADATA, + RCL_CLIENT); + sp->u.sp_ea.fid = rr->rr_fid1; + } + } else if (S_ISLNK(attr->la_mode)) { + const char *tgt = NULL; + + req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM); + if (req_capsule_field_present(pill, &RMF_SYMTGT, RCL_CLIENT)) { + tgt = req_capsule_client_get(pill, &RMF_SYMTGT); + info->mti_spec.u.sp_symname = tgt; } - } else - result = -EFAULT; - RETURN(result); + if (tgt == NULL) + RETURN(-EFAULT); + } + RETURN(0); } static int mdt_link_unpack(struct mdt_thread_info *info) @@ -760,30 +775,36 @@ static int mdt_link_unpack(struct mdt_thread_info *info) struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; struct req_capsule *pill = &info->mti_pill; - int result = 0; ENTRY; rec = req_capsule_client_get(pill, &RMF_REC_LINK); - if (rec != NULL) { - uc->mu_fsuid = rec->lk_fsuid; - uc->mu_fsgid = rec->lk_fsgid; - uc->mu_cap = rec->lk_cap; - uc->mu_suppgids[0] = rec->lk_suppgid1; - uc->mu_suppgids[1] = rec->lk_suppgid2; - - attr->la_uid = rec->lk_fsuid; - attr->la_gid = rec->lk_fsgid; - rr->rr_fid1 = &rec->lk_fid1; - rr->rr_fid2 = &rec->lk_fid2; - attr->la_ctime = rec->lk_time; - attr->la_mtime = rec->lk_time; - attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME; - rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); - if (rr->rr_name == NULL) - result = -EFAULT; - } else - result = -EFAULT; - RETURN(result); + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->lk_fsuid; + uc->mu_fsgid = rec->lk_fsgid; + uc->mu_cap = rec->lk_cap; + uc->mu_suppgids[0] = rec->lk_suppgid1; + uc->mu_suppgids[1] = rec->lk_suppgid2; + + attr->la_uid = rec->lk_fsuid; + attr->la_gid = rec->lk_fsgid; + rr->rr_fid1 = &rec->lk_fid1; + rr->rr_fid2 = &rec->lk_fid2; + attr->la_ctime = rec->lk_time; + attr->la_mtime = rec->lk_time; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) + rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); + + RETURN(0); } static int mdt_unlink_unpack(struct mdt_thread_info *info) @@ -793,33 +814,35 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info) struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; struct req_capsule *pill = &info->mti_pill; - int result = 0; ENTRY; rec = req_capsule_client_get(pill, &RMF_REC_UNLINK); - if (rec != NULL) { - uc->mu_fsuid = rec->ul_fsuid; - uc->mu_fsgid = rec->ul_fsgid; - uc->mu_cap = rec->ul_cap; - uc->mu_suppgids[0] = rec->ul_suppgid; - uc->mu_suppgids[1] = -1; + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->ul_fsuid; + uc->mu_fsgid = rec->ul_fsgid; + uc->mu_cap = rec->ul_cap; + uc->mu_suppgids[0] = rec->ul_suppgid; + uc->mu_suppgids[1] = -1; - attr->la_uid = rec->ul_fsuid; - attr->la_gid = rec->ul_fsgid; - rr->rr_fid1 = &rec->ul_fid1; - rr->rr_fid2 = &rec->ul_fid2; - attr->la_ctime = rec->ul_time; - attr->la_mtime = rec->ul_time; - attr->la_mode = rec->ul_mode; - - attr->la_valid = LA_UID | LA_GID | LA_CTIME | - LA_MTIME | LA_MODE; - rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); - if (rr->rr_name == NULL) - result = -EFAULT; - } else - result = -EFAULT; - RETURN(result); + attr->la_uid = rec->ul_fsuid; + attr->la_gid = rec->ul_fsgid; + rr->rr_fid1 = &rec->ul_fid1; + rr->rr_fid2 = &rec->ul_fid2; + attr->la_ctime = rec->ul_time; + attr->la_mtime = rec->ul_time; + attr->la_mode = rec->ul_mode; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); + + RETURN(0); } static int mdt_rename_unpack(struct mdt_thread_info *info) @@ -829,34 +852,39 @@ static int mdt_rename_unpack(struct mdt_thread_info *info) struct lu_attr *attr = &info->mti_attr.ma_attr; struct mdt_reint_record *rr = &info->mti_rr; struct req_capsule *pill = &info->mti_pill; - int result = 0; ENTRY; rec = req_capsule_client_get(pill, &RMF_REC_RENAME); - if (rec != NULL) { - uc->mu_fsuid = rec->rn_fsuid; - uc->mu_fsgid = rec->rn_fsgid; - uc->mu_cap = rec->rn_cap; - uc->mu_suppgids[0] = rec->rn_suppgid1; - uc->mu_suppgids[1] = rec->rn_suppgid2; + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->rn_fsuid; + uc->mu_fsgid = rec->rn_fsgid; + uc->mu_cap = rec->rn_cap; + uc->mu_suppgids[0] = rec->rn_suppgid1; + uc->mu_suppgids[1] = rec->rn_suppgid2; - attr->la_uid = rec->rn_fsuid; - attr->la_gid = rec->rn_fsgid; - rr->rr_fid1 = &rec->rn_fid1; - rr->rr_fid2 = &rec->rn_fid2; - attr->la_ctime = rec->rn_time; - attr->la_mtime = rec->rn_time; - /* rename_tgt contains the mode already */ - attr->la_mode = rec->rn_mode; - attr->la_valid = LA_UID | LA_GID | LA_CTIME | - LA_MTIME | LA_MODE; - rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); - rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT); - if (rr->rr_name == NULL || rr->rr_tgt == NULL) - result = -EFAULT; - } else - result = -EFAULT; - RETURN(result); + attr->la_uid = rec->rn_fsuid; + attr->la_gid = rec->rn_fsgid; + rr->rr_fid1 = &rec->rn_fid1; + rr->rr_fid2 = &rec->rn_fid2; + attr->la_ctime = rec->rn_time; + attr->la_mtime = rec->rn_time; + /* rename_tgt contains the mode already */ + attr->la_mode = rec->rn_mode; + attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) + rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT); + if (rr->rr_name == NULL || rr->rr_tgt == NULL) + RETURN(-EFAULT); + + RETURN(0); } static int mdt_open_unpack(struct mdt_thread_info *info) @@ -866,36 +894,39 @@ static int mdt_open_unpack(struct mdt_thread_info *info) struct lu_attr *attr = &info->mti_attr.ma_attr; struct req_capsule *pill = &info->mti_pill; struct mdt_reint_record *rr = &info->mti_rr; - int result; ENTRY; rec = req_capsule_client_get(pill, &RMF_REC_CREATE); - if (rec != NULL) { - uc->mu_fsuid = rec->cr_fsuid; - uc->mu_fsgid = rec->cr_fsgid; - uc->mu_cap = rec->cr_cap; - uc->mu_suppgids[0] = rec->cr_suppgid; - uc->mu_suppgids[1] = -1; - - rr->rr_fid1 = &rec->cr_fid1; - rr->rr_fid2 = &rec->cr_fid2; - attr->la_mode = rec->cr_mode; - attr->la_rdev = rec->cr_rdev; - attr->la_uid = rec->cr_fsuid; - attr->la_gid = rec->cr_fsgid; - attr->la_ctime = rec->cr_time; - attr->la_mtime = rec->cr_time; - attr->la_atime = rec->cr_time; - attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | - LA_CTIME | LA_MTIME | LA_ATIME; - info->mti_spec.sp_cr_flags = rec->cr_flags; - rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); - if (rr->rr_name == NULL) - result = -EFAULT; - else - result = 0; - } else - result = -EFAULT; + if (rec == NULL) + RETURN(-EFAULT); + + uc->mu_fsuid = rec->cr_fsuid; + uc->mu_fsgid = rec->cr_fsgid; + uc->mu_cap = rec->cr_cap; + uc->mu_suppgids[0] = rec->cr_suppgid; + uc->mu_suppgids[1] = -1; + + rr->rr_fid1 = &rec->cr_fid1; + rr->rr_fid2 = &rec->cr_fid2; + attr->la_mode = rec->cr_mode; + attr->la_rdev = rec->cr_rdev; + attr->la_uid = rec->cr_fsuid; + attr->la_gid = rec->cr_fsgid; + attr->la_ctime = rec->cr_time; + attr->la_mtime = rec->cr_time; + attr->la_atime = rec->cr_time; + attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID | + LA_CTIME | LA_MTIME | LA_ATIME; + info->mti_spec.sp_cr_flags = rec->cr_flags; + + if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT)) + rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1); + if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT)) + rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2); + + rr->rr_name = req_capsule_client_get(pill, &RMF_NAME); + if (rr->rr_name == NULL) + RETURN(-EFAULT); if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) { struct md_create_spec *sp = &info->mti_spec; @@ -909,7 +940,7 @@ static int mdt_open_unpack(struct mdt_thread_info *info) sp->u.sp_ea.no_lov_create = 1; } - RETURN(result); + RETURN(0); } typedef int (*reint_unpacker)(struct mdt_thread_info *info); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 6a03a01..f015fa3 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -318,6 +318,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info, { struct ptlrpc_request *req = mdt_info_req(info); struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_device *mdt = info->mti_mdt; struct md_attr *ma = &info->mti_attr; struct lu_attr *la = &ma->ma_attr; struct mdt_file_data *mfd; @@ -347,6 +348,33 @@ static int mdt_mfd_open(struct mdt_thread_info *info, } } + spin_lock(&capa_lock); + info->mti_capa_key = *red_capa_key(mdt); + spin_unlock(&capa_lock); + + if (mdt->mdt_opts.mo_mds_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_MDS_DEFAULT; + rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLMDSCAPA; + } + if (mdt->mdt_opts.mo_oss_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA2); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_OSS_DEFAULT; + rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLOSSCAPA; + } + /* if we are following a symlink, don't open; and * do not return open handle for special nodes as client required */ @@ -507,7 +535,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info, if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE) && req->rq_status != 0) { /* We did not create successfully, return error to client. */ - mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); + mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1); GOTO(out, rc = req->rq_status); } @@ -516,10 +544,10 @@ void mdt_reconstruct_open(struct mdt_thread_info *info, * We failed after creation, but we do not know in which step * we failed. So try to check the child object. */ - parent = mdt_object_find(ctxt, mdt, rr->rr_fid1); + parent = mdt_object_find(ctxt, mdt, rr->rr_fid1, rr->rr_capa1); LASSERT(!IS_ERR(parent)); - child = mdt_object_find(ctxt, mdt, rr->rr_fid2); + child = mdt_object_find(ctxt, mdt, rr->rr_fid2, rr->rr_capa2); LASSERT(!IS_ERR(child)); rc = lu_object_exists(&child->mot_obj.mo_lu); @@ -543,7 +571,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info, } mdt_object_put(ctxt, parent); mdt_object_put(ctxt, child); - mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); + mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1); GOTO(out, rc); } else { regular_open: @@ -567,12 +595,12 @@ static int mdt_open_by_fid(struct mdt_thread_info* info, int rc; ENTRY; - o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2); + o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2, + rr->rr_capa2); if (IS_ERR(o)) RETURN(rc = PTR_ERR(o)); rc = lu_object_exists(&o->mot_obj.mo_lu); - if (rc > 0) { const struct lu_context *ctxt = info->mti_ctxt; @@ -612,7 +640,7 @@ static int mdt_cross_open(struct mdt_thread_info* info, int rc; ENTRY; - o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid); + o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid, BYPASS_CAPA); if (IS_ERR(o)) RETURN(rc = PTR_ERR(o)); @@ -710,7 +738,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (rr->rr_name[0] == 0) { /* this is cross-ref open */ mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); - result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep, create_flags); + result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep, + create_flags); GOTO(out, result); } @@ -720,7 +749,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) else lh->mlh_mode = LCK_EX; parent = mdt_object_find_lock(info, rr->rr_fid1, lh, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa1); if (IS_ERR(parent)) GOTO(out, result = PTR_ERR(parent)); @@ -751,7 +780,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS); } - child = mdt_object_find(info->mti_ctxt, mdt, child_fid); + child = mdt_object_find(info->mti_ctxt, mdt, child_fid, BYPASS_CAPA); if (IS_ERR(child)) GOTO(out_parent, result = PTR_ERR(child)); @@ -846,7 +875,7 @@ out_child: out_parent: mdt_object_unlock_put(info, parent, lh, result); out: - mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1); + mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1); if (result) lustre_msg_set_transno(req->rq_repmsg, 0); return result; @@ -986,7 +1015,7 @@ int mdt_close(struct mdt_thread_info *info) mdt_object_put(info->mti_ctxt, o); } if (repbody != NULL) - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0); if (MDT_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) RETURN(-ENOMEM); diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c index ba05274..07c763b 100644 --- a/lustre/mdt/mdt_recovery.c +++ b/lustre/mdt/mdt_recovery.c @@ -37,9 +37,9 @@ static int mdt_server_data_update(const struct lu_context *ctx, struct mdt_device *mdt); /* TODO: maybe this pair should be defined in dt_object.c */ -static int mdt_record_read(const struct lu_context *ctx, - struct dt_object *dt, void *buf, - size_t count, loff_t *pos) +int mdt_record_read(const struct lu_context *ctx, + struct dt_object *dt, void *buf, + size_t count, loff_t *pos) { int rc; @@ -54,9 +54,9 @@ static int mdt_record_read(const struct lu_context *ctx, return rc; } -static int mdt_record_write(const struct lu_context *ctx, - struct dt_object *dt, const void *buf, - size_t count, loff_t *pos, struct thandle *th) +int mdt_record_write(const struct lu_context *ctx, + struct dt_object *dt, const void *buf, + size_t count, loff_t *pos, struct thandle *th) { int rc; @@ -75,8 +75,8 @@ enum { MDT_TXN_LAST_RCVD_WRITE_CREDITS = 3 }; -static struct thandle* mdt_trans_start(const struct lu_context *ctx, - struct mdt_device *mdt, int credits) +struct thandle* mdt_trans_start(const struct lu_context *ctx, + struct mdt_device *mdt, int credits) { struct mdt_thread_info *mti; struct txn_param *p; @@ -87,8 +87,8 @@ static struct thandle* mdt_trans_start(const struct lu_context *ctx, return mdt->mdt_bottom->dd_ops->dt_trans_start(ctx, mdt->mdt_bottom, p); } -static void mdt_trans_stop(const struct lu_context *ctx, - struct mdt_device *mdt, struct thandle *th) +void mdt_trans_stop(const struct lu_context *ctx, + struct mdt_device *mdt, struct thandle *th) { mdt->mdt_bottom->dd_ops->dt_trans_stop(ctx, th); } @@ -880,8 +880,8 @@ static int mdt_txn_commit_cb(const struct lu_context *ctx, int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt, struct obd_device *obd) { - struct lu_fid last_fid; - struct dt_object *last; + struct lu_fid fid; + struct dt_object *o; int rc = 0; ENTRY; @@ -893,26 +893,47 @@ int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt, dt_txn_callback_add(mdt->mdt_bottom, &mdt->mdt_txn_cb); - last = dt_store_open(ctx, mdt->mdt_bottom, - LAST_RCVD, &last_fid); - if(!IS_ERR(last)) { - mdt->mdt_last_rcvd = last; + o = dt_store_open(ctx, mdt->mdt_bottom, LAST_RCVD, &fid); + if(!IS_ERR(o)) { + mdt->mdt_last_rcvd = o; rc = mdt_server_data_init(ctx, mdt); if (rc) { - lu_object_put(ctx, &last->do_lu); + lu_object_put(ctx, &o->do_lu); mdt->mdt_last_rcvd = NULL; } } else { - rc = PTR_ERR(last); + rc = PTR_ERR(o); CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc); } + if (rc) + RETURN(rc); + + o = dt_store_open(ctx, mdt->mdt_bottom, CAPA_KEYS, &fid); + if(!IS_ERR(o)) { + struct md_device *next = mdt->mdt_child; + mdt->mdt_ck_obj = o; + rc = mdt_capa_keys_init(ctx, mdt); + if (rc) { + lu_object_put(ctx, &o->do_lu); + mdt->mdt_ck_obj = NULL; + RETURN(rc); + } + rc = next->md_ops->mdo_init_capa_keys(next, mdt->mdt_capa_keys); + } else { + rc = PTR_ERR(o); + CERROR("cannot open %s: rc = %d\n", CAPA_KEYS, rc); + } + + if (rc) + RETURN(rc); + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); obd->obd_lvfs_ctxt.pwdmnt = current->fs->pwdmnt; obd->obd_lvfs_ctxt.pwd = current->fs->pwd; obd->obd_lvfs_ctxt.fs = get_ds(); - RETURN (rc); + RETURN(0); } @@ -927,6 +948,9 @@ void mdt_fs_cleanup(const struct lu_context *ctx, struct mdt_device *mdt) if (mdt->mdt_last_rcvd) lu_object_put(ctx, &mdt->mdt_last_rcvd->do_lu); mdt->mdt_last_rcvd = NULL; + if (mdt->mdt_ck_obj) + lu_object_put(ctx, &mdt->mdt_ck_obj->do_lu); + mdt->mdt_ck_obj = NULL; } /* reconstruction code */ @@ -974,7 +998,8 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti, return; /* if no error, so child was created with requested fid */ - child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2); + child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2, + mti->mti_rr.rr_capa2); LASSERT(!IS_ERR(child)); body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); @@ -1004,7 +1029,8 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti, return; body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); - obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1); + obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1, + mti->mti_rr.rr_capa1); LASSERT(!IS_ERR(obj)); mo_attr_get(mti->mti_ctxt, mdt_object_child(obj), &mti->mti_attr, NULL); @@ -1027,7 +1053,7 @@ static void mdt_reconstruct_with_shrink(struct mdt_thread_info *mti, struct mdt_lock_handle *lhc) { mdt_reconstruct_generic(mti, lhc); - mdt_shrink_reply(mti, REPLY_REC_OFF + 1); + mdt_shrink_reply(mti, REPLY_REC_OFF + 1, 0, 0); } typedef void (*mdt_reconstructor)(struct mdt_thread_info *mti, diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 3c3be96..42d8f1c 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -54,11 +54,12 @@ static int mdt_md_create(struct mdt_thread_info *info) lh->mlh_mode = LCK_EX; parent = mdt_object_find_lock(info, rr->rr_fid1, - lh, MDS_INODELOCK_UPDATE); + lh, MDS_INODELOCK_UPDATE, + rr->rr_capa1); if (IS_ERR(parent)) RETURN(PTR_ERR(parent)); - child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2); + child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2, BYPASS_CAPA); if (!IS_ERR(child)) { struct md_object *next = mdt_object_child(parent); @@ -95,7 +96,8 @@ static int mdt_md_mkobj(struct mdt_thread_info *info) repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); - o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2); + o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2, + BYPASS_CAPA); if (!IS_ERR(o)) { struct md_object *next = mdt_object_child(o); @@ -187,6 +189,7 @@ out: static int mdt_reint_setattr(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { + struct mdt_device *mdt = info->mti_mdt; struct md_attr *ma = &info->mti_attr; struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); @@ -203,7 +206,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, (unsigned int)ma->ma_attr.la_valid); repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY); - mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1); + mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1, + rr->rr_capa1); if (IS_ERR(mo)) RETURN(rc = PTR_ERR(mo)); @@ -268,6 +272,19 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, GOTO(out, rc); mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo)); + + if (mdt->mdt_opts.mo_oss_capa) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC; + rc = mo_capa_get(info->mti_ctxt, mdt_object_child(mo), capa); + if (rc) + RETURN(rc); + repbody->valid |= OBD_MD_FLOSSCAPA; + } + mdt_body_reverse_idmap(info, repbody); EXIT; out: @@ -333,7 +350,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, parent_lh = &info->mti_lh[MDT_LH_PARENT]; parent_lh->mlh_mode = LCK_EX; mp = mdt_object_find_lock(info, rr->rr_fid1, parent_lh, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa1); if (IS_ERR(mp)) GOTO(out, rc = PTR_ERR(mp)); @@ -365,7 +382,8 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, GOTO(out_unlock_parent, rc); /* we will lock the child regardless it is local or remote. No harm. */ - mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid); + mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid, + BYPASS_CAPA); if (IS_ERR(mc)) GOTO(out_unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; @@ -397,7 +415,7 @@ out_put_child: out_unlock_parent: mdt_object_unlock_put(info, mp, parent_lh, rc); out: - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0); return rc; } @@ -425,7 +443,7 @@ static int mdt_reint_link(struct mdt_thread_info *info, lhs = &info->mti_lh[MDT_LH_PARENT]; lhs->mlh_mode = LCK_EX; ms = mdt_object_find_lock(info, rr->rr_fid1, lhs, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa1); if (IS_ERR(ms)) RETURN(PTR_ERR(ms)); @@ -439,7 +457,7 @@ static int mdt_reint_link(struct mdt_thread_info *info, lhp = &info->mti_lh[MDT_LH_CHILD]; lhp->mlh_mode = LCK_EX; mp = mdt_object_find_lock(info, rr->rr_fid2, lhp, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa2); if (IS_ERR(mp)) GOTO(out_unlock_source, rc = PTR_ERR(mp)); @@ -483,7 +501,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info) lh_tgtdir = &info->mti_lh[MDT_LH_PARENT]; lh_tgtdir->mlh_mode = LCK_EX; mtgtdir = mdt_object_find_lock(info, rr->rr_fid1, lh_tgtdir, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa1); if (IS_ERR(mtgtdir)) GOTO(out, rc = PTR_ERR(mtgtdir)); @@ -496,7 +514,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info) lh_tgt->mlh_mode = LCK_EX; mtgt = mdt_object_find_lock(info, tgt_fid, lh_tgt, - MDS_INODELOCK_LOOKUP); + MDS_INODELOCK_LOOKUP, BYPASS_CAPA); if (IS_ERR(mtgt)) GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt)); @@ -521,7 +539,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info) out_unlock_tgtdir: mdt_object_unlock_put(info, mtgtdir, lh_tgtdir, rc); out: - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0); return rc; } @@ -583,7 +601,8 @@ static int mdt_rename_check(struct mdt_thread_info *info, struct lu_fid *fid) ENTRY; do { - dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid); + dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid, + BYPASS_CAPA); if (!IS_ERR(dst)) { rc = mdo_is_subdir(info->mti_ctxt, mdt_object_child(dst), @@ -648,7 +667,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info, lh_srcdirp = &info->mti_lh[MDT_LH_PARENT]; lh_srcdirp->mlh_mode = LCK_EX; msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp, - MDS_INODELOCK_UPDATE); + MDS_INODELOCK_UPDATE, rr->rr_capa1); if (IS_ERR(msrcdir)) GOTO(out, rc = PTR_ERR(msrcdir)); @@ -659,8 +678,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info, mdt_object_get(info->mti_ctxt, msrcdir); mtgtdir = msrcdir; } else { - mtgtdir = mdt_object_find(info->mti_ctxt, - info->mti_mdt, rr->rr_fid2); + mtgtdir = mdt_object_find(info->mti_ctxt, info->mti_mdt, + rr->rr_fid2, rr->rr_capa2); if (IS_ERR(mtgtdir)) GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir)); @@ -685,7 +704,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info, lh_oldp = &info->mti_lh[MDT_LH_OLD]; lh_oldp->mlh_mode = LCK_EX; mold = mdt_object_find_lock(info, old_fid, lh_oldp, - MDS_INODELOCK_LOOKUP); + MDS_INODELOCK_LOOKUP, BYPASS_CAPA); if (IS_ERR(mold)) GOTO(out_unlock_target, rc = PTR_ERR(mold)); @@ -703,7 +722,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info, GOTO(out_unlock_old, rc = -EINVAL); lh_newp->mlh_mode = LCK_EX; - mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid); + mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid, + BYPASS_CAPA); if (IS_ERR(mnew)) GOTO(out_unlock_old, rc = PTR_ERR(mnew)); @@ -761,7 +781,7 @@ out_unlock_source: mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc); out: mdt_rename_unlock(&rename_lh); - mdt_shrink_reply(info, REPLY_REC_OFF + 1); + mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0); return rc; } diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 4f040ef..0dafc6c 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -24,7 +24,7 @@ obdclass-all-objs += class_obd.o obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o -obdclass-all-objs += lu_object.o dt_object.o hash.o +obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs) diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index 7b3df36..3b9dbbb 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -10,7 +10,7 @@ noinst_LIBRARIES = liblustreclass.a liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c -liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c +liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\" liblustreclass_a_CFLAGS = $(LLCFLAGS) diff --git a/lustre/obdclass/capa.c b/lustre/obdclass/capa.c new file mode 100644 index 0000000..ebc97b7 --- /dev/null +++ b/lustre/obdclass/capa.c @@ -0,0 +1,298 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/obdclass/capa.c + * Lustre Capability Hash Management + * + * Copyright (c) 2005 Cluster File Systems, Inc. + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_SEC + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#else +#include +#endif + +#include +#include + +cfs_mem_cache_t *capa_cachep = NULL; + +#ifdef __KERNEL__ +struct list_head capa_list[CAPA_SITE_MAX]; +spinlock_t capa_lock = SPIN_LOCK_UNLOCKED; /* lock for capa_hash/capa_list */ + +static struct hlist_head *capa_hash; +#endif +/* capa count */ +int capa_count[CAPA_SITE_MAX] = { 0, }; + +static struct capa_hmac_alg capa_hmac_algs[] = { + DEF_CAPA_HMAC_ALG("sha1", SHA1, 20, 20), +}; + +static const char *capa_site_name[] = { + [CAPA_SITE_CLIENT] = "client", + [CAPA_SITE_SERVER] = "server", + [CAPA_SITE_MAX] = "error" +}; + +EXPORT_SYMBOL(capa_cachep); +EXPORT_SYMBOL(capa_list); +EXPORT_SYMBOL(capa_lock); +EXPORT_SYMBOL(capa_count); + +int init_capa_hash(void) +{ +#ifdef __KERNEL__ + int nr_hash, i; + + OBD_ALLOC(capa_hash, PAGE_SIZE); + if (!capa_hash) + return -ENOMEM; + + nr_hash = PAGE_SIZE / sizeof(struct hlist_head); + LASSERT(nr_hash > NR_CAPAHASH); + + for (i = 0; i < NR_CAPAHASH; i++) + INIT_HLIST_HEAD(capa_hash + i); + for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++) + INIT_LIST_HEAD(&capa_list[i]); +#endif + return 0; +} + +#ifdef __KERNEL__ +void cleanup_capa_hash(void) +{ + int i; + + for (i = 0; i < NR_CAPAHASH; i++) + LASSERTF(hlist_empty(capa_hash + i), + "capa hash %d not empty\n", i); + for (i = CAPA_SITE_MAX; i < CAPA_SITE_MAX; i++) + LASSERTF(list_empty(&capa_list[i]), + "capa list %d not empty\n", i); + OBD_FREE(capa_hash, PAGE_SIZE); +} + +static inline int const capa_hashfn(struct lu_fid *fid) +{ + return (fid_oid(fid) ^ fid_ver(fid)) * + (unsigned long)(fid_seq(fid) + 1) % NR_CAPAHASH; +} + +static inline int capa_on_server(struct obd_capa *ocapa) +{ + return ocapa->c_site == CAPA_SITE_SERVER; +} + +static struct obd_capa *find_capa(struct lustre_capa *capa, + struct hlist_head *head) +{ + struct hlist_node *pos; + struct obd_capa *ocapa; + int len = offsetof(struct lustre_capa, lc_hmac); + + /* MDS get capa case */ + if (capa->lc_expiry == 0) + len = offsetof(struct lustre_capa, lc_keyid); + + hlist_for_each_entry(ocapa, pos, head, u.tgt.c_hash) { + if (memcmp(&ocapa->c_capa, capa, len)) + continue; + /* don't return an expired one in this case */ + if (capa->lc_expiry == 0 && capa_is_to_expire(ocapa)) + continue; + + LASSERT(capa_on_server(ocapa)); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found"); + return ocapa; + } + + return NULL; +} + +static inline void capa_delete(struct obd_capa *ocapa) +{ + LASSERT(capa_on_server(ocapa)); + hlist_del(&ocapa->u.tgt.c_hash); + list_del(&ocapa->c_list); + free_capa(ocapa); +} + +static inline void free_capa_lru(struct list_head *head) +{ + struct list_head *node = head->next; + struct obd_capa *ocapa; + int count = 0; + + /* free 12 unused capa from head */ + while (node != head && count < 12) { + ocapa = list_entry(node, struct obd_capa, c_list); + node = node->next; + + LASSERT(capa_on_server(ocapa)); + if (atomic_read(&ocapa->c_refc)) + continue; + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free unused"); + capa_delete(ocapa); + count++; + } +} + +/* add or update */ +struct obd_capa *capa_add(struct lustre_capa *capa) +{ + struct hlist_head *head = capa_hash + capa_hashfn(&capa->lc_fid); + struct obd_capa *ocapa, *old = NULL; + + ocapa = alloc_capa(CAPA_SITE_SERVER); + if (!ocapa) + return NULL; + + spin_lock(&capa_lock); + + old = find_capa(capa, head); + if (!old) { + ocapa->c_capa = *capa; + set_capa_expiry(ocapa); + hlist_add_head(&ocapa->u.tgt.c_hash, head); + list_add_tail(&ocapa->c_list, &capa_list[CAPA_SITE_SERVER]); + + if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE) + free_capa_lru(&capa_list[CAPA_SITE_SERVER]); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new"); + + spin_unlock(&capa_lock); + return ocapa; + } + + spin_lock(&old->c_lock); + old->c_capa = *capa; + set_capa_expiry(old); + spin_unlock(&old->c_lock); + + list_move_tail(&old->c_list, &capa_list[CAPA_SITE_SERVER]); + + spin_unlock(&capa_lock); + + DEBUG_CAPA(D_SEC, &old->c_capa, "update"); + + free_capa(ocapa); + return old; +} + +struct obd_capa *capa_lookup(struct lustre_capa *capa) +{ + struct hlist_head *head; + struct obd_capa *ocapa; + + head = capa_hash + capa_hashfn(&capa->lc_fid); + + spin_lock(&capa_lock); + ocapa = find_capa(capa, head); + if (ocapa) + capa_get(ocapa); + spin_unlock(&capa_lock); + + return ocapa; +} + +int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key) +{ + struct crypto_tfm *tfm; + struct capa_hmac_alg *alg; + int keylen; + struct scatterlist sl = { + .page = virt_to_page(capa), + .offset = (unsigned long)(capa) % PAGE_SIZE, + .length = offsetof(struct lustre_capa, lc_hmac), + }; + + if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1) + RETURN(-EFAULT); + + alg = &capa_hmac_algs[capa_alg(capa)]; + + tfm = crypto_alloc_tfm(alg->ha_name, 0); + if (!tfm) + return -ENOMEM; + keylen = alg->ha_keylen; + + crypto_hmac(tfm, key, &keylen, &sl, 1, hmac); + crypto_free_tfm(tfm); + + return 0; +} + +void cleanup_capas(int site) +{ + struct obd_capa *ocapa, *tmp; + + spin_lock(&capa_lock); + list_for_each_entry_safe(ocapa, tmp, &capa_list[site], c_list) + if (site == ocapa->c_site) + capa_delete(ocapa); + spin_unlock(&capa_lock); + LASSERTF(capa_count[site] == 0, "%s capability count is %d\n", + capa_site_name[site], capa_count[site]); +} +#endif + +void capa_cpy(void *capa, struct obd_capa *ocapa) +{ + spin_lock(&ocapa->c_lock); + *(struct lustre_capa *)capa = ocapa->c_capa; + spin_unlock(&ocapa->c_lock); +} + +void dump_capa_hmac(char *buf, char *key) +{ + int i, n = 0; + + for (i = 0; i < CAPA_HMAC_MAX_LEN; i++) + n += sprintf(buf + n, "%02x", (unsigned char) key[i]); +} + +EXPORT_SYMBOL(capa_add); +EXPORT_SYMBOL(capa_lookup); + +EXPORT_SYMBOL(capa_hmac); +EXPORT_SYMBOL(capa_cpy); + +EXPORT_SYMBOL(cleanup_capas); +EXPORT_SYMBOL(dump_capa_hmac); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 47b7d5c..58fab59 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -513,6 +513,9 @@ int obd_init_checks(void) #define obd_init_checks() do {} while(0) #endif +extern int init_capa_hash(void); +extern void cleanup_capa_hash(void); + extern spinlock_t obd_types_lock; extern spinlock_t handle_lock; extern int class_procfs_init(void); @@ -537,6 +540,10 @@ int init_obdclass(void) CDEBUG(D_INFO, " Build Version: "BUILD_VERSION"\n"); #endif + err = init_capa_hash(); + if (err) + return err; + spin_lock_init(&obd_types_lock); spin_lock_init(&handle_lock); cfs_waitq_init(&obd_race_waitq); @@ -602,6 +609,7 @@ static void cleanup_obdclass(void) } lu_global_fini(); + cleanup_capa_hash(); obd_cleanup_caches(); obd_sysctl_clean(); diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index a574ab2..47310e2 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -162,7 +162,7 @@ static struct dt_object *dt_locate(const struct lu_context *ctx, struct lu_object *obj; struct dt_object *dt; - obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid); + obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid, BYPASS_CAPA); if (!IS_ERR(obj)) { obj = lu_object_locate(obj->lo_header, dev->dd_lu_dev.ld_type); LASSERT(obj != NULL); @@ -185,6 +185,7 @@ struct dt_object *dt_store_open(const struct lu_context *ctx, if (result == 0) { root = dt_locate(ctx, dt, fid); if (!IS_ERR(root)) { + lu_object_bypass_capa(&root->do_lu); result = dt_lookup(ctx, root, name, fid); if (result == 0) child = dt_locate(ctx, dt, fid); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 49a686f..86dbd59 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -518,6 +518,11 @@ void obd_cleanup_caches(void) LASSERTF(rc == 0, "Cannot destory ll_import_cache\n"); import_cachep = NULL; } + if (capa_cachep) { + rc = cfs_mem_cache_destroy(capa_cachep); + LASSERTF(rc == 0, "Cannot destory capa_cache\n"); + capa_cachep = NULL; + } EXIT; } @@ -544,6 +549,12 @@ int obd_init_caches(void) if (!import_cachep) GOTO(out, -ENOMEM); + LASSERT(capa_cachep == NULL); + capa_cachep = cfs_mem_cache_create("capa_cache", + sizeof(struct obd_capa), 0, 0); + if (!capa_cachep) + GOTO(out, -ENOMEM); + RETURN(0); out: obd_cleanup_caches(); diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 460cd4b..34f7707 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -694,7 +694,8 @@ static int llog_lvfs_destroy(struct llog_handle *handle) if (rc) GOTO(out, rc); - rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL); + rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL, + NULL); out: obdo_free(oa); RETURN(rc); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index f079d15..c39f494 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -388,7 +388,7 @@ static const char *obd_connect_names[] = { "remote_client", "max_byte_per_rpc", "64bit_qdata", - "fid_capability", + "mds_capability", "oss_capability", NULL }; diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index e24d976..e8a5031 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -107,7 +107,8 @@ EXPORT_SYMBOL(lu_object_put); */ static struct lu_object *lu_object_alloc(const struct lu_context *ctxt, struct lu_site *s, - const struct lu_fid *f) + const struct lu_fid *f, + const struct lustre_capa *capa) { struct lu_object *scan; struct lu_object *top; @@ -128,7 +129,11 @@ static struct lu_object *lu_object_alloc(const struct lu_context *ctxt, * This is the only place where object fid is assigned. It's constant * after this point. */ - top->lo_header->loh_fid = *f; + top->lo_header->loh_fid = *f; + if (capa == BYPASS_CAPA) + lu_object_bypass_capa(top); + else + top->lo_header->loh_capa = *capa; layers = &top->lo_header->loh_layers; do { /* @@ -422,11 +427,13 @@ static __u32 fid_hash(const struct lu_fid *f) * any case, additional reference is acquired on the returned object. */ struct lu_object *lu_object_find(const struct lu_context *ctxt, - struct lu_site *s, const struct lu_fid *f) + struct lu_site *s, const struct lu_fid *f, + struct lustre_capa *capa) { struct lu_object *o; struct lu_object *shadow; struct hlist_head *bucket; + int rc; /* * This uses standard index maintenance protocol: @@ -447,13 +454,24 @@ struct lu_object *lu_object_find(const struct lu_context *ctxt, o = htable_lookup(s, bucket, f); spin_unlock(&s->ls_guard); - if (o != NULL) + if (o != NULL) { + if (capa == BYPASS_CAPA) { + o->lo_header->loh_capa_bypass = 1; + } else { + rc = lu_object_auth(ctxt, o, capa, + CAPA_OPC_INDEX_LOOKUP); + if (rc) + return ERR_PTR(rc); + o->lo_header->loh_capa = *capa; + } return o; + } + /* * Allocate new object. This may result in rather complicated * operations, including fld queries, inode loading, etc. */ - o = lu_object_alloc(ctxt, s, f); + o = lu_object_alloc(ctxt, s, f, capa); if (IS_ERR(o)) return o; @@ -476,6 +494,24 @@ struct lu_object *lu_object_find(const struct lu_context *ctxt, } EXPORT_SYMBOL(lu_object_find); +int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o, + struct lustre_capa *capa, __u64 opc) +{ + struct lu_object_header *top = o->lo_header; + int rc; + + list_for_each_entry(o, &top->loh_layers, lo_linkage) { + if (o->lo_ops->loo_object_auth) { + rc = o->lo_ops->loo_object_auth(ctxt, o, capa, opc); + if (rc) + return rc; + } + } + + return 0; +} +EXPORT_SYMBOL(lu_object_auth); + enum { LU_SITE_HTABLE_BITS = 8, LU_SITE_HTABLE_SIZE = (1 << LU_SITE_HTABLE_BITS), diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index b0ce61d..6931118 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1907,7 +1907,6 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) goto invalid; clear++; } - /* Linux 2.4 doesn't pass the device, so we stuck it at the end of the options. */ else if (strncmp(s1, "device=", 7) == 0) { diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 547ae26..67bdd54 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -130,7 +130,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa, int echo_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *unused) { struct obd_device *obd = class_exp2obd(exp); @@ -270,7 +270,7 @@ echo_page_debug_check(cfs_page_t *page, obd_id id, int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, struct lustre_capa *unused) { struct obd_device *obd; struct niobuf_local *r = res; diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 1429bdf..016176c 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -240,7 +240,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, oa->o_id, on_target ? " (undoing create)" : ""); if (on_target) - obd_destroy(ec->ec_exp, oa, lsm, oti, NULL); + obd_destroy(ec->ec_exp, oa, lsm, oti, NULL, NULL); rc = -EEXIST; goto failed; @@ -945,7 +945,8 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, ioo.ioo_bufcnt = npages; oti->oti_transno = 0; - ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti); + ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti, + NULL); if (ret != 0) GOTO(out, ret); @@ -1233,7 +1234,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, oa->o_gr = FILTER_GROUP_ECHO; oa->o_valid |= OBD_MD_FLGROUP; rc = obd_destroy(ec->ec_exp, oa, eco->eco_lsm, - &dummy_oti, NULL); + &dummy_oti, NULL, NULL); if (rc == 0) eco->eco_deleted = 1; echo_put_object(eco); diff --git a/lustre/obdfilter/Makefile.in b/lustre/obdfilter/Makefile.in index 8305eb5..f1b3e78 100644 --- a/lustre/obdfilter/Makefile.in +++ b/lustre/obdfilter/Makefile.in @@ -1,7 +1,7 @@ MODULES := obdfilter obdfilter-objs := filter.o filter_io.o filter_log.o -obdfilter-objs += lproc_obdfilter.o filter_lvb.o +obdfilter-objs += lproc_obdfilter.o filter_lvb.o filter_capa.o ifeq ($(PATCHLEVEL),4) obdfilter-objs += filter_io_24.o diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index ced7305..7d48029 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1263,6 +1263,8 @@ static void filter_post(struct obd_device *obd) filter_cleanup_groups(obd); filter_free_server_data(filter); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + filter_free_capa_keys(filter); } static void filter_set_last_id(struct filter_obd *filter, @@ -1927,6 +1929,9 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, obd->obd_replayable ? "enabled" : "disabled"); } + filter->fo_fl_oss_capa = 0; + INIT_LIST_HEAD(&filter->fo_capa_keys); + RETURN(0); err_post: @@ -2716,6 +2721,11 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo) int rc = 0; ENTRY; + rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_META_READ); + if (rc) + RETURN(rc); + obd = class_exp2obd(exp); if (obd == NULL) { CDEBUG(D_IOCTL, "invalid client export %p\n", exp); @@ -2919,6 +2929,11 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; + rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_META_WRITE); + if (rc) + RETURN(rc); + dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa, __FUNCTION__, 1); if (IS_ERR(dentry)) @@ -3047,7 +3062,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, exp->exp_obd->obd_name, oa->o_id + 1, last); for (id = last; id > oa->o_id; id--) { doa.o_id = id; - rc = filter_destroy(exp, &doa, NULL, NULL, NULL); + rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL); if (rc && rc != -ENOENT) /* this is pretty fatal... */ CEMERG("error destroying precreate objid "LPU64": %d\n", id, rc); @@ -3419,7 +3434,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { unsigned int qcids[MAXQUOTAS] = {0, 0}; struct obd_device *obd; @@ -3433,6 +3448,15 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, ENTRY; LASSERT(oa->o_valid & OBD_MD_FLGROUP); + +#if 0 /* some places don't support capability yet */ + rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), + (struct lustre_capa *)capa, + CAPA_OPC_INDEX_LOOKUP); + if (rc) + RETURN(rc); +#endif + #if 0 if (!(oa->o_valid & OBD_MD_FLGROUP)) oa->o_gr = 0; @@ -3580,13 +3604,19 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo, ", o_size = "LPD64"\n", oinfo->oi_oa->o_id, oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start); + rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo), + oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC); + if (rc) + RETURN(rc); + oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start; rc = filter_setattr(exp, oinfo, oti); RETURN(rc); } static int filter_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, obd_off start, obd_off end) + struct lov_stripe_md *lsm, obd_off start, obd_off end, + void *capa) { struct lvfs_run_ctxt saved; struct filter_obd *filter; @@ -3595,6 +3625,11 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, int rc, rc2; ENTRY; + rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), + (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE); + if (rc) + RETURN(rc); + filter = &exp->exp_obd->u.filter; /* an objid of zero is taken to mean "sync whole filesystem" */ @@ -3691,6 +3726,13 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, RETURN(-EINVAL); } + if (KEY_IS(KEY_CAPA_KEY)) { + rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val); + if (rc) + CERROR("filter update capability key failed: %d\n", rc); + RETURN(rc); + } + if (keylen < strlen(KEY_MDS_CONN) || memcmp(key, KEY_MDS_CONN, keylen) != 0) RETURN(-EINVAL); diff --git a/lustre/obdfilter/filter_capa.c b/lustre/obdfilter/filter_capa.c new file mode 100644 index 0000000..cbdca5b --- /dev/null +++ b/lustre/obdfilter/filter_capa.c @@ -0,0 +1,184 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * Author: Lai Siyao + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include + +#include +#include + +#include "filter_internal.h" + +static inline __u32 filter_ck_keyid(struct filter_capa_key *key) +{ + return key->k_key.lk_keyid; +} + +int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key) +{ + struct filter_obd *filter = &obd->u.filter; + struct filter_capa_key *k, *rkey = NULL, *bkey = NULL; + + spin_lock(&capa_lock); + list_for_each_entry(k, &filter->fo_capa_keys, k_list) { + if (k->k_key.lk_mdsid != key->lk_mdsid) + continue; + + if (rkey) + bkey = k; + else + rkey = k; + } + spin_unlock(&capa_lock); + + if (rkey && bkey && filter_ck_keyid(rkey) < filter_ck_keyid(bkey)) { + k = rkey; + rkey = bkey; + bkey = k; + } + + if (bkey) { + k = bkey; + } else { + OBD_ALLOC_PTR(k); + if (!k) + RETURN(-ENOMEM); + INIT_LIST_HEAD(&k->k_list); + } + + spin_lock(&capa_lock); + k->k_key = *key; + if (list_empty(&k->k_list)) + list_add(&k->k_list, &filter->fo_capa_keys); + spin_unlock(&capa_lock); + + DEBUG_CAPA_KEY(D_SEC, key, "new"); + RETURN(0); +} + +int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, + struct lustre_capa *capa, __u64 opc) +{ + struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + struct filter_capa_key *k; + struct lustre_capa_key key; + struct obd_capa *c; + __u8 *hmac; + int keys_ready = 0, key_found = 0, rc = 0; + ENTRY; + + /* capability is disabled */ + if (!filter->fo_fl_oss_capa) + RETURN(0); + + if (capa == NULL) { + CERROR("no capa has been passed\n"); + RETURN(-EACCES); + } + +#warning "enable fid check in filter_verify_capa when fid ready" + + if (!capa_opc_supported(capa, opc)) { + DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc); + RETURN(-EACCES); + } + + c = capa_lookup(capa); + if (c) { + spin_lock(&c->c_lock); + if (memcmp(&c->c_capa, capa, sizeof(*capa))) { + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + rc = -EACCES; + } else if (capa_is_expired(c)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + rc = -ESTALE; + } + spin_unlock(&c->c_lock); + + capa_put(c); + RETURN(rc); + } + + spin_lock(&capa_lock); + list_for_each_entry(k, &filter->fo_capa_keys, k_list) + if (k->k_key.lk_mdsid == mdsid) { + keys_ready = 1; + if (k->k_key.lk_keyid == capa_keyid(capa)) { + key = k->k_key; + key_found = 1; + break; + } + } + spin_unlock(&capa_lock); + + if (!keys_ready) { + CDEBUG(D_SEC, "MDS hasn't propagated capability keys yet, " + "ignore check!\n"); + RETURN(0); + } + + if (!key_found) { + DEBUG_CAPA(D_ERROR, capa, "no matched capability key for"); + RETURN(-ESTALE); + } + + OBD_ALLOC(hmac, CAPA_HMAC_MAX_LEN); + if (hmac == NULL) + RETURN(-ENOMEM); + + rc = capa_hmac(hmac, capa, key.lk_key); + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "HMAC failed: rc %d", rc); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + RETURN(rc); + } + + rc = memcmp(hmac, capa->lc_hmac, CAPA_HMAC_MAX_LEN); + OBD_FREE(hmac, CAPA_HMAC_MAX_LEN); + if (rc) { + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + RETURN(-EACCES); + } + + /* store in capa hash */ + capa_add(capa); + RETURN(0); +} + +void filter_free_capa_keys(struct filter_obd *filter) +{ + struct filter_capa_key *key, *n; + + spin_lock(&capa_lock); + list_for_each_entry_safe(key, n, &filter->fo_capa_keys, k_list) { + list_del_init(&key->k_list); + OBD_FREE(key, sizeof(*key)); + } + spin_unlock(&capa_lock); +} diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 9933a1b..8c4a0de 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -107,7 +107,7 @@ int filter_common_setup(struct obd_device *, struct lustre_cfg *lcfg, void *option); int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *, - struct obd_export *); + struct obd_export *, void *capa); int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, struct obdo *oa, struct obd_trans_info *oti); int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, @@ -125,7 +125,8 @@ extern struct ldlm_valblock_ops filter_lvbo; /* filter_io.c */ int filter_preprw(int cmd, struct obd_export *, struct obdo *, int objcount, struct obd_ioobj *, int niocount, struct niobuf_remote *, - struct niobuf_local *, struct obd_trans_info *); + struct niobuf_local *, struct obd_trans_info *, + struct lustre_capa *); int filter_commitrw(int cmd, struct obd_export *, struct obdo *, int objcount, struct obd_ioobj *, int niocount, struct niobuf_local *, struct obd_trans_info *, int rc); @@ -191,4 +192,14 @@ static inline int lproc_filter_attach_seqstat(struct obd_device *dev) {} /* Quota stuff */ extern quota_interface_t *quota_interface; +/* Capability */ +static inline __u64 obdo_mdsno(struct obdo *oa) +{ + return oa->o_gr - FILTER_GROUP_MDS0; +} + +int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key); +int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, + struct lustre_capa *capa, __u64 opc); +void filter_free_capa_keys(struct filter_obd *filter); #endif /* _FILTER_INTERNAL_H */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 38efbc2..a6645d4 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -271,7 +271,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { struct obd_device *obd = exp->exp_obd; struct lvfs_run_ctxt saved; @@ -290,6 +291,11 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, LASSERTF(objcount == 1, "%d\n", objcount); LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt); + rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa, + CAPA_OPC_OSS_READ); + if (rc) + RETURN(rc); + if (oa && oa->o_valid & OBD_MD_FLGRANT) { spin_lock(&obd->obd_osfs_lock); filter_grant_incoming(exp, oa); @@ -504,7 +510,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, + struct lustre_capa *capa) { struct lvfs_run_ctxt saved; struct niobuf_remote *rnb; @@ -520,6 +527,11 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); + rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa, + CAPA_OPC_OSS_WRITE); + if (rc) + RETURN(rc); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti); if (IS_ERR(iobuf)) @@ -676,14 +688,14 @@ cleanup: int filter_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - struct obd_trans_info *oti) + struct obd_trans_info *oti, struct lustre_capa *capa) { if (cmd == OBD_BRW_WRITE) return filter_preprw_write(cmd, exp, oa, objcount, obj, - niocount, nb, res, oti); + niocount, nb, res, oti, capa); if (cmd == OBD_BRW_READ) return filter_preprw_read(cmd, exp, oa, objcount, obj, - niocount, nb, res, oti); + niocount, nb, res, oti, capa); LBUG(); return -EPROTO; } @@ -851,7 +863,7 @@ int filter_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, ioo.ioo_bufcnt = oa_bufs; ret = filter_preprw(cmd, exp, oinfo->oi_oa, 1, &ioo, - oa_bufs, rnb, lnb, oti); + oa_bufs, rnb, lnb, oti, oinfo_capa(oinfo)); if (ret != 0) GOTO(out, ret); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index d737983..90f4358 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -149,7 +149,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt, memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie)); oid = oa->o_id; - rc = filter_destroy(exp, oa, NULL, NULL, NULL); + rc = filter_destroy(exp, oa, NULL, NULL, NULL, NULL); obdo_free(oa); if (rc == -ENOENT) { CDEBUG(D_HA, "object already removed, send cookie\n"); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index a6bd67a..7c5cfbe 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -147,6 +147,31 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, RETURN(lsm_size); } +static inline void osc_pack_capa(struct ptlrpc_request *req, int offset, + struct ost_body *body, void *capa) +{ + struct obd_capa *oc = (struct obd_capa *)capa; + struct lustre_capa *c; + + if (!capa) + return; + + c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c)); + capa_cpy(c, oc); + body->oa.o_valid |= OBD_MD_FLOSSCAPA; + DEBUG_CAPA(D_SEC, c, "pack"); +} + +static inline void osc_pack_req_body(struct ptlrpc_request *req, int offset, + struct obd_info *oinfo) +{ + struct ost_body *body; + + body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); + body->oa = *oinfo->oi_oa; + osc_pack_capa(req, offset + 1, body, oinfo->oi_capa); +} + static int osc_getattr_interpret(struct ptlrpc_request *req, struct osc_async_args *aa, int rc) { @@ -180,17 +205,17 @@ static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo, { struct ptlrpc_request *req; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; struct osc_async_args *aa; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_GETATTR, 2, size,NULL); + OST_GETATTR, 3, size,NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); req->rq_interpret_reply = osc_getattr_interpret; @@ -207,16 +232,16 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo) { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_GETATTR, 2, size, NULL); + OST_GETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); @@ -251,18 +276,18 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo, { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; LASSERT(!(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP) || oinfo->oi_oa->o_gr > 0); + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SETATTR, 2, size, NULL); + OST_SETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); + osc_pack_req_body(req, REQ_REC_OFF, oinfo); ptlrpc_req_set_repsize(req, 2, size); @@ -311,24 +336,25 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo, { struct ptlrpc_request *req; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; struct osc_async_args *aa; ENTRY; + size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SETATTR, 2, size, NULL); + OST_SETATTR, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - + osc_pack_req_body(req, REQ_REC_OFF, oinfo); if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) { LASSERT(oti); + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof(*body)); memcpy(obdo_logcookie(oinfo->oi_oa), oti->oti_logcookies, sizeof(*oti->oti_logcookies)); } - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); ptlrpc_req_set_repsize(req, 2, size); /* do mds to ost setattr asynchronouly */ if (!rqset) { @@ -353,7 +379,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, struct ptlrpc_request *req; struct ost_body *body; struct lov_stripe_md *lsm; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; LASSERT(oa); @@ -366,13 +392,14 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, RETURN(rc); } + /* FIXME: how to find one OSS WRITE capability? */ req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_CREATE, 2, size, NULL); + OST_CREATE, 3, size, NULL); if (!req) GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oa, sizeof(body->oa)); + body->oa = *oa; ptlrpc_req_set_repsize(req, 2, size); if (oa->o_valid & OBD_MD_FLINLINE) { @@ -460,7 +487,8 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, struct ptlrpc_request *req; struct osc_async_args *aa; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + struct lustre_capa *capa = oinfo->oi_capa; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; if (!oinfo->oi_oa) { @@ -468,8 +496,9 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, RETURN(-EINVAL); } + size[REQ_REC_OFF + 1] = capa ? sizeof(*capa) : 0; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_PUNCH, 2, size, NULL); + OST_PUNCH, 3, size, NULL); if (!req) RETURN(-ENOMEM); @@ -478,14 +507,22 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, OBD_CONNECT_REQPORTAL) req->rq_request_portal = OST_IO_PORTAL; - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa)); - + osc_pack_req_body(req, REQ_REC_OFF, oinfo); /* overload the size and blocks fields in the oa with start/end */ + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); body->oa.o_size = oinfo->oi_policy.l_extent.start; body->oa.o_blocks = oinfo->oi_policy.l_extent.end; body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); + if (capa) { + struct lustre_capa *c; + + c = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, sizeof(*c)); + /* setattr_raw is protected by i_sem, no need to lock here */ + *c = *capa; + body->oa.o_valid |= OBD_MD_FLOSSCAPA; + } + ptlrpc_req_set_repsize(req, 2, size); req->rq_interpret_reply = osc_punch_interpret; @@ -498,11 +535,12 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo, } static int osc_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, obd_size start, obd_size end) + struct lov_stripe_md *md, obd_size start, obd_size end, + void *capa) { struct ptlrpc_request *req; struct ost_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; if (!oa) { @@ -510,19 +548,23 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } + if (capa) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_SYNC, 2, size, NULL); + OST_SYNC, 3, size, NULL); if (!req) RETURN(-ENOMEM); - body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - /* overload the size and blocks fields in the oa with start/end */ + body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); + body->oa = *oa; body->oa.o_size = start; body->oa.o_blocks = end; body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); + osc_pack_capa(req, REQ_REC_OFF + 1, body, capa); + ptlrpc_req_set_repsize(req, 2, size); rc = ptlrpc_queue_wait(req); @@ -556,11 +598,11 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa, * cookies to the MDS after committing destroy transactions. */ static int osc_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_export) + struct obd_export *md_export, void *capa) { struct ptlrpc_request *req; struct ost_body *body; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; if (!oa) { @@ -568,8 +610,10 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } + if (capa) + size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, - OST_DESTROY, 2, size, NULL); + OST_DESTROY, 3, size, NULL); if (!req) RETURN(-ENOMEM); @@ -579,13 +623,13 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, req->rq_request_portal = OST_IO_PORTAL; body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - - if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) { + body->oa = *oa; + if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) memcpy(obdo_logcookie(oa), oti->oti_logcookies, sizeof(*oti->oti_logcookies)); - } - memcpy(&body->oa, oa, sizeof(*oa)); + osc_pack_capa(req, REQ_REC_OFF + 1, body, capa); + ptlrpc_req_set_repsize(req, 2, size); ptlrpcd_add_req(req); @@ -818,7 +862,8 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, struct brw_page **pga, int *requested_nobp, - int *niocountp, struct ptlrpc_request **reqp) + int *niocountp, struct ptlrpc_request **reqp, + struct obd_capa *ocapa) { struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; @@ -826,9 +871,10 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, struct ost_body *body; struct obd_ioobj *ioobj; struct niobuf_remote *niobuf; - int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + int size[5] = { sizeof(struct ptlrpc_body), sizeof(*body) }; int niocount, i, requested_nob, opc, rc; struct ptlrpc_request_pool *pool; + struct lustre_capa *capa; ENTRY; opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ; @@ -841,9 +887,11 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, size[REQ_REC_OFF + 1] = sizeof(*ioobj); size[REQ_REC_OFF + 2] = niocount * sizeof(*niobuf); + if (ocapa) + size[REQ_REC_OFF + 3] = sizeof(*capa); OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM); - req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL, + req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 5, size, NULL, pool, NULL); if (req == NULL) RETURN (-ENOMEM); @@ -867,10 +915,16 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, niobuf = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, niocount * sizeof(*niobuf)); - memcpy(&body->oa, oa, sizeof(*oa)); + body->oa = *oa; obdo_to_ioobj(oa, ioobj); ioobj->ioo_bufcnt = niocount; + if (ocapa) { + capa = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 3, + sizeof(*capa)); + capa_cpy(capa, ocapa); + body->oa.o_valid |= OBD_MD_FLOSSCAPA; + } LASSERT (page_count > 0); for (requested_nob = i = 0; i < page_count; i++, niobuf++) { @@ -1094,7 +1148,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page **pga) + obd_count page_count, struct brw_page **pga, + struct obd_capa *ocapa) { int requested_nob; int niocount; @@ -1105,7 +1160,7 @@ static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa, restart_bulk: rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm, page_count, pga, &requested_nob, &niocount, - &req); + &req, ocapa); if (rc != 0) return (rc); @@ -1142,7 +1197,8 @@ static int brw_interpret(struct ptlrpc_request *req, static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page **pga, struct ptlrpc_request_set *set) + struct brw_page **pga, struct ptlrpc_request_set *set, + struct obd_capa *ocapa) { struct ptlrpc_request *req; int requested_nob; @@ -1164,7 +1220,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm, page_count, pga, &requested_nob, &nio_count, - &req); + &req, ocapa); if (rc == 0) { LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); @@ -1308,7 +1364,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, } rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md, - pages_per_brw, ppga); + pages_per_brw, ppga, oinfo->oi_capa); if (rc != 0) break; @@ -1374,7 +1430,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp, copy = ppga; rc = async_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md, - pages_per_brw, copy, set); + pages_per_brw, copy, set, oinfo->oi_capa); if (rc != 0) break; @@ -1675,6 +1731,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, struct obd_async_page_ops *ops = NULL; void *caller_data = NULL; struct list_head *pos; + struct obd_capa *ocapa; int i, rc; ENTRY; @@ -1710,10 +1767,12 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, /* always get the data for the obdo for the rpc */ LASSERT(ops != NULL); ops->ap_fill_obdo(caller_data, cmd, oa); + ocapa = ops->ap_lookup_capa(caller_data, cmd); sort_brw_pages(pga, page_count); rc = osc_brw_prep_request(cmd, cli->cl_import, oa, NULL, page_count, - pga, &requested_nob, &nio_count, &req); + pga, &requested_nob, &nio_count, &req, ocapa); + capa_put(ocapa); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, req = ERR_PTR(rc)); @@ -2682,12 +2741,12 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, no_match: if (intent) { - int size[3] = { + int size[2] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body), [DLM_LOCKREQ_OFF] = sizeof(struct ldlm_request) }; req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, - LDLM_ENQUEUE, 2, size, NULL); + LDLM_ENQUEUE, 3, size, NULL); if (req == NULL) RETURN(-ENOMEM); diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 2e6b7b1..d7125c8 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -638,7 +638,6 @@ static void osd_ro(const struct lu_context *ctx, struct dt_device *d) EXIT; } - static struct dt_device_operations osd_dt_ops = { .dt_root_get = osd_root_get, .dt_statfs = osd_statfs, @@ -704,14 +703,26 @@ static void osd_object_write_unlock(const struct lu_context *ctx, up_write(&obj->oo_sem); } -static int osd_attr_get(const struct lu_context *ctxt, struct dt_object *dt, +static inline int osd_object_auth(const struct lu_context *ctx, + const struct lu_object *o, + __u64 opc) +{ + return o->lo_ops->loo_object_auth(ctx, o, lu_object_capa(o), opc); +} + +static int osd_attr_get(const struct lu_context *ctxt, + struct dt_object *dt, struct lu_attr *attr) { struct osd_object *obj = osd_dt_obj(dt); + LASSERT(dt_object_exists(dt)); LASSERT(osd_invariant(obj)); LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj)); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ)) + return -EACCES; + return osd_inode_getattr(ctxt, obj->oo_inode, attr); } @@ -726,6 +737,9 @@ static int osd_attr_set(const struct lu_context *ctxt, LASSERT(osd_invariant(obj)); LASSERT(osd_write_locked(ctxt, obj)); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) + return -EACCES; + return osd_inode_setattr(ctxt, obj->oo_inode, attr); } @@ -985,6 +999,8 @@ static int osd_object_create(const struct lu_context *ctx, struct dt_object *dt, /* * XXX missing: permission checks. */ + if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT)) + RETURN(-EACCES); /* * XXX missing: sanity checks (valid ->la_mode, etc.) @@ -1030,6 +1046,12 @@ static void osd_object_ref_add(const struct lu_context *ctxt, LASSERT(osd_write_locked(ctxt, obj)); LASSERT(th != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) { + LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu, + "no capability to link!\n"); + return; + } + if (inode->i_nlink < LDISKFS_LINK_MAX) { inode->i_nlink ++; mark_inode_dirty(inode); @@ -1050,6 +1072,12 @@ static void osd_object_ref_del(const struct lu_context *ctxt, LASSERT(osd_write_locked(ctxt, obj)); LASSERT(th != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) { + LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu, + "no capability to unlink!\n"); + return; + } + if (inode->i_nlink > 0) { inode->i_nlink --; mark_inode_dirty(inode); @@ -1071,6 +1099,9 @@ static int osd_xattr_get(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL); LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj)); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ)) + return -EACCES; + dentry->d_inode = inode; return inode->i_op->getxattr(dentry, name, buf, size); } @@ -1091,6 +1122,9 @@ static int osd_xattr_set(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(osd_write_locked(ctxt, obj)); LASSERT(handle != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) + return -EACCES; + dentry->d_inode = inode; fs_flags = 0; @@ -1115,6 +1149,9 @@ static int osd_xattr_list(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(inode->i_op != NULL && inode->i_op->listxattr != NULL); LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj)); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ)) + return -EACCES; + dentry->d_inode = inode; return inode->i_op->listxattr(dentry, buf, size); } @@ -1132,6 +1169,9 @@ static int osd_xattr_del(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(osd_write_locked(ctxt, obj)); LASSERT(handle != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) + return -EACCES; + dentry->d_inode = inode; return inode->i_op->removexattr(dentry, name); } @@ -1213,6 +1253,9 @@ static int osd_readpage(const struct lu_context *ctxt, LASSERT(rdpg->rp_pages != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_BODY_READ)) + return -EACCES; + if (rdpg->rp_count <= 0) return -EFAULT; @@ -1395,6 +1438,9 @@ static int osd_index_try(const struct lu_context *ctx, struct dt_object *dt, LASSERT(osd_invariant(obj)); LASSERT(dt_object_exists(dt)); + if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP)) + RETURN(-EACCES); + if (osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode) { dt->do_index_ops = &osd_index_compat_ops; result = 0; @@ -1446,6 +1492,9 @@ static int osd_index_delete(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(obj->oo_ipd != NULL); LASSERT(handle != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_DELETE)) + RETURN(-EACCES); + oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); @@ -1469,6 +1518,9 @@ static int osd_index_lookup(const struct lu_context *ctxt, struct dt_object *dt, LASSERT(obj->oo_container.ic_object == obj->oo_inode); LASSERT(obj->oo_ipd != NULL); + if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP)) + return -EACCES; + rc = iam_lookup(&obj->oo_container, (const struct iam_key *)key, (struct iam_rec *)rec, obj->oo_ipd); @@ -1477,7 +1529,6 @@ static int osd_index_lookup(const struct lu_context *ctxt, struct dt_object *dt, RETURN(rc); } - static int osd_index_insert(const struct lu_context *ctx, struct dt_object *dt, const struct dt_rec *rec, const struct dt_key *key, struct thandle *th) @@ -1495,6 +1546,9 @@ static int osd_index_insert(const struct lu_context *ctx, struct dt_object *dt, LASSERT(obj->oo_ipd != NULL); LASSERT(th != NULL); + if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT)) + return -EACCES; + oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); rc = iam_insert(oh->ot_handle, &obj->oo_container, @@ -1554,12 +1608,14 @@ static int osd_it_get(const struct lu_context *ctx, static void osd_it_put(const struct lu_context *ctx, struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + iam_it_put(&it->oi_it); } static int osd_it_next(const struct lu_context *ctx, struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + return iam_it_next(&it->oi_it); } @@ -1570,6 +1626,7 @@ static int osd_it_del(const struct lu_context *ctx, struct dt_it *di, struct osd_thandle *oh; LASSERT(th != NULL); + oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); @@ -1580,12 +1637,14 @@ static struct dt_key *osd_it_key(const struct lu_context *ctx, const struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + return (struct dt_key *)iam_it_key_get(&it->oi_it); } static int osd_it_key_size(const struct lu_context *ctx, const struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + return iam_it_key_size(&it->oi_it); } @@ -1593,12 +1652,14 @@ static struct dt_rec *osd_it_rec(const struct lu_context *ctx, const struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + return (struct dt_rec *)iam_it_rec_get(&it->oi_it); } static __u32 osd_it_store(const struct lu_context *ctxt, const struct dt_it *di) { struct osd_it *it = (struct osd_it *)di; + return iam_it_store(&it->oi_it); } @@ -1606,6 +1667,7 @@ static int osd_it_load(const struct lu_context *ctxt, const struct dt_it *di, __u32 hash) { struct osd_it *it = (struct osd_it *)di; + return iam_it_load(&it->oi_it, hash); } @@ -1638,6 +1700,7 @@ static int osd_index_compat_delete(const struct lu_context *ctxt, LASSERT(handle != NULL); LASSERT(S_ISDIR(obj->oo_inode->i_mode)); ENTRY; + RETURN(-EOPNOTSUPP); } @@ -1786,7 +1849,7 @@ static int osd_index_compat_insert(const struct lu_context *ctx, LASSERT(osd_invariant(obj)); LASSERT(th != NULL); - luch = lu_object_find(ctx, ludev->ld_site, fid); + luch = lu_object_find(ctx, ludev->ld_site, fid, BYPASS_CAPA); if (!IS_ERR(luch)) { if (lu_object_exists(luch)) { struct osd_object *child; @@ -2241,13 +2304,102 @@ static int osd_object_invariant(const struct lu_object *l) return osd_invariant(osd_obj(l)); } +static int capa_is_sane(const struct lu_context *ctx, + struct lustre_capa *capa, + struct lustre_capa_key *keys) +{ + struct obd_capa *c; + struct osd_thread_info *oti = lu_context_key_get(ctx, &osd_key); + int i, rc; + ENTRY; + + c = capa_lookup(capa); + if (c) { + spin_lock(&c->c_lock); + if (memcmp(&c->c_capa, capa, sizeof(*capa))) { + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + rc = -EACCES; + } else if (capa_is_expired(c)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + rc = -ESTALE; + } + spin_unlock(&c->c_lock); + + capa_put(c); + RETURN(rc); + } + + spin_lock(&capa_lock); + for (i = 0; i < 2; i++) { + if (keys[i].lk_keyid == capa->lc_keyid) { + oti->oti_capa_key = keys[i]; + break; + } + } + spin_unlock(&capa_lock); + + if (i == 2) { + DEBUG_CAPA(D_ERROR, capa, "no matched capa key"); + RETURN(-ESTALE); + } + + rc = capa_hmac(oti->oti_capa_hmac, capa, oti->oti_capa_key.lk_key); + if (rc) + RETURN(rc); + if (memcmp(oti->oti_capa_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) { + DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); + RETURN(-EACCES); + } + + capa_add(capa); + + RETURN(0); +} + +static int osd_object_capa_auth(const struct lu_context *ctx, + const struct lu_object *obj, + struct lustre_capa *capa, + __u64 opc) +{ + const struct lu_fid *fid = lu_object_fid(obj); + + return 0; + + if (lu_object_capa_bypass(obj)) + return 0; + + if (!capa) { + CERROR("no capability is provided for fid "DFID"\n", PFID(fid)); + return -EACCES; + } + + if (!lu_fid_eq(fid, &capa->lc_fid)) { + DEBUG_CAPA(D_ERROR, capa, "fid "DFID" mismatch with", + PFID(fid)); + return -EACCES; + } + + if (!capa_opc_supported(capa, opc)) { + DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc); + return -EACCES; + } + + if (!capa_is_sane(ctx, capa, obj->lo_dev->ld_site->ls_capa_keys)) { + DEBUG_CAPA(D_ERROR, capa, "insane"); + return -EACCES; + } + + return 0; +} + static struct lu_object_operations osd_lu_obj_ops = { .loo_object_init = osd_object_init, .loo_object_delete = osd_object_delete, .loo_object_release = osd_object_release, .loo_object_free = osd_object_free, .loo_object_print = osd_object_print, - .loo_object_invariant = osd_object_invariant + .loo_object_invariant = osd_object_invariant, + .loo_object_auth = osd_object_capa_auth }; static struct lu_device_operations osd_lu_ops = { diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index d2b4625..fab177b 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -77,6 +77,11 @@ struct osd_thread_info { int oti_r_locks; int oti_w_locks; int oti_txns; + /* + *XXX temporary: for capa operations. + */ + char oti_capa_hmac[CAPA_HMAC_KEY_MAX_LEN]; + struct lustre_capa_key oti_capa_key; }; #endif /* __KERNEL__ */ diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 6a9a391..00d3cbf 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -80,6 +80,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req, struct obd_trans_info *oti) { struct ost_body *body, *repbody; + struct lustre_capa *capa; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; ENTRY; @@ -97,7 +98,9 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req, repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL); + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_repmsg, REQ_REC_OFF + 1); + req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL, capa); RETURN(0); } @@ -119,9 +122,12 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); + repbody->oa = body->oa; oinfo.oi_oa = &repbody->oa; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg, + REQ_REC_OFF + 1); req->rq_status = obd_getattr(exp, &oinfo); RETURN(0); } @@ -278,6 +284,9 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req, */ oinfo.oi_oa->o_valid &= ~OBD_MD_FLFLAGS; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg, + REQ_REC_OFF + 1); req->rq_status = obd_punch(exp, &oinfo, oti, NULL); ost_punch_lock_put(exp, oinfo.oi_oa, &lh); } @@ -287,6 +296,7 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req, static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) { struct ost_body *body, *repbody; + struct lustre_capa *capa = NULL; int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) }; ENTRY; @@ -295,6 +305,9 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) if (body == NULL) RETURN(-EFAULT); + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) RETURN(rc); @@ -303,7 +316,7 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size, - repbody->oa.o_blocks); + repbody->oa.o_blocks, capa); RETURN(0); } @@ -326,9 +339,12 @@ static int ost_setattr(struct obd_export *exp, struct ptlrpc_request *req, repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); + repbody->oa = body->oa; oinfo.oi_oa = &repbody->oa; + if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) + oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg, + REQ_REC_OFF + 1); req->rq_status = obd_setattr(exp, &oinfo, oti); RETURN(0); } @@ -622,6 +638,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) struct niobuf_local *local_nb; struct obd_ioobj *ioo; struct ost_body *body, *repbody; + struct lustre_capa *capa = NULL; struct l_wait_info lwi; struct lustre_handle lockh = { 0 }; int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; @@ -669,6 +686,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_swab_niobuf_remote (&remote_nb[i]); } + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) GOTO(out, rc); @@ -700,7 +720,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) GOTO(out_bulk, rc); rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1, - ioo, npages, pp_rnb, local_nb, oti); + ioo, npages, pp_rnb, local_nb, oti, capa); if (rc != 0) GOTO(out_lock, rc); @@ -839,6 +859,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) struct ost_body *body, *repbody; struct l_wait_info lwi; struct lustre_handle lockh = {0}; + struct lustre_capa *capa = NULL; __u32 *rcs; int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; int objcount, niocount, npages, comms_error = 0; @@ -905,6 +926,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_swab_niobuf_remote (&remote_nb[i]); } + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3); + size[REPLY_REC_OFF + 1] = niocount * sizeof(*rcs); rc = lustre_pack_reply(req, 3, size, NULL); if (rc != 0) @@ -944,7 +968,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM); rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount, - ioo, npages, pp_rnb, local_nb, oti); + ioo, npages, pp_rnb, local_nb, oti, capa); if (rc != 0) GOTO(out_lock, rc); diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index 2067e61..b8d7e53 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -73,10 +73,22 @@ static const struct req_msg_field *mdt_body_only[] = { &RMF_MDT_BODY }; +static const struct req_msg_field *mdt_renew_capa_client[] = { + &RMF_PTLRPC_BODY, + &RMF_CAPA1 +}; + +static const struct req_msg_field *mdt_body_capa[] = { + &RMF_PTLRPC_BODY, + &RMF_MDT_BODY, + &RMF_CAPA1 +}; + static const struct req_msg_field *mdt_close_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_EPOCH, - &RMF_REC_SETATTR + &RMF_REC_SETATTR, + &RMF_CAPA1 }; static const struct req_msg_field *mds_statfs_server[] = { @@ -109,6 +121,7 @@ static const struct req_msg_field *fld_query_server[] = { static const struct req_msg_field *mds_getattr_name_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, + &RMF_CAPA1, &RMF_NAME }; @@ -120,12 +133,14 @@ static const struct req_msg_field *mds_reint_client[] = { static const struct req_msg_field *mds_reint_create_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_CREATE, + &RMF_CAPA1, &RMF_NAME, }; static const struct req_msg_field *mds_reint_create_sym_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_CREATE, + &RMF_CAPA1, &RMF_NAME, &RMF_SYMTGT }; @@ -133,6 +148,7 @@ static const struct req_msg_field *mds_reint_create_sym_client[] = { static const struct req_msg_field *mds_reint_create_slave_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_CREATE, + &RMF_CAPA1, &RMF_NAME, &RMF_EADATA }; @@ -140,6 +156,8 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = { static const struct req_msg_field *mds_reint_open_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_CREATE, + &RMF_CAPA1, + &RMF_CAPA2, &RMF_NAME, &RMF_EADATA }; @@ -148,24 +166,31 @@ static const struct req_msg_field *mds_reint_open_server[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, &RMF_MDT_MD, - &RMF_ACL + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 }; static const struct req_msg_field *mds_reint_unlink_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_UNLINK, + &RMF_CAPA1, &RMF_NAME }; static const struct req_msg_field *mds_reint_link_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_LINK, + &RMF_CAPA1, + &RMF_CAPA2, &RMF_NAME }; static const struct req_msg_field *mds_reint_rename_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_RENAME, + &RMF_CAPA1, + &RMF_CAPA2, &RMF_NAME, &RMF_SYMTGT }; @@ -180,6 +205,7 @@ static const struct req_msg_field *mds_last_unlink_server[] = { static const struct req_msg_field *mds_reint_setattr_client[] = { &RMF_PTLRPC_BODY, &RMF_REC_SETATTR, + &RMF_CAPA1, &RMF_MDT_EPOCH, &RMF_EADATA, &RMF_LOGCOOKIES @@ -220,7 +246,9 @@ static const struct req_msg_field *ldlm_intent_server[] = { &RMF_DLM_REP, &RMF_MDT_BODY, &RMF_MDT_MD, - &RMF_ACL + &RMF_ACL, + &RMF_CAPA1, + &RMF_CAPA2 }; static const struct req_msg_field *ldlm_intent_getattr_client[] = { @@ -228,6 +256,7 @@ static const struct req_msg_field *ldlm_intent_getattr_client[] = { &RMF_DLM_REQ, &RMF_LDLM_INTENT, &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */ + &RMF_CAPA1, &RMF_NAME }; @@ -236,6 +265,7 @@ static const struct req_msg_field *ldlm_intent_create_client[] = { &RMF_DLM_REQ, &RMF_LDLM_INTENT, &RMF_REC_CREATE, /* coincides with mds_reint_create_client[] */ + &RMF_CAPA1, &RMF_NAME, &RMF_EADATA }; @@ -245,6 +275,8 @@ static const struct req_msg_field *ldlm_intent_open_client[] = { &RMF_DLM_REQ, &RMF_LDLM_INTENT, &RMF_REC_CREATE, /* coincides with mds_reint_open_client[] */ + &RMF_CAPA1, + &RMF_CAPA2, &RMF_NAME, &RMF_EADATA }; @@ -254,12 +286,14 @@ static const struct req_msg_field *ldlm_intent_unlink_client[] = { &RMF_DLM_REQ, &RMF_LDLM_INTENT, &RMF_REC_UNLINK, /* coincides with mds_reint_unlink_client[] */ + &RMF_CAPA1, &RMF_NAME }; static const struct req_msg_field *mds_getxattr_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, + &RMF_CAPA1, &RMF_NAME, &RMF_EADATA }; @@ -273,6 +307,7 @@ static const struct req_msg_field *mds_getxattr_server[] = { static const struct req_msg_field *mds_setxattr_client[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, + &RMF_CAPA1, &RMF_NAME, &RMF_EADATA }; @@ -286,7 +321,8 @@ static const struct req_msg_field *mds_getattr_server[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, &RMF_MDT_MD, - &RMF_ACL + &RMF_ACL, + &RMF_CAPA1 }; static const struct req_format *req_formats[] = { @@ -486,6 +522,16 @@ const struct req_msg_field RMF_REINT_OPC = DEFINE_MSGF("reint_opc", 0, sizeof(__u32), lustre_swab_generic_32s); EXPORT_SYMBOL(RMF_REINT_OPC); +const struct req_msg_field RMF_CAPA1 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa); +EXPORT_SYMBOL(RMF_CAPA1); + +const struct req_msg_field RMF_CAPA2 = + DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), + lustre_swab_lustre_capa); +EXPORT_SYMBOL(RMF_CAPA2); + /* * Request formats. */ @@ -525,7 +571,7 @@ const struct req_format RQF_FLD_QUERY = EXPORT_SYMBOL(RQF_FLD_QUERY); const struct req_format RQF_MDS_GETSTATUS = - DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_only); + DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_capa); EXPORT_SYMBOL(RQF_MDS_GETSTATUS); const struct req_format RQF_MDS_STATFS = @@ -533,11 +579,11 @@ const struct req_format RQF_MDS_STATFS = EXPORT_SYMBOL(RQF_MDS_STATFS); const struct req_format RQF_MDS_SYNC = - DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_only, mdt_body_only); + DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only); EXPORT_SYMBOL(RQF_MDS_SYNC); const struct req_format RQF_MDS_GETATTR = - DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_only, mds_getattr_server); + DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server); EXPORT_SYMBOL(RQF_MDS_GETATTR); const struct req_format RQF_MDS_GETXATTR = @@ -596,7 +642,7 @@ EXPORT_SYMBOL(RQF_MDS_REINT_RENAME); const struct req_format RQF_MDS_REINT_SETATTR = DEFINE_REQ_FMT0("MDS_REINT_SETATTR", - mds_reint_setattr_client, mdt_body_only); + mds_reint_setattr_client, mdt_body_capa); EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR); const struct req_format RQF_MDS_CONNECT = @@ -645,7 +691,7 @@ EXPORT_SYMBOL(RQF_MDS_CLOSE); const struct req_format RQF_MDS_PIN = DEFINE_REQ_FMT0("MDS_PIN", - mdt_body_only, mdt_body_only); + mdt_body_capa, mdt_body_only); EXPORT_SYMBOL(RQF_MDS_PIN); const struct req_format RQF_MDS_DONE_WRITING = @@ -655,7 +701,7 @@ EXPORT_SYMBOL(RQF_MDS_DONE_WRITING); const struct req_format RQF_MDS_READPAGE = DEFINE_REQ_FMT0("MDS_READPAGE", - mdt_body_only, mdt_body_only); + mdt_body_capa, mdt_body_only); EXPORT_SYMBOL(RQF_MDS_READPAGE); const struct req_format RQF_MDS_WRITEPAGE = @@ -668,6 +714,11 @@ const struct req_format RQF_MDS_IS_SUBDIR = mdt_body_only, mdt_body_only); EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR); +const struct req_format RQF_MDS_RENEW_CAPA = + DEFINE_REQ_FMT0("MDS_RENEW_CAPA", + mdt_renew_capa_client, mdt_body_capa); +EXPORT_SYMBOL(RQF_MDS_RENEW_CAPA); + #if !defined(__REQ_LAYOUT_USER__) int req_layout_init(void) diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index a430307..5b44000 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -77,6 +77,7 @@ struct ll_rpc_opcode { { MDS_SETXATTR, "mds_setxattr" }, { MDS_WRITEPAGE, "mds_writepage" }, { MDS_IS_SUBDIR, "mds_is_subdir" }, + { MDS_RENEW_CAPA, "mds_renew_capa" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index e17610a..33829dd 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2229,3 +2229,19 @@ void debug_req(cfs_debug_limit_state_t *cdls, req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0); } EXPORT_SYMBOL(debug_req); + +void lustre_swab_lustre_capa(struct lustre_capa *c) +{ + lustre_swab_lu_fid(&c->lc_fid); + __swab64s (&c->lc_opc); + __swab32s (&c->lc_flags); + __swab32s (&c->lc_keyid); + __swab64s (&c->lc_expiry); +} + +void lustre_swab_lustre_capa_key (struct lustre_capa_key *k) +{ + __swab64s (&k->lk_mdsid); + __swab32s (&k->lk_keyid); + __swab32s (&k->lk_padding); +} diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 018bdfb..b625000 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -276,6 +276,8 @@ EXPORT_SYMBOL(lustre_msg_set_conn_cnt); EXPORT_SYMBOL(lustre_swab_mgs_target_info); EXPORT_SYMBOL(lustre_swab_md_fld); EXPORT_SYMBOL(lustre_swab_generic_32s); +EXPORT_SYMBOL(lustre_swab_lustre_capa); +EXPORT_SYMBOL(lustre_swab_lustre_capa_key); /* recover.c */ EXPORT_SYMBOL(ptlrpc_disconnect_import); diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 034aec0..4ffbd6c 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1306,6 +1306,12 @@ static int mkfs_mdt(struct mkfs_opts *mop) goto out_umount; } + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, CAPA_KEYS); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 1, 1, 4); + if (ret) { + goto out_umount; + } + umount(mntpt); ret = mount(source, mntpt, fstype, 0, NULL); if (ret) { diff --git a/lustre/utils/req-layout.c b/lustre/utils/req-layout.c index fed8092..389d158 100644 --- a/lustre/utils/req-layout.c +++ b/lustre/utils/req-layout.c @@ -52,6 +52,8 @@ #define lustre_swab_mdt_rec_rename NULL #define lustre_swab_mdt_rec_create NULL #define lustre_swab_mdt_rec_setattr NULL +#define lustre_swab_lustre_capa NULL +#define lustre_swab_lustre_capa_key NULL /* * Yes, include .c file. -- 1.8.3.1