Whamcloud - gitweb
land capability.
authorlsy <lsy>
Wed, 27 Sep 2006 06:34:44 +0000 (06:34 +0000)
committerlsy <lsy>
Wed, 27 Sep 2006 06:34:44 +0000 (06:34 +0000)
86 files changed:
lustre/cmm/cmm_device.c
lustre/cmm/cmm_object.c
lustre/cmm/mdc_object.c
lustre/include/Makefile.am
lustre/include/lu_object.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_capa.h [new file with mode: 0644]
lustre/include/lustre_disk.h
lustre/include/lustre_mdt.h
lustre/include/lustre_req_layout.h
lustre/include/md_object.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/liblustre/dir.c
lustre/liblustre/file.c
lustre/liblustre/super.c
lustre/llite/Makefile.in
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_capa.c [new file with mode: 0644]
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/llite_nfs.c
lustre/llite/namei.c
lustre/llite/remote_perm.c
lustre/llite/rw.c
lustre/llite/rw26.c
lustre/llite/super25.c
lustre/llite/symlink.c
lustre/llite/xattr.c
lustre/lmv/lmv_intent.c
lustre/lmv/lmv_internal.h
lustre/lmv/lmv_obd.c
lustre/lmv/lmv_object.c
lustre/lov/lov_obd.c
lustre/lov/lov_request.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mdd/mdd_handler.c
lustre/mdd/mdd_lov.c
lustre/mds/handler.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_lov.c
lustre/mds/mds_unlink_open.c
lustre/mdt/Makefile.in
lustre/mdt/mdt_capa.c [new file with mode: 0644]
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c
lustre/obdclass/Makefile.in
lustre/obdclass/autoMakefile.am
lustre/obdclass/capa.c [new file with mode: 0644]
lustre/obdclass/class_obd.c
lustre/obdclass/dt_object.c
lustre/obdclass/genops.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lu_object.c
lustre/obdclass/obd_mount.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdfilter/Makefile.in
lustre/obdfilter/filter.c
lustre/obdfilter/filter_capa.c [new file with mode: 0644]
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_log.c
lustre/osc/osc_request.c
lustre/osd/osd_handler.c
lustre/osd/osd_internal.h
lustre/ost/ost_handler.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/utils/mkfs_lustre.c
lustre/utils/req-layout.c

index 8a996aa..d6be418 100644 (file)
@@ -85,10 +85,37 @@ static int cmm_maxsize_get(const struct lu_context *ctxt, struct md_device *md,
         RETURN(rc);
 }
 
+static int cmm_init_capa_keys(struct md_device *md,
+                              struct lustre_capa_key *keys)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(md);
+        int rc;
+        ENTRY;
+        LASSERT(cmm_child_ops(cmm_dev)->mdo_init_capa_keys);
+        rc = cmm_child_ops(cmm_dev)->mdo_init_capa_keys(cmm_dev->cmm_child,
+                                                        keys);
+        RETURN(rc);
+}
+
+static int cmm_update_capa_key(const struct lu_context *ctxt,
+                               struct md_device *md,
+                               struct lustre_capa_key *key)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(md);
+        int rc;
+        ENTRY;
+        rc = cmm_child_ops(cmm_dev)->mdo_update_capa_key(ctxt,
+                                                         cmm_dev->cmm_child,
+                                                         key);
+        RETURN(rc);
+}
+
 static struct md_device_operations cmm_md_ops = {
         .mdo_statfs         = cmm_statfs,
         .mdo_root_get       = cmm_root_get,
         .mdo_maxsize_get    = cmm_maxsize_get,
+        .mdo_init_capa_keys = cmm_init_capa_keys,
+        .mdo_update_capa_key= cmm_update_capa_key,
 };
 
 extern struct lu_device_type mdc_device_type;
@@ -295,7 +322,6 @@ static void cmm_device_free(const struct lu_context *ctx, struct lu_device *d)
 {
         struct cmm_device *m = lu2cmm_dev(d);
 
-       LASSERT(atomic_read(&d->ld_ref) == 0);
         LASSERT(m->cmm_tgt_count == 0);
         LASSERT(list_empty(&m->cmm_targets));
        md_device_fini(&m->cmm_md_dev);
index 77f8115..26981bf 100644 (file)
@@ -337,6 +337,15 @@ static int cml_readpage(const struct lu_context *ctxt, struct md_object *mo,
         RETURN(rc);
 }
 
+static int cml_capa_get(const struct lu_context *ctxt, struct md_object *mo,
+                        struct lustre_capa *capa)
+{
+        int rc;
+        ENTRY;
+        rc = mo_capa_get(ctxt, md_object_next(mo), capa);
+        RETURN(rc);
+}
+
 static struct md_object_operations cml_mo_ops = {
         .moo_permission    = cml_permission,
         .moo_attr_get      = cml_attr_get,
@@ -351,7 +360,8 @@ static struct md_object_operations cml_mo_ops = {
         .moo_open          = cml_open,
         .moo_close         = cml_close,
         .moo_readpage      = cml_readpage,
-        .moo_readlink      = cml_readlink
+        .moo_readlink      = cml_readlink,
+        .moo_capa_get      = cml_capa_get
 };
 
 /* md_dir operations */
@@ -422,14 +432,14 @@ static int cml_unlink(const struct lu_context *ctx, struct md_object *mo_p,
 
 /* rename is split to local/remote by location of new parent dir */
 struct md_object *md_object_find(const struct lu_context *ctx,
-                                  struct md_device *md,
-                                  const struct lu_fid *f)
+                                 struct md_device *md,
+                                 const struct lu_fid *f)
 {
         struct lu_object *o;
         struct md_object *m;
         ENTRY;
 
-        o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f);
+        o = lu_object_find(ctx, md2lu_dev(md)->ld_site, f, BYPASS_CAPA);
         if (IS_ERR(o))
                 m = (struct md_object *)o;
         else {
@@ -724,6 +734,12 @@ static int cmr_readpage(const struct lu_context *ctxt, struct md_object *mo,
         RETURN(-EREMOTE);
 }
 
+static int cmr_capa_get(const struct lu_context *ctxt, struct md_object *mo,
+                        struct lustre_capa *capa)
+{
+        RETURN(-EFAULT);
+}
+
 static struct md_object_operations cmr_mo_ops = {
         .moo_permission    = cmr_permission,
         .moo_attr_get      = cmr_attr_get,
@@ -738,7 +754,8 @@ static struct md_object_operations cmr_mo_ops = {
         .moo_open          = cmr_open,
         .moo_close         = cmr_close,
         .moo_readpage      = cmr_readpage,
-        .moo_readlink      = cmr_readlink
+        .moo_readlink      = cmr_readlink,
+        .moo_capa_get      = cmr_capa_get
 };
 
 /* remote part of md_dir operations */
index a26370a..58fea39 100644 (file)
@@ -215,7 +215,8 @@ static int mdc_attr_get(const struct lu_context *ctx, struct md_object *mo,
 
         memset(&mci->mci_opdata, 0, sizeof(mci->mci_opdata));
 
-        rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu),
+        /* FIXME: split capability */
+        rc = md_getattr(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu), NULL,
                         OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID |
                         OBD_MD_FLFLAGS,
                         0, &mci->mci_req);
@@ -463,9 +464,9 @@ static int mdc_is_subdir(const struct lu_context *ctx, struct md_object *mo,
 
         mci = mdc_info_init(ctx);
         
+        /* FIXME: capability for split! */
         rc = md_is_subdir(mc->mc_desc.cl_exp, lu_object_fid(&mo->mo_lu),
-                          fid, &mci->mci_req);
-
+                          fid, NULL, NULL, &mci->mci_req);
         if (rc)
                 GOTO(out, rc);
 
index 3bbaac1..c10de60 100644 (file)
@@ -15,5 +15,5 @@ EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h        \
             obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \
             obd_ost.h obd_support.h lustre_ver.h lu_object.h   \
              md_object.h dt_object.h lustre_param.h lustre_mdt.h \
-             lustre_fid.h lustre_fld.h lustre_req_layout.h
+             lustre_fid.h lustre_fld.h lustre_req_layout.h lustre_capa.h
 
index 8f3018a..8e77057 100644 (file)
@@ -207,6 +207,13 @@ struct lu_object_operations {
          * consistent.
          */
         int (*loo_object_invariant)(const struct lu_object *o);
+        /*
+         * Called to authorize action by capability.
+         */
+        int (*loo_object_auth)(const struct lu_context *ctx,
+                               const struct lu_object *o,
+                               struct lustre_capa *capa,
+                               __u64 opc);
 };
 
 /*
@@ -448,6 +455,11 @@ struct lu_object_header {
          */
         struct lu_fid     loh_fid;
         /*
+         * Fid capability.
+         */
+        unsigned int       loh_capa_bypass:1; /* bypass capability check */
+        struct lustre_capa loh_capa;          /* capability sent by client */
+        /*
          * Common object attributes, cached for efficiency. From enum
          * lu_object_header_attr.
          */
@@ -568,6 +580,11 @@ struct lu_site {
                 __u32 s_cache_race;
                 __u32 s_lru_purged;
         } ls_stats;
+
+        /* Capability */
+        struct lustre_capa_key *ls_capa_keys;
+        unsigned long           ls_capa_timeout;
+        __u32                   ls_capa_alg;
 };
 
 /*
@@ -681,7 +698,14 @@ void lu_site_purge(const struct lu_context *ctx,
  * any case, additional reference is acquired on the returned object.
  */
 struct lu_object *lu_object_find(const struct lu_context *ctxt,
-                                 struct lu_site *s, const struct lu_fid *f);
+                                 struct lu_site *s, const struct lu_fid *f,
+                                 struct lustre_capa *c);
+
+/*
+ * Auth lu_object capability.
+ */
+int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o,
+                   struct lustre_capa *capa, __u64 opc);
 
 /*
  * Helpers.
@@ -713,6 +737,20 @@ static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
 }
 
 /*
+ * Pointer to the fid capability of this object.
+ */
+static inline struct lustre_capa *
+lu_object_capa(const struct lu_object *o)
+{
+        return &o->lo_header->loh_capa;
+}
+
+static inline int lu_object_capa_bypass(const struct lu_object *o)
+{
+        return o->lo_header->loh_capa_bypass;
+}
+
+/*
  * return device operations vector for this object
  */
 static inline struct lu_device_operations *
@@ -805,6 +843,11 @@ static inline const __u32 lu_object_attr(const struct lu_object *o)
         return o->lo_header->loh_attr;
 }
 
+static inline void lu_object_bypass_capa(struct lu_object *o)
+{
+        o->lo_header->loh_capa_bypass = 1;
+}
+
 struct lu_rdpg {
         /* input params, should be filled out by mdt */
         __u32                   rp_hash;        /* hash */
index 22986a2..98883d0 100644 (file)
@@ -244,10 +244,10 @@ static inline int fid_is_sane(const struct lu_fid *fid)
 
 #define DFID "[%16.16"LPF64"x/%8.8x:%8.8x]"
 
-#define PFID(fid)       \
-        fid_seq((fid)), \
-        fid_oid((fid)), \
-        fid_ver((fid))
+#define PFID(fid)     \
+        fid_seq(fid), \
+        fid_oid(fid), \
+        fid_ver(fid)
 
 extern void lustre_swab_lu_fid(struct lu_fid *fid);
 extern void lustre_swab_lu_range(struct lu_range *range);
@@ -301,6 +301,7 @@ static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
 #define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
 #define MAX_HASH_SIZE            0x7fffffff
 
+/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
 struct lmv_stripe_md {
         __u32         mea_magic;
         __u32         mea_count;
@@ -359,7 +360,7 @@ struct lustre_msg_v2 {
         __u32 lm_buflens[0];
 };
 
-/* without security, ptlrpc_body is put in the first buffer. */
+/* without gss, ptlrpc_body is put at the first buffer. */
 struct ptlrpc_body {
         struct lustre_handle pb_handle;
         __u32 pb_type;
@@ -441,7 +442,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_RMT_CLIENT  0x40000ULL /* Remote 1.8 client */
 #define OBD_CONNECT_BRW_SIZE    0x80000ULL /* Max bytes per rpc */
 #define OBD_CONNECT_QUOTA64     0x100000ULL /* 64bit qunit_data.qd_count b=10707*/
-#define OBD_CONNECT_FID_CAPA    0x200000ULL /* fid capability */
+#define OBD_CONNECT_MDS_CAPA    0x200000ULL /* MDS capability */
 #define OBD_CONNECT_OSS_CAPA    0x400000ULL /* OSS capability */
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
@@ -620,6 +621,9 @@ struct md_op_data {
         /* Size-on-MDS epoch and flags. */
         __u64                 ioepoch;
         __u32                 flags;
+
+        struct obd_capa      *mod_capa1;
+        struct obd_capa      *mod_capa2;
 };
 
 #define MDS_MODE_DONT_LOCK (1 << 30)
@@ -704,6 +708,8 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
 #define OBD_MD_FLXATTRRM   (0x0000004000000000ULL) /* xattr remove */
 #define OBD_MD_FLACL       (0x0000008000000000ULL) /* ACL */
 #define OBD_MD_FLRMTPERM   (0x0000010000000000ULL) /* remote permission */
+#define OBD_MD_FLMDSCAPA   (0x0000020000000000ULL) /* MDS capability */
+#define OBD_MD_FLOSSCAPA   (0x0000040000000000ULL) /* OSS capability */
 
 #define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
                           OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
@@ -833,6 +839,7 @@ typedef enum {
         MDS_SETXATTR     = 50,
         MDS_WRITEPAGE    = 51,
         MDS_IS_SUBDIR    = 52,
+        MDS_RENEW_CAPA   = 53,
         MDS_LAST_OPC
 } mds_cmd_t;
 
@@ -1028,7 +1035,9 @@ struct lustre_md {
 #ifdef CONFIG_FS_POSIX_ACL
         struct posix_acl        *posix_acl;
 #endif
-        struct mdt_remote_perm   *remote_perm;
+        struct mdt_remote_perm  *remote_perm;
+        struct obd_capa         *mds_capa;
+        struct obd_capa         *oss_capa;
 };
 
 #define Q_QUOTACHECK    0x800100
@@ -1118,7 +1127,7 @@ struct mdt_rec_setattr {
         __u32           sa_uid;
         __u32           sa_gid;
         __u32           sa_attr_flags;
-        __u32           sa_padding; /* also fix lustre_swab_mdt_rec_setattr */
+        __u32           sa_padding; /* also fix lustre_swab_mds_rec_setattr */
 };
 
 extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa);
@@ -1878,4 +1887,85 @@ typedef enum {
         SEC_LAST_OPC
 } sec_cmd_t;
 
+/*
+ * capa related definitions
+ */
+#define CAPA_HMAC_MAX_LEN       64
+#define CAPA_HMAC_KEY_MAX_LEN   56
+
+/* NB take care when changing the sequence of elements this struct,
+ * because the offset info is used in find_capa() */
+struct lustre_capa {
+        struct lu_fid   lc_fid;       /* fid */
+        __u64           lc_opc;       /* operations allowed */
+        __u32           lc_flags;     /* HMAC algorithm & flags */
+        __u32           lc_keyid;     /* key used for the capability */
+        __u64           lc_expiry;    /* expiry time (sec) */
+        __u8            lc_hmac[CAPA_HMAC_MAX_LEN];   /* HMAC */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa(struct lustre_capa *c);
+
+/* lustre_capa.lc_opc */
+enum {
+        /* MDS only fid capability */
+        CAPA_OPC_BODY_WRITE   = 1,     /* write fid data */
+        CAPA_OPC_BODY_READ    = 1<<1,  /* read fid data */
+        CAPA_OPC_INDEX_LOOKUP = 1<<2,  /* lookup fid */
+        CAPA_OPC_INDEX_INSERT = 1<<3,  /* insert fid */
+        CAPA_OPC_INDEX_DELETE = 1<<4,  /* delete fid */
+        /* OSS only fid capability */
+        CAPA_OPC_OSS_WRITE    = 1<<5,  /* write oss object data */
+        CAPA_OPC_OSS_READ     = 1<<6,  /* read oss object data */
+        CAPA_OPC_OSS_TRUNC    = 1<<7,  /* truncate oss object */
+        /* MDS & OSS both might have */
+        CAPA_OPC_META_WRITE   = 1<<8,  /* write fid meta data */
+        CAPA_OPC_META_READ    = 1<<9,  /* read fid meta data */
+
+};
+
+#define CAPA_OPC_MDS_ONLY                                                      \
+        (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ |                            \
+         CAPA_OPC_INDEX_LOOKUP | CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
+#define CAPA_OPC_OSS_ONLY                                                      \
+        (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC)
+#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
+#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
+
+static inline int capa_for_mds(struct lustre_capa *c)
+{
+        return (c->lc_opc & CAPA_OPC_MDS_ONLY) != 0;
+}
+
+static inline int capa_for_oss(struct lustre_capa *c)
+{
+        return (c->lc_opc & CAPA_OPC_OSS_ONLY) != 0;
+}
+
+/* lustre_capa.lc_flags */
+enum {
+        CAPA_FL_SHORT_EXPIRY = 1, /* short capa expiry */
+        CAPA_FL_ROOT         = 2, /* root fid capa, will always renew */
+};
+
+/* lustre_capa.lc_hmac_alg */
+enum {
+        CAPA_HMAC_ALG_SHA1 = 1, /* sha1 algorithm */
+        CAPA_HMAC_ALG_MAX,
+};
+
+#define CAPA_FL_MASK            0x00ffffff
+#define CAPA_HMAC_ALG_MASK      0xff000000
+
+struct lustre_capa_key {
+        __u64   lk_mdsid;     /* mds# */
+        __u32   lk_keyid;     /* key# */
+        __u32   lk_padding;
+        __u8    lk_key[CAPA_HMAC_KEY_MAX_LEN];    /* key */
+} __attribute__((packed));
+
+extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
+
+typedef int (* renew_capa_cb_t)(struct obd_capa *, struct lustre_capa *);
+
 #endif
diff --git a/lustre/include/lustre_capa.h b/lustre/include/lustre_capa.h
new file mode 100644 (file)
index 0000000..555ee5f
--- /dev/null
@@ -0,0 +1,357 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *   Lustre capability support.
+ */
+
+#ifndef __LINUX_CAPA_H_
+#define __LINUX_CAPA_H_
+
+/*
+ * capability
+ */
+#ifdef __KERNEL__
+#include <linux/crypto.h>
+#endif
+#include <lustre/lustre_idl.h>
+
+#define NR_CAPAHASH 32
+#define CAPA_HASH_SIZE 3000              /* for MDS & OSS */
+
+#define CAPA_TIMEOUT 1800                /* sec, == 30 min */
+#define CAPA_KEY_TIMEOUT (24 * 60 * 60)  /* sec, == 1 days */
+
+struct capa_hmac_alg {
+        const char     *ha_name;
+        int             ha_len;
+        int             ha_keylen;
+};
+
+#define DEF_CAPA_HMAC_ALG(name, type, len, keylen)      \
+[CAPA_HMAC_ALG_ ## type] = {                            \
+        .ha_name         = name,                        \
+        .ha_len          = len,                         \
+        .ha_keylen       = keylen,                      \
+}
+
+struct client_capa {
+        struct inode             *inode;      
+        struct list_head          lli_list;     /* link to lli_oss_capas */
+        atomic_t                  open_count;   /* open count */
+};
+
+struct target_capa {
+        struct hlist_node         c_hash;         /* link to capa hash */
+};
+
+struct obd_capa {
+        struct list_head          c_list;       /* link to capa_list */
+
+        struct lustre_capa        c_capa;       /* capa */
+        atomic_t                  c_refc;       /* ref count */
+        cfs_time_t                c_expiry;     /* jiffies */
+        spinlock_t                c_lock;       /* protect capa content */
+        int                       c_site;
+        int                       c_flags;
+
+        union {
+                struct client_capa      cli;
+                struct target_capa      tgt;
+        } u;
+};
+
+enum {
+        CAPA_SITE_CLIENT = 0,
+        CAPA_SITE_SERVER,
+        CAPA_SITE_MAX
+};
+
+enum {
+        OBD_CAPA_FL_NEW     = 1,
+        OBD_CAPA_FL_EXPIRED = 1<<1,
+        OBD_CAPA_FL_ROOT    = 1<<2,
+        OBD_CAPA_FL_SPLIT   = 1<<3
+};
+
+static inline __u64 capa_opc(struct lustre_capa *capa)
+{
+        return capa->lc_opc;
+}
+
+static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
+{
+        return &capa->lc_fid;
+}
+
+static inline __u32 capa_keyid(struct lustre_capa *capa)
+{
+        return capa->lc_keyid;
+}
+
+static inline __u64 capa_expiry(struct lustre_capa *capa)
+{
+        return capa->lc_expiry;
+}
+
+static inline __u32 capa_flags(struct lustre_capa *capa)
+{
+        return capa->lc_flags & 0xffffff;
+}
+
+static inline __u32 capa_alg(struct lustre_capa *capa)
+{
+        __u32 alg = capa->lc_flags;
+
+        return alg >> 24;
+}
+
+static inline __u64 capa_key_mdsid(struct lustre_capa_key *key)
+{
+        return key->lk_mdsid;
+}
+
+static inline __u32 capa_key_keyid(struct lustre_capa_key *key)
+{
+        return key->lk_keyid;
+}
+
+#define DEBUG_CAPA(level, c, fmt, args...)                                     \
+do {                                                                           \
+CDEBUG(level, fmt " capability@%p opc "LPX64" fid "DFID" keyid %u expiry "LPU64\
+       " flags %u alg %d\n",                                                   \
+       ##args, c, capa_opc(c), PFID(capa_fid(c)), capa_keyid(c),               \
+       capa_expiry(c), capa_flags(c), capa_alg(c));                            \
+} while (0)
+
+#define DEBUG_CAPA_KEY(level, k, fmt, args...)                                 \
+do {                                                                           \
+CDEBUG(level, fmt " capability key@%p mdsid "LPU64" keyid %u\n",               \
+       ##args, k, capa_key_mdsid(k), capa_key_keyid(k));                       \
+} while (0)
+
+/* obdclass/capa.c */
+extern struct list_head capa_list[];
+extern spinlock_t capa_lock;
+extern int capa_count[];
+extern cfs_mem_cache_t *capa_cachep;
+
+struct obd_capa *capa_add(struct lustre_capa *capa);
+struct obd_capa *capa_lookup(struct lustre_capa *capa);
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key);
+void capa_cpy(void *dst, struct obd_capa *ocapa);
+
+void cleanup_capas(int site);
+void dump_capa_hmac(char *buf, char *key);
+
+static inline int obd_capa_is_new(struct obd_capa *oc)
+{
+        return !!((oc)->c_flags & OBD_CAPA_FL_NEW);
+}
+
+static inline int obd_capa_is_expired(struct obd_capa *oc)
+{
+        return !!((oc)->c_flags & OBD_CAPA_FL_EXPIRED);
+}
+
+static inline int obd_capa_is_valid(struct obd_capa *oc)
+{
+        return !!((oc)->c_flags & (OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED));
+}
+
+static inline void obd_capa_set_new(struct obd_capa *oc)
+{
+        oc->c_flags |= OBD_CAPA_FL_NEW;
+}
+
+static inline void obd_capa_set_expired(struct obd_capa *oc)
+{
+        oc->c_flags |= OBD_CAPA_FL_EXPIRED;
+}
+
+static inline void obd_capa_set_valid(struct obd_capa *oc)
+{
+        oc->c_flags &= ~(OBD_CAPA_FL_NEW | OBD_CAPA_FL_EXPIRED);
+}
+
+static inline void obd_capa_clear_new(struct obd_capa *oc)
+{
+        oc->c_flags &= ~OBD_CAPA_FL_NEW;
+}
+
+static inline void obd_capa_clear_expired(struct obd_capa *oc)
+{
+        oc->c_flags &= ~OBD_CAPA_FL_EXPIRED;
+}
+
+static inline int obd_capa_is_root(struct obd_capa *oc)
+{
+        return !!((oc)->c_flags & OBD_CAPA_FL_ROOT);
+}
+
+static inline void obd_capa_set_root(struct obd_capa *oc)
+{
+        oc->c_flags |= OBD_CAPA_FL_ROOT;
+}
+
+static inline int obd_capa_is_split(struct obd_capa *oc)
+{
+        return !!((oc)->c_flags & OBD_CAPA_FL_SPLIT);
+}
+
+static inline void obd_capa_set_split(struct obd_capa *oc)
+{
+        oc->c_flags |= OBD_CAPA_FL_SPLIT;
+}
+
+static inline struct obd_capa *alloc_capa(int site)
+{
+#ifdef __KERNEL__
+        struct obd_capa *ocapa;
+
+        OBD_SLAB_ALLOC(ocapa, capa_cachep, SLAB_KERNEL, sizeof(*ocapa));
+        if (ocapa) {
+                atomic_set(&ocapa->c_refc, 0);
+                spin_lock_init(&ocapa->c_lock);
+                INIT_LIST_HEAD(&ocapa->c_list);
+                ocapa->c_site = site;
+                obd_capa_set_new(ocapa);
+                capa_count[site]++;
+        }
+        return ocapa;
+#else
+        return NULL;
+#endif
+}
+
+static inline void free_capa(struct obd_capa *ocapa)
+{
+#ifdef __KERNEL__
+        if (atomic_read(&ocapa->c_refc)) {
+                DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for",
+                           atomic_read(&ocapa->c_refc));
+                LBUG();
+        }
+
+        capa_count[ocapa->c_site]--;
+        if (capa_count[ocapa->c_site] < 0) {
+                DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "total count %d",
+                           capa_count[ocapa->c_site]);
+                LBUG();
+        }
+        OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+#else
+#endif
+}
+
+static inline struct obd_capa *capa_get(struct obd_capa *ocapa)
+{
+        if (!ocapa)
+                return NULL;
+
+        atomic_inc(&ocapa->c_refc);
+        return ocapa;
+}
+
+static inline void capa_put(struct obd_capa *ocapa)
+{
+        if (!ocapa)
+                return;
+
+        atomic_dec(&ocapa->c_refc);
+}
+
+static inline int open_flags_to_accmode(int flags)
+{
+        int mode = flags;
+
+        if ((mode + 1) & O_ACCMODE)
+                mode++;
+        if (mode & O_TRUNC)
+                mode |= 2;
+
+        return mode;
+}
+
+static inline __u64 capa_open_opc(int mode)
+{
+        return mode & FMODE_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_READ;
+}
+
+static inline void set_capa_expiry(struct obd_capa *ocapa)
+{
+        time_t expiry = (time_t)ocapa->c_capa.lc_expiry;
+
+        expiry = (jiffies + (expiry - CURRENT_SECONDS) * HZ) / HZ;
+        ocapa->c_expiry = expiry * HZ;
+}
+
+static inline unsigned long capa_renewal_time(struct obd_capa *ocapa)
+{
+        /* NB, by default dirty_expire_centisecs is 30*100, that is 30 sec,
+         * the following values guarantee that client cache will be flushed
+         * to OSS before capability expires.
+         */
+        return ocapa->c_expiry -
+               ((ocapa->c_capa.lc_flags & CAPA_FL_SHORT_EXPIRY) ? 40:1200) * HZ;
+}
+
+#ifdef __KERNEL__
+static inline int capa_is_to_expire(struct obd_capa *ocapa)
+{
+        return time_before_eq(capa_renewal_time(ocapa), jiffies);
+}
+
+static inline int capa_is_expired(struct obd_capa *ocapa)
+{
+        return time_before_eq(ocapa->c_expiry, jiffies);
+}
+#endif
+
+static inline int capa_opc_supported(struct lustre_capa *capa, __u64 opc)
+{
+        return (capa->lc_opc & opc) == opc;
+}
+
+static inline struct lustre_capa *
+lustre_unpack_capa(struct lustre_msg *msg, unsigned int offset)
+{
+        struct lustre_capa *capa;
+
+        capa = lustre_swab_buf(msg, offset, sizeof(*capa),
+                               lustre_swab_lustre_capa);
+        if (capa == NULL)
+                CERROR("bufcount %u, bufsize %u\n",
+                       lustre_msg_bufcount(msg),
+                       (lustre_msg_bufcount(msg) <= offset) ?
+                                -1 : lustre_msg_buflen(msg, offset));
+
+        return capa;
+}
+
+struct filter_capa_key {
+        struct list_head        k_list;
+        struct lustre_capa_key  k_key;
+};
+
+#define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT)
+
+#endif /* __LINUX_CAPA_H_ */
index e445bf4..9e71c09 100644 (file)
@@ -37,7 +37,7 @@
 #define LAST_RCVD         "last_received"
 #define LOV_OBJID         "lov_objid"
 #define HEALTH_CHECK      "health_check"
-
+#define CAPA_KEYS         "capa_keys"
 
 /****************** persistent mount data *********************/
 
index 911c8b4..8dc2d79 100644 (file)
@@ -58,8 +58,4 @@ struct mdt_idmap_table {
                                    [MDT_IDMAP_HASHSIZE];
 };
 
-/* remote perm */
-extern int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
-                               struct ptlrpc_request **request);
-
 #endif
index fe92be7..dd548ac 100644 (file)
@@ -112,6 +112,7 @@ extern const struct req_format RQF_MDS_READPAGE;
 extern const struct req_format RQF_MDS_WRITEPAGE;
 extern const struct req_format RQF_MDS_IS_SUBDIR;
 extern const struct req_format RQF_MDS_DONE_WRITING;
+extern const struct req_format RQF_MDS_RENEW_CAPA;
 
 /*
  * This is format of direct (non-intent) MDS_GETATTR_NAME request.
@@ -159,6 +160,8 @@ extern const struct req_msg_field RMF_EADATA;
 extern const struct req_msg_field RMF_ACL;
 extern const struct req_msg_field RMF_LOGCOOKIES;
 extern const struct req_msg_field RMF_REINT_OPC;
+extern const struct req_msg_field RMF_CAPA1;
+extern const struct req_msg_field RMF_CAPA2;
 
 /* seq-mgr fields */
 extern const struct req_msg_field RMF_SEQ_OPC;
index 403f2f2..6b76a32 100644 (file)
@@ -194,6 +194,8 @@ struct md_object_operations {
                          struct md_object *obj,
                          struct md_attr *ma,
                          struct md_ucred *uc);
+        int (*moo_capa_get)(const struct lu_context *, struct md_object *,
+                            struct lustre_capa *);
 };
 
 /*
@@ -290,6 +292,13 @@ struct md_device_operations {
                           struct md_device *m,
                           struct kstatfs *sfs,
                           struct md_ucred *uc);
+
+        int (*mdo_init_capa_keys)(struct md_device *m,
+                                  struct lustre_capa_key *keys);
+
+        int (*mdo_update_capa_key)(const struct lu_context *ctx,
+                                   struct md_device *m,
+                                   struct lustre_capa_key *key);
 };
 
 enum md_upcall_event {
@@ -493,6 +502,14 @@ static inline int mo_ref_del(const struct lu_context *cx,
         return m->mo_ops->moo_ref_del(cx, m, ma, uc);
 }
 
+static inline int mo_capa_get(const struct lu_context *cx,
+                              struct md_object *m,
+                              struct lustre_capa *c)
+{
+        LASSERT(m->mo_ops->moo_capa_get);
+        return m->mo_ops->moo_capa_get(cx, m, c);
+}
+
 static inline int mdo_lookup(const struct lu_context *cx,
                              struct md_object *p,
                              const char *name,
index 65cd41a..12ddad3 100644 (file)
@@ -33,6 +33,7 @@
 #include <lustre_export.h>
 #include <lustre_quota.h>
 #include <lustre_fld.h>
+#include <lustre_capa.h>
 
 #define MAX_OBD_DEVICES 8192
 
@@ -168,7 +169,10 @@ struct obd_info {
          * level. E.g. it is used for update lsm->lsm_oinfo at every recieved
          * request in osc level for enqueue requests. It is also possible to
          * update some caller data from LOV layer if needed. */
-        obd_enqueue_update_f     oi_cb_up;
+        obd_enqueue_update_f    oi_cb_up;
+        /* oss capability, its type is obd_capa in client to avoid copy.
+         * in contrary its type is lustre_capa in OSS. */
+        void                   *oi_capa;
 };
 
 /* compare all relevant fields. */
@@ -223,6 +227,7 @@ struct obd_async_page_ops {
         void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa,
                                obd_valid valid);
         int  (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
+        struct obd_capa *(*ap_lookup_capa)(void *data, int cmd);
 };
 
 /* the `oig' is passed down from a caller of obd rw methods.  the callee
@@ -397,6 +402,10 @@ struct filter_obd {
 
         int                      fo_fmd_max_num; /* per exp filter_mod_data */
         int                      fo_fmd_max_age; /* jiffies to fmd expiry */
+
+        /* capability related */
+        unsigned int             fo_fl_oss_capa;
+        struct list_head         fo_capa_keys;
 };
 
 #define OSC_MAX_RIF_DEFAULT       8
@@ -563,6 +572,7 @@ struct mds_obd {
                                          mds_fl_user_xattr:1,
                                          mds_fl_acl:1;
 
+
         /* For CMD add mds_num */
         int                              mds_num;
 
@@ -571,6 +581,9 @@ struct mds_obd {
 
         /* root squash */
         struct rootsquash_info          *mds_rootsquash_info;
+
+        /* for capability keys update */
+        struct lustre_capa_key          *mds_capa_keys;
 };
 
 struct echo_obd {
@@ -953,6 +966,7 @@ enum obd_cleanup_stage {
 #define KEY_INIT_RECOV          "initial_recov"
 #define KEY_INIT_RECOV_BACKUP   "init_recov_bk"
 #define KEY_FLUSH_CTX           "flush_ctx"
+#define KEY_CAPA_KEY            "capa_key"
 
 struct lu_context;
 
@@ -1014,7 +1028,7 @@ struct obd_ops {
                         struct lov_stripe_md **ea, struct obd_trans_info *oti);
         int (*o_destroy)(struct obd_export *exp, struct obdo *oa,
                          struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                         struct obd_export *md_exp);
+                         struct obd_export *md_exp, void *capa);
         int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo,
                          struct obd_trans_info *oti);
         int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
@@ -1066,7 +1080,8 @@ struct obd_ops {
                        struct obd_trans_info *oti,
                        struct ptlrpc_request_set *rqset);
         int (*o_sync)(struct obd_export *exp, struct obdo *oa,
-                      struct lov_stripe_md *ea, obd_size start, obd_size end);
+                      struct lov_stripe_md *ea, obd_size start, obd_size end,
+                      void *capa);
         int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
                          struct lov_stripe_md *src, obd_size start,
                          obd_size end, struct obd_trans_info *oti);
@@ -1079,7 +1094,8 @@ struct obd_ops {
         int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
                         int objcount, struct obd_ioobj *obj,
                         int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, struct obd_trans_info *oti);
+                        struct niobuf_local *local, struct obd_trans_info *oti,
+                        struct lustre_capa *capa);
         int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
                           int objcount, struct obd_ioobj *obj,
                           int niocount, struct niobuf_local *local,
@@ -1111,7 +1127,7 @@ struct obd_ops {
         
         /* metadata-only methods */
         int (*o_pin)(struct obd_export *, const struct lu_fid *fid,
-                     struct obd_client_handle *, int flag);
+                     struct obd_capa *, struct obd_client_handle *, int flag);
         int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
 
         int (*o_import_event)(struct obd_device *, struct obd_import *,
@@ -1134,7 +1150,8 @@ struct obd_ops {
 };
 
 struct md_ops {
-        int (*m_getstatus)(struct obd_export *, struct lu_fid *);
+        int (*m_getstatus)(struct obd_export *, struct lu_fid *,
+                           struct obd_capa **);
         int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
                                ldlm_iterator_t, void *);
         int (*m_close)(struct obd_export *, struct md_op_data *,
@@ -1149,9 +1166,10 @@ struct md_ops {
                          void *, int, ldlm_completion_callback,
                          ldlm_blocking_callback, void *, int);
         int (*m_getattr)(struct obd_export *, const struct lu_fid *,
-                         obd_valid, int, struct ptlrpc_request **);
+                         struct obd_capa *, obd_valid, int,
+                         struct ptlrpc_request **);
         int (*m_getattr_name)(struct obd_export *, const struct lu_fid *,
-                              const char *, int, obd_valid,
+                              struct obd_capa *, const char *, int, obd_valid,
                               int, struct ptlrpc_request **);
         int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
                              void *, int, struct lookup_intent *, int,
@@ -1163,24 +1181,29 @@ struct md_ops {
                         const char *, int, const char *, int,
                         struct ptlrpc_request **);
         int (*m_is_subdir)(struct obd_export *, const struct lu_fid *,
-                           const struct lu_fid *, struct ptlrpc_request **);
+                           const struct lu_fid *,
+                           struct obd_capa *, struct obd_capa *,
+                           struct ptlrpc_request **);
         int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
                          int , void *, int, struct ptlrpc_request **);
         int (*m_sync)(struct obd_export *, const struct lu_fid *,
-                      struct ptlrpc_request **);
+                      struct obd_capa *, struct ptlrpc_request **);
         int (*m_readpage)(struct obd_export *, const struct lu_fid *,
-                          __u64, struct page *, struct ptlrpc_request **);
+                          struct obd_capa *, __u64, struct page *,
+                          struct ptlrpc_request **);
 
         int (*m_unlink)(struct obd_export *, struct md_op_data *,
                         struct ptlrpc_request **);
 
         int (*m_setxattr)(struct obd_export *, const struct lu_fid *,
-                          obd_valid, const char *, const char *,
-                          int, int, int, struct ptlrpc_request **);
+                          struct obd_capa *, obd_valid, const char *,
+                          const char *, int, int, int,
+                          struct ptlrpc_request **);
 
         int (*m_getxattr)(struct obd_export *, const struct lu_fid *,
-                          obd_valid, const char *, const char *,
-                          int, int, int, struct ptlrpc_request **);
+                          struct obd_capa *, obd_valid, const char *,
+                          const char *, int, int, int,
+                          struct ptlrpc_request **);
 
         int (*m_init_ea_size)(struct obd_export *, int, int, int);
 
@@ -1203,9 +1226,11 @@ struct md_ops {
 
         int (*m_cancel_unused)(struct obd_export *, const struct lu_fid *,
                                int flags, void *opaque);
+        int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
+                            renew_capa_cb_t cb);
 
         int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
-                                 struct ptlrpc_request **);
+                                 struct obd_capa *, struct ptlrpc_request **);
 
         /*
          * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
@@ -1280,4 +1305,14 @@ static inline void init_obd_quota_ops(quota_interface_t *interface,
         obd_ops->o_quotactl = QUOTA_OP(interface, ctl);
 }
 
+static inline __u64 oinfo_mdsno(struct obd_info *oinfo)
+{
+        return oinfo->oi_oa->o_gr - FILTER_GROUP_MDS0;
+}
+
+static inline struct lustre_capa *oinfo_capa(struct obd_info *oinfo)
+{
+        return oinfo->oi_capa;
+}
+
 #endif /* __OBD_H */
index 94b3743..d5afcd1 100644 (file)
@@ -609,7 +609,7 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo,
 static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
                               struct lov_stripe_md *ea,
                               struct obd_trans_info *oti,
-                              struct obd_export *md_exp)
+                              struct obd_export *md_exp, void *capa)
 {
         int rc;
         ENTRY;
@@ -617,7 +617,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
         EXP_CHECK_DT_OP(exp, destroy);
         OBD_COUNTER_INCREMENT(exp->exp_obd, destroy);
 
-        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp);
+        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp, capa);
         RETURN(rc);
 }
 
@@ -995,7 +995,7 @@ static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 
 static inline int obd_sync(struct obd_export *exp, struct obdo *oa,
                            struct lov_stripe_md *ea, obd_size start,
-                           obd_size end)
+                           obd_size end, void *capa)
 {
         int rc;
         ENTRY;
@@ -1003,7 +1003,7 @@ static inline int obd_sync(struct obd_export *exp, struct obdo *oa,
         OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
         OBD_COUNTER_INCREMENT(exp->exp_obd, sync);
 
-        rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end);
+        rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end, capa);
         RETURN(rc);
 }
 
@@ -1086,7 +1086,8 @@ static inline int obd_brw_async(int cmd, struct obd_export *exp,
 static inline int obd_brw_rqset(int cmd, struct obd_export *exp,
                                 struct obdo *oa, struct lov_stripe_md *lsm,
                                 obd_count oa_bufs, struct brw_page *pg,
-                                struct obd_trans_info *oti)
+                                struct obd_trans_info *oti,
+                                struct obd_capa *ocapa)
 {
         struct ptlrpc_request_set *set = NULL;
         struct obd_info oinfo = { { { 0 } } };
@@ -1099,6 +1100,7 @@ static inline int obd_brw_rqset(int cmd, struct obd_export *exp,
 
         oinfo.oi_oa = oa;
         oinfo.oi_md = lsm;
+        oinfo.oi_capa = ocapa;
         rc = obd_brw_async(cmd, exp, &oinfo, oa_bufs, pg, oti, set);
         if (rc == 0) {
                 rc = ptlrpc_set_wait(set);
@@ -1217,7 +1219,8 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                              int objcount, struct obd_ioobj *obj,
                              int niocount, struct niobuf_remote *remote,
                              struct niobuf_local *local,
-                             struct obd_trans_info *oti)
+                             struct obd_trans_info *oti,
+                             struct lustre_capa *capa)
 {
         int rc;
         ENTRY;
@@ -1226,7 +1229,7 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
         OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
 
         rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
-                                       remote, local, oti);
+                                       remote, local, oti, capa);
         RETURN(rc);
 }
 
@@ -1394,14 +1397,15 @@ static inline int obd_join_lru(struct obd_export *exp,
 }
 
 static inline int obd_pin(struct obd_export *exp, const struct lu_fid *fid,
-                          struct obd_client_handle *handle, int flag)
+                          struct obd_capa *oc, struct obd_client_handle *handle,
+                          int flag)
 {
         int rc;
 
         EXP_CHECK_DT_OP(exp, pin);
         OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
 
-        rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag);
+        rc = OBP(exp->exp_obd, pin)(exp, fid, oc, handle, flag);
         return(rc);
 }
 
@@ -1561,27 +1565,26 @@ static inline int obd_register_observer(struct obd_device *obd,
 
 /* metadata helpers */
 static inline int md_getstatus(struct obd_export *exp,
-                               struct lu_fid *fid)
+                               struct lu_fid *fid, struct obd_capa **pc)
 {
         int rc;
         ENTRY;
 
         EXP_CHECK_MD_OP(exp, getstatus);
         MD_COUNTER_INCREMENT(exp->exp_obd, getstatus);
-        rc = MDP(exp->exp_obd, getstatus)(exp, fid);
+        rc = MDP(exp->exp_obd, getstatus)(exp, fid, pc);
         RETURN(rc);
 }
 
-static inline int md_getattr(struct obd_export *exp,
-                             const struct lu_fid *fid,
-                             obd_valid valid, int ea_size,
+static inline int md_getattr(struct obd_export *exp, const struct lu_fid *fid,
+                             struct obd_capa *oc, obd_valid valid, int ea_size,
                              struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, getattr);
         MD_COUNTER_INCREMENT(exp->exp_obd, getattr);
-        rc = MDP(exp->exp_obd, getattr)(exp, fid, valid,
+        rc = MDP(exp->exp_obd, getattr)(exp, fid, oc, valid,
                                         ea_size, request);
         RETURN(rc);
 }
@@ -1598,8 +1601,7 @@ static inline int md_change_cbdata(struct obd_export *exp,
         RETURN(rc);
 }
 
-static inline int md_close(struct obd_export *exp,
-                           struct md_op_data *op_data,
+static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
                            struct obd_client_handle *och,
                            struct ptlrpc_request **request)
 {
@@ -1612,8 +1614,8 @@ static inline int md_close(struct obd_export *exp,
 }
 
 static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
-                            const void *data, int datalen, int mode,
-                            __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev,
+                            const void *data, int datalen, int mode, __u32 uid,
+                            __u32 gid, __u32 cap_effective, __u64 rdev,
                             struct ptlrpc_request **request)
 {
         int rc;
@@ -1658,7 +1660,7 @@ static inline int md_enqueue(struct obd_export *exp, int lock_type,
 }
 
 static inline int md_getattr_name(struct obd_export *exp,
-                                  const struct lu_fid *fid,
+                                  const struct lu_fid *fid, struct obd_capa *oc,
                                   const char *name, int namelen,
                                   obd_valid valid, int ea_size,
                                   struct ptlrpc_request **request)
@@ -1667,15 +1669,14 @@ static inline int md_getattr_name(struct obd_export *exp,
         ENTRY;
         EXP_CHECK_MD_OP(exp, getattr_name);
         MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name);
-        rc = MDP(exp->exp_obd, getattr_name)(exp, fid, name, namelen,
+        rc = MDP(exp->exp_obd, getattr_name)(exp, fid, oc, name, namelen,
                                              valid, ea_size, request);
         RETURN(rc);
 }
 
 static inline int md_intent_lock(struct obd_export *exp,
-                                 struct md_op_data *op_data,
-                                 void *lmm, int lmmsize,
-                                 struct lookup_intent *it,
+                                 struct md_op_data *op_data, void *lmm,
+                                 int lmmsize, struct lookup_intent *it,
                                  int flags, struct ptlrpc_request **reqp,
                                  ldlm_blocking_callback cb_blocking,
                                  int extra_lock_flags)
@@ -1690,8 +1691,7 @@ static inline int md_intent_lock(struct obd_export *exp,
         RETURN(rc);
 }
 
-static inline int md_link(struct obd_export *exp,
-                          struct md_op_data *op_data,
+static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
                           struct ptlrpc_request **request)
 {
         int rc;
@@ -1702,11 +1702,9 @@ static inline int md_link(struct obd_export *exp,
         RETURN(rc);
 }
 
-static inline int md_rename(struct obd_export *exp,
-                            struct md_op_data *op_data,
-                            const char *old, int oldlen,
-                            const char *new, int newlen,
-                            struct ptlrpc_request **request)
+static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
+                            const char *old, int oldlen, const char *new,
+                            int newlen, struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
@@ -1720,13 +1718,14 @@ static inline int md_rename(struct obd_export *exp,
 static inline int md_is_subdir(struct obd_export *exp,
                                const struct lu_fid *pfid,
                                const struct lu_fid *cfid,
+                               struct obd_capa *pc, struct obd_capa *cc,
                                struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, is_subdir);
         MD_COUNTER_INCREMENT(exp->exp_obd, is_subdir);
-        rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
+        rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, pc, cc, request);
         RETURN(rc);
 }
 
@@ -1743,28 +1742,27 @@ static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
         RETURN(rc);
 }
 
-static inline int md_sync(struct obd_export *exp,
-                          const struct lu_fid *fid,
-                          struct ptlrpc_request **request)
+static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
+                          struct obd_capa *oc, struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, sync);
         MD_COUNTER_INCREMENT(exp->exp_obd, sync);
-        rc = MDP(exp->exp_obd, sync)(exp, fid, request);
+        rc = MDP(exp->exp_obd, sync)(exp, fid, oc, request);
         RETURN(rc);
 }
 
-static inline int md_readpage(struct obd_export *exp,
-                              const struct lu_fid *fid,
-                              __u64 offset, struct page *page,
+static inline int md_readpage(struct obd_export *exp, const struct lu_fid *fid,
+                              struct obd_capa *oc, __u64 offset,
+                              struct page *page,
                               struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, readpage);
         MD_COUNTER_INCREMENT(exp->exp_obd, readpage);
-        rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request);
+        rc = MDP(exp->exp_obd, readpage)(exp, fid, oc, offset, page, request);
         RETURN(rc);
 }
 
@@ -1802,7 +1800,7 @@ static inline int md_free_lustre_md(struct obd_export *exp,
 }
 
 static inline int md_setxattr(struct obd_export *exp,
-                              const struct lu_fid *fid,
+                              const struct lu_fid *fid, struct obd_capa *oc,
                               obd_valid valid, const char *name,
                               const char *input, int input_size,
                               int output_size, int flags,
@@ -1811,13 +1809,13 @@ static inline int md_setxattr(struct obd_export *exp,
         ENTRY;
         EXP_CHECK_MD_OP(exp, setxattr);
         MD_COUNTER_INCREMENT(exp->exp_obd, setxattr);
-        RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
+        RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, oc, valid, name, input,
                                            input_size, output_size, flags,
                                            request));
 }
 
 static inline int md_getxattr(struct obd_export *exp,
-                              const struct lu_fid *fid,
+                              const struct lu_fid *fid, struct obd_capa *oc,
                               obd_valid valid, const char *name,
                               const char *input, int input_size,
                               int output_size, int flags,
@@ -1826,7 +1824,7 @@ static inline int md_getxattr(struct obd_export *exp,
         ENTRY;
         EXP_CHECK_MD_OP(exp, getxattr);
         MD_COUNTER_INCREMENT(exp->exp_obd, getxattr);
-        RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
+        RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, oc, valid, name, input,
                                            input_size, output_size, flags,
                                            request));
 }
@@ -1885,26 +1883,36 @@ static inline int md_lock_match(struct obd_export *exp, int flags,
                                              policy, mode, lockh));
 }
 
-static inline int md_init_ea_size(struct obd_export *exp,
-                                  int easize, int def_asize,
-                                  int cookiesize)
+static inline int md_init_ea_size(struct obd_export *exp, int easize,
+                                  int def_asize, int cookiesize)
 {
         ENTRY;
         EXP_CHECK_MD_OP(exp, init_ea_size);
         MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size);
-        RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize,
-                                               def_asize,
+        RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize,
                                                cookiesize));
 }
 
 static inline int md_get_remote_perm(struct obd_export *exp,
                                      const struct lu_fid *fid,
+                                     struct obd_capa *oc,
                                      struct ptlrpc_request **request)
 {
         ENTRY;
         EXP_CHECK_MD_OP(exp, get_remote_perm);
         MD_COUNTER_INCREMENT(exp->exp_obd, get_remote_perm);
-        RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, request));
+        RETURN(MDP(exp->exp_obd, get_remote_perm)(exp, fid, oc, request));
+}
+
+static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+                                renew_capa_cb_t cb)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, renew_capa);
+        MD_COUNTER_INCREMENT(exp->exp_obd, renew_capa);
+        rc = MDP(exp->exp_obd, renew_capa)(exp, ocapa, cb);
+        RETURN(rc);
 }
 
 /* OBD Metadata Support */
@@ -1925,14 +1933,12 @@ static inline void obdo_free(struct obdo *oa)
         OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
 }
 
-static inline void obdo2fid(struct obdo *oa,
-                            struct lu_fid *fid)
+static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
 {
         /* something here */
 }
 
-static inline void fid2obdo(struct lu_fid *fid,
-                            struct obdo *oa)
+static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
 {
         /* something here */
 }
index 2ec7f9b..6a93eda 100644 (file)
@@ -97,6 +97,8 @@ extern int obd_race_state;
 #define OBD_FAIL_MDS_WRITEPAGE_PACK      0x136
 #define OBD_FAIL_MDS_IS_SUBDIR_NET       0x137
 #define OBD_FAIL_MDS_IS_SUBDIR_PACK      0x138
+#define OBD_FAIL_MDS_RENEW_CAPA_NET      0x139
+#define OBD_FAIL_MDS_RENEW_CAPA_PACK     0x13a
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
index ca1f72e..6e206c3 100644 (file)
@@ -102,7 +102,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
         ldlm_lock_dump_handle(D_OTHER, &lockh);
 
         offset = page->index << PAGE_SHIFT;
-        rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid,
+        rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
                          offset, page, &request);
         if (!rc) {
                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
index 6779f1b..8a74c18 100644 (file)
@@ -300,7 +300,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
                 }
         }
 
-        rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL);
+        rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL);
         obdo_free(oa);
         if (rc)
                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
index 89a14b5..1950081 100644 (file)
@@ -451,7 +451,7 @@ static int llu_inode_revalidate(struct inode *inode)
                         valid |= OBD_MD_FLEASIZE;
                 }
                 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
-                                valid, ealen, &req);
+                                NULL, valid, ealen, &req);
                 if (rc) {
                         CERROR("failure %d inode %llu\n", rc,
                                (long long)llu_i2stat(inode)->st_ino);
@@ -982,7 +982,7 @@ static int llu_readlink_internal(struct inode *inode,
                 RETURN(0);
         }
 
-        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL,
                         OBD_MD_LINKNAME, symlen, request);
         if (rc) {
                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
@@ -2124,7 +2124,7 @@ llu_fsswop_mount(const char *source,
 
         llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp);
 
-        err = md_getstatus(sbi->ll_md_exp, &rootfid);
+        err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL);
         if (err) {
                 CERROR("cannot mds_connect: rc = %d\n", err);
                 GOTO(out_dt_fid, err);
@@ -2133,7 +2133,7 @@ llu_fsswop_mount(const char *source,
         sbi->ll_root_fid = rootfid;
 
         /* fetch attr of root inode */
-        err = md_getattr(sbi->ll_md_exp, &rootfid,
+        err = md_getattr(sbi->ll_md_exp, &rootfid, NULL,
                          OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request);
         if (err) {
                 CERROR("md_getattr failed for root: rc = %d\n", err);
index 2a671f3..957e5f0 100644 (file)
@@ -1,5 +1,7 @@
 MODULES := lustre
-lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o remote_perm.o
+lustre-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o
+lustre-objs += llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o
+lustre-objs += xattr.o remote_perm.o llite_capa.o
 
 ifeq ($(PATCHLEVEL),4)
 lustre-objs += rw24.o super.o
index 590010d..bbdefd6 100644 (file)
@@ -616,6 +616,7 @@ do_lookup:
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ll_dentry_data *ldd = ll_d2d(de);
         struct obd_client_handle *handle;
+        struct obd_capa *oc;
         int rc = 0;
         ENTRY;
         LASSERT(ldd);
@@ -639,9 +640,9 @@ do_lookup:
         unlock_kernel();
 
         handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
-        rc = obd_pin(sbi->ll_md_exp, &ll_i2info(inode)->lli_fid,
-                     handle, flag);
-
+        oc = ll_i2mdscapa(inode);
+        rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag);
+        capa_put(oc);
         if (rc) {
                 lock_kernel();
                 memset(handle, 0, sizeof(*handle));
index 81f0dc7..3f97fcf 100644 (file)
@@ -142,6 +142,7 @@ static int ll_dir_readpage(struct file *file, struct page *page)
         struct inode *inode = page->mapping->host;
         struct ptlrpc_request *request;
         struct mdt_body *body;
+        struct obd_capa *oc;
         __u64 hash;
         int rc;
         ENTRY;
@@ -150,8 +151,10 @@ static int ll_dir_readpage(struct file *file, struct page *page)
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n",
                inode->i_ino, inode->i_generation, inode, (unsigned long)hash);
 
+        oc = ll_i2mdscapa(inode);
         rc = md_readpage(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
-                         hash, page, &request);
+                         oc, hash, page, &request);
+        capa_put(oc);
         if (!rc) {
                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
                                       sizeof(*body));
@@ -579,6 +582,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 int namelen, rc, len = 0;
                 char *buf = NULL;
                 char *filename;
+                struct obd_capa *oc;
 
                 rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
                 if (rc)
@@ -593,9 +597,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         GOTO(out, rc = -EINVAL);
                 }
 
-                rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
+                oc = ll_i2mdscapa(inode);
+                rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode), oc,
                                      filename, namelen, OBD_MD_FLID, 0,
                                      &request);
+                capa_put(oc);
                 if (rc < 0) {
                         CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
                         GOTO(out, rc);
@@ -618,9 +624,6 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 if (op_data == NULL)
                         RETURN(-ENOMEM);
 
-                ll_prepare_md_op_data(op_data, inode,
-                                      NULL, NULL, 0, 0);
-
                 LASSERT(sizeof(lum) == sizeof(*lump));
                 LASSERT(sizeof(lum.lmm_objects[0]) ==
                         sizeof(lump->lmm_objects[0]));
@@ -640,8 +643,10 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         lustre_swab_lov_user_md(&lum);
 
                 /* swabbing is done in lov_setstripe() on server side */
+                ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
                 rc = md_setattr(sbi->ll_md_exp, op_data, &lum,
                                 sizeof(lum), NULL, 0, &request);
+                ll_finish_md_op_data(op_data);
                 if (rc) {
                         if (rc != -EPERM && rc != -EACCES)
                                 CERROR("md_setattr fails: rc = %d\n", rc);
@@ -661,6 +666,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 struct lov_mds_md *lmm = NULL;
                 struct mdt_body *body;
                 char *filename = NULL;
+                struct obd_capa *oc;
                 int rc, lmmsize;
 
                 rc = ll_get_max_mdsize(sbi, &lmmsize);
@@ -673,19 +679,24 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         if (IS_ERR(filename))
                                 RETURN(PTR_ERR(filename));
 
-                        rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
+                        oc = ll_i2mdscapa(inode);
+                        rc = md_getattr_name(sbi->ll_md_exp,
+                                             ll_inode2fid(inode), oc,
                                              filename, strlen(filename) + 1,
                                              OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
                                              lmmsize, &request);
+                        capa_put(oc);
                         if (rc < 0) {
                                 CDEBUG(D_INFO, "md_getattr_name failed "
                                        "on %s: rc %d\n", filename, rc);
                                 GOTO(out_name, rc);
                         }
                 } else {
-                        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+                        oc = ll_i2mdscapa(inode);
+                        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
                                         OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
                                         lmmsize, &request);
+                        capa_put(oc);
                         if (rc < 0) {
                                 CDEBUG(D_INFO, "md_getattr failed on inode "
                                        "%lu/%u: rc %d\n", inode->i_ino,
index 0f55396..bbc56b5 100644 (file)
@@ -60,6 +60,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
         ((struct ll_iattr *)&op_data->attr)->ia_attr_flags = inode->i_flags;
         op_data->ioepoch = ll_i2info(inode)->lli_ioepoch;
         memcpy(&op_data->handle, fh, sizeof(op_data->handle));
+        op_data->mod_capa1 = ll_i2mdscapa(inode);
 }
 
 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
@@ -138,6 +139,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
         epoch_close = (op_data->flags & MF_EPOCH_CLOSE) || 
                       !S_ISREG(inode->i_mode);
         rc = md_close(md_exp, op_data, och, &req);
+
+        ll_finish_md_op_data(op_data);
+        OBD_FREE_PTR(op_data);
         if (rc == -EAGAIN) {
                 /* This close must have closed the epoch. */
                 LASSERT(epoch_close);
@@ -157,8 +161,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
         if (!epoch_close)
                 ll_init_done_writing(inode);
 
-        OBD_FREE_PTR(op_data);
-
         if (rc == 0) {
                 rc = ll_objects_destroy(req, inode);
                 if (rc)
@@ -282,6 +284,7 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
         
         LUSTRE_FPRIVATE(file) = NULL;
         ll_file_data_put(fd);
+        ll_oss_capa_close(inode, file);
 
         RETURN(rc);
 }
@@ -360,6 +363,8 @@ static int ll_intent_file_open(struct file *file, void *lmm,
         rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PW, op_data,
                         &lockh, lmm, lmmsize, ldlm_completion_ast,
                         ll_md_blocking_ast, NULL, 0);
+
+        ll_finish_md_op_data(op_data);
         OBD_FREE_PTR(op_data);
         if (rc < 0) {
                 CERROR("lock enqueue: err: %d\n", rc);
@@ -590,6 +595,8 @@ int ll_file_open(struct inode *inode, struct file *file)
         if (!S_ISREG(inode->i_mode))
                 GOTO(out, rc);
 
+        ll_oss_capa_open(inode, file);
+
         lsm = lli->lli_smd;
         if (lsm == NULL) {
                 if (file->f_flags & O_LOV_DELAY_CREATE ||
@@ -639,6 +646,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
                                OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
                                OBD_MD_FLCTIME | OBD_MD_FLGROUP;
+        oinfo.oi_capa = ll_i2mdscapa(inode);
 
         set = ptlrpc_prep_set();
         if (set == NULL) {
@@ -650,6 +658,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
                         rc = ptlrpc_set_wait(set);
                 ptlrpc_set_destroy(set);
         }
+        capa_put(oinfo.oi_capa);
         if (rc)
                 RETURN(rc);
 
@@ -2215,6 +2224,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct ptlrpc_request *req;
+        struct obd_capa *oc;
         int rc, err;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
@@ -2238,8 +2248,10 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
                         rc = err;
         }
 
-        err = md_sync(ll_i2sbi(inode)->ll_md_exp,
-                      ll_inode2fid(inode), &req);
+        oc = ll_i2mdscapa(inode);
+        err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
+                      &req);
+        capa_put(oc);
         if (!rc)
                 rc = err;
         if (!err)
@@ -2247,6 +2259,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
 
         if (data && lsm) {
                 struct obdo *oa = obdo_alloc();
+                struct obd_capa *ocapa;
 
                 if (!oa)
                         RETURN(rc ? rc : -ENOMEM);
@@ -2257,8 +2270,10 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME |
                                            OBD_MD_FLGROUP);
 
+                ocapa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_WRITE);
                 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
-                               0, OBD_OBJECT_EOF);
+                               0, OBD_OBJECT_EOF, ocapa);
+                capa_put(ocapa);
                 if (!rc)
                         rc = err;
                 obdo_free(oa);
@@ -2464,6 +2479,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 obd_valid valid = OBD_MD_FLGETATTR;
                 int ealen = 0;
+                struct obd_capa *oc;
 
                 if (S_ISREG(inode->i_mode)) {
                         rc = ll_get_max_mdsize(sbi, &ealen);
@@ -2471,7 +2487,10 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                                 RETURN(rc); 
                         valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
                 }
-                rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, ealen, &req);
+                oc = ll_i2mdscapa(inode);
+                rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
+                                ealen, &req);
+                capa_put(oc);
                 if (rc) {
                         rc = ll_inode_revalidate_fini(inode, rc);
                         RETURN(rc);
diff --git a/lustre/llite/llite_capa.c b/lustre/llite/llite_capa.c
new file mode 100644 (file)
index 0000000..5bbae3f
--- /dev/null
@@ -0,0 +1,651 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/file.h>
+#include <linux/kmod.h>
+
+#include <lustre_lite.h>
+#include "llite_internal.h"
+
+/* for obd_capa.c_list, client capa might stay in three places:
+ * 1. ll_capa_list.
+ * 2. ll_idle_capas.
+ * 3. stand alone: just allocated.
+ */
+
+/* capas for oss writeback and those failed to renew */
+static LIST_HEAD(ll_idle_capas);
+static struct ptlrpc_thread ll_capa_thread;
+static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
+
+/* llite capa renewal timer */
+cfs_timer_t ll_capa_timer;
+/* for debug: indicate whether capa on llite is enabled or not */
+static atomic_t ll_capa_debug = ATOMIC_INIT(0);
+
+static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
+{
+        if (cfs_time_before(expiry, cfs_timer_deadline(&ll_capa_timer)) ||
+            !cfs_timer_is_armed(&ll_capa_timer)) {
+                cfs_timer_arm(&ll_capa_timer, expiry);
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+                           "ll_capa_timer update: %lu/%lu by",
+                           expiry, cfs_time_current());
+        }
+}
+
+static inline int have_expired_capa(void)
+{
+        struct obd_capa *ocapa = NULL;
+        int expired = 0;
+
+        /* if ll_capa_list has client capa to expire or ll_idle_capas has
+         * expired capa, return 1.
+         */
+        spin_lock(&capa_lock);
+        if (!list_empty(ll_capa_list)) {
+                ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
+                expired = capa_is_to_expire(ocapa);
+                if (!expired)
+                        update_capa_timer(ocapa, capa_renewal_time(ocapa));
+        } else if (!list_empty(&ll_idle_capas)) {
+                ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
+                expired = capa_is_expired(ocapa);
+                if (!expired)
+                        update_capa_timer(ocapa, ocapa->c_expiry);
+        }
+        spin_unlock(&capa_lock);
+
+        if (expired)
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
+        return expired;
+}
+
+static inline int ll_capa_check_stop(void)
+{
+        return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
+}
+
+static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
+{
+        struct obd_capa *tmp;
+        struct list_head *before = NULL;
+
+        /* TODO: client capa is sorted by expiry, this could be optimized */
+        list_for_each_entry_reverse(tmp, head, c_list) {
+                if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+                        before = &tmp->c_list;
+                        break;
+                }
+        }
+
+        LASSERT(&ocapa->c_list != before);
+        list_add(&ocapa->c_list, before ?: head);
+}
+
+static int inode_have_md_lock(struct inode *inode, __u64 inodebits)
+{
+        struct obd_export *exp = ll_i2mdexp(inode);
+        struct lustre_handle lockh;
+        struct ldlm_res_id res_id = { .name = {0} };
+        ldlm_policy_data_t policy = { .l_inodebits = {inodebits}};
+        int flags, rc;
+        ENTRY;
+
+        res_id.name[0] = inode->i_ino;
+        res_id.name[1] = inode->i_generation;
+
+        CDEBUG(D_SEC, "trying to match res "LPU64"\n", res_id.name[0]);
+
+        flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
+        rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags, &res_id,
+                             LDLM_IBITS, &policy, LCK_CR|LCK_CW|LCK_PR, &lockh);
+        RETURN(rc);
+}
+
+static void ll_delete_capa(struct obd_capa *ocapa)
+{
+        struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
+
+        if (capa_for_mds(&ocapa->c_capa)) {
+                capa_put(ocapa);
+                LASSERT(lli->lli_mds_capa == ocapa);
+                lli->lli_mds_capa = NULL;
+        } else if (capa_for_oss(&ocapa->c_capa)) {
+                list_del_init(&ocapa->u.cli.lli_list);
+        }
+
+        DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
+        list_del(&ocapa->c_list);
+        free_capa(ocapa);
+}
+
+/* three places where client capa is deleted:
+ * 1. capa_thread_main(), main place to delete expired capa.
+ * 2. ll_clear_inode_capas() in ll_clear_inode().
+ * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
+ */
+static int capa_thread_main(void *unused)
+{
+        struct obd_capa *ocapa, *tmp, *next;
+        struct inode *inode = NULL;
+        struct l_wait_info lwi = { 0 };
+        int rc;
+        ENTRY;
+
+        cfs_daemonize("ll_capa");
+
+        ll_capa_thread.t_flags = SVC_RUNNING;
+        wake_up(&ll_capa_thread.t_ctl_waitq);
+
+        while (1) {
+                l_wait_event(ll_capa_thread.t_ctl_waitq,
+                             (ll_capa_check_stop() || have_expired_capa()),
+                             &lwi);
+
+                if (ll_capa_check_stop())
+                        break;
+
+                spin_lock(&capa_lock);
+                next = NULL;
+                list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
+                        LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
+
+                        if (!capa_is_to_expire(ocapa)) {
+                                next = ocapa;
+                                break;
+                        }
+
+                        if (capa_for_mds(&ocapa->c_capa) &&
+                            !ll_have_md_lock(ocapa->u.cli.inode,
+                                             MDS_INODELOCK_LOOKUP) &&
+                            !obd_capa_is_root(ocapa)) {
+                                /* fid capa without LOOKUP lock won't renew,
+                                 * move to idle list (except root fid) */
+                                DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+                                           "skip renewal for");
+                                list_del_init(&ocapa->c_list);
+                                sort_add_capa(ocapa, &ll_idle_capas);
+                                continue;
+                        }
+
+                        if (capa_for_oss(&ocapa->c_capa) &&
+                            atomic_read(&ocapa->u.cli.open_count) == 0) {
+                                /* oss capa with open_count == 0 won't renew,
+                                 * move to idle list */
+                                list_del_init(&ocapa->c_list);
+                                sort_add_capa(ocapa, &ll_idle_capas);
+                                continue;
+                        }
+
+                        /* NB iput() is in ll_update_capa() */
+                        inode = igrab(ocapa->u.cli.inode);
+                        if (inode == NULL) {
+                                DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+                                           "igrab failed for");
+                                ll_delete_capa(ocapa);
+                                continue;
+                        }
+
+                        list_del_init(&ocapa->c_list);
+                        capa_get(ocapa);
+                        spin_unlock(&capa_lock);
+
+                        rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
+                                           ll_update_capa);
+                        spin_lock(&capa_lock);
+                        if (rc)
+                                sort_add_capa(ocapa, &ll_idle_capas);
+                }
+
+                if (next)
+                        update_capa_timer(next, capa_renewal_time(next));
+
+                list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
+                        LASSERT(atomic_read(&ocapa->u.cli.open_count) == 0);
+
+                        if (!capa_is_expired(ocapa)) {
+                                if (!next)
+                                        update_capa_timer(ocapa, ocapa->c_expiry);
+                                break;
+                        }
+
+                        if (atomic_read(&ocapa->c_refc)) {
+                                DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+                                           "expired(c_refc %d), don't release",
+                                           atomic_read(&ocapa->c_refc));
+                                obd_capa_set_expired(ocapa);
+                                /* don't try to renew any more */
+                                list_del_init(&ocapa->c_list);
+                                continue;
+                        }
+
+                        /* expired capa is released. */
+                        DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
+                        ll_delete_capa(ocapa);
+                }
+
+                spin_unlock(&capa_lock);
+        }
+
+        ll_capa_thread.t_flags = SVC_STOPPED;
+        wake_up(&ll_capa_thread.t_ctl_waitq);
+        RETURN(0);
+}
+
+void ll_capa_timer_callback(unsigned long unused)
+{
+        wake_up(&ll_capa_thread.t_ctl_waitq);
+}
+
+int ll_capa_thread_start(void)
+{
+        int rc;
+        ENTRY;
+
+        init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
+
+        rc = kernel_thread(capa_thread_main, NULL, 0);
+        if (rc < 0) {
+                CERROR("cannot start expired capa thread: rc %d\n", rc);
+                RETURN(rc);
+        }
+        wait_event(ll_capa_thread.t_ctl_waitq,
+                   ll_capa_thread.t_flags & SVC_RUNNING);
+
+        RETURN(0);
+}
+
+void ll_capa_thread_stop(void)
+{
+        ll_capa_thread.t_flags = SVC_STOPPING;
+        wake_up(&ll_capa_thread.t_ctl_waitq);
+        wait_event(ll_capa_thread.t_ctl_waitq,
+                   ll_capa_thread.t_flags & SVC_STOPPED);
+}
+
+static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *ocapa;
+
+        /* inside capa_lock */
+        list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+                if (!obd_capa_is_valid(ocapa))
+                        continue;
+                if ((capa_opc(&ocapa->c_capa) & opc) == opc)
+                        continue;
+
+                LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+                                  ll_inode2fid(inode)));
+                LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+                return ocapa;
+        }
+
+        return NULL;
+}
+
+struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *ocapa;
+        int found = 0;
+
+        if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+                return NULL;
+        ENTRY;
+        LASSERT(opc == CAPA_OPC_OSS_WRITE ||
+                opc == (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ) ||
+                opc == CAPA_OPC_OSS_TRUNC);
+
+        spin_lock(&capa_lock);
+        list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+                if (!obd_capa_is_valid(ocapa))
+                        continue;
+                if ((opc & CAPA_OPC_OSS_WRITE) &&
+                    capa_opc_supported(&ocapa->c_capa, opc)) {
+                        found = 1; break;
+                } else if ((opc & CAPA_OPC_OSS_READ) &&
+                           capa_opc_supported(&ocapa->c_capa, opc)) {
+                        found = 1; break;
+                } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
+                           capa_opc_supported(&ocapa->c_capa, opc)) {
+                        found = 1; break;
+                }
+        }
+
+        if (found) {
+                LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+                                  ll_inode2fid(inode)));
+                LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+                capa_get(ocapa);
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+        } else if (atomic_read(&ll_capa_debug)) {
+                CERROR("no capability for "DFID" opc "LPX64"\n",
+                       PFID(&lli->lli_fid), opc);
+                atomic_set(&ll_capa_debug, 0);
+                ocapa = NULL;
+        }
+        spin_unlock(&capa_lock);
+        RETURN(ocapa);
+}
+
+struct obd_capa *ll_i2mdscapa(struct inode *inode)
+{
+        struct obd_capa *ocapa;
+
+        LASSERT(inode);
+        if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
+                return NULL;
+
+        spin_lock(&capa_lock);
+        ocapa = capa_get(ll_i2info(inode)->lli_mds_capa);
+        spin_unlock(&capa_lock);
+        if (ocapa && !obd_capa_is_valid(ocapa)) {
+                DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid");
+                capa_put(ocapa);
+                ocapa = NULL;
+        }
+
+        if (!ocapa && atomic_read(&ll_capa_debug)) {
+                CDEBUG(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ?
+                       D_ERROR : D_SEC, "no MDS capa for (ino %lu)\n",
+                       inode->i_ino);
+                if (inode_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
+                        LBUG();
+                atomic_set(&ll_capa_debug, 0);
+        }
+
+        return ocapa;
+}
+
+static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *old = lli->lli_mds_capa;
+        struct obd_capa *ocapa = *pcapa;
+        int rc = 0;
+
+        if (!old) {
+                ocapa->u.cli.inode = inode;
+                lli->lli_mds_capa = capa_get(ocapa);
+                obd_capa_clear_new(ocapa);
+                obd_capa_set_valid(ocapa);
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add fid");
+        } else {
+                if (ocapa->c_capa.lc_expiry == old->c_capa.lc_expiry) {
+                        rc = -EEXIST;
+                } else {
+                        spin_lock(&old->c_lock);
+                        old->c_capa = ocapa->c_capa;
+                        obd_capa_set_valid(old);
+                        spin_unlock(&old->c_lock);
+
+                        DEBUG_CAPA(D_SEC, &old->c_capa, "update fid");
+                }
+
+                free_capa(ocapa);
+                *pcapa = old;
+        }
+
+        return rc;
+}
+
+static inline void inode_add_oss_capa(struct inode *inode,
+                                      struct obd_capa *ocapa)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *tmp;
+        struct list_head *next = NULL;
+
+        /* capa is sorted in lli_oss_capas so lookup can always find the
+         * latest one */
+        list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
+                if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
+                        next = &tmp->u.cli.lli_list;
+                        break;
+                }
+        }
+        list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
+}
+
+static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa)
+{
+        struct obd_capa *old, *ocapa = *pcapa;
+        struct lustre_capa *capa = &ocapa->c_capa;
+        int rc = 0;
+
+        LASSERTF(S_ISREG(inode->i_mode),
+                 "inode has oss capa, but not regular file, mode: %d\n",
+                 inode->i_mode);
+
+        /* FIXME: can't replace it so easily with fine-grained opc */
+        old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY);
+        if (!old) {
+                ocapa->u.cli.inode = inode;
+                atomic_set(&ocapa->u.cli.open_count, 0);
+                INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+                obd_capa_set_valid(ocapa);
+
+                DEBUG_CAPA(D_SEC, capa, "add oss");
+        } else {
+                if (old->c_capa.lc_expiry == capa->lc_expiry) {
+                        rc = -EEXIST;
+                } else {
+                        spin_lock(&old->c_lock);
+                        old->c_capa = *capa;
+                        obd_capa_set_valid(old);
+                        spin_unlock(&old->c_lock);
+
+                        DEBUG_CAPA(D_SEC, capa, "update oss");
+                }
+
+                free_capa(ocapa);
+                *pcapa = old;
+        }
+
+        if (!rc)
+                inode_add_oss_capa(inode, *pcapa);
+        return rc;
+}
+
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
+{
+        struct obd_capa **pcapa = &ocapa;
+        int rc;
+
+        spin_lock(&capa_lock);
+        rc = capa_for_mds(&ocapa->c_capa) ?  do_add_mds_capa(inode, pcapa) :
+                                             do_add_oss_capa(inode, pcapa);
+
+        ocapa = *pcapa;
+        /* truncate capa won't renew, or no existed capa changed, don't update
+         * capa timer. */
+        if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
+                list_del_init(&ocapa->c_list);
+                sort_add_capa(ocapa, ll_capa_list);
+
+                spin_lock(&ocapa->c_lock);
+                set_capa_expiry(ocapa);
+                spin_unlock(&ocapa->c_lock);
+                update_capa_timer(ocapa, capa_renewal_time(ocapa));
+        }
+
+        atomic_set(&ll_capa_debug, 1);
+        spin_unlock(&capa_lock);
+
+        return ocapa;
+}
+
+
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
+{
+        struct inode *inode = ocapa->u.cli.inode;
+        cfs_time_t expiry;
+        int rc = 0;
+
+        LASSERT(ocapa);
+
+        if (IS_ERR(capa)) {
+                /* set error code */
+                rc = PTR_ERR(capa);
+                /* failed capa won't be renewed any longer, but if -EIO, client
+                 * might be doing recovery, retry in 1 min. */
+                spin_lock(&capa_lock);
+                if (rc == -EIO) {
+                        expiry = cfs_time_current() + cfs_time_seconds(60);
+                        DEBUG_CAPA(D_SEC, &ocapa->c_capa,
+                                   "renewal failed: -EIO, retry in 1 min");
+                        goto retry;
+                } else {
+                        sort_add_capa(ocapa, &ll_idle_capas);
+                }
+                spin_unlock(&capa_lock);
+
+                DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa,
+                           "renewal failed(rc: %d) for", rc);
+                goto out;
+        }
+
+        LASSERT(!memcmp(&ocapa->c_capa, capa,
+                        offsetof(struct lustre_capa, lc_flags)));
+
+        spin_lock(&ocapa->c_lock);
+        ocapa->c_capa = *capa;
+        set_capa_expiry(ocapa);
+        spin_unlock(&ocapa->c_lock);
+
+        spin_lock(&capa_lock);
+        if (capa->lc_opc & (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE))
+                inode_add_oss_capa(inode, ocapa);
+        DEBUG_CAPA(D_SEC, capa, "renew");
+
+        expiry = capa_renewal_time(ocapa);
+retry:
+        sort_add_capa(ocapa, ll_capa_list);
+        update_capa_timer(ocapa, expiry);
+        spin_unlock(&capa_lock);
+
+out:
+        capa_put(ocapa);
+        iput(inode);
+        return rc;
+}
+
+void ll_oss_capa_open(struct inode *inode, struct file *file)
+{
+        struct obd_capa *ocapa;
+        int opc = capa_open_opc(open_flags_to_accmode(file->f_flags));
+
+        if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+                return;
+
+        if (!S_ISREG(inode->i_mode))
+                return;
+
+        spin_lock(&capa_lock);
+        ocapa = do_lookup_oss_capa(inode, opc);
+        if (!ocapa) {
+                if (atomic_read(&ll_capa_debug)) {
+                        CDEBUG(D_ERROR, "no capa for (uid %u op %d ino %lu)\n",
+                               (unsigned)current->uid, opc, inode->i_ino);
+                        atomic_set(&ll_capa_debug, 0);
+                }
+                spin_unlock(&capa_lock);
+                return;
+        }
+        atomic_inc(&ocapa->u.cli.open_count);
+        spin_unlock(&capa_lock);
+
+        DEBUG_CAPA(D_SEC, &ocapa->c_capa, "open (count: %d)",
+                   atomic_read(&ocapa->u.cli.open_count));
+}
+
+void ll_oss_capa_close(struct inode *inode, struct file *file)
+{
+        struct obd_capa *ocapa;
+        int opc = capa_open_opc(open_flags_to_accmode(file->f_flags));
+
+        if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
+                return;
+
+        if (!S_ISREG(inode->i_mode))
+                return;
+
+        spin_lock(&capa_lock);
+        ocapa = do_lookup_oss_capa(inode, opc);
+        if (!ocapa) {
+                spin_unlock(&capa_lock);
+                return;
+        }
+        atomic_dec(&ocapa->u.cli.open_count);
+        spin_unlock(&capa_lock);
+
+        DEBUG_CAPA(D_SEC, &ocapa->c_capa, "close (count: %d)",
+                   atomic_read(&ocapa->u.cli.open_count));
+}
+
+/* delete CAPA_OPC_OSS_TRUNC only */
+void ll_truncate_free_capa(struct obd_capa *ocapa)
+{
+        struct inode *inode;
+
+        if (!ocapa)
+                return;
+
+        LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
+        DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate");
+
+        inode = ocapa->u.cli.inode;
+
+        spin_lock(&capa_lock);
+        capa_put(ocapa);
+        ll_delete_capa(ocapa);
+        spin_unlock(&capa_lock);
+}
+
+void ll_clear_inode_capas(struct inode *inode)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *ocapa, *tmp;
+
+        spin_lock(&capa_lock);
+        ocapa = lli->lli_mds_capa;
+        if (ocapa)
+                ll_delete_capa(ocapa);
+                
+        list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
+                                 u.cli.lli_list)
+                ll_delete_capa(ocapa);
+        spin_unlock(&capa_lock);
+}
index 8eff9fa..13442da 100644 (file)
@@ -135,6 +135,11 @@ struct ll_inode_info {
         /* identifying fields for both metadata and data stacks. */
         struct lu_fid           lli_fid;
         struct lov_stripe_md   *lli_smd;
+
+        /* fid capability */
+        struct obd_capa        *lli_mds_capa;
+        /* oss capability list */
+        struct list_head        lli_oss_capas;
 };
 
 /*
@@ -218,13 +223,15 @@ struct ll_rw_process_info {
 };
 
 /* flags for sbi->ll_flags */
-#define LL_SBI_NOLCK            0x01 /* DLM locking disabled (directio-only) */
-#define LL_SBI_CHECKSUM         0x02 /* checksum each page as it's written */
-#define LL_SBI_FLOCK            0x04
-#define LL_SBI_USER_XATTR       0x08 /* support user xattr */
-#define LL_SBI_ACL              0x10 /* support ACL */
-#define LL_SBI_JOIN             0x20 /* support JOIN */
-#define LL_SBI_RMT_CLIENT       0x40 /* remote client */
+#define LL_SBI_NOLCK             0x01 /* DLM locking disabled (directio-only) */
+#define LL_SBI_CHECKSUM          0x02 /* checksum each page as it's written */
+#define LL_SBI_FLOCK             0x04
+#define LL_SBI_USER_XATTR        0x08 /* support user xattr */
+#define LL_SBI_ACL               0x10 /* support ACL */
+#define LL_SBI_JOIN              0x20 /* support JOIN */
+#define LL_SBI_RMT_CLIENT        0x40 /* remote client */
+#define LL_SBI_MDS_CAPA          0x80 /* support mds capa */
+#define LL_SBI_OSS_CAPA         0x100 /* support oss capa */
 
 struct ll_sb_info {
         struct list_head          ll_list;
@@ -451,9 +458,6 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 struct dentry *ll_find_alias(struct inode *, struct dentry *);
 int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
                        void *data, int flag);
-void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
-                           struct inode *i2, const char *name, int namelen,
-                           int mode);
 int ll_md_cancel_unused(struct lustre_handle *, struct inode *, int flags,
                         void *opaque);
 #ifndef LUSTRE_KERNEL_VERSION
@@ -569,6 +573,10 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 int ll_process_config(struct lustre_cfg *lcfg);
 int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
 int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc);
+void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
+                           struct inode *i2, const char *name, int namelen,
+                           int mode);
+void ll_finish_md_op_data(struct md_op_data *op_data);
 
 /* llite/llite_nfs.c */
 extern struct export_operations lustre_export_operations;
@@ -731,4 +739,19 @@ int ll_fid_dt_alloc(struct ll_sb_info *sbi, struct lu_fid *fid,
 
 ino_t ll_fid_build_ino(struct ll_sb_info *sbi, struct lu_fid *fid);
 
+/* llite/llite_capa.c */
+extern cfs_timer_t ll_capa_timer;
+
+int ll_capa_thread_start(void);
+void ll_capa_thread_stop(void);
+void ll_capa_timer_callback(unsigned long unused);
+struct obd_capa *ll_lookup_oss_capa(struct inode *inode, __u64 opc);
+struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa);
+void ll_oss_capa_open(struct inode *inode, struct file *file);
+void ll_oss_capa_close(struct inode *inode, struct file *file);
+int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa);
+void ll_truncate_free_capa(struct obd_capa *ocapa);
+void ll_clear_inode_capas(struct inode *inode);
+struct obd_capa *ll_i2mdscapa(struct inode *inode);
+
 #endif /* LLITE_INTERNAL_H */
index aebc9cb..2ece804 100644 (file)
@@ -156,6 +156,7 @@ static int client_common_fill_super(struct super_block *sb,
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_device *obd;
         struct lu_fid rootfid;
+        struct obd_capa *pc = NULL;
         struct obd_statfs osfs;
         struct ptlrpc_request *request = NULL;
         struct lustre_handle dt_conn = {0, };
@@ -185,7 +186,8 @@ static int client_common_fill_super(struct super_block *sb,
         /* indicate the features supported by this client */
         data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
                                   OBD_CONNECT_ACL | OBD_CONNECT_JOIN |
-                                  OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;
+                                  OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;/* |
+                                  OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA;*/
         data->ocd_ibits_known = MDS_INODELOCK_FULL;
         data->ocd_version = LUSTRE_VERSION_CODE;
 
@@ -262,6 +264,16 @@ static int client_common_fill_super(struct super_block *sb,
                 sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
         }
 
+        if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) {
+                CDEBUG(D_SEC, "client enabled fid capa!\n");
+                sbi->ll_flags |= LL_SBI_MDS_CAPA;
+        }
+
+        if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) {
+                CDEBUG(D_SEC, "client enabled oss capa!\n");
+                sbi->ll_flags |= LL_SBI_OSS_CAPA;
+        }
+
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
         /* We set sb->s_dev equal on all lustre clients in order to support
          * NFS export clustering.  NFSD requires that the FSID be the same
@@ -289,6 +301,8 @@ static int client_common_fill_super(struct super_block *sb,
 
         data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
                                   OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE;
+        if (sbi->ll_flags & LL_SBI_OSS_CAPA)
+                data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA;
 
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
@@ -343,7 +357,7 @@ static int client_common_fill_super(struct super_block *sb,
                 GOTO(out_dt, err);
         }
 
-        err = md_getstatus(sbi->ll_md_exp, &rootfid);
+        err = md_getstatus(sbi->ll_md_exp, &rootfid, &pc);
         if (err) {
                 CERROR("cannot mds_connect: rc = %d\n", err);
                 GOTO(out_dt_fid, err);
@@ -358,12 +372,14 @@ static int client_common_fill_super(struct super_block *sb,
 
         /* make root inode
          * XXX: move this to after cbd setup? */
-        err = md_getattr(sbi->ll_md_exp, &rootfid,
+        err = md_getattr(sbi->ll_md_exp, &rootfid, pc,
                          OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
                          (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
                          0, &request);
         if (err) {
                 CERROR("md_getattr failed for root: rc = %d\n", err);
+                if (pc)
+                        free_capa(pc);
                 GOTO(out_dt, err);
         }
 
@@ -372,9 +388,16 @@ static int client_common_fill_super(struct super_block *sb,
                                &lmd);
         if (err) {
                 CERROR("failed to understand root inode md: rc = %d\n", err);
+                if (pc)
+                        free_capa(pc);
                 ptlrpc_req_finished (request);
                 GOTO(out_dt, err);
         }
+        if (pc) {
+                obd_capa_set_root(pc);
+                lmd.mds_capa = pc;
+                lmd.body->valid |= OBD_MD_FLMDSCAPA;
+        }
 
         LASSERT(fid_is_sane(&sbi->ll_root_fid));
         root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd);
@@ -1862,9 +1885,12 @@ int ll_iocontrol(struct inode *inode, struct file *file,
         switch(cmd) {
         case EXT3_IOC_GETFLAGS: {
                 struct mdt_body *body;
+                struct obd_capa *oc;
 
-                rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+                oc = ll_i2mdscapa(inode);
+                rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
                                 OBD_MD_FLFLAGS, 0, &req);
+                capa_put(oc);
                 if (rc) {
                         CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                         RETURN(-abs(rc));
@@ -2184,11 +2210,46 @@ int ll_process_config(struct lustre_cfg *lcfg)
         return(rc);
 }
 
+/* this function prepares md_op_data hint for passing ot down to MD stack. */
+void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
+                            struct inode *i2, const char *name, int namelen,
+                            int mode)
+{
+        LASSERT(i1 != NULL);
+        LASSERT(op_data != NULL);
+
+        ll_i2gids(op_data->suppgids, i1, i2);
+        op_data->fid1 = ll_i2info(i1)->lli_fid;
+        op_data->mod_capa1 = ll_i2mdscapa(i1);
+
+        /* @i2 may be NULL. In this case caller itself has to initialize ->fid2
+         * if needed. */
+        if (i2) {
+                op_data->fid2 = *ll_inode2fid(i2);
+                op_data->mod_capa2 = ll_i2mdscapa(i2);
+        }
+
+        op_data->name = name;
+        op_data->namelen = namelen;
+        op_data->create_mode = mode;
+        op_data->mod_time = CURRENT_SECONDS;
+        op_data->fsuid = current->fsuid;
+        op_data->fsgid = current->fsgid;
+        op_data->cap = current->cap_effective;
+}
+
+void ll_finish_md_op_data(struct md_op_data *op_data)
+{
+        capa_put(op_data->mod_capa1);
+        capa_put(op_data->mod_capa2);
+}
+
 int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
 {
         struct ptlrpc_request *req = NULL;
         struct mds_body *body;
         char *cmd, *buf;
+        struct obd_capa *oc;
         int rc, buflen;
         ENTRY;
 
@@ -2200,9 +2261,11 @@ int ll_ioctl_getfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
         if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
                 GOTO(out, rc = -EFAULT);
 
-        rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+        oc = ll_i2mdscapa(inode);
+        rc = md_getxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
                           OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
                           ioc->cmd_len, ioc->res_len, 0, &req);
+        capa_put(oc);
         if (rc < 0) {
                 CERROR("mdc_getxattr %s [%s] failed: %d\n",
                        XATTR_NAME_LUSTRE_ACL, cmd, rc);
@@ -2230,6 +2293,7 @@ int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
 {
         struct ptlrpc_request *req = NULL;
         char *cmd, *buf;
+        struct obd_capa *oc;
         int buflen, rc;
         ENTRY;
 
@@ -2241,9 +2305,11 @@ int ll_ioctl_setfacl(struct inode *inode, struct rmtacl_ioctl_data *ioc)
         if (copy_from_user(cmd, ioc->cmd, ioc->cmd_len))
                 GOTO(out, rc = -EFAULT);
 
-        rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode),
+        oc = ll_i2mdscapa(inode);
+        rc = md_setxattr(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
                           OBD_MD_FLXATTR, XATTR_NAME_LUSTRE_ACL, cmd,
                           ioc->cmd_len, ioc->res_len, 0, &req);
+        capa_put(oc);
         if (rc) {
                 CERROR("mdc_setxattr %s [%s] failed: %d\n",
                        XATTR_NAME_LUSTRE_ACL, cmd, rc);
index 0cc0e6d..62845d9 100644 (file)
@@ -59,9 +59,11 @@ static int ll_nfs_test_inode(struct inode *inode, void *opaque)
 
 static struct inode *search_inode_for_lustre(struct super_block *sb,
                                              struct lu_fid *fid,
+                                             struct lustre_capa *capa,
                                              int mode)
 {
         struct ll_sb_info *sbi = ll_s2sbi(sb);
+        struct obd_capa *ocapa = NULL;
         struct ptlrpc_request *req = NULL;
         struct inode *inode = NULL;
         unsigned long valid = 0;
@@ -79,7 +81,16 @@ static struct inode *search_inode_for_lustre(struct super_block *sb,
                 valid |= OBD_MD_FLEASIZE;
         }
 
-        rc = md_getattr(sbi->ll_md_exp, fid, valid, eadatalen, &req);
+        if (capa) {
+                ocapa = alloc_capa(CAPA_SITE_CLIENT);
+                if (!ocapa)
+                        return ERR_PTR(-ENOMEM);
+                ocapa->c_capa = *capa;
+        }
+
+        rc = md_getattr(sbi->ll_md_exp, fid, (struct obd_capa *)ocapa,
+                        valid, eadatalen, &req);
+        free_capa(ocapa);
         if (rc) {
                 CERROR("can't get object attrs, fid "DFID", rc %d\n",
                        PFID(fid), rc);
@@ -99,7 +110,9 @@ static struct inode *search_inode_for_lustre(struct super_block *sb,
 extern struct dentry_operations ll_d_ops;
 
 static struct dentry *ll_iget_for_nfs(struct super_block *sb,
-                                      struct lu_fid *fid, umode_t mode)
+                                      struct lu_fid *fid,
+                                      struct lustre_capa *capa,
+                                      umode_t mode)
 {
         struct inode *inode;
         struct dentry *result;
@@ -110,7 +123,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb,
         if (!fid_is_sane(fid))
                 return ERR_PTR(-ESTALE);
 
-        inode = search_inode_for_lustre(sb, fid, mode);
+        inode = search_inode_for_lustre(sb, fid, capa, mode);
         if (IS_ERR(inode))
                 return ERR_PTR(PTR_ERR(inode));
 
@@ -160,6 +173,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb,
         return result;
 }
 
+#if 0
 static void ll_fh_to_fid(struct lu_fid *fid, __u32 *mode, __u32 *datap)
 {
         /* unpacking ->f_seq */
@@ -246,24 +260,63 @@ int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
         *lenp = 5;
         return 1;
 }
+#endif
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+struct dentry *ll_decode_fh(struct super_block *sb, __u32 *fh, int fh_len,
+                            int fh_type,
+                            int (*acceptable)(void *, struct dentry *),
+                            void *context)
+{
+        int len = (sizeof(struct lu_fid) + sizeof(struct lustre_capa) + 3)/4;
+
+        if (fh_type != 1)
+                return ERR_PTR(-ESTALE);
+        if (fh_len < len)
+                return ERR_PTR(-ESTALE);
+        return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable,
+                                                     context);
+}
+
+int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, int connectable)
+{
+        struct inode *inode = de->d_inode;
+        struct lu_fid *fid = ll_inode2fid(inode);
+        struct obd_capa *ocapa = ll_i2mdscapa(inode);
+        int len = (sizeof(*fid) + sizeof(struct lustre_capa) + 3)/4;
+        char *p = (char *)fh;
+
+        if (*plen < len)
+                return 255;
+
+        memcpy(p, fid, sizeof(*fid));
+        p += sizeof(*fid);
+        if (ocapa) {
+                capa_cpy(p, ocapa);
+                capa_put(ocapa);
+        }
+        *plen = len;
+        return 1;
+}
+
 struct dentry *ll_get_dentry(struct super_block *sb, void *data)
 {
-        __u32 *inump = (__u32*)data;
-        struct lu_fid fid;
-        
-        /* FIXME: seems this is not enough */
-        fid.f_seq = inump[0];
-        fid.f_oid = inump[1];
+        char *p = (char *)data;
+        struct lu_fid *fid;
+        struct lustre_capa *capa;
+
+        fid = (struct lu_fid *)p;
+        capa = (struct lustre_capa *)(p + sizeof(*fid));
         
-        return ll_iget_for_nfs(sb, &fid, S_IFREG);
+        return ll_iget_for_nfs(sb, fid, (capa->lc_opc == 0) ? capa : NULL,
+                               S_IFREG);
 }
 
 struct dentry *ll_get_parent(struct dentry *dchild)
 {
         struct ptlrpc_request *req = NULL;
         struct inode *dir = dchild->d_inode;
+        struct obd_capa *oc;
         struct ll_sb_info *sbi;
         struct dentry *result = NULL;
         struct mdt_body *body;
@@ -275,18 +328,21 @@ struct dentry *ll_get_parent(struct dentry *dchild)
         
         sbi = ll_s2sbi(dir->i_sb);
  
-        rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir),
-                             dotdot, strlen(dotdot) + 1,
-                             0, 0, &req);
+        oc = ll_i2mdscapa(dir);
+        rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), oc,
+                             dotdot, strlen(dotdot) + 1, 0, 0, &req);
         if (rc) {
+                capa_put(oc);
                 CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
-                return ERR_PTR(rc);
+                RETURN(ERR_PTR(rc));
         }
-        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof (*body)); 
+        body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); 
        
         LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
         
-        result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), S_IFDIR);
+        result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir),
+                                 oc ? &oc->c_capa : NULL, S_IFDIR);
+        capa_put(oc);
 
         if (IS_ERR(result))
                 rc = PTR_ERR(result);
index d9f4c15..a2cc955 100644 (file)
@@ -295,31 +295,6 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
         }
 }
 
-/* this function prepares md_op_data hint for passing ot down to MD stack. */
-void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
-                            struct inode *i2, const char *name, int namelen,
-                            int mode)
-{
-        LASSERT(i1 != NULL);
-        LASSERT(op_data != NULL);
-
-        ll_i2gids(op_data->suppgids, i1, i2);
-        op_data->fid1 = ll_i2info(i1)->lli_fid;
-
-        /* @i2 may be NULL. In this case caller itself has to initialize ->fid2
-         * if needed. */
-        if (i2)
-                op_data->fid2 = ll_i2info(i2)->lli_fid;
-
-        op_data->name = name;
-        op_data->namelen = namelen;
-        op_data->create_mode = mode;
-        op_data->mod_time = CURRENT_SECONDS;
-        op_data->fsuid = current->fsuid;
-        op_data->fsgid = current->fsgid;
-        op_data->cap = current->cap_effective;
-}
-
 static void ll_d_add(struct dentry *de, struct inode *inode)
 {
         CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de);
@@ -1071,6 +1046,7 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
         struct lov_stripe_md *lsm = NULL;
         struct obd_trans_info oti = { 0 };
         struct obdo *oa;
+        struct obd_capa *oc;
         int rc;
         ENTRY;
 
@@ -1129,7 +1105,10 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
                 }
         }
 
-        rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir));
+        /* FIXME: parent mds capability is the only one can find! */
+        oc = ll_i2mdscapa(dir);
+        rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc);
+        capa_put(oc);
         obdo_free(oa);
         if (rc)
                 CERROR("obd destroy objid "LPX64" error %d\n",
index 6eaf01e..4183fa6 100644 (file)
@@ -216,6 +216,7 @@ int lustre_check_remote_perm(struct inode *inode, int mask)
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ptlrpc_request *req = NULL;
         struct mdt_remote_perm *perm;
+        struct obd_capa *oc;
         int i = 0, rc;
         ENTRY;
 
@@ -239,7 +240,9 @@ check:
                 LBUG();
         }
 
-        rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), &req);
+        oc = ll_i2mdscapa(inode);
+        rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc, &req);
+        capa_put(oc);
         if (rc) {
                 up(&lli->lli_rmtperm_sem);
                 RETURN(rc);
index 38dfca5..250e6a9 100644 (file)
@@ -65,7 +65,7 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct obd_info oinfo = { { { 0 } } };
         struct brw_page pg;
-        int rc;
+        int opc, rc;
         ENTRY;
 
         pg.pg = page;
@@ -96,7 +96,12 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
                                     LPROC_LL_BRW_READ, pg.count);
         oinfo.oi_oa = oa;
         oinfo.oi_md = lsm;
+        /* NB partial write, so we might not have CAPA_OPC_OSS_READ capa */
+        opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE :
+                                    CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ;
+        oinfo.oi_capa = ll_lookup_oss_capa(inode, opc);
         rc = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, 1, &pg, NULL);
+        capa_put(oinfo.oi_capa);
         if (rc == 0)
                 obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
         else if (rc != -EIO)
@@ -182,7 +187,9 @@ void ll_truncate(struct inode *inode)
 
         ll_inode_size_unlock(inode, 0);
 
+        oinfo.oi_capa = ll_lookup_oss_capa(inode, CAPA_OPC_OSS_TRUNC);
         rc = obd_punch_rqset(ll_i2dtexp(inode), &oinfo, NULL);
+        ll_truncate_free_capa(oinfo.oi_capa);
         if (rc)
                 CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
         else
@@ -403,12 +410,22 @@ static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
         EXIT;
 }
 
+static struct obd_capa *ll_ap_lookup_capa(void *data, int cmd)
+{
+        struct ll_async_page *llap = LLAP_FROM_COOKIE(data);
+        int opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE :
+                                        CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ;
+
+        return ll_lookup_oss_capa(llap->llap_page->mapping->host, opc);
+}
+
 static struct obd_async_page_ops ll_async_page_ops = {
         .ap_make_ready =        ll_ap_make_ready,
         .ap_refresh_count =     ll_ap_refresh_count,
         .ap_fill_obdo =         ll_ap_fill_obdo,
         .ap_update_obdo =       ll_ap_update_obdo,
         .ap_completion =        ll_ap_completion,
+        .ap_lookup_capa =       ll_ap_lookup_capa,
 };
 
 struct ll_async_page *llap_cast_private(struct page *page)
index bb7287e..1c15af3 100644 (file)
@@ -141,8 +141,9 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct file *file,
 {
         struct brw_page *pga;
         struct obdo oa;
-        int i, rc = 0;
+        int opc, i, rc = 0;
         size_t length;
+        struct obd_capa *ocapa;
         ENTRY;
 
         OBD_ALLOC(pga, sizeof(*pga) * page_count);
@@ -166,13 +167,18 @@ static ssize_t ll_direct_IO_26_seg(int rw, struct file *file,
         if (rw == WRITE) {
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_WRITE, size);
+                opc = CAPA_OPC_OSS_WRITE;
                 llap_write_pending(inode, NULL);
         } else {
                 lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
                                     LPROC_LL_DIRECT_READ, size);
+                opc = CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE;
         }
+        ocapa = ll_lookup_oss_capa(inode, opc);
         rc = obd_brw_rqset(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
-                           ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL);
+                           ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL,
+                           ocapa);
+        capa_put(ocapa);
         if (rc == 0) {
                 rc = size;
                 if (rw == WRITE) {
index 1e6052c..ddf5b8f 100644 (file)
@@ -103,6 +103,7 @@ void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
 static int __init init_lustre_lite(void)
 {
         int rc, seed[2];
+
         printk(KERN_INFO "Lustre: Lustre Client File System; "
                "info@clusterfs.com\n");
         rc = ll_init_inodecache();
@@ -150,6 +151,9 @@ static int __init init_lustre_lite(void)
         get_random_bytes(seed, sizeof(seed));
         ll_srand(seed[0], seed[1]);
         
+        init_timer(&ll_capa_timer);
+        ll_capa_timer.function = ll_capa_timer_callback;
+        rc = ll_capa_thread_start();
         return rc;
 }
 
@@ -157,6 +161,12 @@ static void __exit exit_lustre_lite(void)
 {
         int rc;
 
+        del_timer(&ll_capa_timer);
+        ll_capa_thread_stop();
+        LASSERTF(capa_count[CAPA_SITE_CLIENT] == 0,
+                 "client remaining capa count %d\n",
+                 capa_count[CAPA_SITE_CLIENT]);
+
         lustre_register_client_fill_super(NULL);
         lustre_register_client_process_config(NULL);
 
index e44dcbf..b23d6b8 100644 (file)
@@ -36,6 +36,7 @@ static int ll_readlink_internal(struct inode *inode,
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         int rc, symlen = inode->i_size + 1;
         struct mdt_body *body;
+        struct obd_capa *oc;
         ENTRY;
 
         *request = NULL;
@@ -46,8 +47,10 @@ static int ll_readlink_internal(struct inode *inode,
                 RETURN(0);
         }
 
-        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
+        oc = ll_i2mdscapa(inode);
+        rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
                         OBD_MD_LINKNAME, symlen, request);
+        capa_put(oc);
         if (rc) {
                 if (rc != -ENOENT)
                         CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
index 1f4b2dc..6038ef8 100644 (file)
@@ -111,6 +111,7 @@ int ll_setxattr_common(struct inode *inode, const char *name,
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ptlrpc_request *req;
         int xattr_type, rc;
+        struct obd_capa *oc;
         ENTRY;
 
         lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_SETXATTR);
@@ -124,8 +125,10 @@ int ll_setxattr_common(struct inode *inode, const char *name,
         if (xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0)
                 RETURN(0);
 
-        rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
-                         name, value, size, 0, flags, &req);
+        oc = ll_i2mdscapa(inode);
+        rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name,
+                         value, size, 0, flags, &req);
+        capa_put(oc);
         if (rc) {
                 if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
                         LCONSOLE_INFO("Disabling user_xattr feature because "
@@ -177,6 +180,7 @@ int ll_getxattr_common(struct inode *inode, const char *name,
         struct mdt_body *body;
         int xattr_type, rc;
         void *xdata;
+        struct obd_capa *oc;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
@@ -221,8 +225,10 @@ int ll_getxattr_common(struct inode *inode, const char *name,
 #endif
 
 do_getxattr:
-        rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
-                         name, NULL, 0, size, 0, &req);
+        oc = ll_i2mdscapa(inode);
+        rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name,
+                         NULL, 0, size, 0, &req);
+        capa_put(oc);
         if (rc) {
                 if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
                         LCONSOLE_INFO("Disabling user_xattr feature because "
index e8f5219..edd0b30 100644 (file)
@@ -185,6 +185,7 @@ int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 {
         struct obd_device *obd = exp->exp_obd;
         struct lu_fid rpid = op_data->fid1;
+        struct obd_capa *oc = op_data->mod_capa1;
         struct lmv_obd *lmv = &obd->u.lmv;
         struct mdt_body *body = NULL;
         struct md_op_data *sop_data;
@@ -217,6 +218,7 @@ repeat:
                                    (char *)op_data->name, op_data->namelen);
 
                 rpid = obj->lo_inodes[mds].li_fid;
+                oc   = obj->lo_inodes[mds].li_capa;
                 rc = lmv_fld_lookup(lmv, &rpid, &mds);
                 lmv_obj_put(obj);
                 if (rc)
@@ -227,6 +229,7 @@ repeat:
         }
 
         sop_data->fid1 = rpid;
+        sop_data->mod_capa1 = oc;
 
         rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data,
                             lmm, lmmsize, it, flags, reqp,
@@ -238,7 +241,7 @@ repeat:
                  * the request with proper MDS.
                  */
                 LASSERT(lu_fid_eq(&op_data->fid1, &rpid));
-                rc = lmv_handle_split(exp, &rpid);
+                rc = lmv_handle_split(exp, &rpid, oc);
                 if (rc == 0) {
                         ptlrpc_req_finished(*reqp);
                        /* We shoudld reallocate the FID for the object */
@@ -301,8 +304,10 @@ repeat:
 
         obj = lmv_obj_grab(obd, &body->fid1);
         if (!obj && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
+
+                /* FIXME: capability for remote! */
                 /* wow! this is split dir, we'd like to handle it */
-                obj = lmv_obj_create(exp, &body->fid1, mea);
+                obj = lmv_obj_create(exp, &body->fid1, NULL, mea);
                 if (IS_ERR(obj))
                         GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
         }
@@ -337,6 +342,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data,
         struct lmv_obj *obj = NULL, *obj2 = NULL;
         struct obd_device *obd = exp->exp_obd;
         struct lu_fid rpid = op_data->fid1;
+        struct obd_capa *oc = op_data->mod_capa1;
         struct lmv_obd *lmv = &obd->u.lmv;
         struct mdt_body *body = NULL;
         struct md_op_data *sop_data;
@@ -397,6 +403,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data,
                                            op_data->namelen);
 
                         rpid = obj->lo_inodes[mds].li_fid;
+                        oc   = obj->lo_inodes[mds].li_capa;
                         rc = lmv_fld_lookup(lmv, &rpid, &mds);
                         if (rc) {
                                 lmv_obj_put(obj);
@@ -410,6 +417,7 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data,
         }
 
         sop_data->fid1 = rpid;
+        sop_data->mod_capa1 = oc;
 
         rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm,
                             lmmsize, it, flags, reqp, cb_blocking,
@@ -468,8 +476,10 @@ int lmv_intent_getattr(struct obd_export *exp, struct md_op_data *op_data,
         obj2 = lmv_obj_grab(obd, &body->fid1);
 
         if (!obj2 && (mea = lmv_get_mea(*reqp, DLM_REPLY_REC_OFF))) {
+
+                /* FIXME remote capability! */
                 /* wow! this is split dir, we'd like to handle it. */
-                obj2 = lmv_obj_create(exp, &body->fid1, mea);
+                obj2 = lmv_obj_create(exp, &body->fid1, NULL, mea);
                 if (IS_ERR(obj2))
                         GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj2));
         }
@@ -542,6 +552,7 @@ int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp)
 
         for (i = 0; i < obj->lo_objcount; i++) {
                 struct lu_fid fid = obj->lo_inodes[i].li_fid;
+                struct obd_capa *oc= obj->lo_inodes[i].li_capa;
                 struct ptlrpc_request *req = NULL;
                 struct obd_export *tgt_exp;
                 struct lookup_intent it;
@@ -559,6 +570,8 @@ int lmv_lookup_slaves(struct obd_export *exp, struct ptlrpc_request **reqp)
                 memset(op_data, 0, sizeof(*op_data));
                 op_data->fid1 = fid;
                 op_data->fid2 = fid;
+                op_data->mod_capa1 = oc;
+                op_data->mod_capa2 = oc;
 
                 tgt_exp = lmv_get_export(lmv, &fid);
                 if (IS_ERR(tgt_exp))
@@ -620,6 +633,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
 {
         struct obd_device *obd = exp->exp_obd;
         struct lu_fid rpid = op_data->fid1;
+        struct obd_capa *oc = op_data->mod_capa1;
         struct lmv_obd *lmv = &obd->u.lmv;
         struct mdt_body *body = NULL;
         struct md_op_data *sop_data;
@@ -655,6 +669,7 @@ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
                                            (char *)op_data->name,
                                            op_data->namelen);
                         rpid = obj->lo_inodes[mds].li_fid;
+                        oc = obj->lo_inodes[mds].li_capa;
                         lmv_obj_put(obj);
                 }
                 rc = lmv_fld_lookup(lmv, &rpid, &mds);
@@ -684,6 +699,7 @@ repeat:
                                                    (char *)op_data->name,
                                                    op_data->namelen);
                                 rpid = obj->lo_inodes[mds].li_fid;
+                                oc = obj->lo_inodes[mds].li_capa;
                                 rc = lmv_fld_lookup(lmv, &rpid, &mds);
                                 if (rc) {
                                         lmv_obj_put(obj);
@@ -696,6 +712,7 @@ repeat:
         }
 
         sop_data->fid1 = rpid;
+        sop_data->mod_capa1 = oc;
 
         rc = md_intent_lock(lmv->tgts[mds].ltd_exp, sop_data, lmm, lmmsize,
                             it, flags, reqp, cb_blocking, extra_lock_flags);
@@ -730,7 +747,7 @@ repeat:
                 CWARN("we haven't knew about directory splitting!\n");
                 LASSERT(obj == NULL);
 
-                obj = lmv_obj_create(exp, &rpid, NULL);
+                obj = lmv_obj_create(exp, &rpid, oc, NULL);
                 if (IS_ERR(obj))
                         RETURN((int)PTR_ERR(obj));
                 lmv_obj_put(obj);
@@ -754,9 +771,15 @@ repeat:
                 LASSERT(body != NULL);
                 LASSERT((body->valid & OBD_MD_FLID) != 0);
 
+                body = lustre_msg_buf((*reqp)->rq_repmsg,
+                                      DLM_REPLY_REC_OFF, sizeof(*body));
+                LASSERT(body != NULL);
+                LASSERT((body->valid & OBD_MD_FLID) != 0);
+
                 obj = lmv_obj_grab(obd, &body->fid1);
                 if (!obj) {
-                        obj = lmv_obj_create(exp, &body->fid1, mea);
+                        /* FIXME: remote capability */
+                        obj = lmv_obj_create(exp, &body->fid1, NULL, mea);
                         if (IS_ERR(obj))
                                 GOTO(out_free_sop_data, rc = (int)PTR_ERR(obj));
                 }
@@ -844,6 +867,7 @@ int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
 
         for (i = 0; i < obj->lo_objcount; i++) {
                 struct lu_fid fid = obj->lo_inodes[i].li_fid;
+                struct obd_capa *oc = obj->lo_inodes[i].li_capa;
                 struct lustre_handle *lockh = NULL;
                 struct ptlrpc_request *req = NULL;
                 ldlm_blocking_callback cb;
@@ -883,6 +907,8 @@ int lmv_revalidate_slaves(struct obd_export *exp, struct ptlrpc_request **reqp,
 
                 op_data->fid1 = fid;
                 op_data->fid2 = fid;
+                op_data->mod_capa1 = oc;
+                op_data->mod_capa2 = oc;
 
                 /* is obj valid? */
                 tgt_exp = lmv_get_export(lmv, &fid);
index af1454e..ad45117 100644 (file)
@@ -44,6 +44,7 @@ struct qstr {
 
 struct lmv_inode {
         struct lu_fid      li_fid;        /* id of dirobj */
+        struct obd_capa   *li_capa;       /* fid capability */
         unsigned long      li_size;       /* slave size value */
         int                li_flags;
 };
@@ -97,6 +98,7 @@ struct lmv_obj *lmv_obj_alloc(struct obd_device *obd,
 
 struct lmv_obj *lmv_obj_create(struct obd_export *exp,
                               const struct lu_fid *fid,
+                               struct obd_capa *oc,
                               struct lmv_stripe_md *mea);
 
 int lmv_obj_delete(struct obd_export *exp,
@@ -133,7 +135,8 @@ int lmv_revalidate_slaves(struct obd_export *, struct ptlrpc_request **,
                          ldlm_blocking_callback cb_blocking,
                           int extra_lock_flags);
 
-int lmv_handle_split(struct obd_export *, const struct lu_fid *);
+int lmv_handle_split(struct obd_export *, const struct lu_fid *,
+                     struct obd_capa *oc);
 int lmv_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
                     void *, int);
 int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid,
index 83ed667..3148f81 100644 (file)
@@ -1050,7 +1050,8 @@ out_free_temp:
 }
 
 static int lmv_getstatus(struct obd_export *exp,
-                         struct lu_fid *fid)
+                         struct lu_fid *fid,
+                         struct obd_capa **pc)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1061,15 +1062,15 @@ static int lmv_getstatus(struct obd_export *exp,
         if (rc)
                 RETURN(rc);
 
-        rc = md_getstatus(lmv->tgts[0].ltd_exp, fid);
+        rc = md_getstatus(lmv->tgts[0].ltd_exp, fid, pc);
 
         RETURN(rc);
 }
 
 static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid,
-                        obd_valid valid, const char *name, const char *input,
-                        int input_size, int output_size, int flags,
-                        struct ptlrpc_request **request)
+                        struct obd_capa *oc, obd_valid valid, const char *name,
+                        const char *input, int input_size, int output_size,
+                        int flags, struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1085,16 +1086,16 @@ static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_getxattr(tgt_exp, fid, valid, name, input, input_size,
+        rc = md_getxattr(tgt_exp, fid, oc, valid, name, input, input_size,
                          output_size, flags, request);
 
         RETURN(rc);
 }
 
 static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid,
-                        obd_valid valid, const char *name, const char *input,
-                        int input_size, int output_size, int flags,
-                        struct ptlrpc_request **request)
+                        struct obd_capa *oc, obd_valid valid, const char *name,
+                        const char *input, int input_size, int output_size,
+                        int flags, struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1110,14 +1111,14 @@ static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_setxattr(tgt_exp, fid, valid, name,
+        rc = md_setxattr(tgt_exp, fid, oc, valid, name,
                          input, input_size, output_size, flags, request);
         
         RETURN(rc);
 }
 
 static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid,
-                       obd_valid valid, int ea_size,
+                       struct obd_capa *oc, obd_valid valid, int ea_size,
                        struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
@@ -1135,7 +1136,7 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_getattr(tgt_exp, fid, valid, ea_size, request);
+        rc = md_getattr(tgt_exp, fid, oc, valid, ea_size, request);
         if (rc)
                 RETURN(rc);
 
@@ -1157,7 +1158,7 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid,
                         RETURN(rc);
                 }
 
-                body = lustre_msg_buf((*request)->rq_repmsg, REQ_REC_OFF,
+                body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
                                       sizeof(*body));
                 LASSERT(body != NULL);
 
@@ -1184,10 +1185,8 @@ static int lmv_getattr(struct obd_export *exp, const struct lu_fid *fid,
         RETURN(rc);
 }
 
-static int lmv_change_cbdata(struct obd_export *exp,
-                             const struct lu_fid *fid,
-                             ldlm_iterator_t it,
-                             void *data)
+static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+                             ldlm_iterator_t it, void *data)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1235,7 +1234,8 @@ static int lmv_close(struct obd_export *exp,
 
 /* called in the case MDS returns -ERESTART on create on open, what means that
  * directory is split and its LMV presentation object has to be updated. */
-int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid)
+int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid,
+                     struct obd_capa *oc)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1257,7 +1257,7 @@ int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid)
                 RETURN(PTR_ERR(tgt_exp));
 
         /* time to update mea of parent fid */
-        rc = md_getattr(tgt_exp, fid, valid, mealen, &req);
+        rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req);
         if (rc) {
                 CERROR("md_getattr() failed, error %d\n", rc);
                 GOTO(cleanup, rc);
@@ -1272,7 +1272,7 @@ int lmv_handle_split(struct obd_export *exp, const struct lu_fid *fid)
         if (md.mea == NULL)
                 GOTO(cleanup, rc = -ENODATA);
 
-        obj = lmv_obj_create(exp, fid, md.mea);
+        obj = lmv_obj_create(exp, fid, oc, md.mea);
         if (IS_ERR(obj))
                 rc = PTR_ERR(obj);
         else
@@ -1312,7 +1312,8 @@ repeat:
                 
                 mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                    op_data->name, op_data->namelen);
-                op_data->fid1 = obj->lo_inodes[mds].li_fid;
+                op_data->fid1      = obj->lo_inodes[mds].li_fid;
+                op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
                 lmv_obj_put(obj);
         }
 
@@ -1328,13 +1329,14 @@ repeat:
         if (rc == 0) {
                 if (*request == NULL)
                         RETURN(rc);
-                CDEBUG(D_OTHER, "created. "DFID"\n", PFID(&op_data->fid1));
+                CDEBUG(D_OTHER, "created. "DFID"\n",
+                       PFID(&op_data->fid1));
         } else if (rc == -ERESTART) {
                 /*
                  * Directory got split. time to update local object and repeat
                  * the request with proper MDS.
                  */
-                rc = lmv_handle_split(exp, &op_data->fid1);
+                rc = lmv_handle_split(exp, &op_data->fid1, op_data->mod_capa1);
                 if (rc == 0) {
                         ptlrpc_req_finished(*request);
                         rc = lmv_alloc_fid_for_split(obd, &op_data->fid1,
@@ -1530,7 +1532,8 @@ lmv_enqueue(struct obd_export *exp, int lock_type,
                          * name */
                         mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                            (char *)op_data->name, op_data->namelen);
-                        op_data->fid1 = obj->lo_inodes[mds].li_fid;
+                        op_data->fid1      = obj->lo_inodes[mds].li_fid;
+                        op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
                         lmv_obj_put(obj);
                 }
         }
@@ -1555,13 +1558,14 @@ lmv_enqueue(struct obd_export *exp, int lock_type,
 
 static int
 lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
-                 const char *filename, int namelen, obd_valid valid,
-                 int ea_size, struct ptlrpc_request **request)
+                 struct obd_capa *oc, const char *filename, int namelen,
+                 obd_valid valid, int ea_size, struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
-        struct obd_export *tgt_exp;
         struct lu_fid rid = *fid;
+        struct obd_capa *rcapa = oc;
+        struct obd_export *tgt_exp;
         struct mdt_body *body;
         struct lmv_obj *obj;
         int rc, loop = 0;
@@ -1574,12 +1578,13 @@ lmv_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
 
 repeat:
         LASSERT(++loop <= 2);
-        obj = lmv_obj_grab(obd, fid);
+        obj = lmv_obj_grab(obd, &rid);
         if (obj) {
                 /* directory is split. look for right mds for this name */
                 mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                    filename, namelen - 1);
                 rid = obj->lo_inodes[mds].li_fid;
+                rcapa = obj->lo_inodes[mds].li_capa;
                 lmv_obj_put(obj);
         }
 
@@ -1590,7 +1595,7 @@ repeat:
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_getattr_name(tgt_exp, &rid, filename, namelen, valid,
+        rc = md_getattr_name(tgt_exp, &rid, rcapa, filename, namelen, valid,
                              ea_size, request);
         if (rc == 0) {
                 body = lustre_msg_buf((*request)->rq_repmsg,
@@ -1610,15 +1615,15 @@ repeat:
                                 RETURN(PTR_ERR(tgt_exp));
                         }
                         
-                        rc = md_getattr_name(tgt_exp, &rid, NULL, 1, valid,
-                                             ea_size, &req);
+                        rc = md_getattr_name(tgt_exp, &rid, rcapa, NULL, 1,
+                                             valid, ea_size, &req);
                         ptlrpc_req_finished(*request);
                         *request = req;
                 }
         } else if (rc == -ERESTART) {
                 /* directory got split. time to update local object and repeat
                  * the request with proper MDS */
-                rc = lmv_handle_split(exp, &rid);
+                rc = lmv_handle_split(exp, &rid, rcapa);
                 if (rc == 0) {
                         ptlrpc_req_finished(*request);
                         goto repeat;
@@ -1651,7 +1656,8 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
                 if (obj) {
                         rc = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                           op_data->name, op_data->namelen);
-                        op_data->fid2 = obj->lo_inodes[rc].li_fid;
+                        op_data->fid2      = obj->lo_inodes[rc].li_fid;
+                        op_data->mod_capa2 = obj->lo_inodes[rc].li_capa;
                         lmv_obj_put(obj);
                 }
 
@@ -1695,8 +1701,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
         ENTRY;
 
         CDEBUG(D_OTHER, "rename %*s in "DFID" to %*s in "DFID"\n",
-               oldlen, old, PFID(&op_data->fid1), newlen, new,
-               PFID(&op_data->fid2));
+               oldlen, old, PFID(&op_data->fid1),
+               newlen, new, PFID(&op_data->fid2));
 
         rc = lmv_check_connect(obd);
        if (rc)
@@ -1724,7 +1730,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                 if (obj) {
                         mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                            (char *)new, newlen);
-                        op_data->fid2 = obj->lo_inodes[mds].li_fid;
+                        op_data->fid2      = obj->lo_inodes[mds].li_fid;
+                        op_data->mod_capa2 = obj->lo_inodes[mds].li_capa;
                         CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
                                PFID(&op_data->fid2));
                         lmv_obj_put(obj);
@@ -1740,7 +1747,8 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                  */
                 mds = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                    (char *)old, oldlen);
-                op_data->fid1 = obj->lo_inodes[mds].li_fid;
+                op_data->fid1      = obj->lo_inodes[mds].li_fid;
+                op_data->mod_capa1 = obj->lo_inodes[mds].li_capa;
                 CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
                        PFID(&op_data->fid1));
                 lmv_obj_put(obj);
@@ -1756,6 +1764,7 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                                    (char *)new, newlen);
 
                 op_data->fid2 = obj->lo_inodes[mds].li_fid;
+                op_data->mod_capa2 = obj->lo_inodes[mds].li_capa;
                 CDEBUG(D_OTHER, "forward to MDS #"LPU64" ("DFID")\n", mds,
                        PFID(&op_data->fid2));
                 lmv_obj_put(obj);
@@ -1772,8 +1781,8 @@ request:
 
         if (mds != mds2) {
                 CDEBUG(D_OTHER,"cross-node rename "DFID"/%*s to "DFID"/%*s\n",
-                       PFID(&op_data->fid1), oldlen, old, PFID(&op_data->fid2),
-                       newlen, new);
+                       PFID(&op_data->fid1), oldlen, old,
+                       PFID(&op_data->fid2), newlen, new);
         }
         op_data->fsuid = current->fsuid;
         op_data->fsgid = current->fsgid;
@@ -1807,7 +1816,8 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
 
         if (obj) {
                 for (i = 0; i < obj->lo_objcount; i++) {
-                        op_data->fid1 = obj->lo_inodes[i].li_fid;
+                        op_data->fid1      = obj->lo_inodes[i].li_fid;
+                        op_data->mod_capa1 = obj->lo_inodes[i].li_capa;
 
                         tgt_exp = lmv_get_export(lmv, &op_data->fid1);
                         if (IS_ERR(tgt_exp)) {
@@ -1844,7 +1854,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
-                    struct ptlrpc_request **request)
+                    struct obd_capa *oc, struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1860,7 +1870,7 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_sync(tgt_exp, fid, request);
+        rc = md_sync(tgt_exp, fid, oc, request);
         RETURN(rc);
 }
 
@@ -1912,7 +1922,8 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj,
         struct page *page = NULL;
         struct lu_dirpage *next_dp;
         struct obd_export *tgt_exp;
-        struct lu_fid rid = *fid;
+        struct lu_fid rid;
+        struct obd_capa *rcapa;
         __u32 seg_end, max_hash = MAX_HASH_SIZE;
         int rc = 0;
         
@@ -1926,6 +1937,7 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj,
         
         /* Get start offset from next segment */
         rid = obj->lo_inodes[index].li_fid;
+        rcapa = obj->lo_inodes[index].li_capa;
         tgt_exp = lmv_get_export(lmv, &rid);
         if (IS_ERR(tgt_exp))
                 GOTO(cleanup, PTR_ERR(tgt_exp));
@@ -1936,7 +1948,7 @@ static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj,
         if (!page)
                 GOTO(cleanup, rc = -ENOMEM);
     
-        rc = md_readpage(tgt_exp, &rid, seg_end, page, &tmp_req);
+        rc = md_readpage(tgt_exp, &rid, rcapa, seg_end, page, &tmp_req);
         if (rc) {
                 /* E2BIG means it already reached the end of the dir, 
                  * no need reset the hash segment end */
@@ -1963,15 +1975,15 @@ cleanup:
         RETURN(rc);
 }
 
-static int lmv_readpage(struct obd_export *exp,
-                        const struct lu_fid *fid,
-                        __u64 offset, struct page *page,
+static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
+                        struct obd_capa *oc, __u64 offset, struct page *page,
                         struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
         struct obd_export *tgt_exp;
         struct lu_fid rid = *fid;
+        struct obd_capa *rcapa = oc;
         struct lmv_obj *obj;
         int i = 0, rc;
         ENTRY;
@@ -1994,6 +2006,7 @@ static int lmv_readpage(struct obd_export *exp,
                 do_div(index, seg);
                 i = (int)index;
                 rid = obj->lo_inodes[i].li_fid;
+                rcapa = obj->lo_inodes[i].li_capa;
 
                 lmv_obj_unlock(obj);
 
@@ -2005,7 +2018,7 @@ static int lmv_readpage(struct obd_export *exp,
         if (IS_ERR(tgt_exp))
                 GOTO(cleanup, PTR_ERR(tgt_exp));
 
-        rc = md_readpage(tgt_exp, &rid, offset, page, request);
+        rc = md_readpage(tgt_exp, &rid, rcapa, offset, page, request);
         if (rc) 
                 GOTO(cleanup, rc);
 
@@ -2108,7 +2121,8 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
                 if (obj) {
                         i = raw_name2idx(obj->lo_hashtype, obj->lo_objcount,
                                          op_data->name, op_data->namelen);
-                        op_data->fid1 = obj->lo_inodes[i].li_fid;
+                        op_data->fid1      = obj->lo_inodes[i].li_fid;
+                        op_data->mod_capa1 = obj->lo_inodes[i].li_capa;
                         lmv_obj_put(obj);
                         CDEBUG(D_OTHER, "unlink '%*s' in "DFID" -> %u\n",
                                op_data->namelen, op_data->name,
@@ -2479,6 +2493,7 @@ int lmv_clear_open_replay_data(struct obd_export *exp,
 }
 
 static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
+                               struct obd_capa *oc,
                                struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
@@ -2496,8 +2511,29 @@ static int lmv_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_get_remote_perm(tgt_exp, fid, request);
+        rc = md_get_remote_perm(tgt_exp, fid, oc, request);
+
+        RETURN(rc);
+}
+
+static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
+                          renew_capa_cb_t cb)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct lmv_obd *lmv = &obd->u.lmv;
+        struct obd_export *tgt_exp;
+        int rc;
+        ENTRY;
+
+        rc = lmv_check_connect(obd);
+        if (rc)
+                RETURN(rc);
+
+        tgt_exp = lmv_get_export(lmv, &ocapa->c_capa.lc_fid);
+        if (IS_ERR(tgt_exp))
+                RETURN(PTR_ERR(tgt_exp));
 
+        rc = md_renew_capa(tgt_exp, ocapa, cb);
         RETURN(rc);
 }
 
@@ -2550,7 +2586,8 @@ struct md_ops lmv_md_ops = {
         .m_free_lustre_md       = lmv_free_lustre_md,
         .m_set_open_replay_data = lmv_set_open_replay_data,
         .m_clear_open_replay_data = lmv_clear_open_replay_data,
-        .m_get_remote_perm      = lmv_get_remote_perm
+        .m_get_remote_perm      = lmv_get_remote_perm,
+        .m_renew_capa           = lmv_renew_capa
 };
 
 int __init lmv_init(void)
index f37f5cc..00b1c7a 100644 (file)
@@ -282,7 +282,7 @@ __lmv_obj_create(struct obd_device *obd, const struct lu_fid *fid,
  * obtained from correct MDT and used for constructing the object. */
 struct lmv_obj *
 lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid,
-               struct lmv_stripe_md *mea)
+               struct obd_capa *oc, struct lmv_stripe_md *mea)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -312,7 +312,7 @@ lmv_obj_create(struct obd_export *exp, const struct lu_fid *fid,
                 if (IS_ERR(tgt_exp))
                         GOTO(cleanup, obj = (void *)tgt_exp);
 
-                rc = md_getattr(tgt_exp, fid, valid, mealen, &req);
+                rc = md_getattr(tgt_exp, fid, oc, valid, mealen, &req);
                 if (rc) {
                         CERROR("md_getattr() failed, error %d\n", rc);
                         GOTO(cleanup, obj = ERR_PTR(rc));
index 1f2cccf..f81813a 100644 (file)
@@ -1031,7 +1031,7 @@ do {
 
 static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                        struct lov_stripe_md *lsm, struct obd_trans_info *oti,
-                       struct obd_export *md_exp)
+                       struct obd_export *md_exp, void *capa)
 {
         struct lov_request_set *set;
         struct obd_info oinfo;
@@ -1064,7 +1064,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                         oti->oti_logcookies = set->set_cookies + req->rq_stripe;
 
                 err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp,
-                                  req->rq_oi.oi_oa, NULL, oti, NULL);
+                                  req->rq_oi.oi_oa, NULL, oti, NULL, capa);
                 err = lov_update_common_set(set, req, err);
                 if (err) {
                         CERROR("error: destroying objid "LPX64" subobj "
@@ -1400,7 +1400,8 @@ static int lov_punch(struct obd_export *exp, struct obd_info *oinfo,
 }
 
 static int lov_sync(struct obd_export *exp, struct obdo *oa,
-                    struct lov_stripe_md *lsm, obd_off start, obd_off end)
+                    struct lov_stripe_md *lsm, obd_off start, obd_off end,
+                    void *capa)
 {
         struct lov_request_set *set;
         struct obd_info oinfo;
@@ -1426,7 +1427,7 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa,
                 rc = obd_sync(lov->lov_tgts[req->rq_idx]->ltd_exp, 
                               req->rq_oi.oi_oa, NULL, 
                               req->rq_oi.oi_policy.l_extent.start,
-                              req->rq_oi.oi_policy.l_extent.end);
+                              req->rq_oi.oi_policy.l_extent.end, capa);
                 err = lov_update_common_set(set, req, rc);
                 if (err) {
                         CERROR("error: fsync objid "LPX64" subobj "LPX64
@@ -1626,12 +1627,20 @@ static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
         return rc;
 }
 
+static struct obd_capa *lov_ap_lookup_capa(void *data, int cmd)
+{
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
+
+        return lap->lap_caller_ops->ap_lookup_capa(lap->lap_caller_data, cmd);
+}
+
 static struct obd_async_page_ops lov_async_page_ops = {
         .ap_make_ready =        lov_ap_make_ready,
         .ap_refresh_count =     lov_ap_refresh_count,
         .ap_fill_obdo =         lov_ap_fill_obdo,
         .ap_update_obdo =       lov_ap_update_obdo,
         .ap_completion =        lov_ap_completion,
+        .ap_lookup_capa =       lov_ap_lookup_capa,
 };
 
 int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
@@ -2391,7 +2400,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen,
                 GOTO(out, rc);
         }
 
-        if (KEY_IS("evict_by_nid")) {
+        if (KEY_IS("evict_by_nid") || KEY_IS(KEY_CAPA_KEY)) {
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
                         /* OST was disconnected or is inactive */
                         if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
index 92d5467..6538c45 100644 (file)
@@ -615,7 +615,8 @@ cleanup:
                         continue;
 
                 sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
-                err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL);
+                err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL,
+                                  NULL);
                 if (err)
                         CERROR("Failed to uncreate objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
index 311f5df..d444750 100644 (file)
 
 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
                        __u64 valid, const struct lu_fid *fid,
-                       int ea_size, int flags);
+                       struct obd_capa *oc, int ea_size, int flags);
+void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc);
 void mdc_pack_rep_body(struct ptlrpc_request *);
 void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset,
-                        const struct lu_fid *pfid,
-                        const struct lu_fid *cfid, int flags);
+                        const struct lu_fid *pfid, const struct lu_fid *cfid,
+                        struct obd_capa *pc, struct obd_capa *cc,
+                        int flags);
 void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset,
-                     __u32 size, const struct lu_fid *fid);
+                     __u32 size, const struct lu_fid *fid,
+                      struct obd_capa *oc);
 void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid,
                       int flags, struct md_op_data *data);
 void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
@@ -139,25 +142,6 @@ int mdc_enqueue(struct obd_export *exp,
 int mdc_init_ea_size(struct obd_export *exp, int easize, int def_easzie,
                      int cookiesize);
 
-int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid);
-int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid,
-                obd_valid valid, int ea_size,
-                struct ptlrpc_request **request);
-int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
-                     const char *filename, int namelen, obd_valid valid,
-                     int ea_size, struct ptlrpc_request **request);
-int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
-                void *ea, int ealen, void *ea2, int ea2len,
-                struct ptlrpc_request **request);
-int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
-                 obd_valid valid, const char *xattr_name,
-                 const char *input, int input_size,
-                 int output_size, int flags,
-                 struct ptlrpc_request **request);
-int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
-                 obd_valid valid, const char *xattr_name,
-                 const char *input, int input_size,
-                 int output_size, int flags, struct ptlrpc_request **request);
 int mdc_open(struct obd_export *exp, obd_id ino, int type, int flags,
              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
              struct ptlrpc_request **);
@@ -178,42 +162,23 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 int mdc_clear_open_replay_data(struct obd_export *exp,
                                struct obd_client_handle *och);
 
-int mdc_close(struct obd_export *, struct md_op_data *,
-              struct obd_client_handle *och, struct ptlrpc_request **);
-
-int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
-                 __u64 offset,  struct page *, struct ptlrpc_request **);
-
 int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
-               const void *data, int datalen, int mode, __u32 uid,
-               __u32 gid, __u32 cap_effective, __u64 rdev,
-               struct ptlrpc_request **request);
-
-int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
-               struct ptlrpc_request **request);
-
+               const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+               __u32 cap_effective, __u64 rdev, struct ptlrpc_request **request);
 int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
-             struct ptlrpc_request **);
-
+             struct ptlrpc_request **request);
 int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
                const char *old, int oldlen, const char *new, int newlen,
                struct ptlrpc_request **request);
-
-int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid,
-                  const struct lu_fid *cfid, struct ptlrpc_request **request);
-
-int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
-             struct ptlrpc_request **);
-
+int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
+                void *ea, int ealen, void *ea2, int ea2len,
+                struct ptlrpc_request **request);
+int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
+               struct ptlrpc_request **request);
+int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
+                      int flags, void *opaque);
 int mdc_lock_match(struct obd_export *exp, int flags,
                    const struct lu_fid *fid, ldlm_type_t type,
                    ldlm_policy_data_t *policy, ldlm_mode_t mode,
                    struct lustre_handle *lockh);
-
-int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
-                      int flags, void *opaque);
-
-int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
-                     struct obd_client_handle *och);
-
 #endif
index f89cbbe..1b43f1a 100644 (file)
 #endif
 #endif
 
-void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset,
-                      __u32 size, const struct lu_fid *fid)
+static void mdc_pack_body(struct mdt_body *b)
 {
-        struct mdt_body *b;
+        LASSERT (b != NULL);
 
-        b = lustre_msg_buf(req->rq_reqmsg, pos, sizeof(*b));
         b->fsuid = current->fsuid;
         b->fsgid = current->fsgid;
         b->capability = current->cap_effective;
-        b->fid1 = *fid;
-        b->size = offset;                       /* !! */
-        b->suppgid = -1;
-        b->nlink = size;                        /* !! */
+}
+
+void mdc_pack_capa(struct ptlrpc_request *req, int offset, struct obd_capa *oc)
+{
+        struct lustre_capa *c;
+
+        if (!oc) {
+                LASSERT(lustre_msg_buflen(req->rq_reqmsg, offset) == 0);
+                return;
+        }
+
+        c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c));
+        LASSERT(c);
+        capa_cpy(c, oc);
+        DEBUG_CAPA(D_SEC, c, "pack");
 }
 
 void mdc_is_subdir_pack(struct ptlrpc_request *req, int offset,
                         const struct lu_fid *pfid,
-                        const struct lu_fid *cfid, int flags)
+                        const struct lu_fid *cfid,
+                        struct obd_capa *pc,
+                        struct obd_capa *cc, int flags)
 {
         struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
 
-        if (pfid)
+        if (pfid) {
                 b->fid1 = *pfid;
-        if (cfid)
+                mdc_pack_capa(req, offset + 1, pc);
+        }
+        if (cfid) {
                 b->fid2 = *cfid;
+                mdc_pack_capa(req, offset + 2, cc);
+        }
         b->valid = OBD_MD_FLID;
         b->flags = flags;
 }
 
-static void mdc_pack_body(struct mdt_body *b)
-{
-        LASSERT (b != NULL);
-
-        b->fsuid = current->fsuid;
-        b->fsgid = current->fsgid;
-        b->capability = current->cap_effective;
-}
-
 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
                        __u64 valid, const struct lu_fid *fid,
-                       int ea_size, int flags)
+                       struct obd_capa *oc, int ea_size, int flags)
 {
         struct mdt_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
 
-        if (fid)
-                b->fid1 = *fid;
         b->valid = valid;
         b->eadatasize = ea_size;
         b->flags = flags;
         mdc_pack_body(b);
+        if (fid) {
+                b->fid1 = *fid;
+                mdc_pack_capa(req, offset + 1, oc);
+        }
+}
+
+void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pgoff,
+                      __u32 size, const struct lu_fid *fid,
+                      struct obd_capa *oc)
+{
+        struct mdt_body *b;
+
+        b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+        b->fid1 = *fid;
+        b->size = pgoff;                       /* !! */
+        b->suppgid = -1;
+        b->nlink = size;                        /* !! */
+        mdc_pack_body(b);
+        mdc_pack_capa(req, offset + 1, oc);
 }
 
 /* packing of MDS records */
@@ -111,12 +134,14 @@ void mdc_create_pack(struct ptlrpc_request *req, int offset,
         rec->cr_time = op_data->mod_time;
         rec->cr_suppgid = op_data->suppgids[0];
         rec->cr_flags = op_data->flags;
-        
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1);
         LOGL0(op_data->name, op_data->namelen, tmp);
 
         if (data) {
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, datalen);
                 memcpy (tmp, data, datalen);
         }
 }
@@ -176,8 +201,12 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset,
         rec->cr_time = op_data->mod_time;
         rec->cr_suppgid = op_data->suppgids[0];
 
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+        /* the next buffer is child capa, which is used for replay,
+         * will be packed from the data in reply message. */
+
         if (op_data->name) {
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3,
                                      op_data->namelen + 1);
                 LOGL0(op_data->name, op_data->namelen, tmp);
         }
@@ -188,7 +217,7 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset,
                 /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */
                 rec->cr_fid2 = op_data->fid2;
 #endif
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen);
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, lmmlen);
                 memcpy (tmp, lmm, lmmlen);
         }
 }
@@ -237,8 +266,10 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
         rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));        
         mdc_setattr_pack_rec(rec, op_data);
 
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
         if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN)) {
-                epoch = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                epoch = lustre_msg_buf(req->rq_reqmsg, offset + 2,
                                         sizeof(*epoch));
                 mdc_epoch_pack(epoch, op_data);
         }
@@ -246,12 +277,12 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
         if (ealen == 0)
                 return;
 
-        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ealen), ea, ealen);
+        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ealen), ea, ealen);
 
         if (ea2len == 0)
                 return;
 
-        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ea2len), ea2, ea2len);
+        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 4, ea2len), ea2, ea2len);
 }
 
 void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
@@ -273,7 +304,9 @@ void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
         rec->ul_fid2 = op_data->fid2;
         rec->ul_time = op_data->mod_time;
 
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, op_data->namelen + 1);
         LASSERT (tmp != NULL);
         LOGL0(op_data->name, op_data->namelen, tmp);
 }
@@ -296,7 +329,10 @@ void mdc_link_pack(struct ptlrpc_request *req, int offset,
         rec->lk_fid2 = op_data->fid2;
         rec->lk_time = op_data->mod_time;
 
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+        mdc_pack_capa(req, offset + 2, op_data->mod_capa2);
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, op_data->namelen + 1);
         LOGL0(op_data->name, op_data->namelen, tmp);
 }
 
@@ -321,11 +357,14 @@ void mdc_rename_pack(struct ptlrpc_request *req, int offset,
         rec->rn_time = op_data->mod_time;
         rec->rn_mode = op_data->create_mode;
 
-        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1);
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+        mdc_pack_capa(req, offset + 2, op_data->mod_capa2);
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 3, oldlen + 1);
         LOGL0(old, oldlen, tmp);
 
         if (new) {
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1);
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 4, newlen + 1);
                 LOGL0(new, newlen, tmp);
         }
 }
@@ -345,9 +384,12 @@ void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid,
 
         b->fid1 = op_data->fid1;
         b->fid2 = op_data->fid2;
+
+        mdc_pack_capa(req, offset + 1, op_data->mod_capa1);
+
         if (op_data->name) {
                 char *tmp;
-                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2,
                                      op_data->namelen + 1);
                 LOGL0(op_data->name, op_data->namelen, tmp);
         }
@@ -363,6 +405,7 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset,
         rec = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*rec));
 
         mdc_setattr_pack_rec(rec, op_data);
+        mdc_pack_capa(req, offset + 2, op_data->mod_capa1);
         mdc_epoch_pack(epoch, op_data);
 }
 
index 6b688eb..a1c19d6 100644 (file)
@@ -233,16 +233,16 @@ static int round_up(int val)
  * but this is incredibly unlikely, and questionable whether the client
  * could do MDS recovery under OOM anyways... */
 static void mdc_realloc_openmsg(struct ptlrpc_request *req,
-                                struct mdt_body *body, int size[6])
+                                struct mdt_body *body, int size[9])
 {
         int new_size, old_size;
         struct lustre_msg *new_msg;
 
         /* save old size */
-        old_size = lustre_msg_size(lustre_request_magic(req), 6, size);
+        old_size = lustre_msg_size(lustre_request_magic(req), 9, size);
 
-        size[DLM_INTENT_REC_OFF + 2] = body->eadatasize;
-        new_size = lustre_msg_size(lustre_request_magic(req), 6, size);
+        size[DLM_INTENT_REC_OFF + 4] = body->eadatasize;
+        new_size = lustre_msg_size(lustre_request_magic(req), 9, size);
         OBD_ALLOC(new_msg, new_size);
         if (new_msg != NULL) {
                 struct lustre_msg *old_msg = req->rq_reqmsg;
@@ -250,7 +250,7 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
                 DEBUG_REQ(D_INFO, req, "replace reqmsg for larger EA %u\n",
                           body->eadatasize);
                 memcpy(new_msg, old_msg, old_size);
-                lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 2,
+                lustre_msg_set_buflen(new_msg, DLM_INTENT_REC_OFF + 4,
                                       body->eadatasize);
 
                 spin_lock(&req->rq_lock);
@@ -289,16 +289,16 @@ int mdc_enqueue(struct obd_export *exp,
         struct ldlm_request *lockreq;
         struct ldlm_intent *lit;
         struct ldlm_reply *lockrep;
-        int size[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
+        int size[9] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
                         [DLM_LOCKREQ_OFF]     = sizeof(*lockreq),
                         [DLM_INTENT_IT_OFF]   = sizeof(*lit) };
-        int repsize[5] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
+        int repsize[7] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
                            [DLM_LOCKREPLY_OFF]   = sizeof(*lockrep),
                            [DLM_REPLY_REC_OFF]   = sizeof(struct mdt_body),
                            [DLM_REPLY_REC_OFF+1] = obddev->u.cli.
                                                    cl_max_mds_easize };
         int flags = extra_lock_flags | LDLM_FL_HAS_INTENT;
-        int repbufcnt = 4, rc;
+        int repbufcnt = 4, ea_off, rc;
         void *eadata;
         ENTRY;
 
@@ -307,24 +307,40 @@ int mdc_enqueue(struct obd_export *exp,
 //                          ldlm_it2str(it->it_op), it_name, it_inode->i_ino);
 
         if (it->it_op & IT_OPEN) {
+                int do_join = !!(it->it_flags & O_JOIN_FILE);
+
                 it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG;
 
                 size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_create);
-                size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+                /* parent capability */
+                size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+                                               sizeof(struct lustre_capa) : 0;
+                /* child capability, used for replay only */
+                size[DLM_INTENT_REC_OFF + 2] = op_data->mod_capa1 ?
+                                               sizeof(struct lustre_capa) : 0;
+                size[DLM_INTENT_REC_OFF + 3] = op_data->namelen + 1;
                 /* As an optimization, we allocate an RPC request buffer for
                  * at least a default-sized LOV EA even if we aren't sending
                  * one.  We grow the whole request to the next power-of-two
                  * size since we get that much from a slab allocation anyways.
                  * This avoids an allocation below in the common case where
                  * we need to save a default-sized LOV EA for open replay. */
-                size[DLM_INTENT_REC_OFF + 2] = max(lmmsize,
-                                          obddev->u.cli.cl_default_mds_easize);
-                rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic, 6,
-                                     size);
+                ea_off = DLM_INTENT_REC_OFF + 4;
+                size[ea_off] = max(lmmsize,
+                                   obddev->u.cli.cl_default_mds_easize);
+                if (do_join)
+                        size[DLM_INTENT_REC_OFF + 5] =
+                                                sizeof(struct mds_rec_join);
+                rc = lustre_msg_size(class_exp2cliimp(exp)->imp_msg_magic,
+                                     8 + do_join, size);
                 if (rc & (rc - 1))
-                        size[DLM_INTENT_REC_OFF + 2] =
-                                 min(size[DLM_INTENT_REC_OFF+2]+round_up(rc)-rc,
-                                     obddev->u.cli.cl_max_mds_easize);
+                        size[ea_off] = min(size[ea_off] + round_up(rc) - rc,
+                                           obddev->u.cli.cl_max_mds_easize);
+
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
+                                      LDLM_ENQUEUE, 8 + do_join, size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
 
                 if (it->it_flags & O_JOIN_FILE) {
                         __u64 head_size = *(__u32*)cb_data;
@@ -332,26 +348,14 @@ int mdc_enqueue(struct obd_export *exp,
 
                         /* join is like an unlink of the tail */
                         policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
-                        size[DLM_INTENT_REC_OFF + 3] =
-                                                 sizeof(struct mdt_rec_join);
-                        req = ptlrpc_prep_req(class_exp2cliimp(exp),
-                                              LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
-                                              7, size, NULL);
                         /* when joining file, cb_data and lmm args together
                          * indicate the head file size*/
-                        mdc_join_pack(req, DLM_INTENT_REC_OFF + 3, op_data,
+                        mdc_join_pack(req, DLM_INTENT_REC_OFF + 5, op_data,
                                       (head_size << 32) | tsize);
                         cb_data = NULL;
                         lmm = NULL;
-                } else {
-                        req = ptlrpc_prep_req(class_exp2cliimp(exp),
-                                              LUSTRE_DLM_VERSION, LDLM_ENQUEUE,
-                                              6, size, NULL);
                 }
 
-                if (!req)
-                        RETURN(-ENOMEM);
-
                 spin_lock(&req->rq_lock);
                 req->rq_replay = 1;
                 spin_unlock(&req->rq_lock);
@@ -370,12 +374,16 @@ int mdc_enqueue(struct obd_export *exp,
                 repsize[repbufcnt++] = client_is_remote(exp) ?
                                                 sizeof(struct mdt_remote_perm) :
                                                 LUSTRE_POSIX_ACL_MAX_SIZE;
+                repsize[repbufcnt++] = sizeof(struct lustre_capa);
+                repsize[repbufcnt++] = sizeof(struct lustre_capa);
         } else if (it->it_op & IT_UNLINK) {
                 size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_rec_unlink);
-                size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+                size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+                                               sizeof(struct lustre_capa) : 0;
+                size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1;
                 policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
-                                      LDLM_ENQUEUE, 5, size, NULL);
+                                      LDLM_ENQUEUE, 6, size, NULL);
                 if (!req)
                         RETURN(-ENOMEM);
 
@@ -394,13 +402,15 @@ int mdc_enqueue(struct obd_export *exp,
                 valid |= client_is_remote(exp) ? OBD_MD_FLRMTPERM :
                                                  OBD_MD_FLACL;
                 size[DLM_INTENT_REC_OFF] = sizeof(struct mdt_body);
-                size[DLM_INTENT_REC_OFF + 1] = op_data->namelen + 1;
+                size[DLM_INTENT_REC_OFF + 1] = op_data->mod_capa1 ?
+                                               sizeof(struct lustre_capa) : 0;
+                size[DLM_INTENT_REC_OFF + 2] = op_data->namelen + 1;
 
                 if (it->it_op & IT_GETATTR)
                         policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
 
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
-                                      LDLM_ENQUEUE, 5, size, NULL);
+                                      LDLM_ENQUEUE, 6, size, NULL);
                 if (!req)
                         RETURN(-ENOMEM);
 
@@ -416,6 +426,7 @@ int mdc_enqueue(struct obd_export *exp,
                 repsize[repbufcnt++] = client_is_remote(exp) ?
                                                 sizeof(struct mdt_remote_perm) :
                                                 LUSTRE_POSIX_ACL_MAX_SIZE;
+                repsize[repbufcnt++] = sizeof(struct lustre_capa);
         } else if (it->it_op == IT_READDIR) {
                 policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
@@ -496,12 +507,12 @@ int mdc_enqueue(struct obd_export *exp,
                   it->it_op,it->d.lustre.it_disposition,it->d.lustre.it_status);
 
         /* We know what to expect, so we do any byte flipping required here */
-        LASSERT(repbufcnt == 5 || repbufcnt == 2);
-        if (repbufcnt == 5) {
+        LASSERT(repbufcnt == 7 || repbufcnt == 6 || repbufcnt == 2);
+        if (repbufcnt >= 6) {
                 struct mdt_body *body;
-                int offset = DLM_REPLY_REC_OFF;
+                int reply_off = DLM_REPLY_REC_OFF;
 
-                body = lustre_swab_repbuf(req, offset++, sizeof(*body),
+                body = lustre_swab_repbuf(req, reply_off++, sizeof(*body),
                                          lustre_swab_mdt_body);
                 if (body == NULL) {
                         CERROR ("Can't swab mdt_body\n");
@@ -518,7 +529,7 @@ int mdc_enqueue(struct obd_export *exp,
 
                 if ((body->valid & OBD_MD_FLDIREA) != 0) {
                         if (body->eadatasize) {
-                                eadata = lustre_swab_repbuf(req, offset++,
+                                eadata = lustre_swab_repbuf(req, reply_off++,
                                                 body->eadatasize, NULL);
                                 if (eadata == NULL) {
                                         CERROR ("Missing/short eadata\n");
@@ -529,7 +540,7 @@ int mdc_enqueue(struct obd_export *exp,
                 if ((body->valid & OBD_MD_FLEASIZE)) {
                         /* The eadata is opaque; just check that it is there.
                          * Eventually, obd_unpackmd() will check the contents */
-                        eadata = lustre_swab_repbuf(req, offset++,
+                        eadata = lustre_swab_repbuf(req, reply_off++,
                                                     body->eadatasize, NULL);
                         if (eadata == NULL) {
                                 CERROR ("Missing/short eadata\n");
@@ -557,12 +568,12 @@ int mdc_enqueue(struct obd_export *exp,
                          * reallocate it here to hold the actual LOV EA. */
                         if (it->it_op & IT_OPEN) {
                                 if (lustre_msg_buflen(req->rq_reqmsg,
-                                                      DLM_INTENT_REC_OFF + 2) <
+                                                      DLM_INTENT_REC_OFF + 4) <
                                     body->eadatasize)
                                         mdc_realloc_openmsg(req, body, size);
 
                                 lmm = lustre_msg_buf(req->rq_reqmsg,
-                                                     DLM_INTENT_REC_OFF + 2,
+                                                     DLM_INTENT_REC_OFF + 4,
                                                      body->eadatasize);
                                 if (lmm)
                                         memcpy(lmm, eadata, body->eadatasize);
@@ -572,12 +583,42 @@ int mdc_enqueue(struct obd_export *exp,
                         struct mdt_remote_perm *perm;
 
                         LASSERT(client_is_remote(exp));
-                        perm = lustre_swab_repbuf(req, offset++, sizeof(*perm),
+                        perm = lustre_swab_repbuf(req, reply_off++,
+                                                  sizeof(*perm),
                                                   lustre_swab_mdt_remote_perm);
                         if (perm == NULL) {
                                 CERROR("missing remote permission!\n");
                                 RETURN(-EPROTO);
                         }
+                } else if ((body->valid & OBD_MD_FLACL) && body->aclsize) {
+                        reply_off++;
+                }
+                if (body->valid & OBD_MD_FLMDSCAPA) {
+                        struct lustre_capa *capa, *p;
+
+                        capa = lustre_unpack_capa(req->rq_repmsg, reply_off++);
+                        if (capa == NULL) {
+                                CERROR("Missing/short client fid capa\n");
+                                RETURN(-EPROTO);
+                        }
+
+                        if (it->it_op & IT_OPEN) {
+                                /* client fid capa will be checked in replay */
+                                p = lustre_msg_buf(req->rq_reqmsg,
+                                                   DLM_INTENT_REC_OFF + 2,
+                                                   sizeof(*p));
+                                LASSERT(p);
+                                *p = *capa;
+                        }
+                }
+                if (body->valid & OBD_MD_FLOSSCAPA) {
+                        struct lustre_capa *capa;
+
+                        capa = lustre_unpack_capa(req->rq_repmsg, reply_off++);
+                        if (capa == NULL) {
+                                CERROR("Missing/short client oss capa\n");
+                                RETURN(-EPROTO);
+                        }
                 }
         }
 
@@ -630,7 +671,7 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
                op_data->namelen, op_data->name, PFID(&op_data->fid2), 
                PFID(&op_data->fid1), ldlm_it2str(it->it_op), it->it_flags);
 
-        if (fid_is_sane(&op_data->fid2) &&
+        if (fid_is_sane((struct lu_fid *)&op_data->fid2) &&
             (it->it_op & (IT_LOOKUP | IT_GETATTR))) {
                 /* We could just return 1 immediately, but since we should only
                  * be called in revalidate_it if we already have a lock, let's
index 4982a34..69db90e 100644 (file)
@@ -74,15 +74,18 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
         struct mdt_rec_setattr *rec;
         struct mdc_rpc_lock *rpc_lock;
         struct obd_device *obd = exp->exp_obd;
-        int size[5] = { sizeof(struct ptlrpc_body),
-                        sizeof(*rec), 0, ealen, ea2len };
-        int bufcount = 3, rc;
+        int size[6] = { sizeof(struct ptlrpc_body),
+                        sizeof(*rec), 0, 0, ealen, ea2len };
+        int bufcount = 4, rc;
         ENTRY;
 
         LASSERT(op_data != NULL);
 
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN))
-                size[2] = sizeof(struct mdt_epoch);
+                size[REQ_REC_OFF + 2] = sizeof(struct mdt_epoch);
 
         if (ealen > 0) {
                 bufcount++;
@@ -109,7 +112,8 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
         mdc_setattr_pack(req, REQ_REC_OFF, op_data, ea, ealen, ea2, ea2len);
 
         size[REPLY_REC_OFF] = sizeof(struct mdt_body);
-        ptlrpc_req_set_repsize(req, 2, size);
+        size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa);
+        ptlrpc_req_set_repsize(req, 3, size);
 
         rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
         *request = req;
@@ -125,12 +129,15 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
 {
         struct obd_device *obd = exp->exp_obd;
         struct ptlrpc_request *req;
-        int level, bufcount = 3, rc;
-        int size[4] = { sizeof(struct ptlrpc_body),
+        int size[5] = { sizeof(struct ptlrpc_body),
                         sizeof(struct mdt_rec_create),
-                        op_data->namelen + 1 };
+                        0, op_data->namelen + 1 };
+        int level, bufcount = 4, rc;
         ENTRY;
 
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         if (data && datalen) {
                 size[bufcount] = datalen;
                 bufcount++;
@@ -169,13 +176,17 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
         struct ptlrpc_request *req = *request;
         int size[4] = { sizeof(struct ptlrpc_body),
                         sizeof(struct mdt_rec_unlink),
-                        op_data->namelen + 1 };
+                        0, op_data->namelen + 1 };
         int rc;
         ENTRY;
 
         LASSERT(req == NULL);
+
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_REINT, 3, size, NULL);
+                              MDS_REINT, 4, size, NULL);
         if (req == NULL)
                 RETURN(-ENOMEM);
         *request = req;
@@ -198,14 +209,19 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
 {
         struct obd_device *obd = exp->exp_obd;
         struct ptlrpc_request *req;
-        int size[3] = { sizeof(struct ptlrpc_body),
+        int size[5] = { sizeof(struct ptlrpc_body),
                         sizeof(struct mdt_rec_link),
-                        op_data->namelen + 1 };
+                        0, 0, op_data->namelen + 1 };
         int rc;
         ENTRY;
 
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+        if (op_data->mod_capa2)
+                size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_REINT, 3, size, NULL);
+                              MDS_REINT, 5, size, NULL);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -228,14 +244,19 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
 {
         struct obd_device *obd = exp->exp_obd;
         struct ptlrpc_request *req;
-        int size[4] = { sizeof(struct ptlrpc_body),
+        int size[6] = { sizeof(struct ptlrpc_body),
                         sizeof(struct mdt_rec_rename),
-                        oldlen + 1, newlen + 1 };
+                        0, 0, oldlen + 1, newlen + 1 };
         int rc;
         ENTRY;
 
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+        if (op_data->mod_capa2)
+                size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_REINT, 4, size, NULL);
+                              MDS_REINT, 6, size, NULL);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
index 2871f15..def5806 100644 (file)
@@ -52,14 +52,40 @@ static quota_interface_t *quota_interface;
 static int mdc_cleanup(struct obd_device *obd);
 
 extern int mds_queue_req(struct ptlrpc_request *);
+
+static inline struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req,
+                                               unsigned int offset)
+{
+        struct lustre_capa *capa;
+        struct obd_capa *oc;
+
+        /* swabbed already in mdc_enqueue */
+        capa = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*capa));
+        if (capa == NULL) {
+                CERROR("missing capa at offset %d failed!\n", offset);
+                return ERR_PTR(-EFAULT);
+        }
+
+        oc = alloc_capa(CAPA_SITE_CLIENT);
+        if (!oc) {
+                CERROR("alloc capa failed!\n");
+                return ERR_PTR(-ENOMEM);
+        }
+        oc->c_capa = *capa;
+
+        return oc;
+}
+
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
 /* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
 static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
-                          int level, int msg_flags)
+                          struct obd_capa **pc, int level, int msg_flags)
 {
         struct ptlrpc_request *req;
         struct mdt_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body),
+                            sizeof(*body),
+                            sizeof(struct lustre_capa) };
         ENTRY;
 
         req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_GETSTATUS, 2, size,
@@ -68,9 +94,9 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
                 GOTO(out, rc = -ENOMEM);
 
         req->rq_send_state = level;
-        ptlrpc_req_set_repsize(req, 2, size);
+        ptlrpc_req_set_repsize(req, 3, size);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0);
+        mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, NULL, 0, 0);
         lustre_msg_add_flags(req->rq_reqmsg, msg_flags);
         rc = ptlrpc_queue_wait(req);
 
@@ -84,6 +110,15 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
 
                 *rootfid = body->fid1;
 
+                if (body->valid & OBD_MD_FLMDSCAPA) {
+                        struct obd_capa *oc;
+
+                        oc = mdc_unpack_capa(req, REPLY_REC_OFF + 1);
+                        if (IS_ERR(oc))
+                                GOTO(out, rc = PTR_ERR(oc));
+                        *pc = oc;
+                }
+
                 CDEBUG(D_NET, "root fid="DFID", last_committed="LPU64
                        ", last_xid="LPU64"\n",
                        PFID(rootfid),
@@ -98,9 +133,10 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
 }
 
 /* This should be mdc_get_info("rootfid") */
-int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid)
+int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid,
+                  struct obd_capa **pc)
 {
-        return send_getstatus(class_exp2cliimp(exp), rootfid,
+        return send_getstatus(class_exp2cliimp(exp), rootfid, pc,
                               LUSTRE_IMP_FULL, 0);
 }
 
@@ -120,24 +156,24 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size,
 {
         struct mdt_body *body;
         void *eadata;
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[5] = { sizeof(struct ptlrpc_body),
+                        sizeof(*body) };
         int bufcount = 2, rc;
         ENTRY;
         
         /* Request message already built. */
         if (ea_size != 0) {
-                size[bufcount] = ea_size;
+                size[bufcount++] = ea_size;
                 CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
                        ea_size);
         }
-        bufcount++;
         
         if (acl_size) {
-                size[bufcount] = acl_size;
+                size[bufcount++] = acl_size;
                 CDEBUG(D_INODE, "reserved %u bytes for ACL\n", acl_size);
         }
-        bufcount++;
 
+        size[bufcount++] = sizeof(struct lustre_capa);
         ptlrpc_req_set_repsize(req, bufcount, size);
 
         rc = ptlrpc_queue_wait(req);
@@ -178,23 +214,27 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size,
 }
 
 int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid,
-                obd_valid valid, int ea_size, struct ptlrpc_request **request)
+                struct obd_capa *oc, obd_valid valid, int ea_size,
+                struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
         int acl_size = 0, rc;
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         /*
          * XXX do we need to make another request here?  We just did a getattr
          * to do the lookup in the first place.
          */
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_GETATTR, 2, size, NULL);
+                              MDS_GETATTR, 3, size, NULL);
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size,
                           MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
 
         /* currently only root inode will call us with FLACL */
@@ -212,24 +252,29 @@ int mdc_getattr(struct obd_export *exp, const struct lu_fid *fid,
 }
 
 int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
-                     const char *filename, int namelen, obd_valid valid,
-                     int ea_size, struct ptlrpc_request **request)
+                     struct obd_capa *oc, const char *filename, int namelen,
+                     obd_valid valid, int ea_size,
+                     struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mdt_body *body;
-        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), namelen};
+        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body), 0, namelen};
+        int rc;
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_GETATTR_NAME, 3, size, NULL);
+                              MDS_GETATTR_NAME, 4, size, NULL);
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, ea_size,
                           MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
  
         LASSERT(strnlen(filename, namelen) == namelen - 1);
-        memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen),
+        memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2, namelen),
                filename, namelen);
 
         rc = mdc_getattr_common(exp, ea_size, 0, req);
@@ -243,20 +288,28 @@ int mdc_getattr_name(struct obd_export *exp, const struct lu_fid *fid,
 }
 
 int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid,
-                  const struct lu_fid *cfid, struct ptlrpc_request **request)
+                  const struct lu_fid *cfid,
+                  struct obd_capa *pc, struct obd_capa *cc,
+                  struct ptlrpc_request **request)
 {
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+        int size[4] = { sizeof(struct ptlrpc_body),
+                        sizeof(struct mdt_body) };
         struct ptlrpc_request *req;
         struct mdt_body *body;
         int rc;
         ENTRY;
 
+        if (pc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+        if (cc)
+                size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_IS_SUBDIR, 2, size, NULL);
+                              MDS_IS_SUBDIR, 4, size, NULL);
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
-        mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, 0);
+        mdc_is_subdir_pack(req, REQ_REC_OFF, pfid, cfid, pc, cc, 0);
 
         ptlrpc_req_set_repsize(req, 2, size);
         rc = ptlrpc_queue_wait(req);
@@ -277,17 +330,20 @@ int mdc_is_subdir(struct obd_export *exp, const struct lu_fid *pfid,
 
 static
 int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
+                     struct obd_capa *oc,
                      int opcode, obd_valid valid, const char *xattr_name,
                      const char *input, int input_size, int output_size,
                      int flags, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
-        // int size[3] = {sizeof(struct mdt_body)}, bufcnt = 1;
-        int rc, xattr_namelen = 0, bufcnt = 2, offset, remote_acl = 0;
+        int size[5] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+        int bufcnt = 3, offset = REQ_REC_OFF + 2;
+        int rc, xattr_namelen = 0, remote_acl = 0;
         void *tmp;
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
         if (xattr_name) {
                 xattr_namelen = strlen(xattr_name) + 1;
                 size[bufcnt++] = xattr_namelen;
@@ -303,9 +359,8 @@ int mdc_xattr_common(struct obd_export *exp, const struct lu_fid *fid,
                 GOTO(out, rc = -ENOMEM);
 
         /* request data */
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags);
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, oc, output_size, flags);
 
-        offset = REQ_REC_OFF + 1;
 
         if (xattr_name) {
                 tmp = lustre_msg_buf(req->rq_reqmsg, offset++, xattr_namelen);
@@ -363,21 +418,20 @@ err_out:
 }
 
 int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
-                 obd_valid valid, const char *xattr_name,
-                 const char *input, int input_size,
-                 int output_size, int flags,
+                 struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+                 const char *input, int input_size, int output_size, int flags,
                  struct ptlrpc_request **request)
 {
-        return mdc_xattr_common(exp, fid, MDS_SETXATTR, valid, xattr_name,
+        return mdc_xattr_common(exp, fid, oc, MDS_SETXATTR, valid, xattr_name,
                                 input, input_size, output_size, flags, request);
 }
 
 int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid,
-                 obd_valid valid, const char *xattr_name,
-                 const char *input, int input_size,
-                 int output_size, int flags, struct ptlrpc_request **request)
+                 struct obd_capa *oc, obd_valid valid, const char *xattr_name,
+                 const char *input, int input_size, int output_size, int flags,
+                 struct ptlrpc_request **request)
 {
-        return mdc_xattr_common(exp, fid, MDS_GETXATTR, valid, xattr_name,
+        return mdc_xattr_common(exp, fid, oc, MDS_GETXATTR, valid, xattr_name,
                                 input, input_size, output_size, flags, request);
 }
 
@@ -498,7 +552,7 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
         if ((md->body->valid & OBD_MD_FLACL) && md->body->aclsize) {
                 rc = mdc_unpack_acl(dt_exp, req, md, offset);
                 if (rc)
-                        GOTO(err_out, rc);
+                        GOTO(out, rc);
                 offset++;
         }
 
@@ -509,13 +563,39 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
                 LASSERT(md->remote_perm);
                 offset++;
         }
-out:
-        RETURN(rc);
 
-err_out:
-        if (md->lsm)
-                obd_free_memmd(dt_exp, &md->lsm);
-        goto out;
+        if (md->body->valid & OBD_MD_FLMDSCAPA) {
+                struct obd_capa *oc = mdc_unpack_capa(req, offset);
+
+                if (IS_ERR(oc))
+                        GOTO(out, rc = PTR_ERR(oc));
+                md->mds_capa = oc;
+                offset++;
+        }
+
+        if (md->body->valid & OBD_MD_FLOSSCAPA) {
+                struct obd_capa *oc = mdc_unpack_capa(req, offset);
+
+                if (IS_ERR(oc))
+                        GOTO(out, rc = PTR_ERR(oc));
+                md->oss_capa = oc;
+                offset++;
+        }
+
+        EXIT;
+out:
+        if (rc) {
+                if (md->oss_capa)
+                        free_capa(md->oss_capa);
+                if (md->mds_capa)
+                        free_capa(md->mds_capa);
+#ifdef CONFIG_FS_POSIX_ACL
+                posix_acl_release(md->posix_acl);
+#endif
+                if (md->lsm)
+                        obd_free_memmd(dt_exp, &md->lsm);
+        }
+        return rc;
 }
 
 int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
@@ -698,19 +778,22 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
               struct obd_client_handle *och, struct ptlrpc_request **request)
 {
         struct obd_device *obd = class_exp2obd(exp);
-        int reqsize[3] = { sizeof(struct ptlrpc_body),
+        int reqsize[4] = { sizeof(struct ptlrpc_body),
                            sizeof(struct mdt_epoch),
                            sizeof(struct mdt_rec_setattr)};
-        int rc, repsize[4] = { sizeof(struct ptlrpc_body),
-                               sizeof(struct mdt_body),
-                               obd->u.cli.cl_max_mds_easize,
-                               obd->u.cli.cl_max_mds_cookiesize };
+        int repsize[4] = { sizeof(struct ptlrpc_body),
+                           sizeof(struct mdt_body),
+                           obd->u.cli.cl_max_mds_easize,
+                           obd->u.cli.cl_max_mds_cookiesize };
         struct ptlrpc_request *req;
         struct mdc_open_data *mod;
+        int rc;
         ENTRY;
 
+        if (op_data->mod_capa1)
+                reqsize[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_CLOSE, 3, reqsize, NULL);
+                              MDS_CLOSE, 4, reqsize, NULL);
         if (req == NULL)
                 GOTO(out, rc = -ENOMEM);
 
@@ -787,15 +870,18 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
                      struct obd_client_handle *och)
 {
         struct ptlrpc_request *req;
-        int rc, size[3] = { sizeof(struct ptlrpc_body),
-                            sizeof(struct mdt_epoch),
-                            sizeof(struct mdt_rec_setattr)};
+        int size[4] = { sizeof(struct ptlrpc_body),
+                        sizeof(struct mdt_epoch),
+                        sizeof(struct mdt_rec_setattr)};
         int repsize[2] = { sizeof(struct ptlrpc_body),
                            sizeof(struct mdt_body)};
-     
+        int rc;
         ENTRY;
+
+        if (op_data->mod_capa1)
+                size[REQ_REC_OFF + 2] = sizeof(struct lustre_capa);
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_DONE_WRITING, 3, size, NULL);
+                              MDS_DONE_WRITING, 4, size, NULL);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -847,19 +933,21 @@ EXPORT_SYMBOL(mdc_sendpage);
 #endif
 
 int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
-                 __u64 offset, struct page *page,
+                 struct obd_capa *oc, __u64 offset, struct page *page,
                  struct ptlrpc_request **request)
 {
         struct obd_import *imp = class_exp2cliimp(exp);
         struct ptlrpc_request *req = NULL;
         struct ptlrpc_bulk_desc *desc = NULL;
         struct mdt_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         CDEBUG(D_INODE, "object: "DFID"\n", PFID(fid));
 
-        req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 2, size,
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+        req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_READPAGE, 3, size,
                               NULL);
         if (req == NULL)
                 GOTO(out, rc = -ENOMEM);
@@ -874,7 +962,7 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
 
         ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE);
 
-        mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid);
+        mdc_readdir_pack(req, REQ_REC_OFF, offset, PAGE_CACHE_SIZE, fid, oc);
 
         ptlrpc_req_set_repsize(req, 2, size);
         rc = ptlrpc_queue_wait(req);
@@ -902,7 +990,6 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
         return rc;
 }
 
-
 static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          void *karg, void *uarg)
 {
@@ -1095,21 +1182,25 @@ out:
 }
 
 static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid,
+                   struct obd_capa *oc,
                    struct obd_client_handle *handle, int flag)
 {
         struct ptlrpc_request *req;
         struct mdt_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_PIN, 2, size, NULL);
+                              MDS_PIN, 3, size, NULL);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof (*body));
         body->fid1 = *fid;
         body->flags = flag;
+        mdc_pack_capa(req, REQ_REC_OFF + 1, oc);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -1177,19 +1268,22 @@ static int mdc_unpin(struct obd_export *exp,
 }
 
 int mdc_sync(struct obd_export *exp, const struct lu_fid *fid,
+             struct obd_capa *oc,
              struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(struct mdt_body) };
         int rc;
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_SYNC, 2, size, NULL);
+                              MDS_SYNC, 3, size, NULL);
         if (!req)
                 RETURN(rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0);
+        mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, oc, 0, 0);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -1462,25 +1556,27 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf)
 
 /* get remote permission for current user on fid */
 int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
-                        struct ptlrpc_request **request)
+                        struct obd_capa *oc, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
         struct mdt_body *body;
         struct mdt_remote_perm *perm;
-        int size[3] = { sizeof(struct ptlrpc_body),
-                        sizeof(*body),
-                        sizeof(*perm) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         int rc;
         ENTRY;
 
+        if (oc)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         *request = NULL;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
-                              MDS_GETATTR, 2, size, NULL);
+                              MDS_GETATTR, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, 0, 0);
+        mdc_pack_req_body(req, REQ_REC_OFF, OBD_MD_FLRMTPERM, fid, oc, 0, 0);
 
+        size[REPLY_REC_OFF + 1] = sizeof(*perm);
         ptlrpc_req_set_repsize(req, 3, size);
         rc = ptlrpc_queue_wait(req);
         if (rc) {
@@ -1501,6 +1597,31 @@ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
         RETURN(0);
 }
 
+static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
+                          renew_capa_cb_t cb)
+{
+        struct ptlrpc_request *req;
+        int size[2] = { sizeof(struct ptlrpc_body),
+                        sizeof(struct lustre_capa) };
+        int repsize[3] = { sizeof(struct ptlrpc_body),
+                           sizeof(struct mdt_body),
+                           sizeof(struct lustre_capa) };
+        ENTRY;
+
+        req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
+                              MDS_RENEW_CAPA, 2, size, NULL);
+        if (!req)
+                RETURN(-ENOMEM);
+
+        mdc_pack_capa(req, REQ_REC_OFF, oc);
+
+        ptlrpc_req_set_repsize(req, 3, repsize);
+        req->rq_interpret_reply = cb;
+        ptlrpcd_add_req(req);
+
+        RETURN(0);
+}
+
 struct obd_ops mdc_obd_ops = {
         .o_owner            = THIS_MODULE,
         .o_setup            = mdc_setup,
@@ -1552,7 +1673,8 @@ struct md_ops mdc_md_ops = {
         .m_free_lustre_md   = mdc_free_lustre_md,
         .m_set_open_replay_data = mdc_set_open_replay_data,
         .m_clear_open_replay_data = mdc_clear_open_replay_data,
-        .m_get_remote_perm  = mdc_get_remote_perm
+        .m_get_remote_perm  = mdc_get_remote_perm,
+        .m_renew_capa       = mdc_renew_capa
 };
 
 extern quota_interface_t mdc_quota_interface;
index 1204385..df3b80c 100644 (file)
@@ -40,6 +40,7 @@
 
 #include <linux/ldiskfs_fs.h>
 #include <lustre_mds.h>
+#include <lustre/lustre_idl.h>
 
 #include "mdd_internal.h"
 
@@ -325,7 +326,7 @@ struct mdd_object *mdd_object_find(const struct lu_context *ctxt,
         struct mdd_object *m;
         ENTRY;
 
-        o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f);
+        o = lu_object_find(ctxt, mdd2lu_dev(d)->ld_site, f, BYPASS_CAPA);
         if (IS_ERR(o))
                 m = (struct mdd_object *)o;
         else {
@@ -666,6 +667,7 @@ static int mdd_txn_stop_cb(const struct lu_context *ctx,
         struct mdd_device *mdd = cookie;
         struct obd_device *obd = mdd2obd_dev(mdd);
 
+        LASSERT(obd);
         return mds_lov_write_objids(obd);
 }
 
@@ -748,14 +750,14 @@ static int mdd_process_config(const struct lu_context *ctxt,
                         GOTO(out, rc);
                 dt->dd_ops->dt_conf_get(ctxt, dt, &m->mdd_dt_conf);
 
-                rc = mdd_mount(ctxt, m);
-                if (rc)
-                        GOTO(out, rc);
                 rc = mdd_init_obd(ctxt, m, cfg);
                 if (rc) {
                         CERROR("lov init error %d \n", rc);
                         GOTO(out, rc);
                 }
+                rc = mdd_mount(ctxt, m);
+                if (rc)
+                        GOTO(out, rc);
                 break;
         case LCFG_CLEANUP:
                 mdd_device_shutdown(ctxt, m);
@@ -1893,7 +1895,7 @@ static int mdd_lookup_intent(const struct lu_context *ctxt,
 {
         struct mdd_object   *mdd_obj = md2mdd_obj(pobj);
         struct dt_object    *dir = mdd_object_child(mdd_obj);
-        struct dt_rec       *rec    = (struct dt_rec *)fid;
+        struct dt_rec       *rec = (struct dt_rec *)fid;
         const struct dt_key *key = (const struct dt_key *)name;
         int rc;
         ENTRY;
@@ -2590,12 +2592,37 @@ static int mdd_maxsize_get(const struct lu_context *ctx, struct md_device *m,
        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
         ENTRY;
 
-        *md_size =  mdd_lov_mdsize(ctx, mdd);
+        *md_size = mdd_lov_mdsize(ctx, mdd);
         *cookie_size = mdd_lov_cookiesize(ctx, mdd);
 
         RETURN(0);
 }
 
+static int mdd_init_capa_keys(struct md_device *m,
+                              struct lustre_capa_key *keys)
+{
+       struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct mds_obd    *mds = &mdd2obd_dev(mdd)->u.mds;
+        ENTRY;
+
+        mds->mds_capa_keys = keys;
+        RETURN(0);
+}
+
+static int mdd_update_capa_key(const struct lu_context *ctx,
+                               struct md_device *m,
+                               struct lustre_capa_key *key)
+{
+       struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_export *lov_exp = mdd2obd_dev(mdd)->u.mds.mds_osc_exp;
+        int rc;
+        ENTRY;
+
+        rc = obd_set_info_async(lov_exp, strlen(KEY_CAPA_KEY), KEY_CAPA_KEY,
+                                sizeof(*key), key, NULL);
+        RETURN(rc);
+}
+
 static void __mdd_ref_add(const struct lu_context *ctxt, struct mdd_object *obj,
                          struct thandle *handle)
 {
@@ -3060,10 +3087,51 @@ static int mdd_permission(const struct lu_context *ctxt, struct md_object *obj,
         RETURN(rc);
 }
 
+static int mdd_capa_get(const struct lu_context *ctxt, struct md_object *obj,
+                        struct lustre_capa *capa)
+{
+        struct mdd_object *mdd_obj = md2mdd_obj(obj);
+        struct mdd_device *mdd = mdo2mdd(obj);
+        struct lu_site *ls = mdd->mdd_md_dev.md_lu_dev.ld_site;
+        struct lustre_capa_key *key = &ls->ls_capa_keys[1];
+        struct obd_capa *ocapa;
+        int rc;
+        ENTRY;
+
+        LASSERT(lu_object_exists(mdd2lu_obj(mdd_obj)));
+
+        capa->lc_fid = *mdo2fid(mdd_obj);
+        if (ls->ls_capa_timeout < CAPA_TIMEOUT)
+                capa->lc_flags |= CAPA_FL_SHORT_EXPIRY;
+        if (lu_fid_eq(&capa->lc_fid, &mdd->mdd_root_fid))
+                capa->lc_flags |= CAPA_FL_ROOT;
+        capa->lc_flags = ls->ls_capa_alg << 23;
+
+        /* TODO: get right permission here after remote uid landing */
+        ocapa = capa_lookup(capa);
+        if (ocapa) {
+                LASSERT(!capa_is_expired(ocapa));
+                capa_cpy(capa, ocapa);
+                capa_put(ocapa);
+                RETURN(0);
+        }
+
+        capa->lc_keyid = key->lk_keyid;
+        capa->lc_expiry = CURRENT_SECONDS + ls->ls_capa_timeout;
+        rc = capa_hmac(capa->lc_hmac, capa, key->lk_key);
+        if (rc)
+                RETURN(rc);
+
+        capa_add(capa);
+        RETURN(0);
+}
+
 struct md_device_operations mdd_ops = {
         .mdo_statfs         = mdd_statfs,
         .mdo_root_get       = mdd_root_get,
         .mdo_maxsize_get    = mdd_maxsize_get,
+        .mdo_init_capa_keys = mdd_init_capa_keys,
+        .mdo_update_capa_key= mdd_update_capa_key,
 };
 
 static struct md_dir_operations mdd_dir_ops = {
@@ -3093,7 +3161,8 @@ static struct md_object_operations mdd_obj_ops = {
         .moo_open          = mdd_open,
         .moo_close         = mdd_close,
         .moo_readpage      = mdd_readpage,
-        .moo_readlink      = mdd_readlink
+        .moo_readlink      = mdd_readlink,
+        .moo_capa_get      = mdd_capa_get
 };
 
 static struct obd_ops mdd_obd_device_ops = {
index dbd3c16..ae3d61b 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  linux/mds/mds_lov.c
+ *  lustre/mdd/mdd_lov.c
  *  Lustre Metadata Server (mds) handling of striped file data
  *
  *  Copyright (C) 2001-2006 Cluster File Systems, Inc.
index b93dbb5..73eb57a 100644 (file)
@@ -1006,7 +1006,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset)
                 GOTO(out_pop, rc);
         }
 
-        req->rq_status = mds_getattr_internal(obd, de, req, body,REPLY_REC_OFF);
+        req->rq_status = mds_getattr_internal(obd, de, req, body,
+                                              REPLY_REC_OFF);
 
         l_dput(de);
         GOTO(out_pop, rc);
@@ -2614,7 +2615,6 @@ static int mdt_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 GOTO(err_thread3, rc);
 
         ping_evictor_start();
-
         RETURN(0);
 
 err_thread3:
index 0571def..b3ce673 100644 (file)
@@ -735,7 +735,7 @@ out_pop:
 
 int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                    struct obd_export *md_exp)
+                    struct obd_export *md_exp, void *unused)
 {
         struct mds_obd *mds = &exp->exp_obd->u.mds;
         struct inode *parent_inode = mds->mds_objects_dir->d_inode;
index fd63280..172b40f 100644 (file)
@@ -223,7 +223,7 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md **ea, struct obd_trans_info *oti);
 int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                    struct obd_export *md_exp);
+                    struct obd_export *md_exp, void *unused);
 void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt);
 
 /* mds/handler.c */
index 8fc6095..892efa1 100644 (file)
@@ -631,6 +631,28 @@ struct mds_lov_sync_info {
         __u32              mlsi_index;   /* index of target */
 };
 
+static int mds_propagate_capa_keys(struct mds_obd *mds)
+{
+        struct lustre_capa_key *key;
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0; i < 2; i++) {
+                key = &mds->mds_capa_keys[i];
+                DEBUG_CAPA_KEY(D_SEC, key, "propagate");
+
+                rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_CAPA_KEY),
+                                        KEY_CAPA_KEY, sizeof(*key), key, NULL);
+                if (rc) {
+                        DEBUG_CAPA_KEY(D_ERROR, key,
+                                       "propagate failed (rc = %d) for", rc);
+                        RETURN(rc);
+                }
+        }
+
+        RETURN(0);
+}
+
 /* We only sync one osc at a time, so that we don't have to hold
    any kind of lock on the whole mds_lov_desc, which may change
    (grow) as a result of mds_lov_add_ost.  This also avoids any
@@ -665,6 +687,11 @@ static int __mds_lov_synchronize(void *data)
         if (rc != 0)
                 GOTO(out, rc);
 
+        /* propagate capability keys */
+        rc = mds_propagate_capa_keys(mds);
+        if (rc)
+                GOTO(out, rc);
+
         rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
                           mds->mds_lov_desc.ld_tgt_count,
                           NULL, NULL, uuid);
index bc68bc3..09e5321 100644 (file)
@@ -86,7 +86,8 @@ static int mds_osc_destroy_orphan(struct obd_device *obd,
                 oa->o_valid |= OBD_MD_FLCOOKIE;
                 oti.oti_logcookies = logcookies;
         }
-        rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export);
+        rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export,
+                         NULL);
         obdo_free(oa);
         if (rc)
                 CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error "
index 3dcf8bf..af73004 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := mdt
-mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o mdt_open.o
-mdt-objs += mdt_idmap.o mdt_identity.o mdt_rmtacl.o
+mdt-objs := mdt_handler.o mdt_lib.o mdt_reint.o mdt_xattr.o mdt_recovery.o
+mdt-objs += mdt_open.o mdt_idmap.o mdt_identity.o mdt_rmtacl.o mdt_capa.o
 
 @INCLUDE_RULES@
diff --git a/lustre/mdt/mdt_capa.c b/lustre/mdt/mdt_capa.c
new file mode 100644 (file)
index 0000000..b4e754e
--- /dev/null
@@ -0,0 +1,298 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/mdt/mdt_capa.c
+ *  Lustre Metadata Target (mdt) capability key read/write/update.
+ * 
+ *  Copyright (C) 2005 Cluster File Systems, Inc.
+ *   Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
+ *
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
+ *
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include "mdt_internal.h"
+
+static inline void set_capa_key_expiry(struct mdt_device *mdt)
+{
+        mdt->mdt_ck_expiry = jiffies + mdt->mdt_ck_timeout * HZ;
+}
+
+static void make_capa_key(struct lustre_capa_key *key,
+                          mdsno_t mdsnum, int keyid)
+{
+        key->lk_mdsid = mdsnum;
+        key->lk_keyid = keyid + 1;
+        get_random_bytes(key->lk_key, sizeof(key->lk_key));
+}
+
+enum {
+        MDT_TXN_CAPA_KEYS_WRITE_CREDITS = 1
+};
+
+static inline void lck_cpu_to_le(struct lustre_capa_key *tgt,
+                                 struct lustre_capa_key *src)
+{
+        tgt->lk_mdsid   = cpu_to_le64(src->lk_mdsid);
+        tgt->lk_keyid   = cpu_to_le32(src->lk_keyid);
+        tgt->lk_padding = cpu_to_le32(src->lk_padding);
+        memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
+}
+
+static inline void lck_le_to_cpu(struct lustre_capa_key *tgt,
+                                 struct lustre_capa_key *src)
+{
+        tgt->lk_mdsid   = le64_to_cpu(src->lk_mdsid);
+        tgt->lk_keyid   = le32_to_cpu(src->lk_keyid);
+        tgt->lk_padding = le32_to_cpu(src->lk_padding);
+        memcpy(tgt->lk_key, src->lk_key, sizeof(src->lk_key));
+}
+
+static int write_capa_keys(const struct lu_context *ctx,
+                           struct mdt_device *mdt,
+                           struct lustre_capa_key *keys)
+{
+        struct mdt_thread_info *mti;
+        struct lustre_capa_key *tmp;
+        struct thandle *th;
+        loff_t off = 0;
+        int i, rc;
+
+        mti = lu_context_key_get(ctx, &mdt_thread_key);
+
+        th = mdt_trans_start(ctx, mdt, MDT_TXN_CAPA_KEYS_WRITE_CREDITS);
+        if (IS_ERR(th))
+                RETURN(PTR_ERR(th));
+
+        tmp = &mti->mti_capa_key;
+
+        for (i = 0; i < 2; i++) {
+                lck_cpu_to_le(tmp, &keys[i]);
+
+                rc = mdt_record_write(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp),
+                                      &off, th);
+                if (rc)
+                        break;
+        }
+
+        mdt_trans_stop(ctx, mdt, th);
+
+        CDEBUG(D_INFO, "write capability keys rc = %d:\n", rc);
+        return rc;
+}
+
+static int read_capa_keys(const struct lu_context *ctx,
+                          struct mdt_device *mdt,
+                          struct lustre_capa_key *keys)
+{
+        struct mdt_thread_info *mti;
+        struct lustre_capa_key *tmp;
+        loff_t off = 0;
+        int i, rc;
+
+        mti = lu_context_key_get(ctx, &mdt_thread_key);
+        tmp = &mti->mti_capa_key;
+
+        for (i = 0; i < 2; i++) {
+                rc = mdt_record_read(ctx, mdt->mdt_ck_obj, tmp, sizeof(*tmp),
+                                     &off);
+                if (rc)
+                        return rc;
+
+                lck_le_to_cpu(&keys[i], tmp);
+
+                DEBUG_CAPA_KEY(D_INFO, &keys[i], "read");
+        }
+
+        return 0;
+}
+
+int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt)
+{
+        struct lustre_capa_key  *keys = mdt->mdt_capa_keys;
+        struct mdt_thread_info  *mti;
+        struct dt_object        *obj;
+        struct lu_attr          *la;
+        mdsno_t                  mdsnum;
+        unsigned long            size;
+        int                      rc;
+        ENTRY;
+
+        mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id;
+
+        mti = lu_context_key_get(ctx, &mdt_thread_key);
+        LASSERT(mti != NULL);
+        la = &mti->mti_attr.ma_attr;
+
+        obj = mdt->mdt_ck_obj;
+        obj->do_ops->do_read_lock(ctx, obj);
+        rc = obj->do_ops->do_attr_get(ctx, mdt->mdt_ck_obj, la);
+        obj->do_ops->do_read_unlock(ctx, obj);
+        if (rc)
+                RETURN(rc);
+
+        size = (unsigned long)la->la_size;
+        if (size == 0) {
+                int i;
+
+                for (i = 0; i < 2; i++) {
+                        make_capa_key(&keys[i], mdsnum, i);
+                        DEBUG_CAPA_KEY(D_SEC, &keys[i], "initializing");
+                }
+
+                rc = write_capa_keys(ctx, mdt, keys);
+                if (rc) {
+                        CERROR("error writing MDS %s: rc %d\n", CAPA_KEYS, rc);
+                        RETURN(rc);
+                }
+        } else {
+                rc = read_capa_keys(ctx, mdt, keys);
+                if (rc) {
+                        CERROR("error reading MDS %s: rc %d\n", CAPA_KEYS, rc);
+                        RETURN(rc);
+                }
+        }
+        set_capa_key_expiry(mdt);
+        mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
+        CDEBUG(D_SEC, "mds_ck_timer %lu\n", mdt->mdt_ck_expiry);
+        RETURN(0);
+}
+
+void mdt_ck_timer_callback(unsigned long castmeharder)
+{
+        struct mdt_device *mdt = (struct mdt_device *)castmeharder;
+        struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+
+        ENTRY;
+        thread->t_flags |= SVC_EVENT;
+        wake_up(&thread->t_ctl_waitq);
+        EXIT;
+}
+
+static int mdt_ck_thread_main(void *args)
+{
+        struct mdt_device      *mdt = args;
+        struct ptlrpc_thread   *thread = &mdt->mdt_ck_thread;
+        struct lustre_capa_key *tmp, *key = red_capa_key(mdt);
+        struct lu_context       ctx;
+        struct mdt_thread_info *info;
+        struct md_device       *next;
+        struct l_wait_info      lwi = { 0 };
+        mdsno_t                 mdsnum;
+        int                     rc;
+        ENTRY;
+
+        ptlrpc_daemonize("mdt_ck");
+        cfs_block_allsigs();
+
+        thread->t_flags = SVC_RUNNING;
+        cfs_waitq_signal(&thread->t_ctl_waitq);
+
+        rc = lu_context_init(&ctx, LCT_MD_THREAD);
+        if (rc)
+                RETURN(rc);
+
+        thread->t_ctx = &ctx;
+        ctx.lc_thread = thread;
+
+        lu_context_enter(&ctx);
+        info = lu_context_key_get(&ctx, &mdt_thread_key);
+        LASSERT(info != NULL);
+
+        tmp = &info->mti_capa_key;
+        mdsnum = mdt->mdt_md_dev.md_lu_dev.ld_site->ls_node_id;
+        while (1) {
+                l_wait_event(thread->t_ctl_waitq,
+                             thread->t_flags & (SVC_STOPPING | SVC_EVENT),
+                             &lwi);
+
+                if (thread->t_flags & SVC_STOPPING)
+                        break;
+                thread->t_flags &= ~SVC_EVENT;
+
+                if (time_after(mdt->mdt_ck_expiry, jiffies))
+                        break;
+
+                *tmp = *key;
+                make_capa_key(tmp, mdsnum, key->lk_keyid);
+
+                next = mdt->mdt_child;
+                rc = next->md_ops->mdo_update_capa_key(&ctx, next, tmp);
+                if (!rc) {
+                        rc = write_capa_keys(&ctx, mdt, mdt->mdt_capa_keys);
+                        if (!rc) {
+                                spin_lock(&capa_lock);
+                                mdt->mdt_capa_keys[0] = *key;
+                                *key = *tmp;
+                                spin_unlock(&capa_lock);
+
+                                set_capa_key_expiry(mdt);
+
+                                DEBUG_CAPA_KEY(D_SEC, key, "new");
+                        }
+                }
+                if (rc) {
+                        DEBUG_CAPA_KEY(D_ERROR, key, "update failed for");
+                        /* next retry is in 300 sec */
+                        mdt->mdt_ck_expiry = jiffies + 300 * HZ;
+                }
+
+                mod_timer(&mdt->mdt_ck_timer, mdt->mdt_ck_expiry);
+                CDEBUG(D_SEC, "mdt_ck_timer %lu\n", mdt->mdt_ck_expiry);
+        }
+        lu_context_exit(&ctx);
+        lu_context_fini(&ctx);
+
+        thread->t_flags = SVC_STOPPED;
+        cfs_waitq_signal(&thread->t_ctl_waitq);
+        RETURN(0);
+}
+
+int mdt_ck_thread_start(struct mdt_device *mdt)
+{
+        struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+        int rc;
+
+        cfs_waitq_init(&thread->t_ctl_waitq);
+        rc = kernel_thread(mdt_ck_thread_main, mdt,
+                           (CLONE_VM | CLONE_FILES));
+        if (rc < 0) {
+                CERROR("cannot start mdt_ck thread, rc = %d\n", rc);
+                return rc;
+        }
+
+        wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
+        return 0;
+}
+
+void mdt_ck_thread_stop(struct mdt_device *mdt)
+{
+        struct ptlrpc_thread *thread = &mdt->mdt_ck_thread;
+
+        if (!(thread->t_flags & SVC_RUNNING))
+                return;
+
+        thread->t_flags = SVC_STOPPING;
+        cfs_waitq_signal(&thread->t_ctl_waitq);
+        wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
+}
index c129286..96a89da 100644 (file)
@@ -256,6 +256,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                                 struct mdt_object *o)
 {
         struct md_object        *next = mdt_object_child(o);
+        struct mdt_device       *mdt = info->mti_mdt;
         const struct mdt_body   *reqbody = info->mti_body;
         struct ptlrpc_request   *req = mdt_info_req(info);
         struct md_attr          *ma = &info->mti_attr;
@@ -359,7 +360,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                         repbody->aclsize = sizeof(struct mdt_remote_perm);
                 }
         }
-
 #ifdef CONFIG_FS_POSIX_ACL
         else if ((req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) &&
                  (reqbody->valid & OBD_MD_FLACL)) {
@@ -381,6 +381,23 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
         }
 #endif
 
+        if (mdt->mdt_opts.mo_mds_capa) {
+                struct lustre_capa *capa;
+
+                spin_lock(&capa_lock);
+                info->mti_capa_key = *red_capa_key(mdt);
+                spin_unlock(&capa_lock);
+
+                capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+                LASSERT(capa);
+                capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
+                rc = mo_capa_get(ctxt, next, capa);
+                if (rc)
+                        RETURN(rc);
+                else
+                        repbody->valid |= OBD_MD_FLMDSCAPA;
+        }
         RETURN(rc);
 }
 
@@ -406,7 +423,7 @@ static int mdt_getattr(struct mdt_thread_info *info)
         }
 
         rc = mdt_getattr_internal(info, obj);
-        mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0);
         if (reqbody->valid & OBD_MD_FLRMTPERM) 
                 mdt_exit_ucred(info);
         RETURN(rc);
@@ -544,7 +561,8 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
          *step 3: find the child object by fid & lock it.
          *        regardless if it is local or remote.
          */
-        child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid);
+        child = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid,
+                                BYPASS_CAPA);
         if (IS_ERR(child))
                 GOTO(out_parent, rc = PTR_ERR(child));
         if (is_resent) {
@@ -632,7 +650,7 @@ static int mdt_getattr_name(struct mdt_thread_info *info)
                 ldlm_lock_decref(&lhc->mlh_lh, lhc->mlh_mode);
                 lhc->mlh_lh.cookie = 0;
         }
-        mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, REPLY_REC_OFF + 1, 1, 0);
         mdt_exit_ucred(info);
         RETURN(rc);
 }
@@ -1130,6 +1148,37 @@ static int mdt_quotactl_handle(struct mdt_thread_info *info)
         return -EOPNOTSUPP;
 }
 
+static int mdt_renew_capa(struct mdt_thread_info *info)
+{
+        struct mdt_device *mdt = info->mti_mdt;
+        struct mdt_object *obj;
+        struct mdt_body *body;
+        struct lustre_capa *capa;
+        int rc;
+
+        body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
+        LASSERT(body);
+
+        capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+        LASSERT(capa);
+
+        spin_lock(&capa_lock);
+        info->mti_capa_key = *red_capa_key(mdt);
+        spin_unlock(&capa_lock);
+
+        obj = mdt_object_find(info->mti_ctxt, info->mti_mdt, &capa->lc_fid,
+                              capa);
+        if (!IS_ERR(obj))
+                rc = PTR_ERR(obj);
+
+        /* TODO: add capa check */
+        rc = mo_capa_get(info->mti_ctxt, mdt_object_child(obj), capa);
+        if (rc)
+                RETURN(rc);
+
+        RETURN(rc);
+}
+
 /*
  * OBD PING and other handlers.
  */
@@ -1226,13 +1275,17 @@ static struct mdt_object *mdt_obj(struct lu_object *o)
 
 struct mdt_object *mdt_object_find(const struct lu_context *ctxt,
                                    struct mdt_device *d,
-                                   const struct lu_fid *f)
+                                   const struct lu_fid *f,
+                                   struct lustre_capa *c)
 {
         struct lu_object *o;
         struct mdt_object *m;
         ENTRY;
 
-        o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f);
+        if (!d->mdt_opts.mo_mds_capa)
+                c = BYPASS_CAPA;
+
+        o = lu_object_find(ctxt, d->mdt_md_dev.md_lu_dev.ld_site, f, c);
         if (IS_ERR(o))
                 m = (struct mdt_object *)o;
         else
@@ -1300,11 +1353,12 @@ void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *o,
 struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info,
                                         const struct lu_fid *f,
                                         struct mdt_lock_handle *lh,
-                                        __u64 ibits)
+                                        __u64 ibits,
+                                        struct lustre_capa *capa)
 {
         struct mdt_object *o;
 
-        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f);
+        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, f, capa);
         if (!IS_ERR(o)) {
                 int rc;
 
@@ -1383,36 +1437,40 @@ static int mdt_lock_reply_compat(struct mdt_device *m, struct ldlm_reply *rep)
  */
 static int mdt_body_unpack(struct mdt_thread_info *info, __u32 flags)
 {
-        const struct mdt_body   *body;
-        struct mdt_object       *obj;
-        const struct lu_context *ctx;
-        struct req_capsule      *pill;
-        int                     rc;
+        const struct mdt_body    *body;
+        struct lustre_capa *capa = NULL;
+        struct mdt_object        *obj;
+        const struct lu_context  *ctx;
+        struct req_capsule       *pill;
+        int                       rc;
 
         ctx = info->mti_ctxt;
         pill = &info->mti_pill;
 
         body = info->mti_body = req_capsule_client_get(pill, &RMF_MDT_BODY);
-        if (body != NULL) {
-                if (fid_is_sane(&body->fid1)) {
-                        obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1);
-                        if (!IS_ERR(obj)) {
-                                if ((flags & HABEO_CORPUS) &&
-                                    !lu_object_exists(&obj->mot_obj.mo_lu)) {
-                                        mdt_object_put(ctx, obj);
-                                        rc = -ENOENT;
-                                } else {
-                                        info->mti_object = obj;
-                                        rc = 0;
-                                }
-                        } else
-                                rc = PTR_ERR(obj);
+        if (body == NULL)
+                return -EFAULT;
+
+        if (!fid_is_sane(&body->fid1)) {
+                CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1));
+                return -EINVAL;
+        }
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                capa = req_capsule_client_get(pill, &RMF_CAPA1);
+        obj = mdt_object_find(ctx, info->mti_mdt, &body->fid1, capa);
+        if (!IS_ERR(obj)) {
+                if ((flags & HABEO_CORPUS) &&
+                    !lu_object_exists(&obj->mot_obj.mo_lu)) {
+                        mdt_object_put(ctx, obj);
+                        rc = -ENOENT;
                 } else {
-                        CERROR("Invalid fid: "DFID"\n", PFID(&body->fid1));
-                        rc = -EINVAL;
+                        info->mti_object = obj;
+                        rc = 0;
                 }
         } else
-                rc = -EFAULT;
+                rc = PTR_ERR(obj);
+
         return rc;
 }
 
@@ -2105,7 +2163,7 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
         
         ldlm_rep->lock_policy_res2 =
                 mdt_getattr_name_lock(info, lhc, child_bits, ldlm_rep);
-        mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 0);
 
         if (mdt_get_disposition(ldlm_rep, DISP_LOOKUP_NEG))
                 ldlm_rep->lock_policy_res2 = 0;
@@ -3034,6 +3092,10 @@ static void mdt_fini(const struct lu_context *ctx, struct mdt_device *m)
                 m->mdt_rootsquash_info = NULL;
         }
 
+        cleanup_capas(CAPA_SITE_SERVER);
+        del_timer(&m->mdt_ck_timer);
+        mdt_ck_thread_stop(m);
+
         mdt_fs_cleanup(ctx, m);
 
         /* finish the stack */
@@ -3080,6 +3142,11 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m,
         m->mdt_opts.mo_user_xattr = 0;
         m->mdt_opts.mo_acl = 0;
         m->mdt_opts.mo_compat_resname = 0;
+        m->mdt_opts.mo_mds_capa = 0;
+        m->mdt_opts.mo_oss_capa = 0;
+        m->mdt_capa_alg = CAPA_HMAC_ALG_SHA1;
+        m->mdt_capa_timeout = CAPA_TIMEOUT;
+        m->mdt_ck_timeout = CAPA_KEY_TIMEOUT;
         obd->obd_replayable = 1;
         spin_lock_init(&m->mdt_client_bitmap_lock);
 
@@ -3154,9 +3221,20 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m,
                 GOTO(err_free_ns, rc);
         }
 
-        rc = mdt_start_ptlrpc_service(m);
+        rc = mdt_ck_thread_start(m);
         if (rc)
                 GOTO(err_free_ns, rc);
+        m->mdt_ck_timer.function = mdt_ck_timer_callback;
+        m->mdt_ck_timer.data = (unsigned long)m;
+        init_timer(&m->mdt_ck_timer);
+
+        s->ls_capa_keys = m->mdt_capa_keys;
+        s->ls_capa_timeout = m->mdt_capa_timeout;
+        s->ls_capa_alg = m->mdt_capa_alg;
+
+        rc = mdt_start_ptlrpc_service(m);
+        if (rc)
+                GOTO(err_capa, rc);
 
         ping_evictor_start();
         rc = mdt_fs_setup(ctx, m, obd);
@@ -3172,6 +3250,9 @@ static int mdt_init0(const struct lu_context *ctx, struct mdt_device *m,
 
 err_stop_service:
         mdt_stop_ptlrpc_service(m);
+err_capa:
+        del_timer(&m->mdt_ck_timer);
+        mdt_ck_thread_stop(m);
 err_free_ns:
         upcall_cache_cleanup(m->mdt_rmtacl_cache);
         m->mdt_rmtacl_cache = NULL;
@@ -3421,6 +3502,12 @@ static int mdt_connect_internal(struct obd_export *exp,
                 if (!mdt->mdt_opts.mo_user_xattr)
                         data->ocd_connect_flags &= ~OBD_CONNECT_XATTR;
 
+                if (!mdt->mdt_opts.mo_mds_capa)
+                        data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA;
+
+                if (!mdt->mdt_opts.mo_oss_capa)
+                        data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
+
                 exp->exp_connect_flags = data->ocd_connect_flags;
                 data->ocd_version = LUSTRE_VERSION_CODE;
                 exp->exp_mdt_data.med_ibits_known = data->ocd_ibits_known;
@@ -3440,6 +3527,21 @@ static int mdt_connect_internal(struct obd_export *exp,
                 return -EBADE;
         }
 
+        if (mdt->mdt_opts.mo_mds_capa &&
+            ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) {
+                CWARN("%s: MDS requires capability support, but client not\n",
+                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
+                return -EBADE;
+        }
+
+        if (mdt->mdt_opts.mo_oss_capa &&
+            ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) {
+                CWARN("%s: MDS requires OSS capability support, "
+                      "but client not\n",
+                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
+                return -EBADE;
+        }
+
         return 0;
 }
 
@@ -3898,6 +4000,7 @@ static int __init mdt_mod_init(void)
         rc = class_register_type(&mdt_obd_device_ops, NULL,
                                  lvars.module_vars, LUSTRE_MDT_NAME,
                                  &mdt_device_type);
+
         return rc;
 }
 
@@ -3959,7 +4062,8 @@ DEF_MDT_HNDL_F(0           |HABEO_REFERO, PIN,          mdt_pin),
 DEF_MDT_HNDL_0(0,                         SYNC,         mdt_sync),
 DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR,    mdt_is_subdir),
 DEF_MDT_HNDL_0(0,                         QUOTACHECK,   mdt_quotacheck_handle),
-DEF_MDT_HNDL_0(0,                         QUOTACTL,     mdt_quotactl_handle)
+DEF_MDT_HNDL_0(0,                         QUOTACTL,     mdt_quotactl_handle),
+DEF_MDT_HNDL_0(0           |HABEO_REFERO, RENEW_CAPA,   mdt_renew_capa)
 };
 
 #define DEF_OBD_HNDL(flags, name, fn)                   \
index d37743a..103ee17 100644 (file)
@@ -133,9 +133,11 @@ struct mdt_device {
          * Options bit-fields.
          */
         struct {
-                signed int         mo_user_xattr :1;
-                signed int         mo_acl        :1;
-                signed int         mo_compat_resname:1;
+                signed int         mo_user_xattr :1,
+                                   mo_acl        :1,
+                                   mo_compat_resname:1,
+                                   mo_mds_capa   :1,
+                                   mo_oss_capa   :1;
         } mdt_opts;
 
         /* lock to pretect epoch and write count */
@@ -168,6 +170,16 @@ struct mdt_device {
         /* root squash */
         struct rootsquash_info     *mdt_rootsquash_info;
         int                        no_gss_support;
+
+        /* capability */
+        __u32                      mdt_capa_alg;
+        unsigned long              mdt_capa_timeout;
+        unsigned long              mdt_ck_timeout;
+        struct dt_object          *mdt_ck_obj;
+        unsigned long              mdt_ck_expiry;
+        struct timer_list          mdt_ck_timer;
+        struct ptlrpc_thread       mdt_ck_thread;
+        struct lustre_capa_key     mdt_capa_keys[2];
 };
 
 /*XXX copied from mds_internal.h */
@@ -209,6 +221,8 @@ struct mdt_reint_record {
         int                  rr_logcookielen;
         const struct llog_cookie  *rr_logcookies;
         __u32                rr_flags;
+        struct lustre_capa  *rr_capa1;
+        struct lustre_capa  *rr_capa2;
 };
 
 enum mdt_reint_flag {
@@ -316,6 +330,7 @@ struct mdt_thread_info {
         struct mdt_client_data     mti_mcd;
         loff_t                     mti_off;
         struct txn_param           mti_txn_param;
+        struct lustre_capa_key     mti_capa_key;
 };
 /*
  * Info allocated per-transaction.
@@ -380,11 +395,13 @@ void mdt_object_unlock(struct mdt_thread_info *,
 
 struct mdt_object *mdt_object_find(const struct lu_context *,
                                    struct mdt_device *,
-                                   const struct lu_fid *);
+                                   const struct lu_fid *,
+                                   struct lustre_capa *);
 struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *,
                                         const struct lu_fid *,
                                         struct mdt_lock_handle *,
-                                        __u64);
+                                        __u64 ibits,
+                                        struct lustre_capa *);
 void mdt_object_unlock_put(struct mdt_thread_info *,
                            struct mdt_object *,
                            struct mdt_lock_handle *,
@@ -443,10 +460,21 @@ int mdt_close(struct mdt_thread_info *info);
 int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, 
                  int flags);
 int mdt_done_writing(struct mdt_thread_info *info);
-void mdt_shrink_reply(struct mdt_thread_info *info, int offset);
+void mdt_shrink_reply(struct mdt_thread_info *info, int offset,
+                      int mdscapa, int osscapa);
 int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
                            const struct md_attr *);
 void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
+struct thandle* mdt_trans_start(const struct lu_context *ctx,
+                                struct mdt_device *mdt, int credits);
+void mdt_trans_stop(const struct lu_context *ctx,
+                    struct mdt_device *mdt, struct thandle *th);
+int mdt_record_write(const struct lu_context *ctx,
+                     struct dt_object *dt, const void *buf,
+                     size_t count, loff_t *pos, struct thandle *th);
+int mdt_record_read(const struct lu_context *ctx,
+                    struct dt_object *dt, void *buf,
+                    size_t count, loff_t *pos);
 
 void mdt_dump_lmm(int level, const struct lov_mds_md *lmm);
 
@@ -546,5 +574,18 @@ do {                                                                         \
         }                                                                    \
 } while(0)
 
+/*
+ * fid Capability
+ */
+int mdt_ck_thread_start(struct mdt_device *mdt);
+void mdt_ck_thread_stop(struct mdt_device *mdt);
+void mdt_ck_timer_callback(unsigned long castmeharder);
+int mdt_capa_keys_init(const struct lu_context *ctx, struct mdt_device *mdt);
+
+static inline struct lustre_capa_key *red_capa_key(struct mdt_device *mdt)
+{
+        return &mdt->mdt_capa_keys[1];
+}
+
 #endif /* __KERNEL__ */
 #endif /* _MDT_H */
index adfe5ac..74eb3d4 100644 (file)
@@ -487,12 +487,12 @@ void mdt_dump_lmm(int level, const struct lov_mds_md *lmm)
         }
 }
 
-void mdt_shrink_reply(struct mdt_thread_info *info, int offset)
+void mdt_shrink_reply(struct mdt_thread_info *info, int offset,
+                      int mdscapa, int osscapa)
 {
         struct ptlrpc_request *req = mdt_info_req(info);
         struct mdt_body *body;
-        int acl_size;
-        int md_size;
+        int acl_size, md_size;
 
         body = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
         LASSERT(body != NULL);
@@ -504,7 +504,15 @@ void mdt_shrink_reply(struct mdt_thread_info *info, int offset)
                md_size, acl_size);
 
         lustre_shrink_reply(req, offset, md_size, 1);
-        lustre_shrink_reply(req, md_size ? offset + 1 : offset, acl_size, 0);
+        offset += !!md_size;
+        lustre_shrink_reply(req, offset, acl_size, 1);
+        offset += !!acl_size;
+        if (mdscapa && !(body->valid & OBD_MD_FLMDSCAPA))
+                lustre_shrink_reply(req, offset, 0, 0);
+        offset += mdscapa;
+        if (osscapa && !(body->valid & OBD_MD_FLOSSCAPA))
+                lustre_shrink_reply(req, offset, 0, 0);
+        offset += osscapa;
 }
 
 
@@ -624,6 +632,10 @@ static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
         la->la_atime = rec->sa_atime;
         la->la_mtime = rec->sa_mtime;
         ma->ma_valid = MA_INODE;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
         RETURN(0);
 }
 
@@ -662,6 +674,7 @@ static int mdt_setattr_unpack(struct mdt_thread_info *info)
                                                        RCL_CLIENT);
                 ma->ma_valid |= MA_LOV;
         }
+
         if (req_capsule_field_present(pill, &RMF_LOGCOOKIES, RCL_CLIENT)) {
                 ma->ma_cookie = req_capsule_client_get(pill,
                                                        &RMF_LOGCOOKIES);
@@ -693,64 +706,66 @@ static int mdt_create_unpack(struct mdt_thread_info *info)
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct req_capsule      *pill = &info->mti_pill;
-        int                     result = 0;
         ENTRY;
 
         rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
-        if (rec != NULL) {
-                uc->mu_fsuid = rec->cr_fsuid;
-                uc->mu_fsgid = rec->cr_fsgid;
-                uc->mu_cap   = rec->cr_cap;
-                uc->mu_suppgids[0] = rec->cr_suppgid;
-                uc->mu_suppgids[1] = -1;
-                rr->rr_fid1 = &rec->cr_fid1;
-                rr->rr_fid2 = &rec->cr_fid2;
-                attr->la_mode = rec->cr_mode;
-                attr->la_rdev  = rec->cr_rdev;
-                attr->la_uid   = rec->cr_fsuid;
-                attr->la_gid   = rec->cr_fsgid;
-                attr->la_ctime = rec->cr_time;
-                attr->la_mtime = rec->cr_time;
-                attr->la_atime = rec->cr_time;
-                attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
-                                 LA_CTIME | LA_MTIME | LA_ATIME;
-                info->mti_spec.sp_cr_flags = rec->cr_flags;
-
-                rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (S_ISDIR(attr->la_mode)) {
-                        struct md_create_spec *sp = &info->mti_spec;
-                        /* pass parent fid for cross-ref cases */
-                        sp->u.sp_pfid = rr->rr_fid1;
-                        if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
-                                /* create salve object req, need
-                                 * unpack split ea here
-                                 */
-                               req_capsule_extend(pill,
-                                                  &RQF_MDS_REINT_CREATE_SLAVE);
-                               LASSERT(req_capsule_field_present(pill,
-                                                      &RMF_EADATA, RCL_CLIENT));
-                               sp->u.sp_ea.eadata = req_capsule_client_get(pill,
-                                                            &RMF_EADATA);
-                               sp->u.sp_ea.eadatalen =req_capsule_get_size(pill,
-                                                       &RMF_EADATA, RCL_CLIENT);
-                               sp->u.sp_ea.fid = rr->rr_fid1;
-                        }
-                } else if (S_ISLNK(attr->la_mode)) {
-                        const char *tgt = NULL;
-                        req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM);
-                        if (req_capsule_field_present(pill, &RMF_SYMTGT,
-                                                      RCL_CLIENT)) {
-                                tgt = req_capsule_client_get(pill,
-                                                             &RMF_SYMTGT);
-                                info->mti_spec.u.sp_symname = tgt;
-                        }
-                        if (tgt == NULL)
-                                result = -EFAULT;
+        if (rec == NULL)
+                RETURN(-EFAULT);
+
+        uc->mu_fsuid = rec->cr_fsuid;
+        uc->mu_fsgid = rec->cr_fsgid;
+        uc->mu_cap   = rec->cr_cap;
+        uc->mu_suppgids[0] = rec->cr_suppgid;
+        uc->mu_suppgids[1] = -1;
+
+        rr->rr_fid1 = &rec->cr_fid1;
+        rr->rr_fid2 = &rec->cr_fid2;
+        attr->la_mode = rec->cr_mode;
+        attr->la_rdev  = rec->cr_rdev;
+        attr->la_uid   = rec->cr_fsuid;
+        attr->la_gid   = rec->cr_fsgid;
+        attr->la_ctime = rec->cr_time;
+        attr->la_mtime = rec->cr_time;
+        attr->la_atime = rec->cr_time;
+        attr->la_valid = LA_MODE | LA_RDEV | LA_UID | LA_GID |
+                         LA_CTIME | LA_MTIME | LA_ATIME;
+        info->mti_spec.sp_cr_flags = rec->cr_flags;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
+        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+        if (S_ISDIR(attr->la_mode)) {
+                struct md_create_spec *sp = &info->mti_spec;
+
+                /* pass parent fid for cross-ref cases */
+                sp->u.sp_pfid = rr->rr_fid1;
+                if (info->mti_spec.sp_cr_flags & MDS_CREATE_SLAVE_OBJ) {
+                        /* create salve object req, need
+                         * unpack split ea here
+                         */
+                       req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SLAVE);
+                       LASSERT(req_capsule_field_present(pill, &RMF_EADATA,
+                                                         RCL_CLIENT));
+                       sp->u.sp_ea.eadata = req_capsule_client_get(pill,
+                                                                   &RMF_EADATA);
+                       sp->u.sp_ea.eadatalen = req_capsule_get_size(pill,
+                                                                    &RMF_EADATA,
+                                                                    RCL_CLIENT);
+                       sp->u.sp_ea.fid = rr->rr_fid1;
+                }
+        } else if (S_ISLNK(attr->la_mode)) {
+                const char *tgt = NULL;
+
+                req_capsule_extend(pill, &RQF_MDS_REINT_CREATE_SYM);
+                if (req_capsule_field_present(pill, &RMF_SYMTGT, RCL_CLIENT)) {
+                        tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
+                        info->mti_spec.u.sp_symname = tgt;
                 }
-        } else
-                result = -EFAULT;
-        RETURN(result);
+                if (tgt == NULL)
+                        RETURN(-EFAULT);
+        }
+        RETURN(0);
 }
 
 static int mdt_link_unpack(struct mdt_thread_info *info)
@@ -760,30 +775,36 @@ static int mdt_link_unpack(struct mdt_thread_info *info)
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct req_capsule      *pill = &info->mti_pill;
-        int                      result = 0;
         ENTRY;
 
         rec = req_capsule_client_get(pill, &RMF_REC_LINK);
-        if (rec != NULL) {
-                uc->mu_fsuid = rec->lk_fsuid;
-                uc->mu_fsgid = rec->lk_fsgid;
-                uc->mu_cap   = rec->lk_cap;
-                uc->mu_suppgids[0] = rec->lk_suppgid1;
-                uc->mu_suppgids[1] = rec->lk_suppgid2;
-                attr->la_uid = rec->lk_fsuid;
-                attr->la_gid = rec->lk_fsgid;
-                rr->rr_fid1 = &rec->lk_fid1;
-                rr->rr_fid2 = &rec->lk_fid2;
-                attr->la_ctime = rec->lk_time;
-                attr->la_mtime = rec->lk_time;
-                attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
-                rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (rr->rr_name == NULL)
-                        result = -EFAULT;
-        } else
-                result = -EFAULT;
-        RETURN(result);
+        if (rec == NULL)
+                RETURN(-EFAULT);
+
+        uc->mu_fsuid = rec->lk_fsuid;
+        uc->mu_fsgid = rec->lk_fsgid;
+        uc->mu_cap   = rec->lk_cap;
+        uc->mu_suppgids[0] = rec->lk_suppgid1;
+        uc->mu_suppgids[1] = rec->lk_suppgid2;
+
+        attr->la_uid = rec->lk_fsuid;
+        attr->la_gid = rec->lk_fsgid;
+        rr->rr_fid1 = &rec->lk_fid1;
+        rr->rr_fid2 = &rec->lk_fid2;
+        attr->la_ctime = rec->lk_time;
+        attr->la_mtime = rec->lk_time;
+        attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+                rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+        if (rr->rr_name == NULL)
+                RETURN(-EFAULT);
+
+        RETURN(0);
 }
 
 static int mdt_unlink_unpack(struct mdt_thread_info *info)
@@ -793,33 +814,35 @@ static int mdt_unlink_unpack(struct mdt_thread_info *info)
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct req_capsule      *pill = &info->mti_pill;
-        int                      result = 0;
         ENTRY;
 
         rec = req_capsule_client_get(pill, &RMF_REC_UNLINK);
-        if (rec != NULL) {
-                uc->mu_fsuid = rec->ul_fsuid;
-                uc->mu_fsgid = rec->ul_fsgid;
-                uc->mu_cap   = rec->ul_cap;
-                uc->mu_suppgids[0] = rec->ul_suppgid;
-                uc->mu_suppgids[1] = -1;
+        if (rec == NULL)
+                RETURN(-EFAULT);
+
+        uc->mu_fsuid = rec->ul_fsuid;
+        uc->mu_fsgid = rec->ul_fsgid;
+        uc->mu_cap   = rec->ul_cap;
+        uc->mu_suppgids[0] = rec->ul_suppgid;
+        uc->mu_suppgids[1] = -1;
  
-                attr->la_uid = rec->ul_fsuid;
-                attr->la_gid = rec->ul_fsgid;
-                rr->rr_fid1 = &rec->ul_fid1;
-                rr->rr_fid2 = &rec->ul_fid2;
-                attr->la_ctime = rec->ul_time;
-                attr->la_mtime = rec->ul_time;
-                attr->la_mode  = rec->ul_mode;
-
-                attr->la_valid = LA_UID   | LA_GID  | LA_CTIME |
-                                 LA_MTIME | LA_MODE;
-                rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (rr->rr_name == NULL)
-                        result = -EFAULT;
-        } else
-                result = -EFAULT;
-        RETURN(result);
+        attr->la_uid = rec->ul_fsuid;
+        attr->la_gid = rec->ul_fsgid;
+        rr->rr_fid1 = &rec->ul_fid1;
+        rr->rr_fid2 = &rec->ul_fid2;
+        attr->la_ctime = rec->ul_time;
+        attr->la_mtime = rec->ul_time;
+        attr->la_mode  = rec->ul_mode;
+        attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+
+        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+        if (rr->rr_name == NULL)
+                RETURN(-EFAULT);
+
+        RETURN(0);
 }
 
 static int mdt_rename_unpack(struct mdt_thread_info *info)
@@ -829,34 +852,39 @@ static int mdt_rename_unpack(struct mdt_thread_info *info)
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct req_capsule      *pill = &info->mti_pill;
-        int                      result = 0;
         ENTRY;
 
         rec = req_capsule_client_get(pill, &RMF_REC_RENAME);
-        if (rec != NULL) {
-                uc->mu_fsuid = rec->rn_fsuid;
-                uc->mu_fsgid = rec->rn_fsgid;
-                uc->mu_cap   = rec->rn_cap;
-                uc->mu_suppgids[0] = rec->rn_suppgid1;
-                uc->mu_suppgids[1] = rec->rn_suppgid2;
+        if (rec == NULL)
+                RETURN(-EFAULT);
+
+        uc->mu_fsuid = rec->rn_fsuid;
+        uc->mu_fsgid = rec->rn_fsgid;
+        uc->mu_cap   = rec->rn_cap;
+        uc->mu_suppgids[0] = rec->rn_suppgid1;
+        uc->mu_suppgids[1] = rec->rn_suppgid2;
  
-                attr->la_uid = rec->rn_fsuid;
-                attr->la_gid = rec->rn_fsgid;
-                rr->rr_fid1 = &rec->rn_fid1;
-                rr->rr_fid2 = &rec->rn_fid2;
-                attr->la_ctime = rec->rn_time;
-                attr->la_mtime = rec->rn_time;
-                /* rename_tgt contains the mode already */
-                attr->la_mode = rec->rn_mode;
-                attr->la_valid = LA_UID | LA_GID | LA_CTIME |
-                                 LA_MTIME | LA_MODE;
-                rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-                rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
-                if (rr->rr_name == NULL || rr->rr_tgt == NULL)
-                        result = -EFAULT;
-        } else
-                result = -EFAULT;
-        RETURN(result);
+        attr->la_uid = rec->rn_fsuid;
+        attr->la_gid = rec->rn_fsgid;
+        rr->rr_fid1 = &rec->rn_fid1;
+        rr->rr_fid2 = &rec->rn_fid2;
+        attr->la_ctime = rec->rn_time;
+        attr->la_mtime = rec->rn_time;
+        /* rename_tgt contains the mode already */
+        attr->la_mode = rec->rn_mode;
+        attr->la_valid = LA_UID | LA_GID | LA_CTIME | LA_MTIME | LA_MODE;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+                rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+        rr->rr_tgt = req_capsule_client_get(pill, &RMF_SYMTGT);
+        if (rr->rr_name == NULL || rr->rr_tgt == NULL)
+                RETURN(-EFAULT);
+
+        RETURN(0);
 }
 
 static int mdt_open_unpack(struct mdt_thread_info *info)
@@ -866,36 +894,39 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
         struct lu_attr          *attr = &info->mti_attr.ma_attr;
         struct req_capsule      *pill = &info->mti_pill;
         struct mdt_reint_record *rr   = &info->mti_rr;
-        int                     result;
         ENTRY;
 
         rec = req_capsule_client_get(pill, &RMF_REC_CREATE);
-        if (rec != NULL) {
-                uc->mu_fsuid = rec->cr_fsuid;
-                uc->mu_fsgid = rec->cr_fsgid;
-                uc->mu_cap   = rec->cr_cap;
-                uc->mu_suppgids[0] = rec->cr_suppgid;
-                uc->mu_suppgids[1] = -1;
-                rr->rr_fid1   = &rec->cr_fid1;
-                rr->rr_fid2   = &rec->cr_fid2;
-                attr->la_mode = rec->cr_mode;
-                attr->la_rdev  = rec->cr_rdev;
-                attr->la_uid   = rec->cr_fsuid;
-                attr->la_gid   = rec->cr_fsgid;
-                attr->la_ctime = rec->cr_time;
-                attr->la_mtime = rec->cr_time;
-                attr->la_atime = rec->cr_time;
-                attr->la_valid = LA_MODE  | LA_RDEV  | LA_UID   | LA_GID |
-                                 LA_CTIME | LA_MTIME | LA_ATIME;
-                info->mti_spec.sp_cr_flags = rec->cr_flags;
-                rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
-                if (rr->rr_name == NULL)
-                        result = -EFAULT;
-                else
-                        result = 0;
-        } else
-                result = -EFAULT;
+        if (rec == NULL)
+                RETURN(-EFAULT);
+
+        uc->mu_fsuid = rec->cr_fsuid;
+        uc->mu_fsgid = rec->cr_fsgid;
+        uc->mu_cap   = rec->cr_cap;
+        uc->mu_suppgids[0] = rec->cr_suppgid;
+        uc->mu_suppgids[1] = -1;
+
+        rr->rr_fid1   = &rec->cr_fid1;
+        rr->rr_fid2   = &rec->cr_fid2;
+        attr->la_mode = rec->cr_mode;
+        attr->la_rdev  = rec->cr_rdev;
+        attr->la_uid   = rec->cr_fsuid;
+        attr->la_gid   = rec->cr_fsgid;
+        attr->la_ctime = rec->cr_time;
+        attr->la_mtime = rec->cr_time;
+        attr->la_atime = rec->cr_time;
+        attr->la_valid = LA_MODE  | LA_RDEV  | LA_UID   | LA_GID |
+                         LA_CTIME | LA_MTIME | LA_ATIME;
+        info->mti_spec.sp_cr_flags = rec->cr_flags;
+
+        if (req_capsule_get_size(pill, &RMF_CAPA1, RCL_CLIENT))
+                rr->rr_capa1 = req_capsule_client_get(pill, &RMF_CAPA1);
+        if (req_capsule_get_size(pill, &RMF_CAPA2, RCL_CLIENT))
+                rr->rr_capa2 = req_capsule_client_get(pill, &RMF_CAPA2);
+
+        rr->rr_name = req_capsule_client_get(pill, &RMF_NAME);
+        if (rr->rr_name == NULL)
+                RETURN(-EFAULT);
 
         if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
                 struct md_create_spec *sp = &info->mti_spec;
@@ -909,7 +940,7 @@ static int mdt_open_unpack(struct mdt_thread_info *info)
                         sp->u.sp_ea.no_lov_create = 1;
         }
 
-        RETURN(result);
+        RETURN(0);
 }
 
 typedef int (*reint_unpacker)(struct mdt_thread_info *info);
index 6a03a01..f015fa3 100644 (file)
@@ -318,6 +318,7 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
 {
         struct ptlrpc_request  *req = mdt_info_req(info);
         struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+        struct mdt_device      *mdt = info->mti_mdt;
         struct md_attr         *ma  = &info->mti_attr;
         struct lu_attr         *la  = &ma->ma_attr;
         struct mdt_file_data   *mfd;
@@ -347,6 +348,33 @@ static int mdt_mfd_open(struct mdt_thread_info *info,
                 }
         }
 
+        spin_lock(&capa_lock);
+        info->mti_capa_key = *red_capa_key(mdt);
+        spin_unlock(&capa_lock);
+
+        if (mdt->mdt_opts.mo_mds_capa) {
+                struct lustre_capa *capa;
+
+                capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+                LASSERT(capa);
+                capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
+                rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa);
+                if (rc)
+                        RETURN(rc);
+                repbody->valid |= OBD_MD_FLMDSCAPA;
+        }
+        if (mdt->mdt_opts.mo_oss_capa) {
+                struct lustre_capa *capa;
+
+                capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA2);
+                LASSERT(capa);
+                capa->lc_opc = CAPA_OPC_OSS_DEFAULT;
+                rc = mo_capa_get(info->mti_ctxt, mdt_object_child(o), capa);
+                if (rc)
+                        RETURN(rc);
+                repbody->valid |= OBD_MD_FLOSSCAPA;
+        }
+
         /* if we are following a symlink, don't open; and
          * do not return open handle for special nodes as client required
          */
@@ -507,7 +535,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info,
         if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE) && 
             req->rq_status != 0) {
                 /* We did not create successfully, return error to client. */
-                mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+                mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
                 GOTO(out, rc = req->rq_status);
         }
 
@@ -516,10 +544,10 @@ void mdt_reconstruct_open(struct mdt_thread_info *info,
                  * We failed after creation, but we do not know in which step 
                  * we failed. So try to check the child object.
                  */
-                parent = mdt_object_find(ctxt, mdt, rr->rr_fid1);
+                parent = mdt_object_find(ctxt, mdt, rr->rr_fid1, rr->rr_capa1);
                 LASSERT(!IS_ERR(parent));
 
-                child = mdt_object_find(ctxt, mdt, rr->rr_fid2);
+                child = mdt_object_find(ctxt, mdt, rr->rr_fid2, rr->rr_capa2);
                 LASSERT(!IS_ERR(child));
 
                 rc = lu_object_exists(&child->mot_obj.mo_lu);
@@ -543,7 +571,7 @@ void mdt_reconstruct_open(struct mdt_thread_info *info,
                 }
                 mdt_object_put(ctxt, parent);
                 mdt_object_put(ctxt, child);
-                mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+                mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
                 GOTO(out, rc);
         } else {
 regular_open:
@@ -567,12 +595,12 @@ static int mdt_open_by_fid(struct mdt_thread_info* info,
         int                     rc;
         ENTRY;
 
-        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2);
+        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid2,
+                            rr->rr_capa2);
         if (IS_ERR(o)) 
                 RETURN(rc = PTR_ERR(o));
 
         rc = lu_object_exists(&o->mot_obj.mo_lu);
-
         if (rc > 0) {
                 const struct lu_context *ctxt = info->mti_ctxt;
 
@@ -612,7 +640,7 @@ static int mdt_cross_open(struct mdt_thread_info* info,
         int                rc;
         ENTRY;
 
-        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid);
+        o = mdt_object_find(info->mti_ctxt, info->mti_mdt, fid, BYPASS_CAPA);
         if (IS_ERR(o)) 
                 RETURN(rc = PTR_ERR(o));
 
@@ -710,7 +738,8 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
         if (rr->rr_name[0] == 0) {
                 /* this is cross-ref open */
                 mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
-                result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep, create_flags);
+                result = mdt_cross_open(info, rr->rr_fid1, ldlm_rep,
+                                        create_flags);
                 GOTO(out, result);
         }
 
@@ -720,7 +749,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
         else
                 lh->mlh_mode = LCK_EX;
         parent = mdt_object_find_lock(info, rr->rr_fid1, lh,
-                                      MDS_INODELOCK_UPDATE);
+                                      MDS_INODELOCK_UPDATE, rr->rr_capa1);
         if (IS_ERR(parent))
                 GOTO(out, result = PTR_ERR(parent));
 
@@ -751,7 +780,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                 mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_POS);
         }
 
-        child = mdt_object_find(info->mti_ctxt, mdt, child_fid);
+        child = mdt_object_find(info->mti_ctxt, mdt, child_fid, BYPASS_CAPA);
         if (IS_ERR(child))
                 GOTO(out_parent, result = PTR_ERR(child));
 
@@ -846,7 +875,7 @@ out_child:
 out_parent:
         mdt_object_unlock_put(info, parent, lh, result);
 out:
-        mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, DLM_REPLY_REC_OFF + 1, 1, 1);
         if (result)
                 lustre_msg_set_transno(req->rq_repmsg, 0);
         return result;
@@ -986,7 +1015,7 @@ int mdt_close(struct mdt_thread_info *info)
                 mdt_object_put(info->mti_ctxt, o);
         }
         if (repbody != NULL)
-                mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+                mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
 
         if (MDT_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK))
                 RETURN(-ENOMEM);
index ba05274..07c763b 100644 (file)
@@ -37,9 +37,9 @@ static int mdt_server_data_update(const struct lu_context *ctx,
                                   struct mdt_device *mdt);
 
 /* TODO: maybe this pair should be defined in dt_object.c */
-static int mdt_record_read(const struct lu_context *ctx,
-                           struct dt_object *dt, void *buf,
-                           size_t count, loff_t *pos)
+int mdt_record_read(const struct lu_context *ctx,
+                    struct dt_object *dt, void *buf,
+                    size_t count, loff_t *pos)
 {
         int rc;
 
@@ -54,9 +54,9 @@ static int mdt_record_read(const struct lu_context *ctx,
         return rc;
 }
 
-static int mdt_record_write(const struct lu_context *ctx,
-                            struct dt_object *dt, const void *buf,
-                            size_t count, loff_t *pos, struct thandle *th)
+int mdt_record_write(const struct lu_context *ctx,
+                     struct dt_object *dt, const void *buf,
+                     size_t count, loff_t *pos, struct thandle *th)
 {
         int rc;
 
@@ -75,8 +75,8 @@ enum {
         MDT_TXN_LAST_RCVD_WRITE_CREDITS = 3
 };
 
-static struct thandle* mdt_trans_start(const struct lu_context *ctx,
-                                       struct mdt_device *mdt, int credits)
+struct thandle* mdt_trans_start(const struct lu_context *ctx,
+                                struct mdt_device *mdt, int credits)
 {
         struct mdt_thread_info *mti;
         struct txn_param *p;
@@ -87,8 +87,8 @@ static struct thandle* mdt_trans_start(const struct lu_context *ctx,
         return mdt->mdt_bottom->dd_ops->dt_trans_start(ctx, mdt->mdt_bottom, p);
 }
 
-static void mdt_trans_stop(const struct lu_context *ctx,
-                           struct mdt_device *mdt, struct thandle *th)
+void mdt_trans_stop(const struct lu_context *ctx,
+                    struct mdt_device *mdt, struct thandle *th)
 {
         mdt->mdt_bottom->dd_ops->dt_trans_stop(ctx, th);
 }
@@ -880,8 +880,8 @@ static int mdt_txn_commit_cb(const struct lu_context *ctx,
 int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt,
                  struct obd_device *obd)
 {
-        struct lu_fid last_fid;
-        struct dt_object *last;
+        struct lu_fid fid;
+        struct dt_object *o;
         int rc = 0;
         ENTRY;
 
@@ -893,26 +893,47 @@ int mdt_fs_setup(const struct lu_context *ctx, struct mdt_device *mdt,
 
         dt_txn_callback_add(mdt->mdt_bottom, &mdt->mdt_txn_cb);
 
-        last = dt_store_open(ctx, mdt->mdt_bottom,
-                             LAST_RCVD, &last_fid);
-        if(!IS_ERR(last)) {
-                mdt->mdt_last_rcvd = last;
+        o = dt_store_open(ctx, mdt->mdt_bottom, LAST_RCVD, &fid);
+        if(!IS_ERR(o)) {
+                mdt->mdt_last_rcvd = o;
                 rc = mdt_server_data_init(ctx, mdt);
                 if (rc) {
-                        lu_object_put(ctx, &last->do_lu);
+                        lu_object_put(ctx, &o->do_lu);
                         mdt->mdt_last_rcvd = NULL;
                 }
         } else {
-                rc = PTR_ERR(last);
+                rc = PTR_ERR(o);
                 CERROR("cannot open %s: rc = %d\n", LAST_RCVD, rc);
         }
 
+        if (rc)
+                RETURN(rc);
+
+        o = dt_store_open(ctx, mdt->mdt_bottom, CAPA_KEYS, &fid);
+        if(!IS_ERR(o)) {
+                struct md_device *next = mdt->mdt_child;
+                mdt->mdt_ck_obj = o;
+                rc = mdt_capa_keys_init(ctx, mdt);
+                if (rc) {
+                        lu_object_put(ctx, &o->do_lu);
+                        mdt->mdt_ck_obj = NULL;
+                        RETURN(rc);
+                }
+                rc = next->md_ops->mdo_init_capa_keys(next, mdt->mdt_capa_keys);
+        } else {
+                rc = PTR_ERR(o);
+                CERROR("cannot open %s: rc = %d\n", CAPA_KEYS, rc);
+        }
+
+        if (rc)
+                RETURN(rc);
+
         OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
         obd->obd_lvfs_ctxt.pwdmnt = current->fs->pwdmnt;
         obd->obd_lvfs_ctxt.pwd = current->fs->pwd;
         obd->obd_lvfs_ctxt.fs = get_ds();
 
-        RETURN (rc);
+        RETURN(0);
 }
 
 
@@ -927,6 +948,9 @@ void mdt_fs_cleanup(const struct lu_context *ctx, struct mdt_device *mdt)
         if (mdt->mdt_last_rcvd)
                 lu_object_put(ctx, &mdt->mdt_last_rcvd->do_lu);
         mdt->mdt_last_rcvd = NULL;
+        if (mdt->mdt_ck_obj)
+                lu_object_put(ctx, &mdt->mdt_ck_obj->do_lu);
+        mdt->mdt_ck_obj = NULL;
 }
 
 /* reconstruction code */
@@ -974,7 +998,8 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti,
                 return;
 
         /* if no error, so child was created with requested fid */
-        child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2);
+        child = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid2,
+                                mti->mti_rr.rr_capa2);
         LASSERT(!IS_ERR(child));
 
         body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
@@ -1004,7 +1029,8 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti,
                 return;
 
         body = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
-        obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1);
+        obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1,
+                              mti->mti_rr.rr_capa1);
         LASSERT(!IS_ERR(obj));
         mo_attr_get(mti->mti_ctxt, mdt_object_child(obj),
                     &mti->mti_attr, NULL);
@@ -1027,7 +1053,7 @@ static void mdt_reconstruct_with_shrink(struct mdt_thread_info *mti,
                                         struct mdt_lock_handle *lhc)
 {
         mdt_reconstruct_generic(mti, lhc);
-        mdt_shrink_reply(mti, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(mti, REPLY_REC_OFF + 1, 0, 0);
 }
 
 typedef void (*mdt_reconstructor)(struct mdt_thread_info *mti,
index 3c3be96..42d8f1c 100644 (file)
@@ -54,11 +54,12 @@ static int mdt_md_create(struct mdt_thread_info *info)
         lh->mlh_mode = LCK_EX;
 
         parent = mdt_object_find_lock(info, rr->rr_fid1,
-                                      lh, MDS_INODELOCK_UPDATE);
+                                      lh, MDS_INODELOCK_UPDATE,
+                                      rr->rr_capa1);
         if (IS_ERR(parent))
                 RETURN(PTR_ERR(parent));
 
-        child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2);
+        child = mdt_object_find(info->mti_ctxt, mdt, rr->rr_fid2, BYPASS_CAPA);
         if (!IS_ERR(child)) {
                 struct md_object *next = mdt_object_child(parent);
 
@@ -95,7 +96,8 @@ static int mdt_md_mkobj(struct mdt_thread_info *info)
 
         repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
 
-        o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2);
+        o = mdt_object_find(info->mti_ctxt, mdt, info->mti_rr.rr_fid2,
+                            BYPASS_CAPA);
         if (!IS_ERR(o)) {
                 struct md_object *next = mdt_object_child(o);
 
@@ -187,6 +189,7 @@ out:
 static int mdt_reint_setattr(struct mdt_thread_info *info,
                              struct mdt_lock_handle *lhc)
 {
+        struct mdt_device       *mdt = info->mti_mdt;
         struct md_attr          *ma = &info->mti_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct ptlrpc_request   *req = mdt_info_req(info);
@@ -203,7 +206,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                   (unsigned int)ma->ma_attr.la_valid);
 
         repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
-        mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1);
+        mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1,
+                             rr->rr_capa1);
         if (IS_ERR(mo))
                 RETURN(rc = PTR_ERR(mo));
 
@@ -268,6 +272,19 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                 GOTO(out, rc);
 
         mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo));
+
+        if (mdt->mdt_opts.mo_oss_capa) {
+                struct lustre_capa *capa;
+
+                capa = req_capsule_server_get(&info->mti_pill, &RMF_CAPA1);
+                LASSERT(capa);
+                capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC;
+                rc = mo_capa_get(info->mti_ctxt, mdt_object_child(mo), capa);
+                if (rc)
+                        RETURN(rc);
+                repbody->valid |= OBD_MD_FLOSSCAPA;
+        }
+
         mdt_body_reverse_idmap(info, repbody);
         EXIT;
 out:
@@ -333,7 +350,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
         parent_lh = &info->mti_lh[MDT_LH_PARENT];
         parent_lh->mlh_mode = LCK_EX;
         mp = mdt_object_find_lock(info, rr->rr_fid1, parent_lh,
-                                  MDS_INODELOCK_UPDATE);
+                                  MDS_INODELOCK_UPDATE, rr->rr_capa1);
         if (IS_ERR(mp))
                 GOTO(out, rc = PTR_ERR(mp));
 
@@ -365,7 +382,8 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
                  GOTO(out_unlock_parent, rc);
 
         /* we will lock the child regardless it is local or remote. No harm. */
-        mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid);
+        mc = mdt_object_find(info->mti_ctxt, info->mti_mdt, child_fid,
+                                  BYPASS_CAPA);
         if (IS_ERR(mc))
                 GOTO(out_unlock_parent, rc = PTR_ERR(mc));
         child_lh = &info->mti_lh[MDT_LH_CHILD];
@@ -397,7 +415,7 @@ out_put_child:
 out_unlock_parent:
         mdt_object_unlock_put(info, mp, parent_lh, rc);
 out:
-        mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
         return rc;
 }
 
@@ -425,7 +443,7 @@ static int mdt_reint_link(struct mdt_thread_info *info,
         lhs = &info->mti_lh[MDT_LH_PARENT];
         lhs->mlh_mode = LCK_EX;
         ms = mdt_object_find_lock(info, rr->rr_fid1, lhs, 
-                                  MDS_INODELOCK_UPDATE);
+                                  MDS_INODELOCK_UPDATE, rr->rr_capa1);
         if (IS_ERR(ms))
                 RETURN(PTR_ERR(ms));
 
@@ -439,7 +457,7 @@ static int mdt_reint_link(struct mdt_thread_info *info,
         lhp = &info->mti_lh[MDT_LH_CHILD];
         lhp->mlh_mode = LCK_EX;
         mp = mdt_object_find_lock(info, rr->rr_fid2, lhp, 
-                                  MDS_INODELOCK_UPDATE);
+                                  MDS_INODELOCK_UPDATE, rr->rr_capa2);
         if (IS_ERR(mp))
                 GOTO(out_unlock_source, rc = PTR_ERR(mp));
 
@@ -483,7 +501,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
         lh_tgtdir = &info->mti_lh[MDT_LH_PARENT];
         lh_tgtdir->mlh_mode = LCK_EX;
         mtgtdir = mdt_object_find_lock(info, rr->rr_fid1, lh_tgtdir,
-                                       MDS_INODELOCK_UPDATE);
+                                       MDS_INODELOCK_UPDATE, rr->rr_capa1);
         if (IS_ERR(mtgtdir))
                 GOTO(out, rc = PTR_ERR(mtgtdir));
 
@@ -496,7 +514,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
                 lh_tgt->mlh_mode = LCK_EX;
 
                 mtgt = mdt_object_find_lock(info, tgt_fid, lh_tgt,
-                                            MDS_INODELOCK_LOOKUP);
+                                            MDS_INODELOCK_LOOKUP, BYPASS_CAPA);
                 if (IS_ERR(mtgt))
                         GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt));
 
@@ -521,7 +539,7 @@ static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
 out_unlock_tgtdir:
         mdt_object_unlock_put(info, mtgtdir, lh_tgtdir, rc);
 out:
-        mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
         return rc;
 }
 
@@ -583,7 +601,8 @@ static int mdt_rename_check(struct mdt_thread_info *info, struct lu_fid *fid)
         ENTRY;
 
         do {
-                dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid);
+                dst = mdt_object_find(info->mti_ctxt, info->mti_mdt, &dst_fid,
+                                      BYPASS_CAPA);
                 if (!IS_ERR(dst)) {
                         rc = mdo_is_subdir(info->mti_ctxt,
                                            mdt_object_child(dst),
@@ -648,7 +667,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
         lh_srcdirp->mlh_mode = LCK_EX;
         msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp,
-                                       MDS_INODELOCK_UPDATE);
+                                       MDS_INODELOCK_UPDATE, rr->rr_capa1);
         if (IS_ERR(msrcdir))
                 GOTO(out, rc = PTR_ERR(msrcdir));
 
@@ -659,8 +678,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                 mdt_object_get(info->mti_ctxt, msrcdir);
                 mtgtdir = msrcdir;
         } else {
-                mtgtdir = mdt_object_find(info->mti_ctxt,
-                                          info->mti_mdt, rr->rr_fid2);
+                mtgtdir = mdt_object_find(info->mti_ctxt, info->mti_mdt,
+                                          rr->rr_fid2, rr->rr_capa2);
                 if (IS_ERR(mtgtdir))
                         GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir));
                 
@@ -685,7 +704,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         lh_oldp = &info->mti_lh[MDT_LH_OLD];
         lh_oldp->mlh_mode = LCK_EX;
         mold = mdt_object_find_lock(info, old_fid, lh_oldp,
-                                    MDS_INODELOCK_LOOKUP);
+                                    MDS_INODELOCK_LOOKUP, BYPASS_CAPA);
         if (IS_ERR(mold))
                 GOTO(out_unlock_target, rc = PTR_ERR(mold));
 
@@ -703,7 +722,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                         GOTO(out_unlock_old, rc = -EINVAL);
 
                 lh_newp->mlh_mode = LCK_EX;
-                mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid);
+                mnew = mdt_object_find(info->mti_ctxt, info->mti_mdt, new_fid,
+                                       BYPASS_CAPA);
                 if (IS_ERR(mnew))
                         GOTO(out_unlock_old, rc = PTR_ERR(mnew));
 
@@ -761,7 +781,7 @@ out_unlock_source:
         mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc);
 out:
         mdt_rename_unlock(&rename_lh);
-        mdt_shrink_reply(info, REPLY_REC_OFF + 1);
+        mdt_shrink_reply(info, REPLY_REC_OFF + 1, 0, 0);
         return rc;
 }
 
index 4f040ef..0dafc6c 100644 (file)
@@ -24,7 +24,7 @@ obdclass-all-objs += class_obd.o
 obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o
 obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
 obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o
-obdclass-all-objs += lu_object.o dt_object.o hash.o
+obdclass-all-objs += lu_object.o dt_object.o hash.o capa.o
 
 obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
 
index 7b3df36..3b9dbbb 100644 (file)
@@ -10,7 +10,7 @@ noinst_LIBRARIES = liblustreclass.a
 liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c 
 liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
 liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c 
-liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c
+liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c capa.c
 liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c
 liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\"
 liblustreclass_a_CFLAGS = $(LLCFLAGS)
diff --git a/lustre/obdclass/capa.c b/lustre/obdclass/capa.c
new file mode 100644 (file)
index 0000000..ebc97b7
--- /dev/null
@@ -0,0 +1,298 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/obdclass/capa.c
+ *  Lustre Capability Hash Management
+ *
+ *  Copyright (c) 2005 Cluster File Systems, Inc.
+ *   Author: Lai Siyao<lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_SEC
+
+#ifdef __KERNEL__
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <asm/unistd.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lustre/lustre_idl.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <libcfs/list.h>
+#include <lustre_capa.h>
+
+cfs_mem_cache_t *capa_cachep = NULL;
+
+#ifdef __KERNEL__
+struct list_head capa_list[CAPA_SITE_MAX];
+spinlock_t capa_lock = SPIN_LOCK_UNLOCKED; /* lock for capa_hash/capa_list */
+
+static struct hlist_head *capa_hash;
+#endif
+/* capa count */
+int capa_count[CAPA_SITE_MAX] = { 0, };
+
+static struct capa_hmac_alg capa_hmac_algs[] = {
+        DEF_CAPA_HMAC_ALG("sha1", SHA1, 20, 20),
+};
+
+static const char *capa_site_name[] = {
+        [CAPA_SITE_CLIENT] = "client",
+        [CAPA_SITE_SERVER] = "server",
+        [CAPA_SITE_MAX]    = "error"
+};
+
+EXPORT_SYMBOL(capa_cachep);
+EXPORT_SYMBOL(capa_list);
+EXPORT_SYMBOL(capa_lock);
+EXPORT_SYMBOL(capa_count);
+
+int init_capa_hash(void)
+{
+#ifdef __KERNEL__
+        int nr_hash, i;
+
+        OBD_ALLOC(capa_hash, PAGE_SIZE);
+        if (!capa_hash)
+                return -ENOMEM;
+
+        nr_hash = PAGE_SIZE / sizeof(struct hlist_head);
+        LASSERT(nr_hash > NR_CAPAHASH);
+
+        for (i = 0; i < NR_CAPAHASH; i++)
+                INIT_HLIST_HEAD(capa_hash + i);
+        for (i = CAPA_SITE_CLIENT; i < CAPA_SITE_MAX; i++)
+                INIT_LIST_HEAD(&capa_list[i]);
+#endif
+        return 0;
+}
+
+#ifdef __KERNEL__
+void cleanup_capa_hash(void)
+{
+        int i;
+
+        for (i = 0; i < NR_CAPAHASH; i++)
+                LASSERTF(hlist_empty(capa_hash + i),
+                         "capa hash %d not empty\n", i);
+        for (i = CAPA_SITE_MAX; i < CAPA_SITE_MAX; i++)
+                LASSERTF(list_empty(&capa_list[i]),
+                         "capa list %d not empty\n", i);
+        OBD_FREE(capa_hash, PAGE_SIZE);
+}
+
+static inline int const capa_hashfn(struct lu_fid *fid)
+{
+        return (fid_oid(fid) ^ fid_ver(fid)) *
+               (unsigned long)(fid_seq(fid) + 1) % NR_CAPAHASH;
+}
+
+static inline int capa_on_server(struct obd_capa *ocapa)
+{
+        return ocapa->c_site == CAPA_SITE_SERVER;
+}
+
+static struct obd_capa *find_capa(struct lustre_capa *capa,
+                                  struct hlist_head *head)
+{
+        struct hlist_node *pos;
+        struct obd_capa *ocapa;
+        int len = offsetof(struct lustre_capa, lc_hmac);
+
+        /* MDS get capa case */
+        if (capa->lc_expiry == 0)
+                len = offsetof(struct lustre_capa, lc_keyid);
+
+        hlist_for_each_entry(ocapa, pos, head, u.tgt.c_hash) {
+                if (memcmp(&ocapa->c_capa, capa, len))
+                        continue;
+                /* don't return an expired one in this case */
+                if (capa->lc_expiry == 0 && capa_is_to_expire(ocapa))
+                        continue;
+
+                LASSERT(capa_on_server(ocapa));
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found");
+                return ocapa;
+        }
+
+        return NULL;
+}
+
+static inline void capa_delete(struct obd_capa *ocapa)
+{
+        LASSERT(capa_on_server(ocapa));
+        hlist_del(&ocapa->u.tgt.c_hash);
+        list_del(&ocapa->c_list);
+        free_capa(ocapa);
+}
+
+static inline void free_capa_lru(struct list_head *head)
+{
+        struct list_head *node = head->next;
+        struct obd_capa *ocapa;
+        int count = 0;
+
+        /* free 12 unused capa from head */
+        while (node != head && count < 12) {
+                ocapa = list_entry(node, struct obd_capa, c_list);
+                node = node->next;
+
+                LASSERT(capa_on_server(ocapa));
+                if (atomic_read(&ocapa->c_refc))
+                        continue;
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free unused");
+                capa_delete(ocapa);
+                count++;
+        }
+}
+
+/* add or update */
+struct obd_capa *capa_add(struct lustre_capa *capa)
+{
+        struct hlist_head *head = capa_hash + capa_hashfn(&capa->lc_fid);
+        struct obd_capa *ocapa, *old = NULL;
+
+        ocapa = alloc_capa(CAPA_SITE_SERVER);
+        if (!ocapa)
+                return NULL;
+
+        spin_lock(&capa_lock);
+
+        old = find_capa(capa, head);
+        if (!old) {
+                ocapa->c_capa = *capa;
+                set_capa_expiry(ocapa);
+                hlist_add_head(&ocapa->u.tgt.c_hash, head);
+                list_add_tail(&ocapa->c_list, &capa_list[CAPA_SITE_SERVER]);
+
+                if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE)
+                        free_capa_lru(&capa_list[CAPA_SITE_SERVER]);
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new");
+                                        
+                spin_unlock(&capa_lock);
+                return ocapa;
+        }
+
+        spin_lock(&old->c_lock);
+        old->c_capa = *capa;
+        set_capa_expiry(old);
+        spin_unlock(&old->c_lock);
+
+        list_move_tail(&old->c_list, &capa_list[CAPA_SITE_SERVER]);
+
+        spin_unlock(&capa_lock);
+
+        DEBUG_CAPA(D_SEC, &old->c_capa, "update");
+
+        free_capa(ocapa);
+        return old;
+}
+
+struct obd_capa *capa_lookup(struct lustre_capa *capa)
+{
+        struct hlist_head *head;
+        struct obd_capa *ocapa;
+
+        head = capa_hash + capa_hashfn(&capa->lc_fid);
+
+        spin_lock(&capa_lock);
+        ocapa = find_capa(capa, head);
+        if (ocapa)
+                capa_get(ocapa);
+        spin_unlock(&capa_lock);
+
+        return ocapa;
+}
+
+int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key)
+{
+        struct crypto_tfm *tfm;
+        struct capa_hmac_alg *alg;
+        int keylen;
+        struct scatterlist sl = {
+                .page   = virt_to_page(capa),
+                .offset = (unsigned long)(capa) % PAGE_SIZE,
+                .length = offsetof(struct lustre_capa, lc_hmac),
+        };
+
+        if (capa_alg(capa) != CAPA_HMAC_ALG_SHA1)
+                RETURN(-EFAULT);
+
+        alg = &capa_hmac_algs[capa_alg(capa)];
+
+        tfm = crypto_alloc_tfm(alg->ha_name, 0);
+        if (!tfm)
+                return -ENOMEM;
+        keylen = alg->ha_keylen;
+
+        crypto_hmac(tfm, key, &keylen, &sl, 1, hmac);
+        crypto_free_tfm(tfm);
+
+        return 0;
+}
+
+void cleanup_capas(int site)
+{
+        struct obd_capa *ocapa, *tmp;
+
+        spin_lock(&capa_lock);
+        list_for_each_entry_safe(ocapa, tmp, &capa_list[site], c_list)
+                if (site == ocapa->c_site)
+                        capa_delete(ocapa);
+        spin_unlock(&capa_lock);
+        LASSERTF(capa_count[site] == 0, "%s capability count is %d\n",
+                 capa_site_name[site], capa_count[site]);
+}
+#endif
+
+void capa_cpy(void *capa, struct obd_capa *ocapa)
+{
+        spin_lock(&ocapa->c_lock);
+        *(struct lustre_capa *)capa = ocapa->c_capa;
+        spin_unlock(&ocapa->c_lock);
+}
+
+void dump_capa_hmac(char *buf, char *key)
+{
+        int i, n = 0;
+
+        for (i = 0; i < CAPA_HMAC_MAX_LEN; i++)
+                n += sprintf(buf + n, "%02x", (unsigned char) key[i]);
+}
+
+EXPORT_SYMBOL(capa_add);
+EXPORT_SYMBOL(capa_lookup);
+
+EXPORT_SYMBOL(capa_hmac);
+EXPORT_SYMBOL(capa_cpy);
+
+EXPORT_SYMBOL(cleanup_capas);
+EXPORT_SYMBOL(dump_capa_hmac);
index 47b7d5c..58fab59 100644 (file)
@@ -513,6 +513,9 @@ int obd_init_checks(void)
 #define obd_init_checks() do {} while(0)
 #endif
 
+extern int init_capa_hash(void);
+extern void cleanup_capa_hash(void);
+
 extern spinlock_t obd_types_lock;
 extern spinlock_t handle_lock;
 extern int class_procfs_init(void);
@@ -537,6 +540,10 @@ int init_obdclass(void)
         CDEBUG(D_INFO, "        Build Version: "BUILD_VERSION"\n");
 #endif
 
+        err = init_capa_hash();
+        if (err)
+                return err;
+
         spin_lock_init(&obd_types_lock);
         spin_lock_init(&handle_lock);
         cfs_waitq_init(&obd_race_waitq);
@@ -602,6 +609,7 @@ static void cleanup_obdclass(void)
         }
         lu_global_fini();
 
+        cleanup_capa_hash();
         obd_cleanup_caches();
         obd_sysctl_clean();
 
index a574ab2..47310e2 100644 (file)
@@ -162,7 +162,7 @@ static struct dt_object *dt_locate(const struct lu_context *ctx,
         struct lu_object *obj;
         struct dt_object *dt;
 
-        obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid);
+        obj = lu_object_find(ctx, dev->dd_lu_dev.ld_site, fid, BYPASS_CAPA);
         if (!IS_ERR(obj)) {
                 obj = lu_object_locate(obj->lo_header, dev->dd_lu_dev.ld_type);
                 LASSERT(obj != NULL);
@@ -185,6 +185,7 @@ struct dt_object *dt_store_open(const struct lu_context *ctx,
         if (result == 0) {
                 root = dt_locate(ctx, dt, fid);
                 if (!IS_ERR(root)) {
+                        lu_object_bypass_capa(&root->do_lu);
                         result = dt_lookup(ctx, root, name, fid);
                         if (result == 0)
                                 child = dt_locate(ctx, dt, fid);
index 49a686f..86dbd59 100644 (file)
@@ -518,6 +518,11 @@ void obd_cleanup_caches(void)
                 LASSERTF(rc == 0, "Cannot destory ll_import_cache\n");
                 import_cachep = NULL;
         }
+        if (capa_cachep) {
+                rc = cfs_mem_cache_destroy(capa_cachep);
+                LASSERTF(rc == 0, "Cannot destory capa_cache\n");
+                capa_cachep = NULL;
+        }
         EXIT;
 }
 
@@ -544,6 +549,12 @@ int obd_init_caches(void)
         if (!import_cachep)
                 GOTO(out, -ENOMEM);
 
+        LASSERT(capa_cachep == NULL);
+        capa_cachep = cfs_mem_cache_create("capa_cache",
+                                           sizeof(struct obd_capa), 0, 0);
+        if (!capa_cachep)
+                GOTO(out, -ENOMEM);
+
         RETURN(0);
  out:
         obd_cleanup_caches();
index 460cd4b..34f7707 100644 (file)
@@ -694,7 +694,8 @@ static int llog_lvfs_destroy(struct llog_handle *handle)
         if (rc)
                 GOTO(out, rc);
 
-        rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL);
+        rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL,
+                         NULL);
  out:
         obdo_free(oa);
         RETURN(rc);
index f079d15..c39f494 100644 (file)
@@ -388,7 +388,7 @@ static const char *obd_connect_names[] = {
         "remote_client",
         "max_byte_per_rpc",
         "64bit_qdata",
-        "fid_capability",
+        "mds_capability",
         "oss_capability",
         NULL
 };
index e24d976..e8a5031 100644 (file)
@@ -107,7 +107,8 @@ EXPORT_SYMBOL(lu_object_put);
  */
 static struct lu_object *lu_object_alloc(const struct lu_context *ctxt,
                                          struct lu_site *s,
-                                         const struct lu_fid *f)
+                                         const struct lu_fid *f,
+                                         const struct lustre_capa *capa)
 {
         struct lu_object *scan;
         struct lu_object *top;
@@ -128,7 +129,11 @@ static struct lu_object *lu_object_alloc(const struct lu_context *ctxt,
          * This is the only place where object fid is assigned. It's constant
          * after this point.
          */
-        top->lo_header->loh_fid = *f;
+        top->lo_header->loh_fid  = *f;
+        if (capa == BYPASS_CAPA)
+                lu_object_bypass_capa(top);
+        else
+                top->lo_header->loh_capa = *capa;
         layers = &top->lo_header->loh_layers;
         do {
                 /*
@@ -422,11 +427,13 @@ static __u32 fid_hash(const struct lu_fid *f)
  * any case, additional reference is acquired on the returned object.
  */
 struct lu_object *lu_object_find(const struct lu_context *ctxt,
-                                 struct lu_site *s, const struct lu_fid *f)
+                                 struct lu_site *s, const struct lu_fid *f,
+                                 struct lustre_capa *capa)
 {
         struct lu_object  *o;
         struct lu_object  *shadow;
         struct hlist_head *bucket;
+        int                rc;
 
         /*
          * This uses standard index maintenance protocol:
@@ -447,13 +454,24 @@ struct lu_object *lu_object_find(const struct lu_context *ctxt,
         o = htable_lookup(s, bucket, f);
 
         spin_unlock(&s->ls_guard);
-        if (o != NULL)
+        if (o != NULL) {
+                if (capa == BYPASS_CAPA) {
+                        o->lo_header->loh_capa_bypass = 1;
+                } else {
+                        rc = lu_object_auth(ctxt, o, capa,
+                                            CAPA_OPC_INDEX_LOOKUP);
+                        if (rc)
+                                return ERR_PTR(rc);
+                        o->lo_header->loh_capa = *capa;
+                }
                 return o;
+        }
+
         /*
          * Allocate new object. This may result in rather complicated
          * operations, including fld queries, inode loading, etc.
          */
-        o = lu_object_alloc(ctxt, s, f);
+        o = lu_object_alloc(ctxt, s, f, capa);
         if (IS_ERR(o))
                 return o;
 
@@ -476,6 +494,24 @@ struct lu_object *lu_object_find(const struct lu_context *ctxt,
 }
 EXPORT_SYMBOL(lu_object_find);
 
+int lu_object_auth(const struct lu_context *ctxt, const struct lu_object *o,
+                   struct lustre_capa *capa, __u64 opc)
+{
+        struct lu_object_header *top = o->lo_header;
+        int rc;
+
+        list_for_each_entry(o, &top->loh_layers, lo_linkage) {
+                if (o->lo_ops->loo_object_auth) {
+                        rc = o->lo_ops->loo_object_auth(ctxt, o, capa, opc);
+                        if (rc)
+                                return rc;
+                }
+        }
+
+        return 0;
+}
+EXPORT_SYMBOL(lu_object_auth);
+
 enum {
         LU_SITE_HTABLE_BITS = 8,
         LU_SITE_HTABLE_SIZE = (1 << LU_SITE_HTABLE_BITS),
index b0ce61d..6931118 100644 (file)
@@ -1907,7 +1907,6 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                                 goto invalid;
                         clear++;
                 }
-
                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
                    end of the options. */
                 else if (strncmp(s1, "device=", 7) == 0) {
index 547ae26..67bdd54 100644 (file)
@@ -130,7 +130,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa,
 
 int echo_destroy(struct obd_export *exp, struct obdo *oa,
                  struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                 struct obd_export *md_exp)
+                 struct obd_export *md_exp, void *unused)
 {
         struct obd_device *obd = class_exp2obd(exp);
 
@@ -270,7 +270,7 @@ echo_page_debug_check(cfs_page_t *page, obd_id id,
 int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                 int objcount, struct obd_ioobj *obj, int niocount,
                 struct niobuf_remote *nb, struct niobuf_local *res,
-                struct obd_trans_info *oti)
+                struct obd_trans_info *oti, struct lustre_capa *unused)
 {
         struct obd_device *obd;
         struct niobuf_local *r = res;
index 1429bdf..016176c 100644 (file)
@@ -240,7 +240,7 @@ static int echo_create_object(struct obd_device *obd, int on_target,
                         oa->o_id, on_target ? " (undoing create)" : "");
 
                 if (on_target)
-                        obd_destroy(ec->ec_exp, oa, lsm, oti, NULL);
+                        obd_destroy(ec->ec_exp, oa, lsm, oti, NULL, NULL);
 
                 rc = -EEXIST;
                 goto failed;
@@ -945,7 +945,8 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw,
                 ioo.ioo_bufcnt = npages;
                 oti->oti_transno = 0;
 
-                ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti);
+                ret = obd_preprw(rw, exp, oa, 1, &ioo, npages, rnb, lnb, oti,
+                                 NULL);
                 if (ret != 0)
                         GOTO(out, ret);
 
@@ -1233,7 +1234,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp,
                         oa->o_gr = FILTER_GROUP_ECHO;
                         oa->o_valid |= OBD_MD_FLGROUP;
                         rc = obd_destroy(ec->ec_exp, oa, eco->eco_lsm,
-                                         &dummy_oti, NULL);
+                                         &dummy_oti, NULL, NULL);
                         if (rc == 0)
                                 eco->eco_deleted = 1;
                         echo_put_object(eco);
index 8305eb5..f1b3e78 100644 (file)
@@ -1,7 +1,7 @@
 MODULES := obdfilter
 
 obdfilter-objs := filter.o filter_io.o filter_log.o
-obdfilter-objs += lproc_obdfilter.o filter_lvb.o
+obdfilter-objs += lproc_obdfilter.o filter_lvb.o filter_capa.o
 
 ifeq ($(PATCHLEVEL),4)
 obdfilter-objs += filter_io_24.o
index ced7305..7d48029 100644 (file)
@@ -1263,6 +1263,8 @@ static void filter_post(struct obd_device *obd)
         filter_cleanup_groups(obd);
         filter_free_server_data(filter);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        filter_free_capa_keys(filter);
 }
 
 static void filter_set_last_id(struct filter_obd *filter,
@@ -1927,6 +1929,9 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                               obd->obd_replayable ? "enabled" : "disabled");
         }
 
+        filter->fo_fl_oss_capa = 0;
+        INIT_LIST_HEAD(&filter->fo_capa_keys);
+
         RETURN(0);
 
 err_post:
@@ -2716,6 +2721,11 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
         int rc = 0;
         ENTRY;
 
+        rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+                                oinfo_capa(oinfo), CAPA_OPC_META_READ);
+        if (rc)
+                RETURN(rc);
+
         obd = class_exp2obd(exp);
         if (obd == NULL) {
                 CDEBUG(D_IOCTL, "invalid client export %p\n", exp);
@@ -2919,6 +2929,11 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
         int rc;
         ENTRY;
 
+        rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+                                oinfo_capa(oinfo), CAPA_OPC_META_WRITE);
+        if (rc)
+                RETURN(rc);
+
         dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa,
                                     __FUNCTION__, 1);
         if (IS_ERR(dentry))
@@ -3047,7 +3062,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
                exp->exp_obd->obd_name, oa->o_id + 1, last);
         for (id = last; id > oa->o_id; id--) {
                 doa.o_id = id;
-                rc = filter_destroy(exp, &doa, NULL, NULL, NULL);
+                rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
                 if (rc && rc != -ENOENT) /* this is pretty fatal... */
                         CEMERG("error destroying precreate objid "LPU64": %d\n",
                                id, rc);
@@ -3419,7 +3434,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
 
 int filter_destroy(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md *md, struct obd_trans_info *oti,
-                   struct obd_export *md_exp)
+                   struct obd_export *md_exp, void *capa)
 {
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         struct obd_device *obd;
@@ -3433,6 +3448,15 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         ENTRY;
 
         LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+
+#if 0   /* some places don't support capability yet */
+        rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa),
+                                (struct lustre_capa *)capa,
+                                CAPA_OPC_INDEX_LOOKUP);
+        if (rc)
+                RETURN(rc);
+#endif
+
 #if 0
         if (!(oa->o_valid & OBD_MD_FLGROUP))
                 oa->o_gr = 0;
@@ -3580,13 +3604,19 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
                ", o_size = "LPD64"\n", oinfo->oi_oa->o_id,
                oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start);
 
+        rc = filter_verify_capa(exp, NULL, oinfo_mdsno(oinfo),
+                                oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC);
+        if (rc)
+                RETURN(rc);
+
         oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
         rc = filter_setattr(exp, oinfo, oti);
         RETURN(rc);
 }
 
 static int filter_sync(struct obd_export *exp, struct obdo *oa,
-                       struct lov_stripe_md *lsm, obd_off start, obd_off end)
+                       struct lov_stripe_md *lsm, obd_off start, obd_off end,
+                       void *capa)
 {
         struct lvfs_run_ctxt saved;
         struct filter_obd *filter;
@@ -3595,6 +3625,11 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
         int rc, rc2;
         ENTRY;
 
+        rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa),
+                                (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE);
+        if (rc)
+                RETURN(rc);
+
         filter = &exp->exp_obd->u.filter;
 
         /* an objid of zero is taken to mean "sync whole filesystem" */
@@ -3691,6 +3726,13 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
                 RETURN(-EINVAL);
         }
 
+        if (KEY_IS(KEY_CAPA_KEY)) {
+                rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val);
+                if (rc)
+                        CERROR("filter update capability key failed: %d\n", rc);
+                RETURN(rc);
+        }
+
         if (keylen < strlen(KEY_MDS_CONN) ||
             memcmp(key, KEY_MDS_CONN, keylen) != 0)
                 RETURN(-EINVAL);
diff --git a/lustre/obdfilter/filter_capa.c b/lustre/obdfilter/filter_capa.c
new file mode 100644 (file)
index 0000000..cbdca5b
--- /dev/null
@@ -0,0 +1,184 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ * Author: Lai Siyao <lsy@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+#include <linux/file.h>
+#include <linux/kmod.h>
+
+#include <lustre_fsfilt.h>
+#include <lustre_capa.h>
+
+#include "filter_internal.h"
+
+static inline __u32 filter_ck_keyid(struct filter_capa_key *key)
+{
+        return key->k_key.lk_keyid;
+}
+
+int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        struct filter_capa_key *k, *rkey = NULL, *bkey = NULL;
+
+        spin_lock(&capa_lock);
+        list_for_each_entry(k, &filter->fo_capa_keys, k_list) {
+                if (k->k_key.lk_mdsid != key->lk_mdsid)
+                        continue;
+
+                if (rkey)
+                        bkey = k;
+                else
+                        rkey = k;
+        }
+        spin_unlock(&capa_lock);
+
+        if (rkey && bkey && filter_ck_keyid(rkey) < filter_ck_keyid(bkey)) {
+                k = rkey;
+                rkey = bkey;
+                bkey = k;
+        }
+
+        if (bkey) {
+                k = bkey;
+        } else {
+                OBD_ALLOC_PTR(k);
+                if (!k)
+                        RETURN(-ENOMEM);
+                INIT_LIST_HEAD(&k->k_list);
+        }
+
+        spin_lock(&capa_lock);
+        k->k_key = *key;
+        if (list_empty(&k->k_list))
+                list_add(&k->k_list, &filter->fo_capa_keys);
+        spin_unlock(&capa_lock);
+
+        DEBUG_CAPA_KEY(D_SEC, key, "new");
+        RETURN(0);
+}
+
+int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
+                       struct lustre_capa *capa, __u64 opc)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
+        struct filter_capa_key *k;
+        struct lustre_capa_key key;
+        struct obd_capa *c;
+        __u8 *hmac;
+        int keys_ready = 0, key_found = 0, rc = 0;
+        ENTRY;
+
+        /* capability is disabled */
+        if (!filter->fo_fl_oss_capa)
+                RETURN(0);
+
+        if (capa == NULL) {
+                CERROR("no capa has been passed\n");
+                RETURN(-EACCES);
+        }
+
+#warning "enable fid check in filter_verify_capa when fid ready"
+
+        if (!capa_opc_supported(capa, opc)) {
+                DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc);
+                RETURN(-EACCES);
+        }
+
+        c = capa_lookup(capa);
+        if (c) {
+                spin_lock(&c->c_lock);
+                if (memcmp(&c->c_capa, capa, sizeof(*capa))) {
+                        DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+                        rc = -EACCES;
+                } else if (capa_is_expired(c)) {
+                        DEBUG_CAPA(D_ERROR, capa, "expired");
+                        rc = -ESTALE;
+                }
+                spin_unlock(&c->c_lock);
+
+                capa_put(c);
+                RETURN(rc);
+        }
+
+        spin_lock(&capa_lock);
+        list_for_each_entry(k, &filter->fo_capa_keys, k_list)
+                if (k->k_key.lk_mdsid == mdsid) {
+                        keys_ready = 1;
+                        if (k->k_key.lk_keyid == capa_keyid(capa)) {
+                                key = k->k_key;
+                                key_found = 1;
+                                break;
+                        }
+                }
+        spin_unlock(&capa_lock);
+
+        if (!keys_ready) {
+                CDEBUG(D_SEC, "MDS hasn't propagated capability keys yet, "
+                       "ignore check!\n");
+                RETURN(0);
+        }
+
+       if (!key_found) {
+                DEBUG_CAPA(D_ERROR, capa, "no matched capability key for");
+                RETURN(-ESTALE);
+        }
+
+        OBD_ALLOC(hmac, CAPA_HMAC_MAX_LEN);
+        if (hmac == NULL)
+                RETURN(-ENOMEM);
+
+        rc = capa_hmac(hmac, capa, key.lk_key);
+        if (rc) {
+                DEBUG_CAPA(D_ERROR, capa, "HMAC failed: rc %d", rc);
+                OBD_FREE(hmac, CAPA_HMAC_MAX_LEN);
+                RETURN(rc);
+        }
+
+        rc = memcmp(hmac, capa->lc_hmac, CAPA_HMAC_MAX_LEN);
+        OBD_FREE(hmac, CAPA_HMAC_MAX_LEN);
+        if (rc) {
+                DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+                RETURN(-EACCES);
+        }
+
+        /* store in capa hash */
+        capa_add(capa);
+        RETURN(0);
+}
+
+void filter_free_capa_keys(struct filter_obd *filter)
+{
+        struct filter_capa_key *key, *n;
+
+        spin_lock(&capa_lock);
+        list_for_each_entry_safe(key, n, &filter->fo_capa_keys, k_list) {
+                list_del_init(&key->k_list);
+                OBD_FREE(key, sizeof(*key));
+        }
+        spin_unlock(&capa_lock);
+}
index 9933a1b..8c4a0de 100644 (file)
@@ -107,7 +107,7 @@ int filter_common_setup(struct obd_device *, struct lustre_cfg *lcfg,
                         void *option);
 int filter_destroy(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md *md, struct obd_trans_info *,
-                   struct obd_export *);
+                   struct obd_export *, void *capa);
 int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                             struct obdo *oa, struct obd_trans_info *oti);
 int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
@@ -125,7 +125,8 @@ extern struct ldlm_valblock_ops filter_lvbo;
 /* filter_io.c */
 int filter_preprw(int cmd, struct obd_export *, struct obdo *, int objcount,
                   struct obd_ioobj *, int niocount, struct niobuf_remote *,
-                  struct niobuf_local *, struct obd_trans_info *);
+                  struct niobuf_local *, struct obd_trans_info *,
+                  struct lustre_capa *);
 int filter_commitrw(int cmd, struct obd_export *, struct obdo *, int objcount,
                     struct obd_ioobj *, int niocount, struct niobuf_local *,
                     struct obd_trans_info *, int rc);
@@ -191,4 +192,14 @@ static inline int lproc_filter_attach_seqstat(struct obd_device *dev) {}
 /* Quota stuff */
 extern quota_interface_t *quota_interface;
 
+/* Capability */
+static inline __u64 obdo_mdsno(struct obdo *oa)
+{
+        return oa->o_gr - FILTER_GROUP_MDS0;
+}
+
+int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key);
+int filter_verify_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
+                       struct lustre_capa *capa, __u64 opc);
+void filter_free_capa_keys(struct filter_obd *filter);
 #endif /* _FILTER_INTERNAL_H */
index 38efbc2..a6645d4 100644 (file)
@@ -271,7 +271,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
                               int objcount, struct obd_ioobj *obj,
                               int niocount, struct niobuf_remote *nb,
                               struct niobuf_local *res,
-                              struct obd_trans_info *oti)
+                              struct obd_trans_info *oti,
+                              struct lustre_capa *capa)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lvfs_run_ctxt saved;
@@ -290,6 +291,11 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
         LASSERTF(objcount == 1, "%d\n", objcount);
         LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt);
 
+        rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa,
+                                CAPA_OPC_OSS_READ);
+        if (rc)
+                RETURN(rc);
+
         if (oa && oa->o_valid & OBD_MD_FLGRANT) {
                 spin_lock(&obd->obd_osfs_lock);
                 filter_grant_incoming(exp, oa);
@@ -504,7 +510,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
                                int objcount, struct obd_ioobj *obj,
                                int niocount, struct niobuf_remote *nb,
                                struct niobuf_local *res,
-                               struct obd_trans_info *oti)
+                               struct obd_trans_info *oti,
+                               struct lustre_capa *capa)
 {
         struct lvfs_run_ctxt saved;
         struct niobuf_remote *rnb;
@@ -520,6 +527,11 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
         LASSERT(objcount == 1);
         LASSERT(obj->ioo_bufcnt > 0);
 
+        rc = filter_verify_capa(exp, NULL, obdo_mdsno(oa), capa,
+                                CAPA_OPC_OSS_WRITE);
+        if (rc)
+                RETURN(rc);
+
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
         iobuf = filter_iobuf_get(&exp->exp_obd->u.filter, oti);
         if (IS_ERR(iobuf))
@@ -676,14 +688,14 @@ cleanup:
 int filter_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
                   int objcount, struct obd_ioobj *obj, int niocount,
                   struct niobuf_remote *nb, struct niobuf_local *res,
-                  struct obd_trans_info *oti)
+                  struct obd_trans_info *oti, struct lustre_capa *capa)
 {
         if (cmd == OBD_BRW_WRITE)
                 return filter_preprw_write(cmd, exp, oa, objcount, obj,
-                                           niocount, nb, res, oti);
+                                           niocount, nb, res, oti, capa);
         if (cmd == OBD_BRW_READ)
                 return filter_preprw_read(cmd, exp, oa, objcount, obj,
-                                          niocount, nb, res, oti);
+                                          niocount, nb, res, oti, capa);
         LBUG();
         return -EPROTO;
 }
@@ -851,7 +863,7 @@ int filter_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
         ioo.ioo_bufcnt = oa_bufs;
 
         ret = filter_preprw(cmd, exp, oinfo->oi_oa, 1, &ioo,
-                            oa_bufs, rnb, lnb, oti);
+                            oa_bufs, rnb, lnb, oti, oinfo_capa(oinfo));
         if (ret != 0)
                 GOTO(out, ret);
 
index d737983..90f4358 100644 (file)
@@ -149,7 +149,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt,
         memcpy(obdo_logcookie(oa), cookie, sizeof(*cookie));
         oid = oa->o_id;
 
-        rc = filter_destroy(exp, oa, NULL, NULL, NULL);
+        rc = filter_destroy(exp, oa, NULL, NULL, NULL, NULL);
         obdo_free(oa);
         if (rc == -ENOENT) {
                 CDEBUG(D_HA, "object already removed, send cookie\n");
index a6bd67a..7c5cfbe 100644 (file)
@@ -147,6 +147,31 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
         RETURN(lsm_size);
 }
 
+static inline void osc_pack_capa(struct ptlrpc_request *req, int offset,
+                                 struct ost_body *body, void *capa)
+{
+        struct obd_capa *oc = (struct obd_capa *)capa;
+        struct lustre_capa *c;
+
+        if (!capa)
+                return;
+
+        c = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*c));
+        capa_cpy(c, oc);
+        body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+        DEBUG_CAPA(D_SEC, c, "pack");
+}
+
+static inline void osc_pack_req_body(struct ptlrpc_request *req, int offset,
+                                     struct obd_info *oinfo)
+{
+        struct ost_body *body;
+
+        body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+        body->oa = *oinfo->oi_oa;
+        osc_pack_capa(req, offset + 1, body, oinfo->oi_capa);
+}
+
 static int osc_getattr_interpret(struct ptlrpc_request *req,
                                  struct osc_async_args *aa, int rc)
 {
@@ -180,17 +205,17 @@ static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         struct osc_async_args *aa;
         ENTRY;
 
+        size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_GETATTR, 2, size,NULL);
+                              OST_GETATTR, 3, size,NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+        osc_pack_req_body(req, REQ_REC_OFF, oinfo);
 
         ptlrpc_req_set_repsize(req, 2, size);
         req->rq_interpret_reply = osc_getattr_interpret;
@@ -207,16 +232,16 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo)
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
+        size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_GETATTR, 2, size, NULL);
+                              OST_GETATTR, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+        osc_pack_req_body(req, REQ_REC_OFF, oinfo);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -251,18 +276,18 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo,
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         LASSERT(!(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP) || 
                                         oinfo->oi_oa->o_gr > 0);
+        size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_SETATTR, 2, size, NULL);
+                              OST_SETATTR, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
+        osc_pack_req_body(req, REQ_REC_OFF, oinfo);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -311,24 +336,25 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         struct osc_async_args *aa;
         ENTRY;
 
+        size[REQ_REC_OFF + 1] = oinfo->oi_capa ? sizeof(*oinfo->oi_capa) : 0;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_SETATTR, 2, size, NULL);
+                              OST_SETATTR, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-
+        osc_pack_req_body(req, REQ_REC_OFF, oinfo);
         if (oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE) {
                 LASSERT(oti);
+                body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+                                      sizeof(*body));
                 memcpy(obdo_logcookie(oinfo->oi_oa), oti->oti_logcookies,
                        sizeof(*oti->oti_logcookies));
         }
 
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
         ptlrpc_req_set_repsize(req, 2, size);
         /* do mds to ost setattr asynchronouly */
         if (!rqset) {
@@ -353,7 +379,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
         struct ptlrpc_request *req;
         struct ost_body *body;
         struct lov_stripe_md *lsm;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         LASSERT(oa);
@@ -366,13 +392,14 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
                         RETURN(rc);
         }
 
+        /* FIXME: how to find one OSS WRITE capability? */
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_CREATE, 2, size, NULL);
+                              OST_CREATE, 3, size, NULL);
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oa, sizeof(body->oa));
+        body->oa = *oa;
 
         ptlrpc_req_set_repsize(req, 2, size);
         if (oa->o_valid & OBD_MD_FLINLINE) {
@@ -460,7 +487,8 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
         struct ptlrpc_request *req;
         struct osc_async_args *aa;
         struct ost_body *body;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        struct lustre_capa *capa = oinfo->oi_capa;
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         if (!oinfo->oi_oa) {
@@ -468,8 +496,9 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
                 RETURN(-EINVAL);
         }
 
+        size[REQ_REC_OFF + 1] = capa ? sizeof(*capa) : 0;
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_PUNCH, 2, size, NULL);
+                              OST_PUNCH, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -478,14 +507,22 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
             OBD_CONNECT_REQPORTAL)
                 req->rq_request_portal = OST_IO_PORTAL;
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
-
+        osc_pack_req_body(req, REQ_REC_OFF, oinfo);
         /* overload the size and blocks fields in the oa with start/end */
+        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
         body->oa.o_size = oinfo->oi_policy.l_extent.start;
         body->oa.o_blocks = oinfo->oi_policy.l_extent.end;
         body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
 
+        if (capa) {
+                struct lustre_capa *c;
+
+                c = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, sizeof(*c));
+                /* setattr_raw is protected by i_sem, no need to lock here */
+                *c = *capa;
+                body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+        }
+
         ptlrpc_req_set_repsize(req, 2, size);
 
         req->rq_interpret_reply = osc_punch_interpret;
@@ -498,11 +535,12 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
 }
 
 static int osc_sync(struct obd_export *exp, struct obdo *oa,
-                    struct lov_stripe_md *md, obd_size start, obd_size end)
+                    struct lov_stripe_md *md, obd_size start, obd_size end,
+                    void *capa)
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int rc, size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         if (!oa) {
@@ -510,19 +548,23 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (capa)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
+
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_SYNC, 2, size, NULL);
+                              OST_SYNC, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        memcpy(&body->oa, oa, sizeof(*oa));
-
         /* overload the size and blocks fields in the oa with start/end */
+        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
+        body->oa = *oa;
         body->oa.o_size = start;
         body->oa.o_blocks = end;
         body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
 
+        osc_pack_capa(req, REQ_REC_OFF + 1, body, capa);
+
         ptlrpc_req_set_repsize(req, 2, size);
 
         rc = ptlrpc_queue_wait(req);
@@ -556,11 +598,11 @@ static int osc_sync(struct obd_export *exp, struct obdo *oa,
  * cookies to the MDS after committing destroy transactions. */
 static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                       struct obd_export *md_export)
+                       struct obd_export *md_export, void *capa)
 {
         struct ptlrpc_request *req;
         struct ost_body *body;
-        int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
         if (!oa) {
@@ -568,8 +610,10 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
+        if (capa)
+                size[REQ_REC_OFF + 1] = sizeof(struct lustre_capa);
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION,
-                              OST_DESTROY, 2, size, NULL);
+                              OST_DESTROY, 3, size, NULL);
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -579,13 +623,13 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                 req->rq_request_portal = OST_IO_PORTAL;
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-
-        if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) {
+        body->oa = *oa;
+        if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
                 memcpy(obdo_logcookie(oa), oti->oti_logcookies,
                        sizeof(*oti->oti_logcookies));
-        }
 
-        memcpy(&body->oa, oa, sizeof(*oa));
+        osc_pack_capa(req, REQ_REC_OFF + 1, body, capa);
+
         ptlrpc_req_set_repsize(req, 2, size);
 
         ptlrpcd_add_req(req);
@@ -818,7 +862,8 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
 static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                                 struct lov_stripe_md *lsm, obd_count page_count,
                                 struct brw_page **pga, int *requested_nobp,
-                                int *niocountp, struct ptlrpc_request **reqp)
+                                int *niocountp, struct ptlrpc_request **reqp,
+                                struct obd_capa *ocapa)
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
@@ -826,9 +871,10 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         struct ost_body         *body;
         struct obd_ioobj        *ioobj;
         struct niobuf_remote    *niobuf;
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[5] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         int niocount, i, requested_nob, opc, rc;
         struct ptlrpc_request_pool *pool;
+        struct lustre_capa      *capa;
 
         ENTRY;
         opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
@@ -841,9 +887,11 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
 
         size[REQ_REC_OFF + 1] = sizeof(*ioobj);
         size[REQ_REC_OFF + 2] = niocount * sizeof(*niobuf);
+        if (ocapa)
+                size[REQ_REC_OFF + 3] = sizeof(*capa);
 
         OBD_FAIL_RETURN(OBD_FAIL_OSC_BRW_PREP_REQ, -ENOMEM);
-        req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 4, size, NULL,
+        req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 5, size, NULL,
                                    pool, NULL);
         if (req == NULL)
                 RETURN (-ENOMEM);
@@ -867,10 +915,16 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         niobuf = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 2,
                                 niocount * sizeof(*niobuf));
 
-        memcpy(&body->oa, oa, sizeof(*oa));
+        body->oa = *oa;
 
         obdo_to_ioobj(oa, ioobj);
         ioobj->ioo_bufcnt = niocount;
+        if (ocapa) {
+                capa = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 3,
+                                      sizeof(*capa));
+                capa_cpy(capa, ocapa);
+                body->oa.o_valid |= OBD_MD_FLOSSCAPA;
+        }
 
         LASSERT (page_count > 0);
         for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
@@ -1094,7 +1148,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
 
 static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa,
                             struct lov_stripe_md *lsm,
-                            obd_count page_count, struct brw_page **pga)
+                            obd_count page_count, struct brw_page **pga,
+                            struct obd_capa *ocapa)
 {
         int                    requested_nob;
         int                    niocount;
@@ -1105,7 +1160,7 @@ static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa,
 restart_bulk:
         rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
                                   page_count, pga, &requested_nob, &niocount,
-                                  &req);
+                                  &req, ocapa);
         if (rc != 0)
                 return (rc);
 
@@ -1142,7 +1197,8 @@ static int brw_interpret(struct ptlrpc_request *req,
 
 static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                           struct lov_stripe_md *lsm, obd_count page_count,
-                          struct brw_page **pga, struct ptlrpc_request_set *set)
+                          struct brw_page **pga, struct ptlrpc_request_set *set,
+                          struct obd_capa *ocapa)
 {
         struct ptlrpc_request     *req;
         int                        requested_nob;
@@ -1164,7 +1220,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
 
         rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
                                   page_count, pga, &requested_nob, &nio_count,
-                                  &req);
+                                  &req, ocapa);
 
         if (rc == 0) {
                 LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -1308,7 +1364,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
                 }
 
                 rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md,
-                                      pages_per_brw, ppga);
+                                      pages_per_brw, ppga, oinfo->oi_capa);
 
                 if (rc != 0)
                         break;
@@ -1374,7 +1430,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp,
                         copy = ppga;
 
                 rc = async_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md,
-                                    pages_per_brw, copy, set);
+                                    pages_per_brw, copy, set, oinfo->oi_capa);
 
                 if (rc != 0)
                         break;
@@ -1675,6 +1731,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
         struct obd_async_page_ops *ops = NULL;
         void *caller_data = NULL;
         struct list_head *pos;
+        struct obd_capa *ocapa;
         int i, rc;
 
         ENTRY;
@@ -1710,10 +1767,12 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
         /* always get the data for the obdo for the rpc */
         LASSERT(ops != NULL);
         ops->ap_fill_obdo(caller_data, cmd, oa);
+        ocapa = ops->ap_lookup_capa(caller_data, cmd);
 
         sort_brw_pages(pga, page_count);
         rc = osc_brw_prep_request(cmd, cli->cl_import, oa, NULL, page_count,
-                                  pga, &requested_nob, &nio_count, &req);
+                                  pga, &requested_nob, &nio_count, &req, ocapa);
+        capa_put(ocapa);
         if (rc != 0) {
                 CERROR("prep_req failed: %d\n", rc);
                 GOTO(out, req = ERR_PTR(rc));
@@ -2682,12 +2741,12 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo,
 
  no_match:
         if (intent) {
-                int size[3] = {
+                int size[2] = {
                         [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
                         [DLM_LOCKREQ_OFF]     = sizeof(struct ldlm_request) };
 
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
-                                      LDLM_ENQUEUE, 2, size, NULL);
+                                      LDLM_ENQUEUE, 3, size, NULL);
                 if (req == NULL)
                         RETURN(-ENOMEM);
 
index 2e6b7b1..d7125c8 100644 (file)
@@ -638,7 +638,6 @@ static void osd_ro(const struct lu_context *ctx, struct dt_device *d)
         EXIT;
 }
 
-
 static struct dt_device_operations osd_dt_ops = {
         .dt_root_get    = osd_root_get,
         .dt_statfs      = osd_statfs,
@@ -704,14 +703,26 @@ static void osd_object_write_unlock(const struct lu_context *ctx,
         up_write(&obj->oo_sem);
 }
 
-static int osd_attr_get(const struct lu_context *ctxt, struct dt_object *dt,
+static inline int osd_object_auth(const struct lu_context *ctx,
+                                  const struct lu_object *o,
+                                  __u64 opc)
+{
+        return o->lo_ops->loo_object_auth(ctx, o, lu_object_capa(o), opc);
+}
+
+static int osd_attr_get(const struct lu_context *ctxt,
+                        struct dt_object *dt,
                         struct lu_attr *attr)
 {
         struct osd_object *obj = osd_dt_obj(dt);
+
         LASSERT(dt_object_exists(dt));
         LASSERT(osd_invariant(obj));
         LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+                return -EACCES;
+
         return osd_inode_getattr(ctxt, obj->oo_inode, attr);
 }
 
@@ -726,6 +737,9 @@ static int osd_attr_set(const struct lu_context *ctxt,
         LASSERT(osd_invariant(obj));
         LASSERT(osd_write_locked(ctxt, obj));
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+                return -EACCES;
+
         return osd_inode_setattr(ctxt, obj->oo_inode, attr);
 }
 
@@ -985,6 +999,8 @@ static int osd_object_create(const struct lu_context *ctx, struct dt_object *dt,
         /*
          * XXX missing: permission checks.
          */
+        if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT))
+                RETURN(-EACCES);
 
         /*
          * XXX missing: sanity checks (valid ->la_mode, etc.)
@@ -1030,6 +1046,12 @@ static void osd_object_ref_add(const struct lu_context *ctxt,
         LASSERT(osd_write_locked(ctxt, obj));
         LASSERT(th != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) {
+                LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu,
+                                "no capability to link!\n");
+                return;
+        }
+
         if (inode->i_nlink < LDISKFS_LINK_MAX) {
                 inode->i_nlink ++;
                 mark_inode_dirty(inode);
@@ -1050,6 +1072,12 @@ static void osd_object_ref_del(const struct lu_context *ctxt,
         LASSERT(osd_write_locked(ctxt, obj));
         LASSERT(th != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE)) {
+                LU_OBJECT_DEBUG(D_ERROR, ctxt, &dt->do_lu,
+                                "no capability to unlink!\n");
+                return;
+        }
+
         if (inode->i_nlink > 0) {
                 inode->i_nlink --;
                 mark_inode_dirty(inode);
@@ -1071,6 +1099,9 @@ static int osd_xattr_get(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL);
         LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+                return -EACCES;
+
         dentry->d_inode = inode;
         return inode->i_op->getxattr(dentry, name, buf, size);
 }
@@ -1091,6 +1122,9 @@ static int osd_xattr_set(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(osd_write_locked(ctxt, obj));
         LASSERT(handle != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+                return -EACCES;
+
         dentry->d_inode = inode;
 
         fs_flags = 0;
@@ -1115,6 +1149,9 @@ static int osd_xattr_list(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(inode->i_op != NULL && inode->i_op->listxattr != NULL);
         LASSERT(osd_read_locked(ctxt, obj) || osd_write_locked(ctxt, obj));
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_READ))
+                return -EACCES;
+
         dentry->d_inode = inode;
         return inode->i_op->listxattr(dentry, buf, size);
 }
@@ -1132,6 +1169,9 @@ static int osd_xattr_del(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(osd_write_locked(ctxt, obj));
         LASSERT(handle != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_META_WRITE))
+                return -EACCES;
+
         dentry->d_inode = inode;
         return inode->i_op->removexattr(dentry, name);
 }
@@ -1213,6 +1253,9 @@ static int osd_readpage(const struct lu_context *ctxt,
 
         LASSERT(rdpg->rp_pages != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_BODY_READ))
+                return -EACCES;
+
         if (rdpg->rp_count <= 0)
                 return -EFAULT;
 
@@ -1395,6 +1438,9 @@ static int osd_index_try(const struct lu_context *ctx, struct dt_object *dt,
         LASSERT(osd_invariant(obj));
         LASSERT(dt_object_exists(dt));
 
+        if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP))
+                RETURN(-EACCES);
+
         if (osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode) {
                 dt->do_index_ops = &osd_index_compat_ops;
                 result = 0;
@@ -1446,6 +1492,9 @@ static int osd_index_delete(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(obj->oo_ipd != NULL);
         LASSERT(handle != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_DELETE))
+                RETURN(-EACCES);
+
         oh = container_of0(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle != NULL);
 
@@ -1469,6 +1518,9 @@ static int osd_index_lookup(const struct lu_context *ctxt, struct dt_object *dt,
         LASSERT(obj->oo_container.ic_object == obj->oo_inode);
         LASSERT(obj->oo_ipd != NULL);
 
+        if (osd_object_auth(ctxt, &dt->do_lu, CAPA_OPC_INDEX_LOOKUP))
+                return -EACCES;
+
         rc = iam_lookup(&obj->oo_container, (const struct iam_key *)key,
                         (struct iam_rec *)rec, obj->oo_ipd);
 
@@ -1477,7 +1529,6 @@ static int osd_index_lookup(const struct lu_context *ctxt, struct dt_object *dt,
         RETURN(rc);
 }
 
-
 static int osd_index_insert(const struct lu_context *ctx, struct dt_object *dt,
                             const struct dt_rec *rec, const struct dt_key *key,
                             struct thandle *th)
@@ -1495,6 +1546,9 @@ static int osd_index_insert(const struct lu_context *ctx, struct dt_object *dt,
         LASSERT(obj->oo_ipd != NULL);
         LASSERT(th != NULL);
 
+        if (osd_object_auth(ctx, &dt->do_lu, CAPA_OPC_INDEX_INSERT))
+                return -EACCES;
+
         oh = container_of0(th, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle != NULL);
         rc = iam_insert(oh->ot_handle, &obj->oo_container,
@@ -1554,12 +1608,14 @@ static int osd_it_get(const struct lu_context *ctx,
 static void osd_it_put(const struct lu_context *ctx, struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         iam_it_put(&it->oi_it);
 }
 
 static int osd_it_next(const struct lu_context *ctx, struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return iam_it_next(&it->oi_it);
 }
 
@@ -1570,6 +1626,7 @@ static int osd_it_del(const struct lu_context *ctx, struct dt_it *di,
         struct osd_thandle *oh;
 
         LASSERT(th != NULL);
+
         oh = container_of0(th, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle != NULL);
 
@@ -1580,12 +1637,14 @@ static struct dt_key *osd_it_key(const struct lu_context *ctx,
                                  const struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return (struct dt_key *)iam_it_key_get(&it->oi_it);
 }
 
 static int osd_it_key_size(const struct lu_context *ctx, const struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return iam_it_key_size(&it->oi_it);
 }
 
@@ -1593,12 +1652,14 @@ static struct dt_rec *osd_it_rec(const struct lu_context *ctx,
                                  const struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return (struct dt_rec *)iam_it_rec_get(&it->oi_it);
 }
 
 static __u32 osd_it_store(const struct lu_context *ctxt, const struct dt_it *di)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return iam_it_store(&it->oi_it);
 }
 
@@ -1606,6 +1667,7 @@ static int osd_it_load(const struct lu_context *ctxt,
                        const struct dt_it *di, __u32 hash)
 {
         struct osd_it *it = (struct osd_it *)di;
+
         return iam_it_load(&it->oi_it, hash);
 }
 
@@ -1638,6 +1700,7 @@ static int osd_index_compat_delete(const struct lu_context *ctxt,
         LASSERT(handle != NULL);
         LASSERT(S_ISDIR(obj->oo_inode->i_mode));
         ENTRY;
+
         RETURN(-EOPNOTSUPP);
 }
 
@@ -1786,7 +1849,7 @@ static int osd_index_compat_insert(const struct lu_context *ctx,
         LASSERT(osd_invariant(obj));
         LASSERT(th != NULL);
 
-        luch = lu_object_find(ctx, ludev->ld_site, fid);
+        luch = lu_object_find(ctx, ludev->ld_site, fid, BYPASS_CAPA);
         if (!IS_ERR(luch)) {
                 if (lu_object_exists(luch)) {
                         struct osd_object *child;
@@ -2241,13 +2304,102 @@ static int osd_object_invariant(const struct lu_object *l)
         return osd_invariant(osd_obj(l));
 }
 
+static int capa_is_sane(const struct lu_context *ctx,
+                        struct lustre_capa *capa,
+                        struct lustre_capa_key *keys)
+{
+        struct obd_capa *c;
+        struct osd_thread_info *oti = lu_context_key_get(ctx, &osd_key);
+        int i, rc;
+        ENTRY;
+
+        c = capa_lookup(capa);
+        if (c) {
+                spin_lock(&c->c_lock);
+                if (memcmp(&c->c_capa, capa, sizeof(*capa))) {
+                        DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+                        rc = -EACCES;
+                } else if (capa_is_expired(c)) {
+                        DEBUG_CAPA(D_ERROR, capa, "expired");
+                        rc = -ESTALE;
+                }
+                spin_unlock(&c->c_lock);
+
+                capa_put(c);
+                RETURN(rc);
+        }
+
+        spin_lock(&capa_lock);
+        for (i = 0; i < 2; i++) {
+                if (keys[i].lk_keyid == capa->lc_keyid) {
+                        oti->oti_capa_key = keys[i];
+                        break;
+                }
+        }
+        spin_unlock(&capa_lock);
+
+        if (i == 2) {
+                DEBUG_CAPA(D_ERROR, capa, "no matched capa key");
+                RETURN(-ESTALE);
+        }
+
+        rc = capa_hmac(oti->oti_capa_hmac, capa, oti->oti_capa_key.lk_key);
+        if (rc)
+                RETURN(rc);
+        if (memcmp(oti->oti_capa_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) {
+                DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
+                RETURN(-EACCES);
+        }
+
+        capa_add(capa);
+
+        RETURN(0);
+}
+
+static int osd_object_capa_auth(const struct lu_context *ctx,
+                                const struct lu_object *obj,
+                                struct lustre_capa *capa,
+                                __u64 opc)
+{
+        const struct lu_fid *fid = lu_object_fid(obj);
+
+        return 0;
+
+        if (lu_object_capa_bypass(obj))
+                return 0;
+
+        if (!capa) {
+                CERROR("no capability is provided for fid "DFID"\n", PFID(fid));
+                return -EACCES;
+        }
+
+        if (!lu_fid_eq(fid, &capa->lc_fid)) {
+                DEBUG_CAPA(D_ERROR, capa, "fid "DFID" mismatch with",
+                           PFID(fid));
+                return -EACCES;
+        }
+
+        if (!capa_opc_supported(capa, opc)) {
+                DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc);
+                return -EACCES;
+        }
+
+        if (!capa_is_sane(ctx, capa, obj->lo_dev->ld_site->ls_capa_keys)) {
+                DEBUG_CAPA(D_ERROR, capa, "insane");
+                return -EACCES;
+        }
+
+        return 0;
+}
+
 static struct lu_object_operations osd_lu_obj_ops = {
         .loo_object_init      = osd_object_init,
         .loo_object_delete    = osd_object_delete,
         .loo_object_release   = osd_object_release,
         .loo_object_free      = osd_object_free,
         .loo_object_print     = osd_object_print,
-        .loo_object_invariant = osd_object_invariant
+        .loo_object_invariant = osd_object_invariant,
+        .loo_object_auth      = osd_object_capa_auth
 };
 
 static struct lu_device_operations osd_lu_ops = {
index d2b4625..fab177b 100644 (file)
@@ -77,6 +77,11 @@ struct osd_thread_info {
         int                 oti_r_locks;
         int                 oti_w_locks;
         int                 oti_txns;
+        /*
+         *XXX temporary: for capa operations.
+         */
+        char                   oti_capa_hmac[CAPA_HMAC_KEY_MAX_LEN];
+        struct lustre_capa_key oti_capa_key;
 };
 
 #endif /* __KERNEL__ */
index 6a9a391..00d3cbf 100644 (file)
@@ -80,6 +80,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req,
                        struct obd_trans_info *oti)
 {
         struct ost_body *body, *repbody;
+        struct lustre_capa *capa;
         int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         ENTRY;
 
@@ -97,7 +98,9 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req,
         repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                  sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL);
+        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+                capa = lustre_unpack_capa(req->rq_repmsg, REQ_REC_OFF + 1);
+        req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL, capa);
         RETURN(0);
 }
 
@@ -119,9 +122,12 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req)
 
         repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                  sizeof(*repbody));
-        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        repbody->oa = body->oa;
 
         oinfo.oi_oa = &repbody->oa;
+        if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+                oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+                                                   REQ_REC_OFF + 1);
         req->rq_status = obd_getattr(exp, &oinfo);
         RETURN(0);
 }
@@ -278,6 +284,9 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req,
                          */
                         oinfo.oi_oa->o_valid &= ~OBD_MD_FLFLAGS;
 
+                if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+                        oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+                                                           REQ_REC_OFF + 1);
                 req->rq_status = obd_punch(exp, &oinfo, oti, NULL);
                 ost_punch_lock_put(exp, oinfo.oi_oa, &lh);
         }
@@ -287,6 +296,7 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req,
 static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
 {
         struct ost_body *body, *repbody;
+        struct lustre_capa *capa = NULL;
         int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) };
         ENTRY;
 
@@ -295,6 +305,9 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
         if (body == NULL)
                 RETURN(-EFAULT);
 
+        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+                capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1);
+
         rc = lustre_pack_reply(req, 2, size, NULL);
         if (rc)
                 RETURN(rc);
@@ -303,7 +316,7 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
                                  sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size,
-                                  repbody->oa.o_blocks);
+                                  repbody->oa.o_blocks, capa);
         RETURN(0);
 }
 
@@ -326,9 +339,12 @@ static int ost_setattr(struct obd_export *exp, struct ptlrpc_request *req,
 
         repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                  sizeof(*repbody));
-        memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+        repbody->oa = body->oa;
 
         oinfo.oi_oa = &repbody->oa;
+        if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA)
+                oinfo.oi_capa = lustre_unpack_capa(req->rq_repmsg,
+                                                   REQ_REC_OFF + 1);
         req->rq_status = obd_setattr(exp, &oinfo, oti);
         RETURN(0);
 }
@@ -622,6 +638,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         struct niobuf_local *local_nb;
         struct obd_ioobj *ioo;
         struct ost_body *body, *repbody;
+        struct lustre_capa *capa = NULL;
         struct l_wait_info lwi;
         struct lustre_handle lockh = { 0 };
         int size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
@@ -669,6 +686,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
+        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+                capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3);
+
         rc = lustre_pack_reply(req, 2, size, NULL);
         if (rc)
                 GOTO(out, rc);
@@ -700,7 +720,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 GOTO(out_bulk, rc);
 
         rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
-                        ioo, npages, pp_rnb, local_nb, oti);
+                        ioo, npages, pp_rnb, local_nb, oti, capa);
         if (rc != 0)
                 GOTO(out_lock, rc);
 
@@ -839,6 +859,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         struct ost_body         *body, *repbody;
         struct l_wait_info       lwi;
         struct lustre_handle     lockh = {0};
+        struct lustre_capa      *capa = NULL;
         __u32                   *rcs;
         int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         int objcount, niocount, npages, comms_error = 0;
@@ -905,6 +926,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         lustre_swab_niobuf_remote (&remote_nb[i]);
         }
 
+        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+                capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 3);
+
         size[REPLY_REC_OFF + 1] = niocount * sizeof(*rcs);
         rc = lustre_pack_reply(req, 3, size, NULL);
         if (rc != 0)
@@ -944,7 +968,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM);
 
         rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
-                        ioo, npages, pp_rnb, local_nb, oti);
+                        ioo, npages, pp_rnb, local_nb, oti, capa);
         if (rc != 0)
                 GOTO(out_lock, rc);
 
index 2067e61..b8d7e53 100644 (file)
@@ -73,10 +73,22 @@ static const struct req_msg_field *mdt_body_only[] = {
         &RMF_MDT_BODY
 };
 
+static const struct req_msg_field *mdt_renew_capa_client[] = {
+        &RMF_PTLRPC_BODY,
+        &RMF_CAPA1
+};
+
+static const struct req_msg_field *mdt_body_capa[] = {
+        &RMF_PTLRPC_BODY,
+        &RMF_MDT_BODY,
+        &RMF_CAPA1
+};
+
 static const struct req_msg_field *mdt_close_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_EPOCH,
-        &RMF_REC_SETATTR
+        &RMF_REC_SETATTR,
+        &RMF_CAPA1
 };
 
 static const struct req_msg_field *mds_statfs_server[] = {
@@ -109,6 +121,7 @@ static const struct req_msg_field *fld_query_server[] = {
 static const struct req_msg_field *mds_getattr_name_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
+        &RMF_CAPA1,
         &RMF_NAME
 };
 
@@ -120,12 +133,14 @@ static const struct req_msg_field *mds_reint_client[] = {
 static const struct req_msg_field *mds_reint_create_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_CREATE,
+        &RMF_CAPA1,
         &RMF_NAME,
 };
 
 static const struct req_msg_field *mds_reint_create_sym_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_CREATE,
+        &RMF_CAPA1,
         &RMF_NAME,
         &RMF_SYMTGT
 };
@@ -133,6 +148,7 @@ static const struct req_msg_field *mds_reint_create_sym_client[] = {
 static const struct req_msg_field *mds_reint_create_slave_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_CREATE,
+        &RMF_CAPA1,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -140,6 +156,8 @@ static const struct req_msg_field *mds_reint_create_slave_client[] = {
 static const struct req_msg_field *mds_reint_open_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_CREATE,
+        &RMF_CAPA1,
+        &RMF_CAPA2,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -148,24 +166,31 @@ static const struct req_msg_field *mds_reint_open_server[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
         &RMF_MDT_MD,
-        &RMF_ACL
+        &RMF_ACL,
+        &RMF_CAPA1,
+        &RMF_CAPA2
 };
 
 static const struct req_msg_field *mds_reint_unlink_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_UNLINK,
+        &RMF_CAPA1,
         &RMF_NAME
 };
 
 static const struct req_msg_field *mds_reint_link_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_LINK,
+        &RMF_CAPA1,
+        &RMF_CAPA2,
         &RMF_NAME
 };
 
 static const struct req_msg_field *mds_reint_rename_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_RENAME,
+        &RMF_CAPA1,
+        &RMF_CAPA2,
         &RMF_NAME,
         &RMF_SYMTGT
 };
@@ -180,6 +205,7 @@ static const struct req_msg_field *mds_last_unlink_server[] = {
 static const struct req_msg_field *mds_reint_setattr_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_REC_SETATTR,
+        &RMF_CAPA1,
         &RMF_MDT_EPOCH,
         &RMF_EADATA,
         &RMF_LOGCOOKIES
@@ -220,7 +246,9 @@ static const struct req_msg_field *ldlm_intent_server[] = {
         &RMF_DLM_REP,
         &RMF_MDT_BODY,
         &RMF_MDT_MD,
-        &RMF_ACL
+        &RMF_ACL,
+        &RMF_CAPA1,
+        &RMF_CAPA2
 };
 
 static const struct req_msg_field *ldlm_intent_getattr_client[] = {
@@ -228,6 +256,7 @@ static const struct req_msg_field *ldlm_intent_getattr_client[] = {
         &RMF_DLM_REQ,
         &RMF_LDLM_INTENT,
         &RMF_MDT_BODY,     /* coincides with mds_getattr_name_client[] */
+        &RMF_CAPA1,
         &RMF_NAME
 };
 
@@ -236,6 +265,7 @@ static const struct req_msg_field *ldlm_intent_create_client[] = {
         &RMF_DLM_REQ,
         &RMF_LDLM_INTENT,
         &RMF_REC_CREATE,    /* coincides with mds_reint_create_client[] */
+        &RMF_CAPA1,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -245,6 +275,8 @@ static const struct req_msg_field *ldlm_intent_open_client[] = {
         &RMF_DLM_REQ,
         &RMF_LDLM_INTENT,
         &RMF_REC_CREATE,    /* coincides with mds_reint_open_client[] */
+        &RMF_CAPA1,
+        &RMF_CAPA2,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -254,12 +286,14 @@ static const struct req_msg_field *ldlm_intent_unlink_client[] = {
         &RMF_DLM_REQ,
         &RMF_LDLM_INTENT,
         &RMF_REC_UNLINK,    /* coincides with mds_reint_unlink_client[] */
+        &RMF_CAPA1,
         &RMF_NAME
 };
 
 static const struct req_msg_field *mds_getxattr_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
+        &RMF_CAPA1,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -273,6 +307,7 @@ static const struct req_msg_field *mds_getxattr_server[] = {
 static const struct req_msg_field *mds_setxattr_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
+        &RMF_CAPA1,
         &RMF_NAME,
         &RMF_EADATA
 };
@@ -286,7 +321,8 @@ static const struct req_msg_field *mds_getattr_server[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
         &RMF_MDT_MD,
-        &RMF_ACL
+        &RMF_ACL,
+        &RMF_CAPA1
 };
 
 static const struct req_format *req_formats[] = {
@@ -486,6 +522,16 @@ const struct req_msg_field RMF_REINT_OPC =
         DEFINE_MSGF("reint_opc", 0, sizeof(__u32), lustre_swab_generic_32s);
 EXPORT_SYMBOL(RMF_REINT_OPC);
 
+const struct req_msg_field RMF_CAPA1 =
+        DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+                    lustre_swab_lustre_capa);
+EXPORT_SYMBOL(RMF_CAPA1);
+
+const struct req_msg_field RMF_CAPA2 =
+        DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa),
+                    lustre_swab_lustre_capa);
+EXPORT_SYMBOL(RMF_CAPA2);
+
 /*
  * Request formats.
  */
@@ -525,7 +571,7 @@ const struct req_format RQF_FLD_QUERY =
 EXPORT_SYMBOL(RQF_FLD_QUERY);
 
 const struct req_format RQF_MDS_GETSTATUS =
-        DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_only);
+        DEFINE_REQ_FMT0("MDS_GETSTATUS", empty, mdt_body_capa);
 EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
 
 const struct req_format RQF_MDS_STATFS =
@@ -533,11 +579,11 @@ const struct req_format RQF_MDS_STATFS =
 EXPORT_SYMBOL(RQF_MDS_STATFS);
 
 const struct req_format RQF_MDS_SYNC =
-        DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_only, mdt_body_only);
+        DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only);
 EXPORT_SYMBOL(RQF_MDS_SYNC);
 
 const struct req_format RQF_MDS_GETATTR =
-        DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_only, mds_getattr_server);
+        DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server);
 EXPORT_SYMBOL(RQF_MDS_GETATTR);
 
 const struct req_format RQF_MDS_GETXATTR =
@@ -596,7 +642,7 @@ EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
 
 const struct req_format RQF_MDS_REINT_SETATTR =
         DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
-                        mds_reint_setattr_client, mdt_body_only);
+                        mds_reint_setattr_client, mdt_body_capa);
 EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR);
 
 const struct req_format RQF_MDS_CONNECT =
@@ -645,7 +691,7 @@ EXPORT_SYMBOL(RQF_MDS_CLOSE);
 
 const struct req_format RQF_MDS_PIN =
         DEFINE_REQ_FMT0("MDS_PIN",
-                        mdt_body_only, mdt_body_only);
+                        mdt_body_capa, mdt_body_only);
 EXPORT_SYMBOL(RQF_MDS_PIN);
 
 const struct req_format RQF_MDS_DONE_WRITING =
@@ -655,7 +701,7 @@ EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
 
 const struct req_format RQF_MDS_READPAGE =
         DEFINE_REQ_FMT0("MDS_READPAGE",
-                        mdt_body_only, mdt_body_only);
+                        mdt_body_capa, mdt_body_only);
 EXPORT_SYMBOL(RQF_MDS_READPAGE);
 
 const struct req_format RQF_MDS_WRITEPAGE =
@@ -668,6 +714,11 @@ const struct req_format RQF_MDS_IS_SUBDIR =
                         mdt_body_only, mdt_body_only);
 EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
 
+const struct req_format RQF_MDS_RENEW_CAPA =
+        DEFINE_REQ_FMT0("MDS_RENEW_CAPA",
+                        mdt_renew_capa_client, mdt_body_capa);
+EXPORT_SYMBOL(RQF_MDS_RENEW_CAPA);
+
 #if !defined(__REQ_LAYOUT_USER__)
 
 int req_layout_init(void)
index a430307..5b44000 100644 (file)
@@ -77,6 +77,7 @@ struct ll_rpc_opcode {
         { MDS_SETXATTR,     "mds_setxattr" },
         { MDS_WRITEPAGE,    "mds_writepage" },
         { MDS_IS_SUBDIR,    "mds_is_subdir" },
+        { MDS_RENEW_CAPA,   "mds_renew_capa" },
         { LDLM_ENQUEUE,     "ldlm_enqueue" },
         { LDLM_CONVERT,     "ldlm_convert" },
         { LDLM_CANCEL,      "ldlm_cancel" },
index e17610a..33829dd 100644 (file)
@@ -2229,3 +2229,19 @@ void debug_req(cfs_debug_limit_state_t *cdls,
                req->rq_repmsg ? lustre_msg_get_status(req->rq_repmsg) : 0);
 }
 EXPORT_SYMBOL(debug_req);
+
+void lustre_swab_lustre_capa(struct lustre_capa *c)
+{
+        lustre_swab_lu_fid(&c->lc_fid);
+        __swab64s (&c->lc_opc);
+        __swab32s (&c->lc_flags);
+        __swab32s (&c->lc_keyid);
+        __swab64s (&c->lc_expiry);
+}
+
+void lustre_swab_lustre_capa_key (struct lustre_capa_key *k)
+{
+        __swab64s (&k->lk_mdsid);
+        __swab32s (&k->lk_keyid);
+        __swab32s (&k->lk_padding);
+}
index 018bdfb..b625000 100644 (file)
@@ -276,6 +276,8 @@ EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
 EXPORT_SYMBOL(lustre_swab_mgs_target_info);
 EXPORT_SYMBOL(lustre_swab_md_fld);
 EXPORT_SYMBOL(lustre_swab_generic_32s);
+EXPORT_SYMBOL(lustre_swab_lustre_capa);
+EXPORT_SYMBOL(lustre_swab_lustre_capa_key);
 
 /* recover.c */
 EXPORT_SYMBOL(ptlrpc_disconnect_import);
index 034aec0..4ffbd6c 100644 (file)
@@ -1306,6 +1306,12 @@ static int mkfs_mdt(struct mkfs_opts *mop)
                 goto out_umount;
         }
 
+        snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, CAPA_KEYS);
+        ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 1, 1, 4);
+        if (ret) {
+                goto out_umount;
+        }
+
         umount(mntpt);
         ret = mount(source, mntpt, fstype, 0, NULL);
         if (ret) {
index fed8092..389d158 100644 (file)
@@ -52,6 +52,8 @@
 #define lustre_swab_mdt_rec_rename NULL
 #define lustre_swab_mdt_rec_create NULL
 #define lustre_swab_mdt_rec_setattr NULL
+#define lustre_swab_lustre_capa NULL
+#define lustre_swab_lustre_capa_key NULL
 
 /*
  * Yes, include .c file.