* File join has been disabled in this release, refer to Bugzilla 16929.
Severity : normal
+Frequency : start MDS on uncleanly shutdowned MDS device
+Bugzilla : 16839
+Descriptoin: ll_sync thread stay in waiting mds<>ost recovery finished
+Details : stay in waiting mds<>ost recovery finished produce random bugs
+ due race between two ll_sync thread for one lov target. send
+ ACTIVATE event only if connect realy finished and import have
+ FULL state.
+
+Severity : normal
+Frequency : rare, connect and disconnect target at same time
+Bugzilla : 17310
+Descriptoin: ASSERTION(atomic_read(&imp->imp_inflight) == 0
+Details : don't call obd_disconnect under lov_lock. this long time
+ operation and can block ptlrpcd which answer to connect request.
+
+Severity : normal
+Frequency : rare
+Bugzilla : 18154
+Descriptoin: don't lose wakeup for imp_recovery_waitq
+Details : recover_import_no_retry or invalidate_import and import_close can
+ both sleep on imp_recovery_waitq, but we was send only one wakeup
+ to sleep queue.
+
+Severity : normal
Frequency : always with long access acl
Bugzilla : 17636
Descriptoin: mds can't pack reply with long acl.
from a different NID, so we do not need to wait for the export to be
evicted
+Severity : major
+Frequency : rare, only if using MMP with Linux RAID
+Bugzilla : 17895
+Description: MMP doesn't work with Linux RAID
+Details : While using HA for Lustre servers with Linux RAID, it is possible
+ that MMP will not detect multiple mounts. To make this work we
+ need to unplug the device queue in RAID when the MMP block is being
+ written. Also while reading the MMP block, we should read it from
+ disk and not the cached one.
+
+Severity : enhancement
+Bugzilla : 17187
+Description: open file using fid
+Details : A file can be opened using just its fid, like
+ <mntpt>/.lustre/fid/SEQ:OID:VER - this is needed for HSM and replication
+
--------------------------------------------------------------------------------
2007-08-10 Cluster File Systems, Inc. <info@clusterfs.com>
dnl FIXME
AC_DEFUN([LC_CONFIG_RMTCLIENT],
[LB_LINUX_CONFIG_IM([CRYPTO_AES],[],[
- AC_MSG_ERROR([Lustre remote client require that CONFIG_CRYPTO_AES is enabled in your kernel.])
+ AC_MSG_WARN([Lustre remote client require that CONFIG_CRYPTO_AES is enabled in your kernel.])
])
])
AC_DEFUN([LC_CONFIG_GSS_KEYRING],
[AC_MSG_CHECKING([whether to enable gss keyring backend])
AC_ARG_ENABLE([gss_keyring],
- [AC_HELP_STRING([--disable-gss-keyring],
+ [AC_HELP_STRING([--disable-gss-keyring],
[disable gss keyring backend])],
- [],[enable_gss_keyring='yes'])
+ [],[enable_gss_keyring='yes'])
AC_MSG_RESULT([$enable_gss_keyring])
if test x$enable_gss_keyring != xno; then
- LB_LINUX_CONFIG_IM([KEYS],[],
+ LB_LINUX_CONFIG_IM([KEYS],[],
[AC_MSG_ERROR([GSS keyring backend require that CONFIG_KEYS be enabled in your kernel.])])
- AC_CHECK_LIB([keyutils], [keyctl_search], [],
+ AC_CHECK_LIB([keyutils], [keyctl_search], [],
[AC_MSG_ERROR([libkeyutils is not found, which is required by gss keyring backend])],)
- AC_DEFINE([HAVE_GSS_KEYRING], [1],
+ AC_DEFINE([HAVE_GSS_KEYRING], [1],
[Define this if you enable gss keyring backend])
fi
])
AC_MSG_RESULT([$enable_gss])
if test x$enable_gss == xyes; then
- LC_CONFIG_GSS_KEYRING
+ LC_CONFIG_GSS_KEYRING
LC_CONFIG_SUNRPC
+ AC_DEFINE([HAVE_GSS], [1], [Define this if you enable gss])
+
LB_LINUX_CONFIG_IM([CRYPTO_MD5],[],
[AC_MSG_WARN([kernel MD5 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],
+ LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],
[AC_MSG_WARN([kernel SHA1 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_SHA256],[],
+ LB_LINUX_CONFIG_IM([CRYPTO_SHA256],[],
[AC_MSG_WARN([kernel SHA256 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_SHA512],[],
+ LB_LINUX_CONFIG_IM([CRYPTO_SHA512],[],
[AC_MSG_WARN([kernel SHA512 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_WP512],[],
- [AC_MSG_WARN([kernel WP512 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_ARC4],[],
- [AC_MSG_WARN([kernel ARC4 support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_DES],[],
- [AC_MSG_WARN([kernel DES support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_TWOFISH],[],
- [AC_MSG_WARN([kernel TWOFISH support is recommended by using GSS.])])
- LB_LINUX_CONFIG_IM([CRYPTO_CAST6],[],
- [AC_MSG_WARN([kernel CAST6 support is recommended by using GSS.])])
-
- AC_CHECK_LIB([gssapi], [gss_init_sec_context],
+
+ AC_CHECK_LIB([gssapi], [gss_init_sec_context],
[GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi"],
[AC_CHECK_LIB([gssglue], [gss_init_sec_context],
[GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssglue"],
[AC_MSG_ERROR([libgssapi or libgssglue is not found, which is required by GSS.])])],)
- AC_SUBST(GSSAPI_LIBS)
+ AC_SUBST(GSSAPI_LIBS)
- AC_KERBEROS_V5
+ AC_KERBEROS_V5
fi
])
m4_define([LUSTRE_MAJOR],[1])
m4_define([LUSTRE_MINOR],[9])
-m4_define([LUSTRE_PATCH],[130])
+m4_define([LUSTRE_PATCH],[150])
m4_define([LUSTRE_FIX],[0])
dnl # don't forget to update the service tags info
static int __init cmm_mod_init(void)
{
struct lprocfs_static_vars lvars;
+ int rc;
+
+ /*
+ * Kludge code : it should be moved mdc_device.c if mdc_(mds)_device
+ * is really stacked.
+ */
+ rc = lu_device_type_init(&mdc_device_type);
+ if (rc)
+ return rc;
lprocfs_cmm_init_vars(&lvars);
- return class_register_type(&cmm_obd_device_ops, NULL, lvars.module_vars,
- LUSTRE_CMM_NAME, &cmm_device_type);
+ rc = class_register_type(&cmm_obd_device_ops, NULL, lvars.module_vars,
+ LUSTRE_CMM_NAME, &cmm_device_type);
+ if (rc)
+ lu_device_type_fini(&mdc_device_type);
+
+ return rc;
}
static void __exit cmm_mod_exit(void)
{
class_unregister_type(LUSTRE_CMM_NAME);
+ lu_device_type_fini(&mdc_device_type);
}
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
CERROR("target %s not set up\n", mdc->obd_name);
rc = -EINVAL;
} else {
- struct lustre_handle *conn = &desc->cl_conn;
struct obd_connect_data *ocd;
CDEBUG(D_CONFIG, "connect to %s(%s)\n",
OBD_CONNECT_MDS_MDS |
OBD_CONNECT_FID |
OBD_CONNECT_AT;
- rc = obd_connect(env, conn, mdc, &mdc->obd_uuid, ocd, NULL);
+ rc = obd_connect(env, &desc->cl_exp, mdc, &mdc->obd_uuid, ocd, NULL);
OBD_FREE_PTR(ocd);
if (rc) {
CERROR("target %s connect error %d\n",
mdc->obd_name, rc);
} else {
- desc->cl_exp = class_conn2export(conn);
/* set seq controller export for MDC0 if exists */
if (mc->mc_num == 0)
ms->ms_control_exp =
#include <md_object.h>
struct mdc_cli_desc {
- struct lustre_handle cl_conn;
/* uuid of remote MDT to connect */
struct obd_uuid cl_srv_uuid;
/* mdc uuid */
*
* The first 0x400 sequences of normal FID are reserved for special purpose.
* FID_SEQ_START + 1 is for local file id generation.
+ * FID_SEQ_START + 2 is for .lustre directory and its objects
*/
const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
FID_SEQ_START + 0x400ULL,
.f_oid = 0x0000000000000001,
.f_ver = 0x0000000000000000 };
EXPORT_SYMBOL(LUSTRE_BFL_FID);
+
+/** Special fid for ".lustre" directory */
+const struct lu_fid LU_DOT_LUSTRE_FID = { .f_seq = LU_DOT_LUSTRE_SEQ,
+ .f_oid = 0x0000000000000001,
+ .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LU_DOT_LUSTRE_FID);
+
+/** Special fid for "fid" special object in .lustre */
+const struct lu_fid LU_OBF_FID = { .f_seq = LU_DOT_LUSTRE_SEQ,
+ .f_oid = 0x0000000000000002,
+ .f_ver = 0x0000000000000000 };
+EXPORT_SYMBOL(LU_OBF_FID);
#include <lustre_fid.h>
#include <lustre_req_layout.h>
#include "fld_internal.h"
+#include <lustre_fid.h>
#ifdef __KERNEL__
int mds_node_id)
{
int cache_size, cache_threshold;
+ struct lu_seq_range range;
int rc;
ENTRY;
GOTO(out, rc);
fld->lsf_control_exp = NULL;
+
+ /* Insert reserved sequence number of ".lustre" into fld cache. */
+ range.lsr_start = LU_DOT_LUSTRE_SEQ;
+ range.lsr_end = LU_DOT_LUSTRE_SEQ + 1;
+ range.lsr_mdt = 0;
+ fld_cache_insert(fld->lsf_cache, &range);
+
EXIT;
out:
if (rc)
*/
CEF_DISCARD_DATA = 0x00000004,
/**
- * tell the sub layers that it must be a `real' lock.
+ * tell the sub layers that it must be a `real' lock. This is used for
+ * mmapped-buffer locks and glimpse locks that must be never converted
+ * into lockless mode.
+ *
+ * \see vvp_mmap_locks(), cl_glimpse_lock().
*/
CEF_MUST = 0x00000008,
/**
- * tell the sub layers that never request a `real' lock.
- * currently, the CEF_MUST & CEF_NEVER are only used for mmap locks.
- * cl_io::ci_lockreq and these two flags: ci_lockreq just describes
- * generic information of lock requirement for this IO, especially for
- * locks which belong to the object doing IO; however, lock itself may
- * have precise requirements, this is described by the latter.
+ * tell the sub layers that never request a `real' lock. This flag is
+ * not used currently.
+ *
+ * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless
+ * conversion policy: ci_lockreq describes generic information of lock
+ * requirement for this IO, especially for locks which belong to the
+ * object doing IO; however, lock itself may have precise requirements
+ * that are described by the enqueue flags.
*/
CEF_NEVER = 0x00000010,
/**
{
LASSERT(lh);
LASSERT(LHO(lh));
+ LASSERT(LHP(lh, hash));
- if (LHP(lh, hash))
- return LHP(lh, hash)(lh, key, mask);
-
- return -EOPNOTSUPP;
+ return LHP(lh, hash)(lh, key, mask);
}
static inline void *
int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
int lustre_fsync(struct file *file);
long l_readdir(struct file * file, struct list_head *dentry_list);
+int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
+ struct iattr *newattrs);
+int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
+ char *name, loff_t length);
static inline void l_dput(struct dentry *de)
{
(LDLM_LAST_OPC - LDLM_FIRST_OPC) +
(MDS_LAST_OPC - MDS_FIRST_OPC) +
(OST_LAST_OPC - OST_FIRST_OPC));
- } else if (opc < FLD_LAST_OPC) {
- /* FLD opcode */
- return (opc - FLD_FIRST_OPC +
+ } else if (opc < QUOTA_LAST_OPC) {
+ /* LQUOTA Opcode */
+ return (opc - QUOTA_FIRST_OPC +
(LLOG_LAST_OPC - LLOG_FIRST_OPC) +
(OBD_LAST_OPC - OBD_FIRST_OPC) +
(MGS_LAST_OPC - MGS_FIRST_OPC) +
} else if (opc < SEQ_LAST_OPC) {
/* SEQ opcode */
return (opc - SEQ_FIRST_OPC +
- (FLD_LAST_OPC - FLD_FIRST_OPC) +
+ (QUOTA_LAST_OPC- QUOTA_FIRST_OPC) +
(LLOG_LAST_OPC - LLOG_FIRST_OPC) +
(OBD_LAST_OPC - OBD_FIRST_OPC) +
(MGS_LAST_OPC - MGS_FIRST_OPC) +
/* SEC opcode */
return (opc - SEC_FIRST_OPC +
(SEQ_LAST_OPC - SEQ_FIRST_OPC) +
- (FLD_LAST_OPC - FLD_FIRST_OPC) +
+ (QUOTA_LAST_OPC- QUOTA_FIRST_OPC) +
(LLOG_LAST_OPC - LLOG_FIRST_OPC) +
(OBD_LAST_OPC - OBD_FIRST_OPC) +
(MGS_LAST_OPC - MGS_FIRST_OPC) +
(LDLM_LAST_OPC - LDLM_FIRST_OPC) +
(MDS_LAST_OPC - MDS_FIRST_OPC) +
(OST_LAST_OPC - OST_FIRST_OPC));
- } else if (opc < QUOTA_LAST_OPC) {
- /* LQUOTA Opcode */
- return (opc - QUOTA_FIRST_OPC +
+ } else if (opc < FLD_LAST_OPC) {
+ /* FLD opcode */
+ return (opc - FLD_FIRST_OPC +
(SEC_LAST_OPC - SEC_FIRST_OPC) +
(SEQ_LAST_OPC - SEQ_FIRST_OPC) +
- (FLD_LAST_OPC - FLD_FIRST_OPC) +
+ (QUOTA_LAST_OPC- QUOTA_FIRST_OPC) +
(LLOG_LAST_OPC - LLOG_FIRST_OPC) +
(OBD_LAST_OPC - OBD_FIRST_OPC) +
(MGS_LAST_OPC - MGS_FIRST_OPC) +
}
}
-#define LUSTRE_MAX_OPCODES ((LDLM_LAST_OPC - LDLM_FIRST_OPC) + \
+
+#define LUSTRE_MAX_OPCODES ((OST_LAST_OPC - OST_FIRST_OPC) + \
(MDS_LAST_OPC - MDS_FIRST_OPC) + \
- (OST_LAST_OPC - OST_FIRST_OPC) + \
- (OBD_LAST_OPC - OBD_FIRST_OPC) + \
- (FLD_LAST_OPC - FLD_FIRST_OPC) + \
- (SEQ_LAST_OPC - SEQ_FIRST_OPC) + \
+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \
(MGS_LAST_OPC - MGS_FIRST_OPC) + \
+ (OBD_LAST_OPC - OBD_FIRST_OPC) + \
(LLOG_LAST_OPC - LLOG_FIRST_OPC) + \
+ (QUOTA_LAST_OPC - QUOTA_FIRST_OPC) + \
+ (SEQ_LAST_OPC - SEQ_FIRST_OPC) + \
(SEC_LAST_OPC - SEC_FIRST_OPC) + \
- (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
+ (FLD_LAST_OPC - FLD_FIRST_OPC))
#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \
(EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
#ifndef HAVE_LINUX_FIEMAP_H
struct ll_fiemap_extent {
- __u64 fe_logical; /* logical offset in bytes for the start of
- * the extent from the beginning of the file */
- __u64 fe_physical; /* physical offset in bytes for the start
- * of the extent from the beginning of the disk */
- __u64 fe_length; /* length in bytes for the extent */
- __u32 fe_flags; /* FIEMAP_EXTENT_* flags for the extent */
- __u32 fe_device; /* device number for this extent */
+ __u64 fe_logical; /* logical offset in bytes for the start of
+ * the extent from the beginning of the file */
+ __u64 fe_physical; /* physical offset in bytes for the start
+ * of the extent from the beginning of the disk */
+ __u64 fe_length; /* length in bytes for this extent */
+ __u64 fe_reserved64[2];
+ __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
+ __u32 fe_device; /* device number for this extent */
+ __u32 fe_reserved[2];
};
struct ll_user_fiemap {
- __u64 fm_start; /* logical offset (inclusive) at
- * which to start mapping (in) */
- __u64 fm_length; /* logical length of mapping which
- * userspace wants (in) */
- __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
- __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
- __u32 fm_extent_count; /* size of fm_extents array (in) */
- __u32 fm_reserved;
- struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out).
- * Lustre uses first extent to
- * send end_offset */
+ __u64 fm_start; /* logical offset (inclusive) at
+ * which to start mapping (in) */
+ __u64 fm_length; /* logical length of mapping which
+ * userspace wants (in) */
+ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
+ __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
+ __u32 fm_extent_count; /* size of fm_extents array (in) */
+ __u32 fm_reserved;
+ struct ll_fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
};
#define FIEMAP_MAX_OFFSET (~0ULL)
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR | \
FIEMAP_FLAG_DEVICE_ORDER)
-#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
-#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
-#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
- * Sets EXTENT_UNKNOWN. */
-#define FIEMAP_EXTENT_NO_DIRECT 0x00000008 /* Data mapping undefined */
-#define FIEMAP_EXTENT_SECONDARY 0x00000010 /* Data copied offline. May
- * set EXTENT_NO_DIRECT. */
-#define FIEMAP_EXTENT_NET 0x00000020 /* Data stored remotely.
- * Sets EXTENT_NO_DIRECT. */
-#define FIEMAP_EXTENT_DATA_COMPRESSED 0x00000040 /* Data is compressed by fs.
- * Sets EXTENT_NO_DIRECT. */
-#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
- * Sets EXTENT_NO_DIRECT. */
-#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
- * block aligned. */
-#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
- * Sets EXTENT_NOT_ALIGNED.*/
-#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
- * Sets EXTENT_NOT_ALIGNED.*/
-#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
- * no data (i.e. zero). */
-#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
- * support extents. Result
- * merged for efficiency. */
+
+#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
+#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
+#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
+ * Sets EXTENT_UNKNOWN. */
+#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
+ * while fs is unmounted */
+#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
+ * Sets EXTENT_NO_DIRECT. */
+#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
+ * block aligned. */
+#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
+ * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
+ * Sets EXTENT_NOT_ALIGNED.*/
+#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
+ * no data (i.e. zero). */
+#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
+ * support extents. Result
+ * merged for efficiency. */
+
+/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
+#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */
+#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely.
+ * Sets NO_DIRECT flag */
#else
*
* Lustre wire protocol definitions.
*
- * We assume all nodes are either little-endian or big-endian, and we
- * always send messages in the sender's native format. The receiver
- * detects the message format by checking the 'magic' field of the message
- * (see lustre_msg_swabbed() below).
* ALL structs passing over the wire should be declared here. Structs
* that are used in interfaces with userspace should go in lustre_user.h.
*
* in the code to ensure that new/old clients that see this larger struct
* do not fail, otherwise you need to implement protocol compatibility).
*
+ * We assume all nodes are either little-endian or big-endian, and we
+ * always send messages in the sender's native format. The receiver
+ * detects the message format by checking the 'magic' field of the message
+ * (see lustre_msg_swabbed() below).
+ *
* Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines,
* implemented either here, inline (trivial implementations) or in
* ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other"
}
#define DFID "["LPX64":0x%x:0x%x]"
+#define SFID "0x%llx:0x%x:0x%x"
#define PFID(fid) \
fid_seq(fid), \
* Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
*/
-#define MSG_CONNECT_RECOVERING 0x1
-#define MSG_CONNECT_RECONNECT 0x2
-#define MSG_CONNECT_REPLAYABLE 0x4
+#define MSG_CONNECT_RECOVERING 0x00000001
+#define MSG_CONNECT_RECONNECT 0x00000002
+#define MSG_CONNECT_REPLAYABLE 0x00000004
//#define MSG_CONNECT_PEER 0x8
-#define MSG_CONNECT_LIBCLIENT 0x10
-#define MSG_CONNECT_INITIAL 0x20
-#define MSG_CONNECT_ASYNC 0x40
-#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */
-#define MSG_CONNECT_TRANSNO 0x100 /* report transno */
+#define MSG_CONNECT_LIBCLIENT 0x00000010
+#define MSG_CONNECT_INITIAL 0x00000020
+#define MSG_CONNECT_ASYNC 0x00000040
+#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */
+#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */
/* Connect flags */
-#define OBD_CONNECT_RDONLY 0x00000001ULL /* client allowed read-only access */
-#define OBD_CONNECT_INDEX 0x00000002ULL /* connect to specific LOV idx */
-#define OBD_CONNECT_MDS 0x00000004ULL /* connect from MDT to OST */
-#define OBD_CONNECT_GRANT 0x00000008ULL /* OSC acquires grant at connect */
-#define OBD_CONNECT_SRVLOCK 0x00000010ULL /* server takes locks for client */
-#define OBD_CONNECT_VERSION 0x00000020ULL /* Server supports versions in ocd */
-#define OBD_CONNECT_REQPORTAL 0x00000040ULL /* Separate portal for non-IO reqs */
-#define OBD_CONNECT_ACL 0x00000080ULL /* client uses access control lists */
-#define OBD_CONNECT_XATTR 0x00000100ULL /* client using extended attributes*/
-#define OBD_CONNECT_TRUNCLOCK 0x00000400ULL /* locks on server for punch b=9528 */
-#define OBD_CONNECT_IBITS 0x00001000ULL /* support for inodebits locks */
-#define OBD_CONNECT_JOIN 0x00002000ULL /* files can be concatenated */
-#define OBD_CONNECT_ATTRFID 0x00004000ULL /* Server supports GetAttr By Fid */
-#define OBD_CONNECT_NODEVOH 0x00008000ULL /* No open handle for special nodes */
-#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /* Remote client */
-#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /* Remote client by force */
-#define OBD_CONNECT_BRW_SIZE 0x00040000ULL /* Max bytes per rpc */
-#define OBD_CONNECT_QUOTA64 0x00080000ULL /* 64bit qunit_data.qd_count b=10707*/
-#define OBD_CONNECT_MDS_CAPA 0x00100000ULL /* MDS capability */
-#define OBD_CONNECT_OSS_CAPA 0x00200000ULL /* OSS capability */
-#define OBD_CONNECT_CANCELSET 0x00400000ULL /* Early batched cancels. */
-#define OBD_CONNECT_SOM 0x00800000ULL /* SOM feature */
-#define OBD_CONNECT_AT 0x01000000ULL /* client uses adaptive timeouts */
-#define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /* Lru resize feature. */
-#define OBD_CONNECT_MDS_MDS 0x04000000ULL /* MDS-MDS connection*/
-#define OBD_CONNECT_REAL 0x08000000ULL /* real connection */
-#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /* shrink/enlarge qunit b=10600 */
-#define OBD_CONNECT_CKSUM 0x20000000ULL /* support several cksum algos */
-#define OBD_CONNECT_FID 0x40000000ULL /* FID is supported by server */
-#define OBD_CONNECT_LOV_V3 0x100000000ULL /* client supports lov v3 ea */
-
+#define OBD_CONNECT_RDONLY 0x1ULL /*client allowed read-only access*/
+#define OBD_CONNECT_INDEX 0x2ULL /*connect to specific LOV idx */
+#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */
+#define OBD_CONNECT_GRANT 0x8ULL /*OSC acquires grant at connect */
+#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for client */
+#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */
+#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO request portal */
+#define OBD_CONNECT_ACL 0x80ULL /*access control lists */
+#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attributes */
+#define OBD_CONNECT_CROW 0x200ULL /*MDS+OST create objects on write*/
+#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */
+#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends initial transno */
+#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks */
+#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated */
+#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server supports GetAttr By Fid */
+#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open handle on special nodes*/
+#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /*Remote client */
+#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /*Remote client by force */
+#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64 0x80000ULL /*64bit qunit_data.qd_count */
+#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */
+#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */
+#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */
+#define OBD_CONNECT_SOM 0x00800000ULL /*Size on MDS */
+#define OBD_CONNECT_AT 0x01000000ULL /*client uses adaptive timeouts */
+#define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /*LRU resize feature. */
+#define OBD_CONNECT_MDS_MDS 0x04000000ULL /*MDS-MDS connection */
+#define OBD_CONNECT_REAL 0x08000000ULL /*real connection */
+#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*shrink/enlarge qunit b=10600 */
+#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos */
+#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */
+#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */
+#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */
/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
* and lustre/utils/wirecheck.c */
OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
OBD_CONNECT_NODEVOH |/* OBD_CONNECT_ATTRFID |*/\
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
OBD_CONNECT_RMT_CLIENT | \
OBD_CONNECT_RMT_CLIENT_FORCE | \
OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
- OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \
- OBD_CONNECT_FID | \
- LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_AT | \
+ OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID | \
+ LRU_RESIZE_CONNECT_FLAG | \
OBD_CONNECT_LOV_V3)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \
- OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET | \
- OBD_CONNECT_CKSUM | LRU_RESIZE_CONNECT_FLAG | \
- OBD_CONNECT_AT | OBD_CONNECT_CHANGE_QS | \
- OBD_CONNECT_RMT_CLIENT | \
- OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_MDS)
+ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
+ LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
+ OBD_CONNECT_CHANGE_QS | \
+ OBD_CONNECT_OSS_CAPA | OBD_CONNECT_RMT_CLIENT | \
+ OBD_CONNECT_RMT_CLIENT_FORCE | \
+ OBD_CONNECT_MDS)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT)
-#define MAX_QUOTA_COUNT32 (0xffffffffULL)
-
#define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\
((patch)<<8) + (fix))
#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255)
typedef __u64 obd_size;
typedef __u64 obd_off;
typedef __u64 obd_blocks;
+typedef __u64 obd_valid;
typedef __u32 obd_blksize;
typedef __u32 obd_mode;
typedef __u32 obd_uid;
typedef __u32 obd_gid;
typedef __u32 obd_flag;
-typedef __u64 obd_valid;
typedef __u32 obd_count;
#define OBD_FL_INLINEDATA (0x00000001)
#define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */
#define OBD_FL_NO_USRQUOTA (0x00000100) /* the object's owner is over quota */
#define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */
+#define OBD_FL_CREATE_CROW (0x00000400) /* object should be create on write */
/**
* Set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs
#define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */
#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
-#define OBD_MD_FLOSCOPQ (0x00400000ULL) /* osc opaque data */
+/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */
#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
}
#endif
-struct mdt_body {
- struct lu_fid fid1;
- struct lu_fid fid2;
+/*
+ * while mds_body is to interact with 1.6, mdt_body is to interact with 2.0.
+ * both of them should have the same fields layout, because at client side
+ * one could be dynamically cast to the other.
+ *
+ * mdt_body has large size than mds_body, with unused padding (48 bytes)
+ * at the end. client always use size of mdt_body to prepare request/reply
+ * buffers, and actual data could be interepeted as mdt_body or mds_body
+ * accordingly.
+ */
+struct mds_body {
+ struct ll_fid fid1;
+ struct ll_fid fid2;
struct lustre_handle handle;
__u64 valid;
__u64 size; /* Offset, in the case of MDS_READPAGE */
__u64 atime;
__u64 ctime;
__u64 blocks; /* XID, in the case of MDS_READPAGE */
- __u64 ioepoch;
- __u64 ino; /* for 1.6 compatibility */
+ __u64 io_epoch;
+ __u64 ino;
__u32 fsuid;
__u32 fsgid;
__u32 capability;
__u32 flags; /* from vfs for pin/unpin, MDS_BFLAG for close */
__u32 rdev;
__u32 nlink; /* #bytes to read in the case of MDS_READPAGE */
- __u32 generation; /* for 1.6 compatibility */
+ __u32 generation;
__u32 suppgid;
__u32 eadatasize;
__u32 aclsize;
__u32 max_mdsize;
__u32 max_cookiesize;
- __u32 padding_4; /* also fix lustre_swab_mdt_body */
- __u64 padding_5;
- __u64 padding_6;
- __u64 padding_7;
- __u64 padding_8;
- __u64 padding_9;
- __u64 padding_10;
+ __u32 padding_4; /* also fix lustre_swab_mds_body */
};
-struct mds_body {
- struct ll_fid fid1;
- struct ll_fid fid2;
+extern void lustre_swab_mds_body (struct mds_body *b);
+
+struct mdt_body {
+ struct lu_fid fid1;
+ struct lu_fid fid2;
struct lustre_handle handle;
__u64 valid;
__u64 size; /* Offset, in the case of MDS_READPAGE */
__u64 atime;
__u64 ctime;
__u64 blocks; /* XID, in the case of MDS_READPAGE */
- __u64 io_epoch;
- __u64 ino;
+ __u64 ioepoch;
+ __u64 ino; /* for 1.6 compatibility */
__u32 fsuid;
__u32 fsgid;
__u32 capability;
__u32 flags; /* from vfs for pin/unpin, MDS_BFLAG for close */
__u32 rdev;
__u32 nlink; /* #bytes to read in the case of MDS_READPAGE */
- __u32 generation;
+ __u32 generation; /* for 1.6 compatibility */
__u32 suppgid;
__u32 eadatasize;
__u32 aclsize;
__u32 max_mdsize;
__u32 max_cookiesize;
- __u32 padding_4; /* also fix lustre_swab_mds_body */
-};
+ __u32 padding_4; /* also fix lustre_swab_mdt_body */
+ __u64 padding_5;
+ __u64 padding_6;
+ __u64 padding_7;
+ __u64 padding_8;
+ __u64 padding_9;
+ __u64 padding_10;
+}; /* 216 */
-extern void lustre_swab_mds_body (struct mds_body *b);
extern void lustre_swab_mdt_body (struct mdt_body *b);
struct mdt_epoch {
MDS_QUOTA_IGNORE = 1 << 5
};
-struct mds_rec_join {
- struct ll_fid jr_fid;
- __u64 jr_headsize;
-};
-
-extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr);
-
-struct mdt_rec_join {
- struct lu_fid jr_fid;
- __u64 jr_headsize;
-};
-
-extern void lustre_swab_mdt_rec_join (struct mdt_rec_join *jr);
-
struct mds_rec_create {
__u32 cr_opcode;
__u32 cr_fsuid;
__u32 cr_suppgid2_h;
struct lu_fid cr_fid1;
struct lu_fid cr_fid2;
- struct lustre_handle cr_old_handle; /* u64 handle in case of open replay */
+ struct lustre_handle cr_old_handle; /* handle in case of open replay */
__u64 cr_time;
__u64 cr_rdev;
__u64 cr_ioepoch;
extern void lustre_swab_mdt_rec_create (struct mdt_rec_create *cr);
+struct mds_rec_join {
+ struct ll_fid jr_fid;
+ __u64 jr_headsize;
+};
+
+extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr);
+
+struct mdt_rec_join {
+ struct lu_fid jr_fid;
+ __u64 jr_headsize;
+};
+
+extern void lustre_swab_mdt_rec_join (struct mdt_rec_join *jr);
+
struct mds_rec_link {
__u32 lk_opcode;
__u32 lk_fsuid;
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
__u32 ld_active_tgt_count; /* how many active */
+ __u32 ld_default_stripe_count; /* how many objects are used */
+ __u32 ld_pattern; /* default MEA_MAGIC_* */
+ __u64 ld_default_hash_size;
+ __u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_qos_maxage; /* in second */
+ __u32 ld_padding_3; /* also fix lustre_swab_lmv_desc */
+ __u32 ld_padding_4; /* also fix lustre_swab_lmv_desc */
struct obd_uuid ld_uuid;
};
extern void lustre_swab_lmv_desc (struct lmv_desc *ld);
+/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
+struct lmv_stripe_md {
+ __u32 mea_magic;
+ __u32 mea_count;
+ __u32 mea_master;
+ __u32 mea_padding;
+ char mea_pool_name[LOV_MAXPOOLNAME];
+ struct lu_fid mea_ids[0];
+};
+
+extern void lustre_swab_lmv_stripe_md(struct lmv_stripe_md *mea);
+
+/* lmv structures */
+#define MEA_MAGIC_LAST_CHAR 0xb2221ca1
+#define MEA_MAGIC_ALL_CHARS 0xb222a11c
+#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
+
+#define MAX_HASH_SIZE_32 0x7fffffffUL
+#define MAX_HASH_SIZE 0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT 0x1000000000000000ULL
+
+struct md_fld {
+ seqno_t mf_seq;
+ mdsno_t mf_mds;
+};
+
+extern void lustre_swab_md_fld (struct md_fld *mf);
+
enum fld_rpc_opc {
- FLD_QUERY = 600,
+ FLD_QUERY = 900,
FLD_LAST_OPC,
FLD_FIRST_OPC = FLD_QUERY
};
* LOV data structures
*/
-#define LOV_MIN_STRIPE_SIZE 65536 /* maximum PAGE_SIZE (ia64), power of 2 */
+#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
+#define LOV_MIN_STRIPE_SIZE (1<<LOV_MIN_STRIPE_BITS)
#define LOV_MAX_STRIPE_COUNT 160 /* until bug 4424 is fixed */
#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
extern void lustre_swab_cfg_marker(struct cfg_marker *marker,
int swab, int size);
-
/*
* Opcodes for multiple servers.
*/
extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
int stripe_count);
extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
+extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
/* llog_swab.c */
extern void lustre_swab_llogd_body (struct llogd_body *d);
int is_req, int is_exp);
typedef enum {
- QUOTA_DQACQ = 901,
- QUOTA_DQREL = 902,
+ QUOTA_DQACQ = 601,
+ QUOTA_DQREL = 602,
QUOTA_LAST_OPC
} quota_cmd_t;
#define QUOTA_FIRST_OPC QUOTA_DQACQ
#define QUOTA_RET_NOLIMIT 2 /**< quota limit isn't set */
#define QUOTA_RET_ACQUOTA 4 /**< need to acquire extra quota */
+
/* security opcodes */
typedef enum {
SEC_CTX_INIT = 801,
#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
-#define EXT3_IOC_FIEMAP _IOWR('f', 10, struct ll_user_fiemap)
+#define EXT3_IOC_FIEMAP _IOWR('f', 11, struct ll_user_fiemap)
#endif
/* FIEMAP flags supported by Lustre */
struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
} __attribute__((packed));
+#define copy_lov_mds2user(user_md, mds_md) do { \
+ memcpy(user_md, mds_md, sizeof(*(user_md))); \
+ (user_md)->lmm_stripe_offset = 0; \
+ (user_md)->lmm_stripe_count = (mds_md)->lmm_stripe_count; } while(0)
+
/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
* use this. It is unsafe to #define those values in this header as it
* is possible the application has already #included <sys/stat.h>. */
/* Flags sent in AST lock_flags to be mapped into the receiving lock. */
#define LDLM_AST_FLAGS (LDLM_FL_DISCARD_DATA)
-/* Used for marking lock as an target for -EINTR while cp_ast sleep situation
+/*
+ * --------------------------------------------------------------------------
+ * NOTE! Starting from this point, that is, LDLM_FL_* flags with values above
+ * 0x80000000 will not be sent over the wire.
+ * --------------------------------------------------------------------------
+ */
+
+/* Used for marking lock as an target for -EINTR while cp_ast sleep
* emulation + race with upcoming bl_ast. */
#define LDLM_FL_FAIL_LOC 0x100000000ULL
} ldlm_appetite_t;
/*
- * Default value for ->ns_shrink_thumb. If lock is not extent one its cost
- * is one page. Here we have 256 pages which is 1M on i386. Thus by default
- * all extent locks which have more than 1M long extent will be kept in lru,
- * others (including ibits locks) will be canceled on memory pressure event.
- */
-#define LDLM_LOCK_SHRINK_THUMB 256
-
-/*
* Default values for the "max_nolock_size", "contention_time" and
* "contended_locks" namespace tunables.
*/
unsigned int ns_ctime_age_limit;
/**
- * Lower limit to number of pages in lock to keep it in cache.
- */
- unsigned long ns_shrink_thumb;
-
- /**
* Next debug dump, jiffies.
*/
cfs_time_t ns_next_dump;
*/
cfs_waitq_t l_waitq;
- struct timeval l_enqueued_time;
+ /**
+ * Seconds. it will be updated if there is any activity related to
+ * the lock, e.g. enqueue the lock or send block AST.
+ */
+ cfs_time_t l_last_activity;
/**
* Jiffies. Should be converted to time if needed.
...)
__attribute__ ((format (printf, 4, 5)));
-#define LDLM_ERROR(lock, fmt, a...) do { \
+#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \
static cfs_debug_limit_state_t _ldlm_cdls; \
- ldlm_lock_debug(&_ldlm_cdls, D_ERROR, lock, \
+ ldlm_lock_debug(&_ldlm_cdls, mask, lock, \
__FILE__, __FUNCTION__, __LINE__, \
"### " fmt , ##a); \
} while (0)
+#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
+#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+
#define LDLM_DEBUG(lock, fmt, a...) do { \
ldlm_lock_debug(NULL, D_DLMTRACE, lock, \
__FILE__, __FUNCTION__, __LINE__, \
void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
void ldlm_lock_allow_match(struct ldlm_lock *lock);
+void ldlm_lock_allow_match_locked(struct ldlm_lock *lock);
ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, int flags,
const struct ldlm_res_id *, ldlm_type_t type,
ldlm_policy_data_t *, ldlm_mode_t mode,
int fed_mod_count;/* items in fed_writing list */
long fed_pending; /* bytes just being written */
__u32 fed_group;
- struct brw_stats fed_brw_stats;
};
typedef struct nid_stat_uuid {
int nid_exp_ref_count;
}nid_stat_t;
+enum obd_option {
+ OBD_OPT_FORCE = 0x0001,
+ OBD_OPT_FAILOVER = 0x0002,
+ OBD_OPT_ABORT_RECOV = 0x0004,
+};
+
struct obd_export {
struct portals_handle exp_handle;
atomic_t exp_refcount;
spinlock_t exp_lock; /* protects flags int below */
/* ^ protects exp_outstanding_replies too */
__u64 exp_connect_flags;
- int exp_flags;
+ enum obd_option exp_flags;
unsigned long exp_failed:1,
exp_in_recovery:1,
exp_disconnected:1,
extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE;
extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE;
extern const struct lu_fid LUSTRE_BFL_FID;
+extern const struct lu_fid LU_OBF_FID;
+extern const struct lu_fid LU_DOT_LUSTRE_FID;
enum {
/*
/** special fid seq: used for local object create. */
#define FID_SEQ_LOCAL_FILE (FID_SEQ_START + 1)
+/** special fid seq: used for .lustre objects. */
+#define LU_DOT_LUSTRE_SEQ (FID_SEQ_START + 0x02ULL)
+
/** special OID for local objects */
enum {
/** \see osd_oi_index_create */
int target_handle_connect(struct ptlrpc_request *req);
int target_handle_disconnect(struct ptlrpc_request *req);
void target_destroy_export(struct obd_export *exp);
-int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
- struct obd_uuid *cluuid, int);
int target_pack_pool_reply(struct ptlrpc_request *req);
int target_handle_ping(struct ptlrpc_request *req);
void target_committed_to_req(struct ptlrpc_request *req);
lnet_handle_md_t bd_md_h; /* associated MD */
lnet_nid_t bd_sender; /* stash event::sender */
- cfs_page_t **bd_enc_pages;
#if defined(__KERNEL__)
+ /*
+ * encrypt iov, size is either 0 or bd_iov_count.
+ */
+ lnet_kiov_t *bd_enc_iov;
+
lnet_kiov_t bd_iov[0];
#else
lnet_md_iovec_t bd_iov[0];
int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
int client_obd_cleanup(struct obd_device *obddev);
int client_connect_import(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid, struct obd_connect_data *,
void *localdata);
int client_disconnect_export(struct obd_export *exp);
SPTLRPC_SVC_MAX,
};
+enum sptlrpc_bulk_type {
+ SPTLRPC_BULK_DEFAULT = 0, /* follow rpc flavor */
+ SPTLRPC_BULK_HASH = 1, /* hash integrity */
+ SPTLRPC_BULK_MAX,
+};
+
+enum sptlrpc_bulk_service {
+ SPTLRPC_BULK_SVC_NULL = 0,
+ SPTLRPC_BULK_SVC_AUTH = 1,
+ SPTLRPC_BULK_SVC_INTG = 2,
+ SPTLRPC_BULK_SVC_PRIV = 3,
+ SPTLRPC_BULK_SVC_MAX,
+};
+
/*
- * rpc flavor compose/extract, represented as 16 bits
+ * rpc flavor compose/extract, represented as 32 bits. currently the
+ * high 12 bits are unused, must be set as 0.
*
- * 4b (reserved) | 4b (svc) | 4b (mech) | 4b (policy)
+ * 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech) | 4b (policy)
*/
-#define RPC_FLVR_POLICY_OFFSET (0)
-#define RPC_FLVR_MECH_OFFSET (4)
-#define RPC_FLVR_SVC_OFFSET (8)
-
-#define MAKE_RPC_FLVR(policy, mech, svc) \
- (((__u16)(policy) << RPC_FLVR_POLICY_OFFSET) | \
- ((__u16)(mech) << RPC_FLVR_MECH_OFFSET) | \
- ((__u16)(svc) << RPC_FLVR_SVC_OFFSET))
+#define FLVR_POLICY_OFFSET (0)
+#define FLVR_MECH_OFFSET (4)
+#define FLVR_SVC_OFFSET (8)
+#define FLVR_BULK_TYPE_OFFSET (12)
+#define FLVR_BULK_SVC_OFFSET (16)
+
+#define MAKE_FLVR(policy, mech, svc, btype, bsvc) \
+ (((__u32)(policy) << FLVR_POLICY_OFFSET) | \
+ ((__u32)(mech) << FLVR_MECH_OFFSET) | \
+ ((__u32)(svc) << FLVR_SVC_OFFSET) | \
+ ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) | \
+ ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET))
-#define MAKE_RPC_SUBFLVR(mech, svc) \
- ((__u16)(mech) | \
- ((__u16)(svc) << (RPC_FLVR_SVC_OFFSET - RPC_FLVR_MECH_OFFSET)))
-
-#define RPC_FLVR_SUB(flavor) \
- ((((__u16)(flavor)) >> RPC_FLVR_MECH_OFFSET) & 0xFF)
-
-#define RPC_FLVR_POLICY(flavor) \
- ((((__u16)(flavor)) >> RPC_FLVR_POLICY_OFFSET) & 0xF)
-#define RPC_FLVR_MECH(flavor) \
- ((((__u16)(flavor)) >> RPC_FLVR_MECH_OFFSET) & 0xF)
-#define RPC_FLVR_SVC(flavor) \
- ((((__u16)(flavor)) >> RPC_FLVR_SVC_OFFSET) & 0xF)
+/*
+ * extraction
+ */
+#define SPTLRPC_FLVR_POLICY(flavor) \
+ ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_MECH(flavor) \
+ ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_SVC(flavor) \
+ ((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_TYPE(flavor) \
+ ((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF)
+#define SPTLRPC_FLVR_BULK_SVC(flavor) \
+ ((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF)
+
+#define SPTLRPC_FLVR_BASE(flavor) \
+ ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF)
+#define SPTLRPC_FLVR_BASE_SUB(flavor) \
+ ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF)
/*
* gss subflavors
*/
+#define MAKE_BASE_SUBFLVR(mech, svc) \
+ ((__u32)(mech) | \
+ ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET)))
+
#define SPTLRPC_SUBFLVR_KRB5N \
- MAKE_RPC_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL)
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL)
#define SPTLRPC_SUBFLVR_KRB5A \
- MAKE_RPC_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH)
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH)
#define SPTLRPC_SUBFLVR_KRB5I \
- MAKE_RPC_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG)
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG)
#define SPTLRPC_SUBFLVR_KRB5P \
- MAKE_RPC_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV)
+ MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV)
/*
* "end user" flavors
*/
#define SPTLRPC_FLVR_NULL \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_NULL, \
- SPTLRPC_MECH_NULL, \
- SPTLRPC_SVC_NULL)
+ MAKE_FLVR(SPTLRPC_POLICY_NULL, \
+ SPTLRPC_MECH_NULL, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
#define SPTLRPC_FLVR_PLAIN \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_PLAIN, \
- SPTLRPC_MECH_PLAIN, \
- SPTLRPC_SVC_NULL)
+ MAKE_FLVR(SPTLRPC_POLICY_PLAIN, \
+ SPTLRPC_MECH_PLAIN, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_HASH, \
+ SPTLRPC_BULK_SVC_INTG)
#define SPTLRPC_FLVR_KRB5N \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_NULL)
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_NULL, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
#define SPTLRPC_FLVR_KRB5A \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_AUTH)
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_AUTH, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_NULL)
#define SPTLRPC_FLVR_KRB5I \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_INTG)
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_INTG, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_INTG)
#define SPTLRPC_FLVR_KRB5P \
- MAKE_RPC_FLVR(SPTLRPC_POLICY_GSS, \
- SPTLRPC_MECH_GSS_KRB5, \
- SPTLRPC_SVC_PRIV)
-
-#define SPTLRPC_FLVR_ANY ((__u16) 0xf000)
-#define SPTLRPC_FLVR_INVALID ((__u16) 0xffff)
+ MAKE_FLVR(SPTLRPC_POLICY_GSS, \
+ SPTLRPC_MECH_GSS_KRB5, \
+ SPTLRPC_SVC_PRIV, \
+ SPTLRPC_BULK_DEFAULT, \
+ SPTLRPC_BULK_SVC_PRIV)
#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL
+#define SPTLRPC_FLVR_INVALID ((__u32) 0xFFFFFFFF)
+#define SPTLRPC_FLVR_ANY ((__u32) 0xFFF00000)
+
/*
- * 32 bits wire flavor (msg->lm_secflvr), lower 12 bits is the rpc flavor,
- * higher 20 bits is not defined right now.
+ * extract the useful part from wire flavor
*/
-#define WIRE_FLVR_RPC(wflvr) (((__u16) (wflvr)) & 0x0FFF)
+#define WIRE_FLVR(wflvr) (((__u32) (wflvr)) & 0x000FFFFF)
-static inline void rpc_flvr_set_svc(__u16 *flvr, __u16 svc)
+static inline void flvr_set_svc(__u32 *flvr, __u32 svc)
{
LASSERT(svc < SPTLRPC_SVC_MAX);
- *flvr = MAKE_RPC_FLVR(RPC_FLVR_POLICY(*flvr),
- RPC_FLVR_MECH(*flvr),
- svc);
+ *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+ SPTLRPC_FLVR_MECH(*flvr),
+ svc,
+ SPTLRPC_FLVR_BULK_TYPE(*flvr),
+ SPTLRPC_FLVR_BULK_SVC(*flvr));
}
+static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc)
+{
+ LASSERT(svc < SPTLRPC_BULK_SVC_MAX);
+ *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr),
+ SPTLRPC_FLVR_MECH(*flvr),
+ SPTLRPC_FLVR_SVC(*flvr),
+ SPTLRPC_FLVR_BULK_TYPE(*flvr),
+ svc);
+}
+
+struct bulk_spec_hash {
+ __u8 hash_alg;
+};
struct sptlrpc_flavor {
- __u16 sf_rpc; /* rpc flavor */
- __u8 sf_bulk_ciph; /* bulk cipher alg */
- __u8 sf_bulk_hash; /* bulk hash alg */
+ __u32 sf_rpc; /* wire flavor - should be renamed to sf_wire */
__u32 sf_flags; /* general flags */
+ /*
+ * rpc flavor specification
+ */
+ union {
+ /* nothing for now */
+ } u_rpc;
+ /*
+ * bulk flavor specification
+ */
+ union {
+ struct bulk_spec_hash hash;
+ } u_bulk;
};
enum lustre_sec_part {
};
int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr);
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr);
static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set)
{
}
void sptlrpc_rule_set_free(struct sptlrpc_rule_set *set);
-int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *set, int expand);
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *set);
int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *set,
- struct sptlrpc_rule *rule,
- int expand);
+ struct sptlrpc_rule *rule);
int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset,
enum lustre_sec_part from,
enum lustre_sec_part to,
int msgsize);
void (*free_rs) (struct ptlrpc_reply_state *rs);
void (*free_ctx) (struct ptlrpc_svc_ctx *ctx);
- /* reverse credential */
+ /* reverse context */
int (*install_rctx)(struct obd_import *imp,
struct ptlrpc_svc_ctx *ctx);
/* bulk transform */
+ int (*prep_bulk) (struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
int (*unwrap_bulk) (struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int (*wrap_bulk) (struct ptlrpc_request *req,
BULK_HASH_ALG_SHA256,
BULK_HASH_ALG_SHA384,
BULK_HASH_ALG_SHA512,
- BULK_HASH_ALG_WP256,
- BULK_HASH_ALG_WP384,
- BULK_HASH_ALG_WP512,
BULK_HASH_ALG_MAX
};
-enum sptlrpc_bulk_cipher_alg {
- BULK_CIPH_ALG_NULL = 0,
- BULK_CIPH_ALG_ARC4,
- BULK_CIPH_ALG_AES128,
- BULK_CIPH_ALG_AES192,
- BULK_CIPH_ALG_AES256,
- BULK_CIPH_ALG_CAST128,
- BULK_CIPH_ALG_CAST256,
- BULK_CIPH_ALG_TWOFISH128,
- BULK_CIPH_ALG_TWOFISH256,
- BULK_CIPH_ALG_MAX
-};
-
struct sptlrpc_hash_type {
char *sht_name;
char *sht_tfm_name;
unsigned int sht_size;
};
-struct sptlrpc_ciph_type {
- char *sct_name;
- char *sct_tfm_name;
- __u32 sct_tfm_flags;
- unsigned int sct_ivsize;
- unsigned int sct_keysize;
-};
-
const struct sptlrpc_hash_type *sptlrpc_get_hash_type(__u8 hash_alg);
const char * sptlrpc_get_hash_name(__u8 hash_alg);
-const struct sptlrpc_ciph_type *sptlrpc_get_ciph_type(__u8 ciph_alg);
-const char *sptlrpc_get_ciph_name(__u8 ciph_alg);
+__u8 sptlrpc_get_hash_alg(const char *algname);
-#define CIPHER_MAX_BLKSIZE (16)
-#define CIPHER_MAX_KEYSIZE (64)
+enum {
+ BSD_FL_ERR = 1,
+};
struct ptlrpc_bulk_sec_desc {
- __u8 bsd_version;
- __u8 bsd_flags;
- __u8 bsd_pad[4];
- __u8 bsd_hash_alg; /* hash algorithm */
- __u8 bsd_ciph_alg; /* cipher algorithm */
- __u8 bsd_key[CIPHER_MAX_KEYSIZE]; /* encrypt key seed */
- __u8 bsd_csum[0];
+ __u8 bsd_version; /* 0 */
+ __u8 bsd_type; /* SPTLRPC_BULK_XXX */
+ __u8 bsd_svc; /* SPTLRPC_BULK_SVC_XXXX */
+ __u8 bsd_flags; /* flags */
+ __u32 bsd_nob; /* nob of bulk data */
+ __u8 bsd_data[0]; /* policy-specific token */
};
int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy);
int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy);
-__u16 sptlrpc_name2rpcflavor(const char *name);
-const char *sptlrpc_rpcflavor2name(__u16 flavor);
-int sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize);
+__u32 sptlrpc_name2flavor_base(const char *name);
+const char *sptlrpc_flavor2name_base(__u32 flvr);
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+ char *buf, int bufsize);
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize);
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize);
static inline
struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy)
*/
int sptlrpc_import_sec_adapt(struct obd_import *imp,
struct ptlrpc_svc_ctx *ctx,
- __u16 rpc_flavor);
+ struct sptlrpc_flavor *flvr);
struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp);
void sptlrpc_import_sec_put(struct obd_import *imp);
int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
- int nob, obd_count pg_count,
- struct brw_page **pga);
+ struct ptlrpc_bulk_desc *desc,
+ int nob);
int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
+int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
int sptlrpc_svc_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int sptlrpc_svc_unwrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
+/* bulk helpers (internal use only by policies) */
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen);
+
+int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset);
+
/* user descriptor helpers */
static inline int sptlrpc_user_desc_size(int ngroups)
{
int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset);
int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset);
-/* bulk helpers (internal use only by policies) */
-int bulk_sec_desc_size(__u8 hash_alg, int request, int read);
-int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset);
-
-int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
- __u32 alg, struct lustre_msg *rmsg, int roff);
-int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *rmsg, int roff,
- struct lustre_msg *vmsg, int voff);
-int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
- struct ptlrpc_bulk_sec_desc *bsdv, int vsize,
- struct ptlrpc_bulk_sec_desc *bsdr, int rsize);
#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN)
#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE)
};
struct lov_tgt_desc {
+ struct list_head ltd_kill;
struct obd_uuid ltd_uuid;
struct obd_export *ltd_exp;
struct ltd_qos ltd_qos; /* qos info per target */
* Events signalled through obd_notify() upcall-chain.
*/
enum obd_notify_event {
+ /* Device connect start */
+ OBD_NOTIFY_CONNECT,
/* Device activated */
OBD_NOTIFY_ACTIVE,
/* Device deactivated */
struct lu_ref obd_reference;
};
-#define OBD_OPT_FORCE 0x0001
-#define OBD_OPT_FAILOVER 0x0002
-
#define OBD_LLOG_FL_SENDNOW 0x0001
enum obd_cleanup_stage {
#define KEY_CLEAR_FS "clear_fs"
#define KEY_BLOCKSIZE "blocksize"
#define KEY_BLOCKSIZE_BITS "blocksize_bits"
-#define KEY_FIEMAP "FIEMAP"
+#define KEY_FIEMAP "fiemap"
#define KEY_SPTLRPC_CONF "sptlrpc_conf"
#define KEY_MGSSEC "mgssec"
/* XXX unused ?*/
* granted by the target, which are guaranteed to be a subset of flags
* asked for. If @ocd == NULL, use default parameters. */
int (*o_connect)(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *src,
+ struct obd_export **exp, struct obd_device *src,
struct obd_uuid *cluuid, struct obd_connect_data *ocd,
void *localdata);
int (*o_reconnect)(const struct lu_env *env,
* Also, add a wrapper function in include/linux/obd_class.h. */
};
-/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
-struct lmv_stripe_md {
- __u32 mea_magic;
- __u32 mea_count;
- __u32 mea_master;
- __u32 mea_padding;
- struct lu_fid mea_ids[0];
-};
-
enum {
LUSTRE_OPC_MKDIR = (1 << 0),
LUSTRE_OPC_SYMLINK = (1 << 1),
int class_disconnect(struct obd_export *exp);
void class_fail_export(struct obd_export *exp);
void class_disconnect_exports(struct obd_device *obddev);
-int class_disconnect_stale_exports(struct obd_device *,
- int (*test_export)(struct obd_export *));
int class_manual_cleanup(struct obd_device *obd);
+int class_disconnect_stale_exports(struct obd_device *,
+ int (*test_export)(struct obd_export *),
+ enum obd_option flags);
+
+static inline enum obd_option exp_flags_from_obd(struct obd_device *obd)
+{
+ return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
+ (obd->obd_force ? OBD_OPT_FORCE : 0) |
+ (obd->obd_abort_recovery ? OBD_OPT_ABORT_RECOV : 0) |
+ 0);
+}
void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
}
static inline int obd_connect(const struct lu_env *env,
- struct lustre_handle *conn,struct obd_device *obd,
+ struct obd_export **exp,struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *d,
void *localdata)
OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
OBD_COUNTER_INCREMENT(obd, connect);
- rc = OBP(obd, connect)(env, conn, obd, cluuid, d, localdata);
+ rc = OBP(obd, connect)(env, exp, obd, cluuid, d, localdata);
/* check that only subset is granted */
LASSERT(ergo(d != NULL,
(d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
--- /dev/null
+Index: linux-2.6.16.60-0.33/drivers/md/raid5.c
+===================================================================
+--- linux-2.6.16.60-0.33.orig/drivers/md/raid5.c
++++ linux-2.6.16.60-0.33/drivers/md/raid5.c
+@@ -900,6 +900,8 @@ static int add_stripe_bio(struct stripe_
+ bi->bi_next = *bip;
+ *bip = bi;
+ bi->bi_phys_segments ++;
++ if (bio_sync(bi) && !forwrite)
++ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
+ spin_unlock_irq(&conf->device_lock);
+ spin_unlock(&sh->lock);
+
+@@ -1617,6 +1619,8 @@ static int make_request (request_queue_t
+ bi->bi_end_io(bi, bytes, 0);
+ }
+ spin_unlock_irq(&conf->device_lock);
++ if (bio_sync(bi))
++ raid5_unplug_device(q);
+ return 0;
+ }
+
--- /dev/null
+Index: linux-2.6.22.14/drivers/md/raid5.c
+===================================================================
+--- linux-2.6.22.14.orig/drivers/md/raid5.c
++++ linux-2.6.22.14/drivers/md/raid5.c
+@@ -1268,6 +1268,8 @@ static int add_stripe_bio(struct stripe_
+ bi->bi_next = *bip;
+ *bip = bi;
+ bi->bi_phys_segments ++;
++ if (bio_sync(bi) && !forwrite)
++ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
+ spin_unlock_irq(&conf->device_lock);
+ spin_unlock(&sh->lock);
+
+@@ -2972,6 +2974,8 @@ static int make_request(request_queue_t
+ test_bit(BIO_UPTODATE, &bi->bi_flags)
+ ? 0 : -EIO);
+ }
++ if (bio_sync(bi))
++ raid5_unplug_device(q);
+ return 0;
+ }
+
md-soft-lockups.patch
jbd-journal-chksum-2.6.18-vanilla.patch
quota-large-limits-rhel5.patch
+md-mmp-unplug-dev.patch
export-nr_free_buffer_pages.patch
fmode-exec-2.6-sles10.patch
quota-large-limits-sles10.patch
+md-mmp-unplug-dev-sles10.patch
export-show_task-2.6.18-vanilla.patch
sd_iostats-2.6.22-vanilla.patch
quota-large-limits-rhel5.patch
+md-mmp-unplug-dev.patch
*descr = whole_file;
descr->cld_obj = clob;
descr->cld_mode = CLM_PHANTOM;
- /* The lockreq for glimpse should be mandatory,
- * otherwise, osc may decide to use lockless */
- io->ci_lockreq = CILR_MANDATORY;
cio->cui_glimpse = 1;
- lock = cl_lock_request(env, io, descr, CEF_ASYNC,
+ /*
+ * CEF_ASYNC is used because glimpse sub-locks cannot
+ * deadlock (because they never conflict with other
+ * locks) and, hence, can be enqueued out-of-order.
+ *
+ * CEF_MUST protects glimpse lock from conversion into
+ * a lockless mode.
+ */
+ lock = cl_lock_request(env, io, descr,
+ CEF_ASYNC|CEF_MUST,
"glimpse", cfs_current());
cio->cui_glimpse = 0;
if (!IS_ERR(lock)) {
}
/**
- * Implements cl_lock_operations::clo_state() method for vvp layer, invoked
+ * Implements cl_lock_operations::clo_state() method for ccc layer, invoked
* whenever lock state changes. Transfers object attributes, that might be
* updated as a result of lock acquiring into inode.
*/
#
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = ldlm_extent.c ldlm_flock.c ldlm_internal.h ldlm_lib.c \
+EXTRA_DIST = ldlm_extent.c ldlm_flock.c ldlm_internal.h ldlm_lib.c \
ldlm_lock.c ldlm_lockd.c ldlm_plain.c ldlm_request.c \
ldlm_resource.c l_lock.c ldlm_inodebits.c ldlm_pool.c \
interval_tree.c
int flags);
int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
int count, int max, int cancel_flags, int flags);
-int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns, int count, int max,
- int flags);
extern int ldlm_enqueue_min;
int ldlm_get_enq_timeout(struct ldlm_lock *lock);
/* ->o_connect() method for client side (OSC and MDC and MGC) */
int client_connect_import(const struct lu_env *env,
- struct lustre_handle *dlm_handle,
+ struct obd_export **exp,
struct obd_device *obd, struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct client_obd *cli = &obd->u.cli;
struct obd_import *imp = cli->cl_import;
- struct obd_export *exp;
struct obd_connect_data *ocd;
struct ldlm_namespace *to_be_freed = NULL;
+ struct lustre_handle conn = { 0 };
int rc;
ENTRY;
+ *exp = NULL;
down_write(&cli->cl_sem);
- rc = class_connect(dlm_handle, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
GOTO(out_sem, rc);
+ *exp = class_conn2export(&conn);
+
cli->cl_conn_count++;
if (cli->cl_conn_count > 1)
GOTO(out_sem, rc);
- exp = class_conn2export(dlm_handle);
if (obd->obd_namespace != NULL)
CERROR("already have namespace!\n");
if (obd->obd_namespace == NULL)
GOTO(out_disco, rc = -ENOMEM);
- imp->imp_dlm_handle = *dlm_handle;
+ imp->imp_dlm_handle = conn;
rc = ptlrpc_init_import(imp);
if (rc != 0)
GOTO(out_ldlm, rc);
LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
GOTO(out_ldlm, rc);
}
- LASSERT(exp->exp_connection);
+ LASSERT((*exp)->exp_connection);
if (data) {
LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
obd->obd_namespace = NULL;
out_disco:
cli->cl_conn_count--;
- class_disconnect(exp);
- } else {
- class_export_put(exp);
+ class_disconnect(*exp);
+ *exp = NULL;
}
out_sem:
up_write(&cli->cl_sem);
to_be_freed = obd->obd_namespace;
}
+ /*
+ * there's no necessary to hold sem during diconnecting an import,
+ * and actually it may cause deadlock in gss.
+ */
+ up_write(&cli->cl_sem);
rc = ptlrpc_disconnect_import(imp, 0);
+ down_write(&cli->cl_sem);
ptlrpc_invalidate_import(imp);
/* set obd_namespace to NULL only after invalidate, because we can have
* from old lib/target.c
* -------------------------------------------------------------------------- */
-int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
- struct obd_uuid *cluuid, int initial_conn)
+static int target_handle_reconnect(struct lustre_handle *conn,
+ struct obd_export *exp,
+ struct obd_uuid *cluuid)
{
ENTRY;
- if (exp->exp_connection && exp->exp_imp_reverse && !initial_conn) {
+ if (exp->exp_connection && exp->exp_imp_reverse) {
struct lustre_handle *hdl;
hdl = &exp->exp_imp_reverse->imp_remote_handle;
/* Might be a re-connect after a partition. */
struct obd_uuid remote_uuid;
char *str;
int rc = 0;
- int initial_conn = 0;
+ int mds_conn = 0;
struct obd_connect_data *data, *tmpdata;
lnet_nid_t *client_nid = NULL;
ENTRY;
}
}
- if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL)
- initial_conn = 1;
+ if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) &&
+ (data->ocd_connect_flags & OBD_CONNECT_MDS))
+ mds_conn = 1;
/* lctl gets a backstage, all-access pass. */
if (obd_uuid_equals(&cluuid, &target->obd_uuid))
goto dont_check_exports;
- spin_lock(&target->obd_dev_lock);
export = lustre_hash_lookup(target->obd_uuid_hash, &cluuid);
- if (export != NULL && export->exp_connecting) { /* bug 9635, et. al. */
+ if (export != NULL && mds_conn) {
+ /* mds reconnected after failover */
+ class_fail_export(export);
+ CWARN("%s: received MDS connection from NID %s,"
+ " removing former export from NID %s\n",
+ target->obd_name, libcfs_nid2str(req->rq_peer.nid),
+ libcfs_nid2str(export->exp_connection->c_peer.nid));
+ class_export_put(export);
+ export = NULL;
+ rc = 0;
+ } else if (export != NULL && export->exp_connecting) { /* bug 9635, et. al. */
CWARN("%s: exp %p already connecting\n",
export->exp_obd->obd_name, export);
class_export_put(export);
rc = -EALREADY;
} else if (export != NULL && export->exp_connection != NULL &&
req->rq_peer.nid != export->exp_connection->c_peer.nid) {
- /* make darn sure this is coming from the same peer
- * if the UUIDs matched */
- if (data && data->ocd_connect_flags & OBD_CONNECT_MDS) {
- /* the MDS UUID can be reused, don't need to wait
- * for the export to be evicted */
- CWARN("%s: received MDS connection from a new NID %s,"
- " removing former export from NID %s\n",
- target->obd_name,
- libcfs_nid2str(req->rq_peer.nid),
- libcfs_nid2str(export->exp_connection->c_peer.nid));
- class_fail_export(export);
- } else {
- CWARN("%s: cookie %s seen on new NID %s when "
- "existing NID %s is already connected\n",
- target->obd_name, cluuid.uuid,
- libcfs_nid2str(req->rq_peer.nid),
- libcfs_nid2str(export->exp_connection->c_peer.nid));
- rc = -EALREADY;
- }
+ /* in mds failover we have static uuid but nid can be
+ * changed*/
+ CWARN("%s: cookie %s seen on new NID %s when "
+ "existing NID %s is already connected\n",
+ target->obd_name, cluuid.uuid,
+ libcfs_nid2str(req->rq_peer.nid),
+ libcfs_nid2str(export->exp_connection->c_peer.nid));
+ rc = -EALREADY;
class_export_put(export);
export = NULL;
} else if (export != NULL) {
export->exp_connecting = 1;
spin_unlock(&export->exp_lock);
class_export_put(export);
- spin_unlock(&target->obd_dev_lock);
LASSERT(export->exp_obd == target);
- rc = target_handle_reconnect(&conn, export, &cluuid, initial_conn);
+ rc = target_handle_reconnect(&conn, export, &cluuid);
}
/* If we found an export, we already unlocked. */
if (!export) {
- spin_unlock(&target->obd_dev_lock);
OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 2 * obd_timeout);
} else if (req->rq_export == NULL &&
atomic_read(&export->exp_rpc_count) > 0) {
libcfs_nid2str(req->rq_peer.nid),
export, atomic_read(&export->exp_rpc_count));
GOTO(out, rc = -EBUSY);
- } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 &&
- !initial_conn) {
+ } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) {
CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; "
"cookies not random?\n", target->obd_name,
libcfs_nid2str(req->rq_peer.nid), cluuid.uuid);
GOTO(out, rc = -EALREADY);
} else {
OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
- if (req->rq_export == NULL && initial_conn)
- export->exp_last_request_time =
- max(export->exp_last_request_time,
- (time_t)cfs_time_current_sec());
}
if (rc < 0) {
} else {
dont_check_exports:
rc = obd_connect(req->rq_svc_thread->t_env,
- &conn, target, &cluuid, data,
+ &export, target, &cluuid, data,
client_nid);
+ if (rc == 0)
+ conn.cookie = export->exp_handle.h_cookie;
}
} else {
rc = obd_reconnect(req->rq_svc_thread->t_env,
export, target, &cluuid, data, client_nid);
+ if (rc == 0)
+ /* prevous done via class_conn2export */
+ class_export_get(export);
}
if (rc)
GOTO(out, rc);
lustre_msg_set_handle(req->rq_repmsg, &conn);
- /* ownership of this export ref transfers to the request AFTER we
- * drop any previous reference the request had, but we don't want
- * that to go to zero before we get our new export reference. */
- export = class_conn2export(&conn);
- if (!export) {
- DEBUG_REQ(D_ERROR, req, "Missing export!");
- GOTO(out, rc = -ENODEV);
- }
-
/* If the client and the server are the same node, we will already
* have an export that really points to the client's DLM export,
* because we have a shared handles table.
req->rq_export = export;
spin_lock(&export->exp_lock);
- if (initial_conn) {
- lustre_msg_set_conn_cnt(req->rq_repmsg, export->exp_conn_cnt + 1);
- } else if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
+ if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
spin_unlock(&export->exp_lock);
CERROR("%s: %s already connected at higher conn_cnt: %d > %d\n",
cluuid.uuid, libcfs_nid2str(req->rq_peer.nid),
else
revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx,
- req->rq_flvr.sf_rpc);
+ rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
if (rc) {
CERROR("Failed to get sec for reverse import: %d\n", rc);
export->exp_imp_reverse = NULL;
"evict them\n", obd->obd_connected_clients,
obd->obd_max_recoverable_clients);
obd->obd_abort_recovery = obd->obd_stopping;
- class_disconnect_stale_exports(obd, connect_done);
+ class_disconnect_stale_exports(obd, connect_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
}
/* next stage: replay requests */
delta = jiffies;
if (obd->obd_abort_recovery) {
CDEBUG(D_ERROR, "req replay timed out, aborting ...\n");
obd->obd_abort_recovery = obd->obd_stopping;
- class_disconnect_stale_exports(obd, req_replay_done);
+ class_disconnect_stale_exports(obd, req_replay_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
abort_req_replay_queue(obd);
}
int stale;
CERROR("lock replay timed out, aborting ...\n");
obd->obd_abort_recovery = obd->obd_stopping;
- stale = class_disconnect_stale_exports(obd, lock_replay_done);
+ stale = class_disconnect_stale_exports(obd, lock_replay_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
abort_lock_replay_queue(obd);
}
struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
list_del_init(&lock->l_lru);
+ LASSERT(ns->ns_nr_unused > 0);
ns->ns_nr_unused--;
- LASSERT(ns->ns_nr_unused >= 0);
rc = 1;
}
return rc;
return NULL;
}
-void ldlm_lock_allow_match(struct ldlm_lock *lock)
+void ldlm_lock_allow_match_locked(struct ldlm_lock *lock)
{
- lock_res_and_lock(lock);
lock->l_flags |= LDLM_FL_LVB_READY;
cfs_waitq_signal(&lock->l_waitq);
+}
+
+void ldlm_lock_allow_match(struct ldlm_lock *lock)
+{
+ lock_res_and_lock(lock);
+ ldlm_lock_allow_match_locked(lock);
unlock_res_and_lock(lock);
}
struct ldlm_interval *node = NULL;
ENTRY;
- do_gettimeofday(&lock->l_enqueued_time);
+ lock->l_last_activity = cfs_time_current_sec();
/* policies are not executed on the client or during replay */
if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
&& !local && ns->ns_policy) {
lock->l_resource->lr_namespace->ns_timeouts++;
LDLM_ERROR(lock, "lock callback timer expired after %lds: "
"evicting client at %s ",
- cfs_time_current_sec()- lock->l_enqueued_time.tv_sec,
+ cfs_time_current_sec()- lock->l_last_activity,
libcfs_nid2str(
lock->l_export->exp_connection->c_peer.nid));
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT) ||
OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
- seconds = 2;
+ seconds = 1;
timeout = cfs_time_shift(seconds);
if (likely(cfs_time_after(timeout, lock->l_callback_timeout)))
struct ldlm_cb_set_arg *arg = data;
struct ldlm_request *body;
struct ptlrpc_request *req;
- struct timeval granted_time;
long total_enqueue_wait;
int instant_cancel = 0;
int rc = 0;
LASSERT(lock != NULL);
LASSERT(data != NULL);
- do_gettimeofday(&granted_time);
- total_enqueue_wait = cfs_timeval_sub(&granted_time,
- &lock->l_enqueued_time, NULL);
+ total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
+ lock->l_last_activity);
- if (total_enqueue_wait / ONE_MILLION > obd_timeout)
+ if (total_enqueue_wait > obd_timeout)
/* non-fatal with AT - change to LDLM_DEBUG? */
- LDLM_ERROR(lock, "enqueue wait took %luus from "CFS_TIME_T,
- total_enqueue_wait, lock->l_enqueued_time.tv_sec);
+ LDLM_WARN(lock, "enqueue wait took %lus from "CFS_TIME_T,
+ total_enqueue_wait, lock->l_last_activity);
req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
&RQF_LDLM_CP_CALLBACK);
unlock_res_and_lock(lock);
}
- LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+ LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
total_enqueue_wait);
/* Server-side enqueue wait time estimate, used in
__ldlm_add_waiting_lock to set future enqueue timers */
at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
- total_enqueue_wait / ONE_MILLION);
+ total_enqueue_wait);
ptlrpc_request_set_replen(req);
lock_res_and_lock(lock);
if (lock->l_flags & LDLM_FL_AST_SENT) {
body->lock_flags |= LDLM_FL_AST_SENT;
+ /* copy ast flags like LDLM_FL_DISCARD_DATA */
+ body->lock_flags |= (lock->l_flags & LDLM_AST_FLAGS);
/* We might get here prior to ldlm_handle_enqueue setting
* LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock
if (!lock)
GOTO(out, rc = -ENOMEM);
- do_gettimeofday(&lock->l_enqueued_time);
+ lock->l_last_activity = cfs_time_current_sec();
lock->l_remote_handle = dlm_req->lock_handle[0];
LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
LDLM_DEBUG(lock, "server-side convert handler START");
- do_gettimeofday(&lock->l_enqueued_time);
+ lock->l_last_activity = cfs_time_current_sec();
res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
&dlm_rep->lock_flags);
if (res) {
RETURN(0);
}
- if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+ if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
return -ENOMEM;
ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks",
- sizeof(struct ldlm_lock), 0,
- SLAB_HWCACHE_ALIGN);
+ sizeof(struct ldlm_lock), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU);
if (ldlm_lock_slab == NULL) {
cfs_mem_cache_destroy(ldlm_resource_slab);
return -ENOMEM;
EXPORT_SYMBOL(ldlm_lock_dump_handle);
EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
EXPORT_SYMBOL(ldlm_reprocess_all_ns);
+EXPORT_SYMBOL(ldlm_lock_allow_match_locked);
EXPORT_SYMBOL(ldlm_lock_allow_match);
EXPORT_SYMBOL(ldlm_lock_downgrade);
EXPORT_SYMBOL(ldlm_lock_convert);
int nr, unsigned int gfp_mask)
{
__u32 limit;
- ENTRY;
/*
* VM is asking how many entries may be potentially freed.
*/
if (nr == 0)
- RETURN(atomic_read(&pl->pl_granted));
+ return atomic_read(&pl->pl_granted);
/*
* Client already canceled locks but server is already in shrinker
* We did not really free any memory here so far, it only will be
* freed later may be, so that we return 0 to not confuse VM.
*/
- RETURN(0);
+ return 0;
}
/**
* It may be called when SLV has changed much, this is why we do not
* take into account pl->pl_recalc_time here.
*/
- RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC,
+ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_SYNC,
LDLM_CANCEL_LRUR));
}
static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
int nr, unsigned int gfp_mask)
{
- ENTRY;
+ struct ldlm_namespace *ns;
+ int canceled = 0, unused;
+
+ ns = ldlm_pl2ns(pl);
/*
* Do not cancel locks in case lru resize is disabled for this ns.
*/
- if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+ if (!ns_connect_lru_resize(ns))
RETURN(0);
/*
*/
ldlm_cli_pool_pop_slv(pl);
+ spin_lock(&ns->ns_unused_lock);
+ unused = ns->ns_nr_unused;
+ spin_unlock(&ns->ns_unused_lock);
+
+ if (nr) {
+ canceled = ldlm_cancel_lru(ns, nr, LDLM_SYNC,
+ LDLM_CANCEL_SHRINK);
+ }
+#ifdef __KERNEL__
/*
- * Find out how many locks may be released according to shrink
- * policy.
- */
- if (nr == 0)
- RETURN(ldlm_cancel_lru_estimate(ldlm_pl2ns(pl), 0, 0,
- LDLM_CANCEL_SHRINK));
-
- /*
- * Cancel @nr locks accoding to shrink policy.
+ * Retrun the number of potentially reclaimable locks.
*/
- RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC,
- LDLM_CANCEL_SHRINK));
+ return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+#else
+ return unused - canceled;
+#endif
}
struct ldlm_pool_ops ldlm_srv_pool_ops = {
LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
CFS_DURATION_T"s ago); not entering recovery in "
"server code, just going back to sleep",
- lock->l_enqueued_time.tv_sec,
+ lock->l_last_activity,
cfs_time_sub(cfs_time_current_sec(),
- lock->l_enqueued_time.tv_sec));
+ lock->l_last_activity));
if (cfs_time_after(cfs_time_current(), next_dump)) {
last_dump = next_dump;
next_dump = cfs_time_shift(300);
ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
CFS_DURATION_T"s ago), entering recovery for %s@%s",
- lock->l_enqueued_time.tv_sec,
- cfs_time_sub(cfs_time_current_sec(),
- lock->l_enqueued_time.tv_sec),
+ lock->l_last_activity,
+ cfs_time_sub(cfs_time_current_sec(), lock->l_last_activity),
obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
RETURN(0);
result = -EIO;
} else {
delay = cfs_time_sub(cfs_time_current_sec(),
- lock->l_enqueued_time.tv_sec);
+ lock->l_last_activity);
LDLM_DEBUG(lock, "client-side enqueue: granted after "
CFS_DURATION_T"s", delay);
}
/**
- * Callback function for shrink policy. Makes decision whether to keep
- * \a lock in LRU for current \a LRU size \a unused, added in current scan
- * \a added and number of locks to be preferably canceled \a count.
- *
- * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning
- *
- * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU
- */
-static ldlm_policy_res_t ldlm_cancel_shrink_policy(struct ldlm_namespace *ns,
- struct ldlm_lock *lock,
- int unused, int added,
- int count)
-{
- int lock_cost;
- __u64 page_nr;
-
- /*
- * Stop lru processing when we reached passed @count or checked all
- * locks in lru.
- */
- if (count && added >= count)
- return LDLM_POLICY_KEEP_LOCK;
-
- if (lock->l_resource->lr_type == LDLM_EXTENT) {
- if (lock->l_weigh_ast) {
- /*
- * For liblustre, l_weigh_ast should return 0 since it
- * don't cache pages
- */
- page_nr = lock->l_weigh_ast(lock);
- } else {
- struct ldlm_extent *l_extent;
-
- /*
- * For all extent locks cost is 1 + number of pages in
- * their extent.
- */
- l_extent = &lock->l_policy_data.l_extent;
- page_nr = l_extent->end - l_extent->start;
- do_div(page_nr, CFS_PAGE_SIZE);
- }
- lock_cost = 1 + page_nr;
- } else {
- /*
- * For all locks which are not extent ones cost is 1
- */
- lock_cost = 1;
- }
-
- /*
- * Keep all expensive locks in lru for the memory pressure time
- * cancel policy. They anyways may be canceled by lru resize
- * pplicy if they have not small enough CLV.
- */
- return lock_cost > ns->ns_shrink_thumb ?
- LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK;
-}
-
-/**
* Callback function for lru-resize policy. Makes decision whether to keep
* \a lock in LRU for current \a LRU size \a unused, added in current scan
* \a added and number of locks to be preferably canceled \a count.
{
if (ns_connect_lru_resize(ns)) {
if (flags & LDLM_CANCEL_SHRINK)
- return ldlm_cancel_shrink_policy;
+ /* We kill passed number of old locks. */
+ return ldlm_cancel_passed_policy;
else if (flags & LDLM_CANCEL_LRUR)
return ldlm_cancel_lrur_policy;
else if (flags & LDLM_CANCEL_PASSED)
RETURN(ldlm_cancel_list(cancels, added, cancel_flags));
}
-/* Returns number of locks which could be canceled next time when
- * ldlm_cancel_lru() is called. Used from locks pool shrinker. */
-int ldlm_cancel_lru_estimate(struct ldlm_namespace *ns,
- int count, int max, int flags)
-{
- struct list_head disp = CFS_LIST_HEAD_INIT(disp);
- ldlm_cancel_lru_policy_t pf;
- struct ldlm_lock *lock;
- int added = 0, unused;
- int loop_stop = 0;
- ENTRY;
-
- pf = ldlm_cancel_lru_policy(ns, flags);
- LASSERT(pf != NULL);
- spin_lock(&ns->ns_unused_lock);
- unused = ns->ns_nr_unused;
- list_splice_init(&ns->ns_unused_list, &disp);
- while (!list_empty(&disp)) {
- lock = list_entry(disp.next, struct ldlm_lock, l_lru);
- list_move_tail(&lock->l_lru, &ns->ns_unused_list);
-
- /* For any flags, stop scanning if @max is reached. */
- if (max && added >= max)
- break;
-
- /* Somebody is already doing CANCEL or there is a
- * blocking request will send cancel. Let's not count
- * this lock. */
- if ((lock->l_flags & LDLM_FL_CANCELING) ||
- (lock->l_flags & LDLM_FL_BL_AST))
- continue;
-
- LDLM_LOCK_GET(lock);
- spin_unlock(&ns->ns_unused_lock);
- lu_ref_add(&lock->l_reference, __FUNCTION__, cfs_current());
-
- /* Pass the lock through the policy filter and see if it
- * should stay in lru. */
- if (pf(ns, lock, unused, added, count) == LDLM_POLICY_KEEP_LOCK)
- loop_stop = 1;
-
- lu_ref_del(&lock->l_reference, __FUNCTION__, cfs_current());
- LDLM_LOCK_RELEASE(lock);
- spin_lock(&ns->ns_unused_lock);
- if (loop_stop)
- break;
-
- added++;
- unused--;
- }
- list_splice(&disp, ns->ns_unused_list.prev);
- spin_unlock(&ns->ns_unused_lock);
- RETURN(added);
-}
-
/* when called with LDLM_ASYNC the blocking callback will be handled
* in a thread and this function will return after the thread has been
* asked to call the callback. when called with LDLM_SYNC the blocking
RETURN(count);
}
- /* If an error occured in ASYNC mode, or
- * this is SYNC mode, cancel the list. */
+ /* If an error occured in ASYNC mode, or this is SYNC mode,
+ * cancel the list. */
ldlm_cli_cancel_list(&cancels, count, NULL, 0);
RETURN(count);
}
lock_vars[0].write_fptr = lprocfs_wr_lru_size;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
- snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
- ns->ns_name);
- lock_vars[0].data = ns;
- lock_vars[0].read_fptr = lprocfs_rd_uint;
- lock_vars[0].write_fptr = lprocfs_wr_uint;
- lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
-
snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
ns->ns_name);
lock_vars[0].data = &ns->ns_max_age;
if (!ns->ns_hash)
GOTO(out_ns, NULL);
- ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
ns->ns_appetite = apt;
LASSERT(obd != NULL);
struct lustre_cfg *lcfg;
char *peer = "MGS_UUID";
struct obd_device *obd;
- struct lustre_handle mgc_conn = {0, };
struct obd_export *exp;
char *name = "mgc_dev";
class_uuid_t uuid;
#endif
ocd->ocd_version = LUSTRE_VERSION_CODE;
- rc = obd_connect(NULL, &mgc_conn, obd, &mgc_uuid, ocd, NULL);
+ rc = obd_connect(NULL, &exp, obd, &mgc_uuid, ocd, NULL);
if (rc) {
CERROR("cannot connect to %s at %s: rc = %d\n",
LUSTRE_MGS_OBDNAME, mgsnid, rc);
GOTO(out_cleanup, rc);
}
- exp = class_conn2export(&mgc_conn);
-
ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
cfg->cfg_flags |= CFG_F_COMPAT146;
rc = class_config_parse_llog(ctxt, profile, cfg);
struct obd_statfs osfs;
static struct qstr noname = { NULL, 0, 0 };
struct ptlrpc_request *request = NULL;
- struct lustre_handle md_conn = {0, };
- struct lustre_handle dt_conn = {0, };
struct lustre_md md;
class_uuid_t uuid;
struct config_llog_instance cfg = {0, };
ocd.ocd_version = LUSTRE_VERSION_CODE;
/* setup mdc */
- err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
+ err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", mdc, err);
GOTO(out_free, err);
}
- sbi->ll_md_exp = class_conn2export(&md_conn);
err = obd_statfs(obd, &osfs, 100000000, 0);
if (err)
OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
OBD_CONNECT_FID | OBD_CONNECT_AT;
ocd.ocd_version = LUSTRE_VERSION_CODE;
- err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
+ err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
if (err) {
CERROR("cannot connect to %s: rc = %d\n", osc, err);
GOTO(out_md, err);
}
- sbi->ll_dt_exp = class_conn2export(&dt_conn);
sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
llite_lloop-objs := lloop.o
+EXTRA_DIST := $(lustre-objs:.o=.c) llite_internal.h rw26.c super25.c
+EXTRA_DIST += $(llite_lloop-objs:.o=.c)
+EXTRA_DIST += vvp_internal.h
+
@INCLUDE_RULES@
modulefs_DATA = lustre$(KMODEXT) llite_lloop$(KMODEXT)
endif
-DIST_SOURCES := $(lustre-objs:.o=.c) llite_internal.h rw26.c super25.c
-DIST_SOURCES += $(llite_lloop-objs:.o=.c)
-DIST_SOURCES += vvp_internal.h
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
if (rc != -ESTALE) {
CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
"%d\n", rc, it->d.lustre.it_status);
+ } else {
+#ifndef HAVE_VFS_INTENT_PATCHES
+ if (it_disposition(it, DISP_OPEN_OPEN) &&
+ !it_open_error(DISP_OPEN_OPEN, it))
+ /* server have valid open - close file first*/
+ ll_release_openhandle(de, it);
+#endif
}
GOTO(out, rc = 0);
}
* nd->intent.open.file for error, so we need to return it as lookup's result
* instead */
if (IS_ERR(filp))
- rc = 0;
+ rc = PTR_ERR(filp);
#endif
}
#else
return rc;
}
-/* Fills the obdo with the attributes for the inode defined by lsm */
-int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
+/* Fills the obdo with the attributes for the lsm */
+static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
+ struct obd_capa *capa, struct obdo *obdo)
{
struct ptlrpc_request_set *set;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = lli->lli_smd;
+ struct obd_info oinfo = { { { 0 } } };
+ int rc;
- struct obd_info oinfo = { { { 0 } } };
- int rc;
ENTRY;
LASSERT(lsm != NULL);
OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLGROUP;
- oinfo.oi_capa = ll_mdscapa_get(inode);
+ oinfo.oi_capa = capa;
set = ptlrpc_prep_set();
if (set == NULL) {
CERROR("can't allocate ptlrpc set\n");
rc = -ENOMEM;
} else {
- rc = obd_getattr_async(ll_i2dtexp(inode), &oinfo, set);
+ rc = obd_getattr_async(exp, &oinfo, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
}
- capa_put(oinfo.oi_capa);
- if (rc)
- RETURN(rc);
+ if (rc == 0)
+ oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME | OBD_MD_FLSIZE);
+ RETURN(rc);
+}
- oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
- OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE);
+/* Fills the obdo with the attributes for the inode defined by lsm */
+int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_capa *capa = ll_mdscapa_get(inode);
+ int rc;
+ ENTRY;
- obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
- CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
- lli->lli_smd->lsm_object_id, i_size_read(inode),
- (unsigned long long)inode->i_blocks,
- (unsigned long)ll_inode_blksize(inode));
- RETURN(0);
+ rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
+ capa_put(capa);
+ if (rc == 0) {
+ obdo_refresh_inode(inode, obdo, obdo->o_valid);
+ CDEBUG(D_INODE,
+ "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
+ lli->lli_smd->lsm_object_id, i_size_read(inode),
+ (unsigned long long)inode->i_blocks,
+ (unsigned long)ll_inode_blksize(inode));
+ }
+ RETURN(rc);
}
int ll_merge_lvb(struct inode *inode)
int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
lstat_t *st)
{
- /* XXX */
- return -ENOSYS;
+ struct obdo obdo = { 0 };
+ int rc;
+
+ rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
+ if (rc == 0) {
+ st->st_size = obdo.o_size;
+ st->st_blocks = obdo.o_blocks;
+ st->st_mtime = obdo.o_mtime;
+ st->st_atime = obdo.o_atime;
+ st->st_ctime = obdo.o_ctime;
+ }
+ return rc;
}
void ll_io_init(struct cl_io *io, const struct file *file, int write)
struct lustre_handle *lockh);
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
-int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
int ll_glimpse_ioctl(struct ll_sb_info *sbi,
struct lov_stripe_md *lsm, lstat_t *st);
int ll_local_open(struct file *file,
struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt);
+/** direct write pages */
+struct ll_dio_pages {
+ /** page array to be written. we don't support
+ * partial pages except the last one. */
+ struct page **ldp_pages;
+ /* offset of each page */
+ loff_t *ldp_offsets;
+ /** if ldp_offsets is NULL, it means a sequential
+ * pages to be written, then this is the file offset
+ * of the * first page. */
+ loff_t ldp_start_offset;
+ /** how many bytes are to be written. */
+ size_t ldp_size;
+ /** # of pages in the array. */
+ int ldp_nr;
+};
+
+extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct ll_dio_pages *pv);
+
#endif /* LLITE_INTERNAL_H */
struct obd_capa *oc = NULL;
struct obd_statfs osfs;
struct ptlrpc_request *request = NULL;
- struct lustre_handle dt_conn = {0, };
- struct lustre_handle md_conn = {0, };
struct obd_connect_data *data = NULL;
struct obd_uuid *uuid;
struct lustre_md lmd;
if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
- err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data, NULL);
+ err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, data, NULL);
if (err == -EBUSY) {
LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing "
"recovery, of which this client is not a "
CERROR("cannot connect to %s: rc = %d\n", md, err);
GOTO(out, err);
}
- sbi->ll_md_exp = class_conn2export(&md_conn);
err = obd_fid_init(sbi->ll_md_exp);
if (err) {
obd->obd_upcall.onu_upcall = cl_ocd_update;
data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT;
- err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data, NULL);
+ err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data, NULL);
if (err == -EBUSY) {
LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing "
"recovery, of which this client is not a "
GOTO(out_md_fid, err);
}
- sbi->ll_dt_exp = class_conn2export(&dt_conn);
-
err = obd_fid_init(sbi->ll_dt_exp);
if (err) {
CERROR("Can't init data layer FID infrastructure, "
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
* permitted under the GNU General Public License.
*
- * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
- * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
- *
* Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
* Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
*
*
* Loadable modules and other fixes by AK, 1998
*
- * Make real block number available to downstream transfer functions, enables
- * CBC (and relatives) mode encryption requiring unique IVs per data block.
- * Reed H. Petty, rhp@draper.net
- *
* Maximum number of loop devices now dynamic via max_loop module parameter.
* Russell Kroll <rkroll@exploits.org> 19990701
*
};
struct lloop_device {
- int lo_number;
- int lo_refcnt;
- loff_t lo_offset;
- loff_t lo_sizelimit;
- int lo_flags;
+ int lo_number;
+ int lo_refcnt;
+ loff_t lo_offset;
+ loff_t lo_sizelimit;
+ int lo_flags;
int (*ioctl)(struct lloop_device *, int cmd,
- unsigned long arg);
+ unsigned long arg);
- struct file * lo_backing_file;
+ struct file *lo_backing_file;
struct block_device *lo_device;
- unsigned lo_blocksize;
+ unsigned lo_blocksize;
- int old_gfp_mask;
+ int old_gfp_mask;
- spinlock_t lo_lock;
- struct bio *lo_bio;
- struct bio *lo_biotail;
- int lo_state;
- struct semaphore lo_sem;
- struct semaphore lo_ctl_mutex;
- struct semaphore lo_bh_mutex;
- atomic_t lo_pending;
+ spinlock_t lo_lock;
+ struct bio *lo_bio;
+ struct bio *lo_biotail;
+ int lo_state;
+ struct semaphore lo_sem;
+ struct semaphore lo_ctl_mutex;
+ atomic_t lo_pending;
+ wait_queue_head_t lo_bh_wait;
- request_queue_t *lo_queue;
+ request_queue_t *lo_queue;
+
+ const struct lu_env *lo_env;
+ struct cl_io lo_io;
+ struct ll_dio_pages lo_pvec;
/* data to handle bio for lustre. */
struct lo_request_data {
- struct brw_page lrd_pages[LLOOP_MAX_SEGMENTS];
- struct obdo lrd_oa;
+ struct page *lrd_pages[LLOOP_MAX_SEGMENTS];
+ loff_t lrd_offsets[LLOOP_MAX_SEGMENTS];
} lo_requests[1];
-
};
/*
};
static int lloop_major;
-static int max_loop = 8;
+#define MAX_LOOP_DEFAULT 16
+static int max_loop = MAX_LOOP_DEFAULT;
static struct lloop_device *loop_dev;
static struct gendisk **disks;
static struct semaphore lloop_mutex;
return loopsize >> 9;
}
-static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio)
+static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
{
- struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = lli->lli_smd;
- struct obd_info oinfo = {{{ 0 }}};
- struct brw_page *pg = lo->lo_requests[0].lrd_pages;
- struct obdo *oa = &lo->lo_requests[0].lrd_oa;
- pgoff_t offset;
- int ret, cmd, i, opc;
- struct bio_vec *bvec;
-
- BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS);
-
- offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
- bio_for_each_segment(bvec, bio, i) {
- BUG_ON(bvec->bv_offset != 0);
- BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
-
- pg->pg = bvec->bv_page;
- pg->off = offset;
- pg->count = bvec->bv_len;
- pg->flag = OBD_BRW_SRVLOCK;
-
- pg++;
- offset += bvec->bv_len;
+ const struct lu_env *env = lo->lo_env;
+ struct cl_io *io = &lo->lo_io;
+ struct inode *inode = lo->lo_backing_file->f_dentry->d_inode;
+ struct cl_object *obj = ll_i2info(inode)->lli_clob;
+ pgoff_t offset;
+ int ret;
+ int i;
+ int rw;
+ obd_count page_count = 0;
+ struct bio_vec *bvec;
+ struct bio *bio;
+ ssize_t bytes;
+
+ struct ll_dio_pages *pvec = &lo->lo_pvec;
+ struct page **pages = pvec->ldp_pages;
+ loff_t *offsets = pvec->ldp_offsets;
+
+ truncate_inode_pages(inode->i_mapping, 0);
+
+ /* initialize the IO */
+ memset(io, 0, sizeof(*io));
+ io->ci_obj = obj;
+ ret = cl_io_init(env, io, CIT_MISC, obj);
+ if (ret)
+ return io->ci_result;
+ io->ci_lockreq = CILR_NEVER;
+
+ LASSERT(head != NULL);
+ rw = head->bi_rw;
+ for (bio = head; bio != NULL; bio = bio->bi_next) {
+ LASSERT(rw == bio->bi_rw);
+
+ offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
+ bio_for_each_segment(bvec, bio, i) {
+ BUG_ON(bvec->bv_offset != 0);
+ BUG_ON(bvec->bv_len != CFS_PAGE_SIZE);
+
+ pages[page_count] = bvec->bv_page;
+ offsets[page_count] = offset;
+ page_count++;
+ offset += bvec->bv_len;
+ }
+ LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
}
- oa->o_mode = inode->i_mode;
- oa->o_id = lsm->lsm_object_id;
- oa->o_gr = lsm->lsm_object_gr;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE |
- OBD_MD_FLTYPE |OBD_MD_FLGROUP;
- obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-
- cmd = OBD_BRW_READ;
- if (bio_rw(bio) == WRITE)
- cmd = OBD_BRW_WRITE;
-
- if (cmd == OBD_BRW_WRITE)
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
- else
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
- oinfo.oi_oa = oa;
- oinfo.oi_md = lsm;
- opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW;
- oinfo.oi_capa = ll_osscapa_get(inode, opc);
- ret = obd_brw(cmd, ll_i2dtexp(inode), &oinfo,
- (obd_count)(i - bio->bi_idx),
- lo->lo_requests[0].lrd_pages, NULL);
- capa_put(oinfo.oi_capa);
- if (ret == 0)
- obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
- return ret;
+ ll_stats_ops_tally(ll_i2sbi(inode),
+ (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
+ page_count << PAGE_CACHE_SHIFT);
+
+ pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
+ pvec->ldp_nr = page_count;
+
+ /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
+ * write those pages into OST. Even worse case is that more pages
+ * would be asked to write out to swap space, and then finally get here
+ * again.
+ * Unfortunately this is NOT easy to fix.
+ * Thoughts on solution:
+ * 0. Define a reserved pool for cl_pages, which could be a list of
+ * pre-allocated cl_pages from cl_page_kmem;
+ * 1. Define a new operation in cl_object_operations{}, says clo_depth,
+ * which measures how many layers for this lustre object. Generally
+ * speaking, the depth would be 2, one for llite, and one for lovsub.
+ * However, for SNS, there will be more since we need additional page
+ * to store parity;
+ * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
+ * pool. Afterwards, the clio would allocate the pages from reserved
+ * pool, this guarantees we neeedn't allocate the cl_pages from
+ * generic cl_page slab cache.
+ * Of course, if there is NOT enough pages in the pool, we might
+ * be asked to write less pages once, this purely depends on
+ * implementation. Anyway, we should be careful to avoid deadlocking.
+ */
+ LOCK_INODE_MUTEX(inode);
+ bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
+ UNLOCK_INODE_MUTEX(inode);
+ cl_io_fini(env, io);
+ return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
}
-
/*
* Add bio to back of pending list
*/
lo->lo_bio = lo->lo_biotail = bio;
spin_unlock_irqrestore(&lo->lo_lock, flags);
- up(&lo->lo_bh_mutex);
+ atomic_inc(&lo->lo_pending);
+ if (waitqueue_active(&lo->lo_bh_wait))
+ wake_up(&lo->lo_bh_wait);
}
/*
* Grab first pending buffer
*/
-static struct bio *loop_get_bio(struct lloop_device *lo)
+static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
{
- struct bio *bio;
+ struct bio *first;
+ struct bio **bio;
+ unsigned int count = 0;
+ unsigned int page_count = 0;
+ int rw;
spin_lock_irq(&lo->lo_lock);
- if ((bio = lo->lo_bio)) {
- if (bio == lo->lo_biotail)
- lo->lo_biotail = NULL;
- lo->lo_bio = bio->bi_next;
- bio->bi_next = NULL;
+ first = lo->lo_bio;
+ if (unlikely(first == NULL)) {
+ spin_unlock_irq(&lo->lo_lock);
+ return 0;
}
- spin_unlock_irq(&lo->lo_lock);
- return bio;
+ /* TODO: need to split the bio, too bad. */
+ LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
+
+ rw = first->bi_rw;
+ bio = &lo->lo_bio;
+ while (*bio && (*bio)->bi_rw == rw) {
+ CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n",
+ (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size,
+ page_count, (*bio)->bi_vcnt);
+ if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
+ break;
+
+
+ page_count += (*bio)->bi_vcnt;
+ count++;
+ bio = &(*bio)->bi_next;
+ }
+ if (*bio) {
+ /* Some of bios can't be mergable. */
+ lo->lo_bio = *bio;
+ *bio = NULL;
+ } else {
+ /* Hit the end of queue */
+ lo->lo_biotail = NULL;
+ lo->lo_bio = NULL;
+ }
+ *req = first;
+ spin_unlock_irq(&lo->lo_lock);
+ return count;
}
static int loop_make_request(request_queue_t *q, struct bio *old_bio)
{
struct lloop_device *lo = q->queuedata;
int rw = bio_rw(old_bio);
+ int inactive;
if (!lo)
- goto out;
+ goto err;
+
+ CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
+ (unsigned long long)old_bio->bi_sector, old_bio->bi_size);
spin_lock_irq(&lo->lo_lock);
- if (lo->lo_state != LLOOP_BOUND)
- goto inactive;
- atomic_inc(&lo->lo_pending);
+ inactive = (lo->lo_state != LLOOP_BOUND);
spin_unlock_irq(&lo->lo_lock);
+ if (inactive)
+ goto err;
if (rw == WRITE) {
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
loop_add_bio(lo, old_bio);
return 0;
err:
- if (atomic_dec_and_test(&lo->lo_pending))
- up(&lo->lo_bh_mutex);
-out:
bio_io_error(old_bio, old_bio->bi_size);
return 0;
-inactive:
- spin_unlock_irq(&lo->lo_lock);
- goto out;
}
/*
static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio)
{
int ret;
- ret = do_bio_filebacked(lo, bio);
- bio_endio(bio, bio->bi_size, ret);
+ ret = do_bio_lustrebacked(lo, bio);
+ while (bio) {
+ struct bio *tmp = bio->bi_next;
+ bio->bi_next = NULL;
+ bio_endio(bio, bio->bi_size, ret);
+ bio = tmp;
+ }
+}
+
+static inline int loop_active(struct lloop_device *lo)
+{
+ return atomic_read(&lo->lo_pending) || (lo->lo_state == LLOOP_RUNDOWN);
}
/*
* worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn. it also does loop decrypting
- * on reads for block backed loop, as that is too heavy to do from
- * b_end_io context where irqs may be disabled.
+ * to avoid blocking in our make_request_fn.
*/
static int loop_thread(void *data)
{
struct lloop_device *lo = data;
struct bio *bio;
+ unsigned int count;
+ unsigned long times = 0;
+ unsigned long total_count = 0;
+
+ struct lu_env *env;
+ int refcheck;
+ int ret = 0;
daemonize("lloop%d", lo->lo_number);
set_user_nice(current, -20);
lo->lo_state = LLOOP_BOUND;
- atomic_inc(&lo->lo_pending);
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ GOTO(out, ret = PTR_ERR(env));
+
+ lo->lo_env = env;
+ memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec));
+ lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages;
+ lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets;
/*
* up sem, we are running
up(&lo->lo_sem);
for (;;) {
- down_interruptible(&lo->lo_bh_mutex);
- /*
- * could be upped because of tear-down, not because of
- * pending work
- */
- if (!atomic_read(&lo->lo_pending))
- break;
+ wait_event(lo->lo_bh_wait, loop_active(lo));
+ if (!atomic_read(&lo->lo_pending)) {
+ int exiting = 0;
+ spin_lock_irq(&lo->lo_lock);
+ exiting = (lo->lo_state == LLOOP_RUNDOWN);
+ spin_unlock_irq(&lo->lo_lock);
+ if (exiting)
+ break;
+ }
- bio = loop_get_bio(lo);
- if (!bio) {
+ bio = NULL;
+ count = loop_get_bio(lo, &bio);
+ if (!count) {
CWARN("lloop(minor: %d): missing bio\n", lo->lo_number);
continue;
}
- loop_handle_bio(lo, bio);
- /*
- * upped both for pending work and tear-down, lo_pending
- * will hit zero then
- */
- if (atomic_dec_and_test(&lo->lo_pending))
- break;
+ total_count += count;
+ if (total_count < count) { /* overflow */
+ total_count = count;
+ times = 1;
+ } else {
+ times++;
+ }
+ if ((times & 127) == 0) {
+ CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n",
+ total_count, times, total_count / times);
+ }
+
+ LASSERT(bio != NULL);
+ LASSERT(count <= atomic_read(&lo->lo_pending));
+ loop_handle_bio(lo, bio);
+ atomic_sub(count, &lo->lo_pending);
}
+ cl_env_put(env, &refcheck);
+out:
up(&lo->lo_sem);
- return 0;
+ return ret;
}
static int loop_set_fd(struct lloop_device *lo, struct file *unused,
struct block_device *bdev, struct file *file)
{
- struct inode *inode;
+ struct inode *inode;
struct address_space *mapping;
- int lo_flags = 0;
- int error;
+ int lo_flags = 0;
+ int error;
loff_t size;
if (!try_module_get(THIS_MODULE))
/* queue parameters */
blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE);
- blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS);
+ blk_queue_max_sectors(lo->lo_queue,
+ LLOOP_MAX_SEGMENTS << (CFS_PAGE_SHIFT - 9));
blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
+ blk_queue_max_hw_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS);
set_capacity(disks[lo->lo_number], size);
bd_set_size(bdev, size << 9);
spin_lock_irq(&lo->lo_lock);
lo->lo_state = LLOOP_RUNDOWN;
- if (atomic_dec_and_test(&lo->lo_pending))
- up(&lo->lo_bh_mutex);
spin_unlock_irq(&lo->lo_lock);
+ wake_up(&lo->lo_bh_wait);
down(&lo->lo_sem);
lo->lo_backing_file = NULL;
/* lloop device node's ioctl function. */
static int lo_ioctl(struct inode *inode, struct file *unused,
- unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
struct block_device *bdev = inode->i_bdev;
/* dynamic iocontrol callback.
* This callback is registered in lloop_init and will be called by
* ll_iocontrol_call.
+ *
* This is a llite regular file ioctl function. It takes the responsibility
- * of attaching a file, and detaching a file by a lloop's device numner.
+ * of attaching or detaching a file by a lloop's device numner.
*/
static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
- unsigned int cmd, unsigned long arg,
- void *magic, int *rcp)
+ unsigned int cmd, unsigned long arg,
+ void *magic, int *rcp)
{
struct lloop_device *lo = NULL;
struct block_device *bdev = NULL;
};
if (max_loop < 1 || max_loop > 256) {
+ max_loop = MAX_LOOP_DEFAULT;
CWARN("lloop: invalid max_loop (must be between"
- " 1 and 256), using default (8)\n");
- max_loop = 8;
+ " 1 and 256), using default (%u)\n", max_loop);
}
lloop_major = register_blkdev(0, "lloop");
if (lloop_major < 0)
return -EIO;
+ CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n",
+ lloop_major, max_loop);
+
ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist);
if (ll_iocontrol_magic == NULL)
goto out_mem1;
- loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL);
+ OBD_ALLOC_WAIT(loop_dev, max_loop * sizeof(*loop_dev));
if (!loop_dev)
goto out_mem1;
- memset(loop_dev, 0, max_loop * sizeof(struct lloop_device));
- disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL);
+ OBD_ALLOC_WAIT(disks, max_loop * sizeof(*disks));
if (!disks)
goto out_mem2;
struct lloop_device *lo = &loop_dev[i];
struct gendisk *disk = disks[i];
- memset(lo, 0, sizeof(*lo));
lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
if (!lo->lo_queue)
goto out_mem4;
init_MUTEX(&lo->lo_ctl_mutex);
init_MUTEX_LOCKED(&lo->lo_sem);
- init_MUTEX_LOCKED(&lo->lo_bh_mutex);
+ init_waitqueue_head(&lo->lo_bh_wait);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = lloop_major;
out_mem3:
while (i--)
put_disk(disks[i]);
- kfree(disks);
+ OBD_FREE(disks, max_loop * sizeof(*disks));
out_mem2:
- kfree(loop_dev);
+ OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev));
out_mem1:
unregister_blkdev(lloop_major, "lloop");
ll_iocontrol_unregister(ll_iocontrol_magic);
}
if (ll_unregister_blkdev(lloop_major, "lloop"))
CWARN("lloop: cannot unregister blkdev\n");
+ else
+ CDEBUG(D_CONFIG, "unregistered lloop major %d\n", lloop_major);
- kfree(disks);
- kfree(loop_dev);
+ OBD_FREE(disks, max_loop * sizeof(*disks));
+ OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev));
}
module_init(lloop_init);
OBD_FREE(pages, npages * sizeof(*pages));
}
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
- int rw, struct inode *inode,
- struct address_space *mapping,
- size_t size, loff_t file_offset,
- struct page **pages, int page_count)
+ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct ll_dio_pages *pv)
{
struct cl_page *clp;
struct ccc_page *clup;
struct cl_sync_io *anchor = &ccc_env_info(env)->cti_sync_io;
int i;
ssize_t rc = 0;
- ssize_t size_orig = size;
- size_t page_size = cl_page_size(obj);
+ loff_t file_offset = pv->ldp_start_offset;
+ size_t size = pv->ldp_size;
+ int page_count = pv->ldp_nr;
+ struct page **pages = pv->ldp_pages;
+ size_t page_size = cl_page_size(obj);
ENTRY;
cl_sync_io_init(anchor, page_count);
queue = &io->ci_queue;
cl_2queue_init(queue);
for (i = 0; i < page_count; i++) {
+ if (pv->ldp_offsets)
+ file_offset = pv->ldp_offsets[i];
+ LASSERT(!(file_offset & (page_size - 1)));
clp = cl_page_find(env, obj, cl_index(obj, file_offset),
- pages[i], CPT_TRANSIENT);
+ pv->ldp_pages[i], CPT_TRANSIENT);
if (IS_ERR(clp)) {
rc = PTR_ERR(clp);
break;
cl_sync_io_note(anchor, +1);
/* wait for the IO to be finished. */
rc = cl_sync_io_wait(env, io, &queue->c2_qout,
- anchor) ?: size_orig;
+ anchor) ?: pv->ldp_size;
}
}
cl_2queue_fini(env, queue);
RETURN(rc);
}
+EXPORT_SYMBOL(ll_direct_rw_pages);
+
+static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
+ int rw, struct inode *inode,
+ struct address_space *mapping,
+ size_t size, loff_t file_offset,
+ struct page **pages, int page_count)
+{
+ struct ll_dio_pages pvec = { .ldp_pages = pages,
+ .ldp_nr = page_count,
+ .ldp_size = size,
+ .ldp_offsets = NULL,
+ .ldp_start_offset = file_offset
+ };
+
+ return ll_direct_rw_pages(env, io, rw, inode, &pvec);
+}
/* This is the maximum size of a single O_DIRECT request, based on a 128kB
* kmalloc limit. We need to fit all of the brw_page structs, each one
struct cl_sync_io *anchor = cp->cpg_sync_io;
LINVRNT(cl_page_is_vmlocked(env, clp));
- KLASSERT(!PageWriteback(vmpage));
+
+ /* Don't assert the page writeback bit here because the lustre file
+ * may be as a backend of swap space. in this case, the page writeback
+ * is set by VM, and obvious we shouldn't clear it at all. Fortunately
+ * this type of pages are all TRANSIENT pages. */
+ KLASSERT(ergo(clp->cp_type == CPT_CACHEABLE, !PageWriteback(vmpage)));
vvp_vmpage_error(inode, vmpage, ioret);
* caller that everything is okay. Real connection will be performed later.
*/
static int lmv_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid, struct obd_connect_data *data,
void *localdata)
{
struct proc_dir_entry *lmv_proc_dir;
#endif
struct lmv_obd *lmv = &obd->u.lmv;
- struct obd_export *exp;
+ struct lustre_handle conn = { 0 };
int rc = 0;
ENTRY;
- rc = class_connect(conn, obd, cluuid);
- if (rc) {
- CERROR("class_connection() returned %d\n", rc);
- RETURN(rc);
- }
-
- exp = class_conn2export(conn);
-
/*
* We don't want to actually do the underlying connections more than
* once, so keep track.
*/
lmv->refcount++;
if (lmv->refcount > 1) {
- class_export_put(exp);
+ *exp = NULL;
RETURN(0);
}
- lmv->exp = exp;
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc) {
+ CERROR("class_connection() returned %d\n", rc);
+ RETURN(rc);
+ }
+
+ *exp = class_conn2export(&conn);
+ class_export_get(*exp);
+
+ lmv->exp = *exp;
lmv->connected = 0;
lmv->cluuid = *cluuid;
struct obd_uuid *cluuid = &lmv->cluuid;
struct obd_connect_data *mdc_data = NULL;
struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" };
- struct lustre_handle conn = {0, };
struct obd_device *mdc_obd;
struct obd_export *mdc_exp;
struct lu_fld_target target;
RETURN(-EINVAL);
}
- rc = obd_connect(NULL, &conn, mdc_obd, &lmv_mdc_uuid,
+ rc = obd_connect(NULL, &mdc_exp, mdc_obd, &lmv_mdc_uuid,
&lmv->conn_data, NULL);
if (rc) {
CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc);
RETURN(rc);
}
- mdc_exp = class_conn2export(&conn);
-
/*
* Init fid sequence client for this mdc and add new fld target.
*/
MODULES := lov
lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o lov_ea.o lov_dev.o lov_object.o lov_page.o lov_lock.o lov_io.o lovsub_dev.o lovsub_object.o lovsub_page.o lovsub_lock.o lovsub_io.o lov_pool.o
+EXTRA_DIST = $(lov-objs:.o=.c) lov_internal.h lov_cl_internal.h
+
@INCLUDE_RULES@
install-data-hook: $(install_data_hook)
-DIST_SOURCES = $(lov-objs:.o=.c) lov_internal.h lov_cl_internal.h
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
* cl_lock::cll_guard, and will be automatically cleared by the sub-lock
* when the latter is destroyed. When a sub-lock is canceled, a
* reference to it is removed from the top-lock array, and top-lock is
- * moved into CLS_NEW state. It is guaranteed that all sub-locks exits
+ * moved into CLS_NEW state. It is guaranteed that all sub-locks exist
* while their top-lock is in CLS_HELD or CLS_CACHED states.
*
* - IO's are not reference counted.
*
* To implement a connection between top and sub entities, lov layer is split
* into two pieces: lov ("upper half"), and lovsub ("bottom half"), both
- * implementing full set of cl-interfaces. For example, top-object has clu and
+ * implementing full set of cl-interfaces. For example, top-object has vvp and
* lov layers, and it's sub-object has lovsub and osc layers. lovsub layer is
* used to track child-parent relationship.
*
#define LOV_USES_ASSIGNED_STRIPE 0
#define LOV_USES_DEFAULT_STRIPE 1
int qos_add_tgt(struct obd_device *obd, __u32 index);
-int qos_del_tgt(struct obd_device *obd, __u32 index);
+int qos_del_tgt(struct obd_device *obd, struct lov_tgt_desc *tgt);
void qos_shrink_lsm(struct lov_request_set *set);
int qos_prep_create(struct obd_export *exp, struct lov_request_set *set);
void qos_update(struct lov_obd *lov);
void lov_dump_pool(int level, struct pool_desc *pool);
struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname);
int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool);
+void lov_pool_putref(struct pool_desc *pool);
#endif
return;
}
-static void __lov_del_obd(struct obd_device *obd, __u32 index);
+static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt);
void lov_putref(struct obd_device *obd)
{
struct lov_obd *lov = &obd->u.lov;
+
mutex_down(&lov->lov_lock);
/* ok to dec to 0 more than once -- ltd_exp's will be null */
if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) {
+ CFS_LIST_HEAD(kill);
int i;
+ struct lov_tgt_desc *tgt, *n;
CDEBUG(D_CONFIG, "destroying %d lov targets\n",
lov->lov_death_row);
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_reap)
+ tgt = lov->lov_tgts[i];
+
+ if (!tgt || !tgt->ltd_reap)
continue;
- /* Disconnect and delete from list */
- __lov_del_obd(obd, i);
+ list_add(&tgt->ltd_kill, &kill);
+ /* XXX - right now there is a dependency on ld_tgt_count
+ * being the maximum tgt index for computing the
+ * mds_max_easize. So we can't shrink it. */
+ lov_ost_pool_remove(&lov->lov_packed, i);
+ lov->lov_tgts[i] = NULL;
lov->lov_death_row--;
}
+ mutex_up(&lov->lov_lock);
+
+ list_for_each_entry_safe(tgt, n, &kill, ltd_kill) {
+ list_del(&tgt->ltd_kill);
+ /* Disconnect */
+ __lov_del_obd(obd, tgt);
+ }
+ } else {
+ mutex_up(&lov->lov_lock);
}
- mutex_up(&lov->lov_lock);
}
static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid,
struct obd_uuid tgt_uuid;
struct obd_device *tgt_obd;
struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
- struct lustre_handle conn = {0, };
struct obd_import *imp;
#ifdef __KERNEL__
ptlrpc_activate_import(imp);
}
+ rc = obd_register_observer(tgt_obd, obd);
+ if (rc) {
+ CERROR("Target %s register_observer error %d\n",
+ obd_uuid2str(&tgt_uuid), rc);
+ RETURN(rc);
+ }
+
+
if (imp->imp_invalid) {
CERROR("not connecting OSC %s; administratively "
"disabled\n", obd_uuid2str(&tgt_uuid));
- rc = obd_register_observer(tgt_obd, obd);
- if (rc) {
- CERROR("Target %s register_observer error %d; "
- "will not be able to reactivate\n",
- obd_uuid2str(&tgt_uuid), rc);
- }
RETURN(0);
}
- rc = obd_connect(NULL, &conn, tgt_obd, &lov_osc_uuid, data, NULL);
- if (rc) {
+ rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd,
+ &lov_osc_uuid, data, NULL);
+ if (rc || !lov->lov_tgts[index]->ltd_exp) {
CERROR("Target %s connect error %d\n",
obd_uuid2str(&tgt_uuid), rc);
- RETURN(rc);
- }
- lov->lov_tgts[index]->ltd_exp = class_conn2export(&conn);
- if (!lov->lov_tgts[index]->ltd_exp) {
- CERROR("Target %s: null export!\n", obd_uuid2str(&tgt_uuid));
RETURN(-ENODEV);
}
- rc = obd_register_observer(tgt_obd, obd);
- if (rc) {
- CERROR("Target %s register_observer error %d\n",
- obd_uuid2str(&tgt_uuid), rc);
- obd_disconnect(lov->lov_tgts[index]->ltd_exp);
- lov->lov_tgts[index]->ltd_exp = NULL;
- RETURN(rc);
- }
-
lov->lov_tgts[index]->ltd_reap = 0;
if (activate) {
lov->lov_tgts[index]->ltd_active = 1;
#ifdef __KERNEL__
lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
if (lov_proc_dir) {
- struct obd_device *osc_obd = class_conn2obd(&conn);
+ struct obd_device *osc_obd = lov->lov_tgts[index]->ltd_exp->exp_obd;
cfs_proc_dir_entry_t *osc_symlink;
char name[MAX_STRING_SIZE];
}
static int lov_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid, struct obd_connect_data *data,
void *localdata)
{
struct lov_obd *lov = &obd->u.lov;
struct lov_tgt_desc *tgt;
+ struct lustre_handle conn;
int i, rc;
ENTRY;
CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
+ *exp = class_conn2export(&conn);
+
/* Why should there ever be more than 1 connect? */
lov->lov_connects++;
LASSERT(lov->lov_connects == 1);
continue;
rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd,
- OBD_NOTIFY_ACTIVE, (void *)&i);
+ OBD_NOTIFY_CONNECT, (void *)&i);
if (rc) {
CERROR("%s error sending notify %d\n",
obd->obd_name, rc);
RETURN(0);
}
-static int lov_disconnect_obd(struct obd_device *obd, __u32 index)
+static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
{
cfs_proc_dir_entry_t *lov_proc_dir;
struct lov_obd *lov = &obd->u.lov;
struct obd_device *osc_obd;
int rc;
-
ENTRY;
- if (lov->lov_tgts[index] == NULL)
- RETURN(-EINVAL);
-
- osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
+ osc_obd = class_exp2obd(tgt->ltd_exp);
CDEBUG(D_CONFIG, "%s: disconnecting target %s\n",
obd->obd_name, osc_obd->obd_name);
- if (lov->lov_tgts[index]->ltd_active) {
- lov->lov_tgts[index]->ltd_active = 0;
+ if (tgt->ltd_active) {
+ tgt->ltd_active = 0;
lov->desc.ld_active_tgt_count--;
- lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1;
+ tgt->ltd_exp->exp_obd->obd_inactive = 1;
}
lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
obd_register_observer(osc_obd, NULL);
- rc = obd_disconnect(lov->lov_tgts[index]->ltd_exp);
+ rc = obd_disconnect(tgt->ltd_exp);
if (rc) {
CERROR("Target %s disconnect error %d\n",
- lov_uuid2str(lov, index), rc);
+ tgt->ltd_uuid.uuid, rc);
rc = 0;
}
- qos_del_tgt(obd, index);
+ qos_del_tgt(obd, tgt);
- lov->lov_tgts[index]->ltd_exp = NULL;
+ tgt->ltd_exp = NULL;
RETURN(0);
}
GOTO(out, rc = 0);
rc = lov_notify(obd, tgt->ltd_exp->exp_obd,
- active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE,
+ active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE,
(void *)&index);
out:
RETURN(rc);
}
-/* We are holding lov_lock */
-static void __lov_del_obd(struct obd_device *obd, __u32 index)
+static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
{
- struct lov_obd *lov = &obd->u.lov;
struct obd_device *osc_obd;
- struct lov_tgt_desc *tgt = lov->lov_tgts[index];
LASSERT(tgt);
LASSERT(tgt->ltd_reap);
osc_obd = class_exp2obd(tgt->ltd_exp);
CDEBUG(D_CONFIG, "Removing tgt %s : %s\n",
- lov_uuid2str(lov, index),
+ tgt->ltd_uuid.uuid,
osc_obd ? osc_obd->obd_name : "<no obd>");
if (tgt->ltd_exp)
- lov_disconnect_obd(obd, index);
-
- /* XXX - right now there is a dependency on ld_tgt_count being the
- * maximum tgt index for computing the mds_max_easize. So we can't
- * shrink it. */
+ lov_disconnect_obd(obd, tgt);
- lov_ost_pool_remove(&lov->lov_packed, index);
- lov->lov_tgts[index] = NULL;
OBD_FREE_PTR(tgt);
/* Manual cleanup - no cleanup logs to clean up the osc's. We must
static int lov_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
{
int rc = 0;
+ struct lov_obd *lov = &obd->u.lov;
+
ENTRY;
switch (stage) {
case OBD_CLEANUP_EARLY: {
- struct lov_obd *lov = &obd->u.lov;
int i;
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active)
struct list_head *pos, *tmp;
struct pool_desc *pool;
- lprocfs_obd_cleanup(obd);
-
- /* Delete hash entries and kill hash table before freeing pools
- * and get to use after free issue. */
- lustre_hash_exit(lov->lov_pools_hash_body);
-
list_for_each_safe(pos, tmp, &lov->lov_pool_list) {
pool = list_entry(pos, struct pool_desc, pool_list);
/* free pool structs */
+ CDEBUG(D_INFO, "delete pool %p\n", pool);
lov_pool_del(obd, pool->pool_name);
}
+ lustre_hash_exit(lov->lov_pools_hash_body);
lov_ost_pool_free(&(lov->lov_qos.lq_rr.lqr_pool));
lov_ost_pool_free(&lov->lov_packed);
if (lov->lov_tgts) {
int i;
+ lov_getref(obd);
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
if (!lov->lov_tgts[i])
continue;
atomic_read(&lov->lov_refcount));
lov_del_target(obd, i, 0, 0);
}
+ lov_putref(obd);
OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) *
lov->lov_tgt_size);
lov->lov_tgt_size = 0;
}
+ /* clear pools parent proc entry only after all pools is killed */
+ lprocfs_obd_cleanup(obd);
+
RETURN(0);
}
rc = lov_check_index_in_pool(lumv3.lmm_stripe_offset,
pool);
if (rc < 0) {
- lh_put(lov->lov_pools_hash_body,
- &pool->pool_hash);
+ lov_pool_putref(pool);
RETURN(-EINVAL);
}
}
if (stripe_count > pool_tgt_count(pool))
stripe_count = pool_tgt_count(pool);
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_pool_putref(pool);
}
if ((__u64)lumv1->lmm_stripe_size * stripe_count > ~0UL) {
CLASSERT(sizeof lum.lmm_objects[0] ==
sizeof lmmk->lmm_objects[0]);
+ if ((cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) &&
+ (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC)))
+ lustre_swab_lov_mds_md(lmmk);
/* User wasn't expecting this many OST entries */
if (lum.lmm_stripe_count == 0) {
- if (copy_to_user(lump, lmmk, lum_size))
+ copy_lov_mds2user(&lum, lmmk);
+ if (copy_to_user(lump, &lum, lum_size))
rc = -EFAULT;
} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
rc = -EOVERFLOW;
- } else if (copy_to_user(lump, lmmk, lmm_size))
- rc = -EFAULT;
+ } else {
+ copy_lov_mds2user(&lum, lmmk);
+ if (copy_to_user(lump, &lum, lmm_size))
+ rc = -EFAULT;
+ }
obd_free_diskmd(exp, &lmmk);
}
* OST pool methods
*
* Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
+ * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
+ * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
*/
#define DEBUG_SUBSYSTEM S_LOV
#include <obd.h>
#include "lov_internal.h"
-static void lov_pool_getref(struct pool_desc *pool) {
+static void lov_pool_getref(struct pool_desc *pool)
+{
+ CDEBUG(D_INFO, "pool %p\n", pool);
atomic_inc(&pool->pool_refcount);
}
-static void lov_pool_putref(struct pool_desc *pool) {
+void lov_pool_putref(struct pool_desc *pool)
+{
+ CDEBUG(D_INFO, "pool %p\n", pool);
if (atomic_dec_and_test(&pool->pool_refcount)) {
+ LASSERT(hlist_unhashed(&pool->pool_hash));
+ LASSERT(list_empty(&pool->pool_list));
+ LASSERT(pool->pool_proc_entry == NULL);
lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
lov_ost_pool_free(&(pool->pool_obds));
OBD_FREE_PTR(pool);
+ EXIT;
}
}
#define LOV_POOL_INIT_COUNT 2
int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
{
+ ENTRY;
+
if (count == 0)
count = LOV_POOL_INIT_COUNT;
op->op_array = NULL;
OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
if (op->op_array == NULL) {
op->op_size = 0;
- return -ENOMEM;
+ RETURN(-ENOMEM);
}
+ EXIT;
return 0;
}
/* ost not found we add it */
op->op_array[op->op_count] = idx;
op->op_count++;
+ EXIT;
out:
up_write(&op->op_rw_sem);
return rc;
int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
{
int i;
+ ENTRY;
down_write(&op->op_rw_sem);
(op->op_count - i - 1) * sizeof(op->op_array[0]));
op->op_count--;
up_write(&op->op_rw_sem);
+ EXIT;
return 0;
}
}
up_write(&op->op_rw_sem);
- return -EINVAL;
+ RETURN(-EINVAL);
}
int lov_ost_pool_free(struct ost_pool *op)
{
+ ENTRY;
+
if (op->op_size == 0)
- return 0;
+ RETURN(0);
down_write(&op->op_rw_sem);
op->op_size = 0;
up_write(&op->op_rw_sem);
- return 0;
+ RETURN(0);
}
memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
- if (rc) {
- lov_ost_pool_free(&new_pool->pool_obds);
- GOTO(out_err, rc);
- }
+ if (rc)
+ GOTO(out_free_pool_obds, rc);
INIT_HLIST_NODE(&new_pool->pool_hash);
- rc = lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
- &new_pool->pool_hash);
- if (rc) {
- lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
- lov_ost_pool_free(&new_pool->pool_obds);
- GOTO(out_err, rc = -EEXIST);
- }
-
- spin_lock(&obd->obd_dev_lock);
- list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
- lov->lov_pool_count++;
-
- spin_unlock(&obd->obd_dev_lock);
-
- CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
- poolname, lov->lov_pool_count);
#ifdef LPROCFS
- /* ifdef needed for liblustre */
+ /* we need this assert seq_file is not implementated for liblustre */
/* get ref for /proc file */
lov_pool_getref(new_pool);
new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
poolname, NULL, NULL,
new_pool,
&pool_proc_operations);
-#endif
-
if (IS_ERR(new_pool->pool_proc_entry)) {
CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
new_pool->pool_proc_entry = NULL;
lov_pool_putref(new_pool);
}
+ CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
+#endif
+
+ spin_lock(&obd->obd_dev_lock);
+ list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
+ lov->lov_pool_count++;
+ spin_unlock(&obd->obd_dev_lock);
+
+ /* add to find only when it fully ready */
+ rc = lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
+ &new_pool->pool_hash);
+ if (rc)
+ GOTO(out_err, rc = -EEXIST);
+
+ CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
+ poolname, lov->lov_pool_count);
RETURN(0);
out_err:
+ spin_lock(&obd->obd_dev_lock);
+ list_del_init(&new_pool->pool_list);
+ lov->lov_pool_count--;
+ spin_unlock(&obd->obd_dev_lock);
+
+ lprocfs_remove(&new_pool->pool_proc_entry);
+
+ lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
+out_free_pool_obds:
+ lov_ost_pool_free(&new_pool->pool_obds);
OBD_FREE_PTR(new_pool);
return rc;
}
lov = &(obd->u.lov);
- spin_lock(&obd->obd_dev_lock);
-
- pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
- if (pool == NULL) {
- spin_unlock(&obd->obd_dev_lock);
+ /* lookup and kill hash reference */
+ pool = lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
+ if (pool == NULL)
RETURN(-ENOENT);
- }
-#ifdef LPROCFS
if (pool->pool_proc_entry != NULL) {
- remove_proc_entry(pool->pool_proc_entry->name,
- pool->pool_proc_entry->parent);
- /* remove ref for /proc file */
+ CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
+ lprocfs_remove(&pool->pool_proc_entry);
lov_pool_putref(pool);
}
-#endif
- lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
+ spin_lock(&obd->obd_dev_lock);
list_del_init(&pool->pool_list);
-
lov->lov_pool_count--;
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
spin_unlock(&obd->obd_dev_lock);
- /* remove ref got when pool was created in memory
- * pool will be freed when refount will reach 0
- */
+ /* release last reference */
lov_pool_putref(pool);
RETURN(0);
struct obd_uuid ost_uuid;
struct lov_obd *lov;
struct pool_desc *pool;
- unsigned int i, lov_idx;
+ unsigned int lov_idx;
int rc;
ENTRY;
/* search ost in lov array */
- mutex_down(&lov->lov_lock);
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i])
+ lov_getref(obd);
+ for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+ if (!lov->lov_tgts[lov_idx])
continue;
- if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+ if (obd_uuid_equals(&ost_uuid,
+ &(lov->lov_tgts[lov_idx]->ltd_uuid)))
break;
}
-
/* test if ost found in lov */
- if (i == lov->desc.ld_tgt_count) {
- mutex_up(&lov->lov_lock);
+ if (lov_idx == lov->desc.ld_tgt_count)
GOTO(out, rc = -EINVAL);
- }
- mutex_up(&lov->lov_lock);
-
- lov_idx = i;
rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
if (rc)
EXIT;
out:
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_putref(obd);
+ lov_pool_putref(pool);
return rc;
}
struct obd_uuid ost_uuid;
struct lov_obd *lov;
struct pool_desc *pool;
- unsigned int i, lov_idx;
+ unsigned int lov_idx;
int rc = 0;
ENTRY;
lov = &(obd->u.lov);
- spin_lock(&obd->obd_dev_lock);
pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
- if (pool == NULL) {
- spin_unlock(&obd->obd_dev_lock);
+ if (pool == NULL)
RETURN(-ENOENT);
- }
obd_str2uuid(&ost_uuid, ostname);
+ lov_getref(obd);
/* search ost in lov array, to get index */
- for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- if (!lov->lov_tgts[i])
+ for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+ if (!lov->lov_tgts[lov_idx])
continue;
- if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+ if (obd_uuid_equals(&ost_uuid,
+ &(lov->lov_tgts[lov_idx]->ltd_uuid)))
break;
}
/* test if ost found in lov */
- if (i == lov->desc.ld_tgt_count) {
- spin_unlock(&obd->obd_dev_lock);
+ if (lov_idx == lov->desc.ld_tgt_count)
GOTO(out, rc = -EINVAL);
- }
-
- spin_unlock(&obd->obd_dev_lock);
-
- lov_idx = i;
lov_ost_pool_remove(&pool->pool_obds, lov_idx);
EXIT;
out:
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_putref(obd);
+ lov_pool_putref(pool);
return rc;
}
CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
poolname);
/* pool is ignored, so we remove ref on it */
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_pool_putref(pool);
pool = NULL;
}
}
RETURN(rc);
}
-int qos_del_tgt(struct obd_device *obd, __u32 index)
+int qos_del_tgt(struct obd_device *obd, struct lov_tgt_desc *tgt)
{
struct lov_obd *lov = &obd->u.lov;
struct lov_qos_oss *oss;
int rc = 0;
ENTRY;
- if (!lov->lov_tgts[index])
- RETURN(0);
-
down_write(&lov->lov_qos.lq_rw_sem);
- oss = lov->lov_tgts[index]->ltd_qos.ltq_oss;
+ oss = tgt->ltd_qos.ltq_oss;
if (!oss)
GOTO(out, rc = -ENOENT);
if (pool != NULL) {
up_read(&pool_tgt_rw_sem(pool));
/* put back ref got by lov_find_pool() */
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_pool_putref(pool);
}
RETURN(rc);
if (pool != NULL) {
up_read(&pool_tgt_rw_sem(pool));
/* put back ref got by lov_find_pool() */
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_pool_putref(pool);
}
RETURN(rc);
if (pool != NULL) {
up_read(&pool_tgt_rw_sem(pool));
/* put back ref got by lov_find_pool() */
- lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+ lov_pool_putref(pool);
}
if (rc == -EAGAIN)
$(obj)/fsfilt-%.c: $(obj)/fsfilt_%.c
ln -s $< $@
+EXTRA_DIST = $(lvfs-objs:.o=.c) $(quotafmt-objs:.o=.c) \
+ fsfilt_ext3.c fsfilt_reiserfs.c \
+ lvfs_internal.h lvfs_userfs.c \
+ lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c
+
# for <ext3/xattr.h> on 2.6
EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
install-data-hook: $(install_data_hook)
-DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \
- lvfs_internal.h lvfs_linux.c lvfs_userfs.c \
- upcall_cache.c prng.c lvfs_lib.c \
- lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c
-
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
CLEANFILES = fsfilt-*.c fsfilt_ldiskfs*.c fsfilt_extN.c sources
}
EXPORT_SYMBOL(l_readdir);
+int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
+ struct iattr *newattrs)
+{
+ int rc;
+
+ LOCK_INODE_MUTEX(dchild->d_inode);
+#ifdef HAVE_SECURITY_PLUG
+ rc = notify_change(dchild, mnt, newattrs);
+#else
+ rc = notify_change(dchild, newattrs);
+#endif
+ UNLOCK_INODE_MUTEX(dchild->d_inode);
+ return rc;
+}
+EXPORT_SYMBOL(l_notify_change);
+
+/* utility to truncate a file */
+int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
+ char *name, loff_t length)
+{
+ struct dentry *dchild;
+ struct iattr newattrs;
+ int err = 0;
+ ENTRY;
+
+ CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
+ name, (long long)length);
+ dchild = ll_lookup_one_len(name, dir, strlen(name));
+ if (IS_ERR(dchild))
+ GOTO(out, err = PTR_ERR(dchild));
+
+ if (dchild->d_inode) {
+ int old_mode = dchild->d_inode->i_mode;
+ if (S_ISDIR(old_mode)) {
+ CERROR("found %s (%lu/%u) is mode %o\n", name,
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation, old_mode);
+ GOTO(out_dput, err = -EISDIR);
+ }
+
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE;
+ err = l_notify_change(mnt, dchild, &newattrs);
+ }
+ EXIT;
+out_dput:
+ dput(dchild);
+out:
+ return err;
+}
+EXPORT_SYMBOL(simple_truncate);
+
#ifdef LUSTRE_KERNEL_VERSION
#ifndef HAVE_CLEAR_RDONLY_ON_PUT
#error rdonly patchset must be updated [cfs bz11248]
MODULES := mdc
mdc-objs := mdc_request.o mdc_reint.o lproc_mdc.o mdc_lib.o mdc_locks.o
+EXTRA_DIST = $(mdc-objs:.o=.c) mdc_internal.h
+
@INCLUDE_RULES@
modulefs_DATA = mdc$(KMODEXT)
endif
-DIST_SOURCES = $(mdc-objs:.o=.c) mdc_internal.h
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
ptlrpc_request_set_replen(req);
rc = ptlrpc_queue_wait(req);
- GOTO(out, rc);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk);
out:
ptlrpc_req_finished(req);
return rc;
RETURN(rc);
}
+ rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
+ req->rq_bulk->bd_nob_transferred);
+ if (rc < 0) {
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
if (req->rq_bulk->bd_nob_transferred != CFS_PAGE_SIZE) {
CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
req->rq_bulk->bd_nob_transferred, CFS_PAGE_SIZE);
}
static int mdc_connect(const struct lu_env *env,
- struct lustre_handle *dlm_handle,
+ struct obd_export **exp,
struct obd_device *obd, struct obd_uuid *cluuid,
struct obd_connect_data *data,
void *localdata)
obd->obd_name);
}
- return client_connect_import(env, dlm_handle, obd, cluuid, data, NULL);
+ return client_connect_import(env, exp, obd, cluuid, data, NULL);
}
struct obd_ops mdc_obd_ops = {
#include <lustre/lustre_idl.h>
#include <lustre_disk.h> /* for changelogs */
#include <lustre_param.h>
+#include <lustre_fid.h>
#include "mdd_internal.h"
const struct md_device_operations mdd_ops;
+static struct lu_device_type mdd_device_type;
static const char mdd_root_dir_name[] = "ROOT";
+static const char mdd_obf_dir_name[] = "fid";
+static const char mdd_dot_lustre_name[] = ".lustre";
static int mdd_device_init(const struct lu_env *env, struct lu_device *d,
const char *name, struct lu_device *next)
ENTRY;
mdd_changelog_fini(env, m);
dt_txn_callback_del(m->mdd_child, &m->mdd_txn_cb);
+ mdd_object_put(env, m->mdd_dot_lustre_objs.mdd_obf);
+ mdd_object_put(env, m->mdd_dot_lustre);
if (m->mdd_obd_dev)
mdd_fini_obd(env, m, cfg);
orph_index_fini(env, m);
RETURN(rc);
}
+/**
+ * Create ".lustre" directory.
+ */
+static int create_dot_lustre_dir(const struct lu_env *env, struct mdd_device *m)
+{
+ struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
+ struct md_object *mdo;
+ int rc;
+
+ memcpy(fid, &LU_DOT_LUSTRE_FID, sizeof(struct lu_fid));
+ mdo = llo_store_create_index(env, &m->mdd_md_dev, m->mdd_child,
+ mdd_root_dir_name, mdd_dot_lustre_name,
+ fid, &dt_directory_features);
+ /* .lustre dir may be already present */
+ if (IS_ERR(mdo) && PTR_ERR(mdo) != -EEXIST) {
+ rc = PTR_ERR(mdo);
+ CERROR("creating obj [%s] fid = "DFID" rc = %d\n",
+ mdd_dot_lustre_name, PFID(fid), rc);
+ RETURN(rc);
+ }
+
+ return 0;
+}
+
+static int dot_lustre_attr_get(const struct lu_env *env, struct md_object *obj,
+ struct md_attr *ma)
+{
+ struct mdd_object *mdd_obj = md2mdd_obj(obj);
+
+ return mdd_attr_get_internal_locked(env, mdd_obj, ma);
+}
+
+static int dot_lustre_attr_set(const struct lu_env *env, struct md_object *obj,
+ const struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int dot_lustre_xattr_get(const struct lu_env *env,
+ struct md_object *obj, struct lu_buf *buf,
+ const char *name)
+{
+ return 0;
+}
+
+/**
+ * Direct access to the ".lustre" directory is not allowed.
+ */
+static int dot_lustre_mdd_open(const struct lu_env *env, struct md_object *obj,
+ int flags)
+{
+ return -EPERM;
+}
+
+static int dot_lustre_path(const struct lu_env *env, struct md_object *obj,
+ char *path, int pathlen, __u64 recno, int *linkno)
+{
+ return -ENOSYS;
+}
+
+static struct md_object_operations mdd_dot_lustre_obj_ops = {
+ .moo_attr_get = dot_lustre_attr_get,
+ .moo_attr_set = dot_lustre_attr_set,
+ .moo_xattr_get = dot_lustre_xattr_get,
+ .moo_open = dot_lustre_mdd_open,
+ .moo_path = dot_lustre_path
+};
+
+static int dot_lustre_lookup(const struct lu_env *env, struct md_object *p,
+ const struct lu_name *lname, struct lu_fid *f,
+ struct md_op_spec *spec)
+{
+ if (strcmp(lname->ln_name, mdd_obf_dir_name) == 0)
+ *f = LU_OBF_FID;
+ else
+ return -ENOENT;
+
+ return 0;
+}
+
+static int dot_lustre_create(const struct lu_env *env, struct md_object *pobj,
+ const struct lu_name *lname,
+ struct md_object *child, struct md_op_spec *spec,
+ struct md_attr* ma)
+{
+ return -EPERM;
+}
+
+static int dot_lustre_rename(const struct lu_env *env,
+ struct md_object *src_pobj,
+ struct md_object *tgt_pobj,
+ const struct lu_fid *lf,
+ const struct lu_name *lsname,
+ struct md_object *tobj,
+ const struct lu_name *ltname, struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int dot_lustre_link(const struct lu_env *env, struct md_object *tgt_obj,
+ struct md_object *src_obj,
+ const struct lu_name *lname, struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int dot_lustre_unlink(const struct lu_env *env, struct md_object *pobj,
+ struct md_object *cobj, const struct lu_name *lname,
+ struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static struct md_dir_operations mdd_dot_lustre_dir_ops = {
+ .mdo_lookup = dot_lustre_lookup,
+ .mdo_create = dot_lustre_create,
+ .mdo_rename = dot_lustre_rename,
+ .mdo_link = dot_lustre_link,
+ .mdo_unlink = dot_lustre_unlink,
+};
+
+static int obf_attr_get(const struct lu_env *env, struct md_object *obj,
+ struct md_attr *ma)
+{
+ int rc = 0;
+
+ if (ma->ma_need & MA_INODE) {
+ struct mdd_device *mdd = mdo2mdd(obj);
+
+ /* "fid" is a virtual object and hence does not have any "real"
+ * attributes. So we reuse attributes of .lustre for "fid" dir */
+ ma->ma_need |= MA_INODE;
+ rc = dot_lustre_attr_get(env, &mdd->mdd_dot_lustre->mod_obj, ma);
+ if (rc)
+ return rc;
+ ma->ma_valid |= MA_INODE;
+ }
+
+ /* "fid" directory does not have any striping information. */
+ if (ma->ma_need & MA_LOV) {
+ struct mdd_object *mdd_obj = md2mdd_obj(obj);
+
+ if (ma->ma_valid & MA_LOV)
+ return 0;
+
+ if (!(S_ISREG(mdd_object_type(mdd_obj)) ||
+ S_ISDIR(mdd_object_type(mdd_obj))))
+ return 0;
+
+ if (ma->ma_need & MA_LOV_DEF) {
+ rc = mdd_get_default_md(mdd_obj, ma->ma_lmm,
+ &ma->ma_lmm_size);
+ if (rc > 0) {
+ ma->ma_valid |= MA_LOV;
+ rc = 0;
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int obf_attr_set(const struct lu_env *env, struct md_object *obj,
+ const struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int obf_xattr_get(const struct lu_env *env,
+ struct md_object *obj, struct lu_buf *buf,
+ const char *name)
+{
+ return 0;
+}
+
+static int obf_mdd_open(const struct lu_env *env, struct md_object *obj,
+ int flags)
+{
+ struct mdd_object *mdd_obj = md2mdd_obj(obj);
+
+ mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
+ mdd_obj->mod_count++;
+ mdd_write_unlock(env, mdd_obj);
+
+ return 0;
+}
+
+static int obf_mdd_close(const struct lu_env *env, struct md_object *obj,
+ struct md_attr *ma)
+{
+ struct mdd_object *mdd_obj = md2mdd_obj(obj);
+
+ mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
+ mdd_obj->mod_count--;
+ mdd_write_unlock(env, mdd_obj);
+
+ return 0;
+}
+
+/** Nothing to list in "fid" directory */
+static int obf_mdd_readpage(const struct lu_env *env, struct md_object *obj,
+ const struct lu_rdpg *rdpg)
+{
+ return -EPERM;
+}
+
+static int obf_path(const struct lu_env *env, struct md_object *obj,
+ char *path, int pathlen, __u64 recno, int *linkno)
+{
+ return -ENOSYS;
+}
+
+static struct md_object_operations mdd_obf_obj_ops = {
+ .moo_attr_get = obf_attr_get,
+ .moo_attr_set = obf_attr_set,
+ .moo_xattr_get = obf_xattr_get,
+ .moo_open = obf_mdd_open,
+ .moo_close = obf_mdd_close,
+ .moo_readpage = obf_mdd_readpage,
+ .moo_path = obf_path
+};
+
+/**
+ * Lookup method for "fid" object. Only filenames with correct SEQ:OID format
+ * are valid. We also check if object with passed fid exists or not.
+ */
+static int obf_lookup(const struct lu_env *env, struct md_object *p,
+ const struct lu_name *lname, struct lu_fid *f,
+ struct md_op_spec *spec)
+{
+ char *name = (char *)lname->ln_name;
+ struct mdd_device *mdd = mdo2mdd(p);
+ struct mdd_object *child;
+ int rc = 0;
+
+ while (*name == '[')
+ name++;
+
+ sscanf(name, SFID, &(f->f_seq), &(f->f_oid),
+ &(f->f_ver));
+ if (!fid_is_sane(f)) {
+ CWARN("bad FID format [%s], should be "DFID"\n", lname->ln_name,
+ (__u64)1, 2, 0);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ /* Check if object with this fid exists */
+ child = mdd_object_find(env, mdd, f);
+ if (child == NULL)
+ GOTO(out, rc = 0);
+ if (IS_ERR(child))
+ GOTO(out, rc = PTR_ERR(child));
+
+ if (mdd_object_exists(child) == 0)
+ rc = -ENOENT;
+
+ mdd_object_put(env, child);
+
+out:
+ return rc;
+}
+
+static int obf_create(const struct lu_env *env, struct md_object *pobj,
+ const struct lu_name *lname, struct md_object *child,
+ struct md_op_spec *spec, struct md_attr* ma)
+{
+ return -EPERM;
+}
+
+static int obf_rename(const struct lu_env *env,
+ struct md_object *src_pobj, struct md_object *tgt_pobj,
+ const struct lu_fid *lf, const struct lu_name *lsname,
+ struct md_object *tobj, const struct lu_name *ltname,
+ struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int obf_link(const struct lu_env *env, struct md_object *tgt_obj,
+ struct md_object *src_obj, const struct lu_name *lname,
+ struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static int obf_unlink(const struct lu_env *env, struct md_object *pobj,
+ struct md_object *cobj, const struct lu_name *lname,
+ struct md_attr *ma)
+{
+ return -EPERM;
+}
+
+static struct md_dir_operations mdd_obf_dir_ops = {
+ .mdo_lookup = obf_lookup,
+ .mdo_create = obf_create,
+ .mdo_rename = obf_rename,
+ .mdo_link = obf_link,
+ .mdo_unlink = obf_unlink
+};
+
+/**
+ * Create special in-memory "fid" object for open-by-fid.
+ */
+static int mdd_obf_setup(const struct lu_env *env, struct mdd_device *m)
+{
+ struct mdd_object *mdd_obf;
+ struct lu_object *obf_lu_obj;
+ int rc = 0;
+
+ m->mdd_dot_lustre_objs.mdd_obf = mdd_object_find(env, m,
+ &LU_OBF_FID);
+ if (m->mdd_dot_lustre_objs.mdd_obf == NULL ||
+ IS_ERR(m->mdd_dot_lustre_objs.mdd_obf))
+ GOTO(out, rc = -ENOENT);
+
+ mdd_obf = m->mdd_dot_lustre_objs.mdd_obf;
+ mdd_obf->mod_obj.mo_dir_ops = &mdd_obf_dir_ops;
+ mdd_obf->mod_obj.mo_ops = &mdd_obf_obj_ops;
+ /* Don't allow objects to be created in "fid" dir */
+ mdd_obf->mod_flags |= IMMUTE_OBJ;
+
+ obf_lu_obj = mdd2lu_obj(mdd_obf);
+ obf_lu_obj->lo_header->loh_attr |= (LOHA_EXISTS | S_IFDIR);
+
+out:
+ return rc;
+}
+
+/** Setup ".lustre" directory object */
+static int mdd_dot_lustre_setup(const struct lu_env *env, struct mdd_device *m)
+{
+ struct dt_object *dt_dot_lustre;
+ struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
+ int rc;
+
+ rc = create_dot_lustre_dir(env, m);
+ if (rc)
+ return rc;
+
+ dt_dot_lustre = dt_store_open(env, m->mdd_child, mdd_root_dir_name,
+ mdd_dot_lustre_name, fid);
+ if (IS_ERR(dt_dot_lustre)) {
+ rc = PTR_ERR(dt_dot_lustre);
+ GOTO(out, rc);
+ }
+
+ /* references are released in mdd_device_shutdown() */
+ m->mdd_dot_lustre = lu2mdd_obj(lu_object_locate(dt_dot_lustre->do_lu.lo_header,
+ &mdd_device_type));
+
+ lu_object_put(env, &dt_dot_lustre->do_lu);
+
+ m->mdd_dot_lustre->mod_obj.mo_dir_ops = &mdd_dot_lustre_dir_ops;
+ m->mdd_dot_lustre->mod_obj.mo_ops = &mdd_dot_lustre_obj_ops;
+
+ rc = mdd_obf_setup(env, m);
+ if (rc)
+ CERROR("Error initializing \"fid\" object - %d.\n", rc);
+
+out:
+ RETURN(rc);
+}
+
static int mdd_process_config(const struct lu_env *env,
struct lu_device *d, struct lustre_cfg *cfg)
{
LASSERT(root != NULL);
lu_object_put(env, &root->do_lu);
rc = orph_index_init(env, mdd);
- } else
+ } else {
rc = PTR_ERR(root);
+ }
+ if (rc)
+ GOTO(out, rc);
+
+ rc = mdd_dot_lustre_setup(env, mdd);
+ if (rc) {
+ CERROR("Error(%d) initializing .lustre objects\n", rc);
+ GOTO(out, rc);
+ }
out:
RETURN(rc);
__u64 mc_starttime;
};
+/** Objects in .lustre dir */
+struct mdd_dot_lustre_objs {
+ struct mdd_object *mdd_obf;
+};
+
struct mdd_device {
struct md_device mdd_md_dev;
struct dt_device *mdd_child;
struct mdd_txn_op_descr mdd_tod[MDD_TXN_LAST_OP];
struct mdd_changelog mdd_cl;
unsigned long mdd_atime_diff;
+ struct mdd_object *mdd_dot_lustre;
+ struct mdd_dot_lustre_objs mdd_dot_lustre_objs;
};
enum mod_flags {
struct mdd_object *mdd_object_find(const struct lu_env *env,
struct mdd_device *d,
const struct lu_fid *f);
+int mdd_get_default_md(struct mdd_object *mdd_obj, struct lov_mds_md *lmm,
+ int *size);
/* mdd_quota.c*/
#ifdef HAVE_QUOTA_SUPPORT
RETURN(rc);
}
-static int mdd_get_default_md(struct mdd_object *mdd_obj,
- struct lov_mds_md *lmm, int *size)
+int mdd_get_default_md(struct mdd_object *mdd_obj, struct lov_mds_md *lmm,
+ int *size)
{
struct lov_desc *ldesc;
struct mdd_device *mdd = mdo2mdd(&mdd_obj->mod_obj);
MODULES := mds
mds-objs := handler.o lproc_mds.o mds_fs.o mds_log.o mds_lov.o
+EXTRA_DIST := $(mds-objs:%.o=%.c) mds_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES := $(mds-objs:%.o=%.c) mds_internal.h
int mds_lov_connect(struct obd_device *obd, char * lov_name)
{
struct mds_obd *mds = &obd->u.mds;
- struct lustre_handle conn = {0,};
struct obd_connect_data *data;
int rc;
ENTRY;
/* send the list of supported checksum types */
data->ocd_cksum_types = OBD_CKSUM_ALL;
/* NB: lov_connect() needs to fill in .ocd_index for each OST */
- rc = obd_connect(NULL, &conn, mds->mds_osc_obd, &obd->obd_uuid, data, NULL);
+ rc = obd_connect(NULL, &mds->mds_osc_exp, mds->mds_osc_obd, &obd->obd_uuid, data, NULL);
OBD_FREE(data, sizeof(*data));
if (rc) {
CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc);
mds->mds_osc_obd = ERR_PTR(rc);
RETURN(rc);
}
- mds->mds_osc_exp = class_conn2export(&conn);
/* I want to see a callback happen when the OBD moves to a
* "For General Use" state, and that's when we'll call
}
LASSERT(desc->bd_nob == rdpg->rp_count);
+ rc = sptlrpc_svc_wrap_bulk(req, desc);
+ if (rc)
+ GOTO(free_desc, rc);
+
rc = ptlrpc_start_bulk_transfer(desc);
if (rc)
GOTO(free_desc, rc);
ptlrpc_prep_bulk_page(desc, page, (int)reqbody->size,
(int)reqbody->nlink);
+ rc = sptlrpc_svc_prep_bulk(req, desc);
+ if (rc != 0)
+ GOTO(cleanup_page, rc);
/*
* Check if client was evicted while we were doing i/o before touching
* network.
if (likely(rc == 0)) {
rc = mdt_recovery(info);
if (likely(rc == +1)) {
+ switch (lustre_msg_get_opc(msg)) {
+ case MDS_READPAGE:
+ req->rq_bulk_read = 1;
+ break;
+ case MDS_WRITEPAGE:
+ req->rq_bulk_write = 1;
+ break;
+ }
+
h = mdt_handler_find(lustre_msg_get_opc(msg),
supported);
if (likely(h != NULL)) {
/* mds_connect copy */
static int mdt_obd_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *data,
void *localdata)
{
struct mdt_thread_info *info;
struct lsd_client_data *lcd;
- struct obd_export *exp;
+ struct obd_export *lexp;
+ struct lustre_handle conn = { 0 };
struct mdt_device *mdt;
struct ptlrpc_request *req;
int rc;
ENTRY;
LASSERT(env != NULL);
- if (!conn || !obd || !cluuid)
+ if (!exp || !obd || !cluuid)
RETURN(-EINVAL);
info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
req = info->mti_pill->rc_req;
mdt = mdt_dev(obd->obd_lu_dev);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp != NULL);
- rc = mdt_connect_check_sptlrpc(mdt, exp, req);
+ rc = mdt_connect_check_sptlrpc(mdt, lexp, req);
if (rc)
GOTO(out, rc);
- rc = mdt_connect_internal(exp, mdt, data);
+ rc = mdt_connect_internal(lexp, mdt, data);
if (rc == 0) {
OBD_ALLOC_PTR(lcd);
if (lcd != NULL) {
mti = lu_context_key_get(&env->le_ctx,
&mdt_thread_key);
LASSERT(mti != NULL);
- mti->mti_exp = exp;
+ mti->mti_exp = lexp;
memcpy(lcd->lcd_uuid, cluuid, sizeof lcd->lcd_uuid);
- exp->exp_mdt_data.med_lcd = lcd;
+ lexp->exp_mdt_data.med_lcd = lcd;
rc = mdt_client_new(env, mdt);
if (rc != 0) {
OBD_FREE_PTR(lcd);
- exp->exp_mdt_data.med_lcd = NULL;
+ lexp->exp_mdt_data.med_lcd = NULL;
} else {
- mdt_export_stats_init(obd, exp, localdata);
+ mdt_export_stats_init(obd, lexp, localdata);
}
} else
rc = -ENOMEM;
out:
if (rc != 0)
- class_disconnect(exp);
+ class_disconnect(lexp);
else
- class_export_put(exp);
+ *exp = lexp;
RETURN(rc);
}
LASSERT(info->mti_pill->rc_fmt == &RQF_LDLM_INTENT_OPEN);
ldlm_rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP);
- /* TODO: JOIN file */
+ /* JOIN file was deprecated since 1.6.5, but may be revived one day */
if (create_flags & MDS_OPEN_JOIN_FILE) {
- CERROR("JOIN file will be supported soon\n");
+ CERROR("file join is unsupported in this version of Lustre\n");
GOTO(out, result = err_serious(-EOPNOTSUPP));
}
msg_flags = lustre_msg_get_flags(req->rq_reqmsg);
req_capsule_set_size(info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER,
info->mti_mdt->mdt_max_cookiesize);
rc = req_capsule_server_pack(info->mti_pill);
- if (mdt_check_resent(info, mdt_reconstruct_generic, NULL))
+ if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) {
+ if (rc == 0)
+ mdt_shrink_reply(info);
RETURN(lustre_msg_get_status(req->rq_repmsg));
+ }
/* Continue to close handle even if we can not pack reply */
if (rc == 0) {
#include "mdt_internal.h"
static int mdt_server_data_update(const struct lu_env *env,
- struct mdt_device *mdt);
+ struct mdt_device *mdt,
+ int need_sync);
struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len)
{
return rc;
}
+static void mdt_client_cb(const struct mdt_device *mdt, __u64 transno,
+ void *data, int err)
+{
+ struct obd_device *obd = mdt2obd_dev(mdt);
+ target_client_add_cb(obd, transno, data, err);
+}
+
static inline int mdt_last_rcvd_header_write(const struct lu_env *env,
- struct mdt_device *mdt)
+ struct mdt_device *mdt,
+ int need_sync)
{
struct mdt_thread_info *mti;
struct thandle *th;
mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
+ if (mti->mti_exp) {
+ spin_lock(&mti->mti_exp->exp_lock);
+ mti->mti_exp->exp_need_sync = need_sync;
+ spin_unlock(&mti->mti_exp->exp_lock);
+ }
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
mti->mti_off = 0;
lsd_cpu_to_le(&mdt->mdt_lsd, &mti->mti_lsd);
+ if (need_sync && mti->mti_exp)
+ mdt_trans_add_cb(th, mdt_client_cb, mti->mti_exp);
+
rc = mdt_record_write(env, mdt->mdt_last_rcvd,
mdt_buf_const(env, &mti->mti_lsd,
sizeof(mti->mti_lsd)),
lsd->lsd_mount_count = mdt->mdt_mount_count;
/* save it, so mount count and last_transno is current */
- rc = mdt_server_data_update(env, mdt);
+ rc = mdt_server_data_update(env, mdt, (mti->mti_exp &&
+ mti->mti_exp->exp_need_sync));
if (rc)
GOTO(err_client, rc);
}
static int mdt_server_data_update(const struct lu_env *env,
- struct mdt_device *mdt)
+ struct mdt_device *mdt,
+ int need_sync)
{
int rc = 0;
ENTRY;
* mdt->mdt_last_rcvd may be NULL that time.
*/
if (mdt->mdt_last_rcvd != NULL)
- rc = mdt_last_rcvd_header_write(env, mdt);
+ rc = mdt_last_rcvd_header_write(env, mdt, need_sync);
RETURN(rc);
}
-void mdt_cb_new_client(const struct mdt_device *mdt, __u64 transno,
- void *data, int err)
-{
- struct obd_device *obd = mdt2obd_dev(mdt);
-
- target_client_add_cb(obd, transno, data, err);
-}
-
int mdt_client_new(const struct lu_env *env, struct mdt_device *mdt)
{
unsigned long *bitmap = mdt->mdt_client_bitmap;
init_mutex(&med->med_lcd_lock);
LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off);
- /* write new client data */
+
+ /* Write new client data. */
off = med->med_lr_off;
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
+
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
RETURN(PTR_ERR(th));
- /* until this operations will be committed the sync is needed for this
- * export */
- mdt_trans_add_cb(th, mdt_cb_new_client, mti->mti_exp);
+ /*
+ * Until this operations will be committed the sync is needed
+ * for this export. This should be done _after_ starting the
+ * transaction so that many connecting clients will not bring
+ * server down with lots of sync writes.
+ */
+ mdt_trans_add_cb(th, mdt_client_cb, mti->mti_exp);
spin_lock(&mti->mti_exp->exp_lock);
mti->mti_exp->exp_need_sync = 1;
spin_unlock(&mti->mti_exp->exp_lock);
struct mdt_export_data *med;
struct lsd_client_data *lcd;
struct obd_device *obd = mdt2obd_dev(mdt);
- struct thandle *th;
- loff_t off;
- int rc = 0;
+ struct obd_export *exp;
+ struct thandle *th;
+ int need_sync;
+ loff_t off;
+ int rc = 0;
ENTRY;
mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
LASSERT(mti != NULL);
- med = &mti->mti_exp->exp_mdt_data;
+ exp = mti->mti_exp;
+ med = &exp->exp_mdt_data;
lcd = med->med_lcd;
if (!lcd)
RETURN(0);
/* XXX: If lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (!strcmp(med->med_lcd->lcd_uuid, obd->obd_uuid.uuid))
+ if (!strcmp(lcd->lcd_uuid, obd->obd_uuid.uuid))
GOTO(free, 0);
CDEBUG(D_INFO, "freeing client at idx %u, offset %lld\n",
LBUG();
}
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ need_sync = (!exp->exp_libclient || exp->exp_need_sync) &&
+ !(exp->exp_flags & OBD_OPT_ABORT_RECOV);
+
/*
* This may be called from difficult reply handler path and
* mdt->mdt_last_rcvd may be NULL that time.
*/
if (mdt->mdt_last_rcvd != NULL) {
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
+
+ spin_lock(&exp->exp_lock);
+ exp->exp_need_sync = need_sync;
+ spin_unlock(&exp->exp_lock);
+
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
GOTO(free, rc = PTR_ERR(th));
+ if (need_sync) {
+ /*
+ * Until this operations will be committed the sync
+ * is needed for this export.
+ */
+ mdt_trans_add_cb(th, mdt_client_cb, exp);
+ }
+
mutex_down(&med->med_lcd_lock);
memset(lcd, 0, sizeof *lcd);
}
CDEBUG(rc == 0 ? D_INFO : D_ERROR, "Zeroing out client idx %u in "
- "%s rc %d\n", med->med_lr_idx, LAST_RCVD, rc);
+ "%s %ssync rc %d\n", med->med_lr_idx, LAST_RCVD,
+ need_sync ? "" : "a", rc);
spin_lock(&mdt->mdt_client_bitmap_lock);
clear_bit(med->med_lr_idx, mdt->mdt_client_bitmap);
spin_unlock(&mdt->mdt_client_bitmap_lock);
- /*
- * Make sure the server's last_transno is up to date. Do this after the
- * client is freed so we know all the client's transactions have been
- * committed.
+ /*
+ * Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed.
*/
- mdt_server_data_update(env, mdt);
+ mdt_server_data_update(env, mdt, need_sync);
+
EXIT;
free:
OBD_FREE_PTR(lcd);
*/
if (mti->mti_transno == 0 &&
*transno_p == mdt->mdt_last_transno)
- mdt_server_data_update(mti->mti_env, mdt);
+ mdt_server_data_update(mti->mti_env, mdt,
+ (mti->mti_exp &&
+ mti->mti_exp->exp_need_sync));
*transno_p = mti->mti_transno;
MODULES := mgc
mgc-objs := mgc_request.o lproc_mgc.o
+EXTRA_DIST := $(mgc-objs:%.o=%.c) libmgc.c mgc_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES := $(mgc-objs:%.o=%.c) libmgc.c mgc_internal.h
MODULES := mgs
mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o
+EXTRA_DIST := $(mgs-objs:%.o=%.c) mgs_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES := $(mgs-objs:%.o=%.c) mgs_internal.h
/* Establish a connection to the MGS.*/
static int mgs_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid, struct obd_connect_data *data,
void *localdata)
{
- struct obd_export *exp;
+ struct obd_export *lexp;
+ struct lustre_handle conn = { 0 };
int rc;
ENTRY;
- if (!conn || !obd || !cluuid)
+ if (!exp || !obd || !cluuid)
RETURN(-EINVAL);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp);
- mgs_counter_incr(exp, LPROC_MGS_CONNECT);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp);
+
+ mgs_counter_incr(lexp, LPROC_MGS_CONNECT);
if (data != NULL) {
data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
- exp->exp_connect_flags = data->ocd_connect_flags;
+ lexp->exp_connect_flags = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
}
- rc = mgs_client_add(obd, exp, localdata);
+ rc = mgs_client_add(obd, lexp, localdata);
if (rc) {
- class_disconnect(exp);
+ class_disconnect(lexp);
} else {
- class_export_put(exp);
+ *exp = lexp;
}
RETURN(rc);
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"mgs_ldlm_client", &obd->obd_ldlm_client);
- LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) {
+ CERROR("%s: Underlying device is marked as read-only. "
+ "Setup failed\n", obd->obd_name);
+ GOTO(err_ops, rc = -EROFS);
+ }
rc = mgs_fs_setup(obd, mnt);
if (rc) {
rset = &fsdb->fsdb_srpc_gen;
}
- rc = sptlrpc_rule_set_merge(rset, &rule, 1);
+ rc = sptlrpc_rule_set_merge(rset, &rule);
RETURN(rc);
}
int rc, copy_size;
ENTRY;
+#ifndef HAVE_GSS
+ RETURN(-EINVAL);
+#endif
/* keep a copy of original param, which could be destroied
* during parsing */
copy_size = strlen(param) + 1;
$(obj)/llog-test.c: $(obj)/llog_test.c
ln -sf $< $@
+EXTRA_DIST = $(filter-out llog-test.c,$(obdclass-all-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h
+EXTRA_DIST += cl_internal.h
+
@INCLUDE_RULES@
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ llog-test.c
MOSTLYCLEANFILES += linux/*.o darwin/*.o
-DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-all-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h cl_internal.h
(const struct lu_env *,
const struct cl_page_slice *, int), ioret);
- KLASSERT(!PageWriteback(cl_page_vmpage(env, pg)));
+ /* Don't assert the page writeback bit here because the lustre file
+ * may be as a backend of swap space. in this case, the page writeback
+ * is set by VM, and obvious we shouldn't clear it at all. Fortunately
+ * this type of pages are all TRANSIENT pages. */
+ KLASSERT(ergo(pg->cp_type == CPT_CACHEABLE,
+ !PageWriteback(cl_page_vmpage(env, pg))));
EXIT;
}
EXPORT_SYMBOL(cl_page_completion);
RETURN(0);
}
-static void class_disconnect_export_list(struct list_head *list, int flags)
+static void class_disconnect_export_list(struct list_head *list,
+ enum obd_option flags)
{
int rc;
struct lustre_handle fake_conn;
EXIT;
}
-static inline int get_exp_flags_from_obd(struct obd_device *obd)
-{
- return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
- (obd->obd_force ? OBD_OPT_FORCE : 0));
-}
-
void class_disconnect_exports(struct obd_device *obd)
{
struct list_head work_list;
CDEBUG(D_HA, "OBD device %d (%p) has exports, "
"disconnecting them\n", obd->obd_minor, obd);
class_disconnect_export_list(&work_list,
- get_exp_flags_from_obd(obd));
+ exp_flags_from_obd(obd));
} else
CDEBUG(D_HA, "OBD device %d (%p) has no exports\n",
obd->obd_minor, obd);
/* Remove exports that have not completed recovery.
*/
int class_disconnect_stale_exports(struct obd_device *obd,
- int (*test_export)(struct obd_export *))
+ int (*test_export)(struct obd_export *),
+ enum obd_option flags)
{
struct list_head work_list;
struct list_head *pos, *n;
CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n",
obd->obd_name, cnt);
- class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd));
+ class_disconnect_export_list(&work_list, flags);
RETURN(cnt);
}
EXPORT_SYMBOL(class_disconnect_stale_exports);
struct obd_device *mgc_obd;
struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT);
struct obd_uuid *mgs_uuid = &ctxt->loc_exp->exp_obd->obd_uuid;
- struct lustre_handle exph = {0, };
struct obd_export *exp;
struct obd_uuid uuid = {"LLOG_TEST6_UUID"};
struct llog_handle *llh = NULL;
GOTO(ctxt_release, rc = -ENOENT);
}
- rc = obd_connect(NULL, &exph, mgc_obd, &uuid,
+ rc = obd_connect(NULL, &exp, mgc_obd, &uuid,
NULL /* obd_connect_data */, NULL);
if (rc) {
CERROR("6: failed to connect to MGC: %s\n", mgc_obd->obd_name);
GOTO(ctxt_release, rc);
}
- exp = class_conn2export(&exph);
+ LASSERTF(exp->exp_obd == mgc_obd, "%p - %p - %p\n", exp, exp->exp_obd, mgc_obd);
nctxt = llog_get_context(mgc_obd, LLOG_CONFIG_REPL_CTXT);
rc = llog_create(nctxt, &llh, NULL, name);
if (rc) {
CERROR("6: llog_close failed: rc = %d\n", rc);
}
+ CDEBUG(D_INFO, "obd %p - %p - %p - %p\n",
+ mgc_obd, exp, exp->exp_obd, exp->exp_obd->obd_type);
rc = obd_disconnect(exp);
ctxt_release:
llog_ctxt_put(ctxt);
{
LINVRNT(ctx->lc_state == LCS_ENTERED);
LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
+ LASSERT(lu_keys[key->lct_index] == key);
return ctx->lc_value[key->lct_index];
}
EXPORT_SYMBOL(lu_context_key_get);
be no more in-progress ops by this point.*/
spin_lock(&obd->obd_self_export->exp_lock);
- obd->obd_self_export->exp_flags |=
- (obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
- (obd->obd_force ? OBD_OPT_FORCE : 0);
+ obd->obd_self_export->exp_flags |= exp_flags_from_obd(obd);
spin_unlock(&obd->obd_self_export->exp_lock);
/* note that we'll recurse into class_decref again */
*/
static int lustre_start_mgc(struct super_block *sb)
{
- struct lustre_handle mgc_conn = {0, };
struct obd_connect_data *data = NULL;
struct lustre_sb_info *lsi = s2lsi(sb);
struct obd_device *obd;
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
OBD_CONNECT_AT;
data->ocd_version = LUSTRE_VERSION_CODE;
- rc = obd_connect(NULL, &mgc_conn, obd, &(obd->obd_uuid), data, NULL);
+ rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
OBD_FREE_PTR(data);
if (rc) {
CERROR("connect failed %d\n", rc);
GOTO(out, rc);
}
- exp = class_conn2export(&mgc_conn);
obd->u.cli.cl_mgc_mgsexp = exp;
out:
GOTO(out_free, rc);
}
+ if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
+ simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
+ LR_CLIENT_START);
+
OBD_PAGE_FREE(__page);
lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
MODULES := obdecho
obdecho-objs := echo.o echo_client.o lproc_echo.o
+EXTRA_DIST = $(obdecho-objs:%.o=%.c) echo_internal.h
+
@INCLUDE_RULES@
install-data-hook: $(install_data_hook)
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = $(obdecho-objs:%.o=%.c) echo_internal.h
};
static int echo_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid, struct obd_connect_data *data,
void *localdata)
{
+ struct lustre_handle conn = { 0 };
+ int rc;
+
data->ocd_connect_flags &= ECHO_CONNECT_SUPPORTED;
- return class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc) {
+ CERROR("can't connect %d\n", rc);
+ return rc;
+ }
+ *exp = class_conn2export(&conn);
+
+ return 0;
}
static int echo_disconnect(struct obd_export *exp)
{
struct echo_client_obd *ec = &obddev->u.echo_client;
struct obd_device *tgt;
- struct lustre_handle conn = {0, };
struct obd_uuid echo_uuid = { "ECHO_UUID" };
struct obd_connect_data *ocd = NULL;
int rc;
ocd->ocd_version = LUSTRE_VERSION_CODE;
ocd->ocd_group = FILTER_GROUP_ECHO;
- rc = obd_connect(NULL, &conn, tgt, &echo_uuid, ocd, NULL);
+ rc = obd_connect(NULL, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
OBD_FREE(ocd, sizeof(*ocd));
lustre_cfg_string(lcfg, 1));
return (rc);
}
- ec->ec_exp = class_conn2export(&conn);
RETURN(rc);
}
}
static int echo_client_connect(const struct lu_env *env,
- struct lustre_handle *conn,
+ struct obd_export **exp,
struct obd_device *src, struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
- struct obd_export *exp;
int rc;
+ struct lustre_handle conn = { 0 };
ENTRY;
- rc = class_connect(conn, src, cluuid);
+ rc = class_connect(&conn, src, cluuid);
if (rc == 0) {
- exp = class_conn2export(conn);
- class_export_put(exp);
+ *exp = class_conn2export(&conn);
}
RETURN (rc);
obdfilter-objs += lproc_obdfilter.o filter_lvb.o filter_capa.o
obdfilter-objs += filter_io_26.o
+EXTRA_DIST = $(obdfilter-objs:%.o=%.c) filter_io_26.c filter_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = $(obdfilter-objs:%.o=%.c) filter_io_26.c filter_internal.h
struct obd_export *exp,
void *client_nid)
{
- struct filter_export_data *fed = &exp->exp_filter_data;
int rc, newnid = 0;
ENTRY;
- init_brw_stats(&fed->fed_brw_stats);
-
if (obd_uuid_equals(&exp->exp_client_uuid, &obd->obd_uuid))
/* Self-export gets no proc entry */
RETURN(0);
RETURN(0);
}
+struct lsd_client_data zero_lcd; /* globals are implicitly zeroed */
+
static int filter_client_free(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct obd_device *obd = exp->exp_obd;
- struct lsd_client_data zero_lcd;
struct lvfs_run_ctxt saved;
int rc;
loff_t off;
}
if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
- memset(&zero_lcd, 0, sizeof zero_lcd);
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ int need_sync = (!exp->exp_libclient || exp->exp_need_sync) &&
+ !(exp->exp_flags&OBD_OPT_ABORT_RECOV);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
- sizeof(zero_lcd), &off,
- (!exp->exp_libclient ||
- exp->exp_need_sync));
+ sizeof(zero_lcd), &off, 0);
+
+ /* Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed. */
if (rc == 0)
- /* update server's transno */
filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd,
- !exp->exp_libclient);
+ filter->fo_fsd, need_sync);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
- "zeroing out client %s at idx %u (%llu) in %s rc %d\n",
+ "zero out client %s at idx %u/%llu in %s %ssync rc %d\n",
fed->fed_lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
- LAST_RCVD, rc);
+ LAST_RCVD, need_sync ? "" : "a", rc);
}
if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
if (rc != 0)
GOTO(err_ops, rc);
- LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) {
+ CERROR("%s: Underlying device is marked as read-only. "
+ "Setup failed\n", obd->obd_name);
+ GOTO(err_ops, rc = -EROFS);
+ }
/* failover is the default */
obd->obd_replayable = 1;
/* nearly identical to mds_connect */
static int filter_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct lvfs_run_ctxt saved;
- struct obd_export *exp;
+ struct lustre_handle conn = { 0 };
+ struct obd_export *lexp;
struct filter_export_data *fed;
struct lsd_client_data *lcd = NULL;
__u32 group;
int rc;
ENTRY;
- if (conn == NULL || obd == NULL || cluuid == NULL)
+ if (exp == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp != NULL);
- fed = &exp->exp_filter_data;
+ fed = &lexp->exp_filter_data;
- rc = filter_connect_internal(exp, data);
+ rc = filter_connect_internal(lexp, data);
if (rc)
GOTO(cleanup, rc);
- filter_export_stats_init(obd, exp, localdata);
+ filter_export_stats_init(obd, lexp, localdata);
if (obd->obd_replayable) {
OBD_ALLOC(lcd, sizeof(*lcd));
if (!lcd) {
memcpy(lcd->lcd_uuid, cluuid, sizeof(lcd->lcd_uuid));
fed->fed_lcd = lcd;
- rc = filter_client_add(obd, exp, -1);
+ rc = filter_client_add(obd, lexp, -1);
if (rc)
GOTO(cleanup, rc);
}
group = data->ocd_group;
CWARN("%s: Received MDS connection ("LPX64"); group %d\n",
- obd->obd_name, exp->exp_handle.h_cookie, group);
+ obd->obd_name, lexp->exp_handle.h_cookie, group);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = filter_read_groups(obd, group, 1);
OBD_FREE_PTR(lcd);
fed->fed_lcd = NULL;
}
- class_disconnect(exp);
+ class_disconnect(lexp);
+ *exp = NULL;
} else {
- class_export_put(exp);
+ *exp = lexp;
}
RETURN(rc);
/* Flush any remaining cancel messages out to the target */
filter_sync_llogs(obd, exp);
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
+
/* Disconnect early so that clients can't keep using export */
rc = class_disconnect(exp);
if (exp->exp_obd->obd_namespace != NULL)
obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
/* setup llog imports */
- LASSERT(val != NULL);
+ if (val != NULL)
+ group = (int)(*(__u32 *)val);
+ else
+ group = 0; /* default value */
- group = (int)(*(__u32 *)val);
LASSERT_MDS_GROUP(group);
rc = filter_setup_llog_group(exp, obd, group);
if (rc)
atomic_inc(&filter->fo_r_in_flight);
lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_RPC_HIST],
atomic_read(&filter->fo_r_in_flight));
- lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_DISK_IOSIZE],
+ lprocfs_oh_tally_log2(&filter->
+ fo_filter_stats.hist[BRW_R_DISK_IOSIZE],
size);
- lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_RPC_HIST],
- atomic_read(&filter->fo_r_in_flight));
- lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DISK_IOSIZE], size);
+ if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {
+ lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->
+ hist[BRW_R_RPC_HIST],
+ atomic_read(&filter->fo_r_in_flight));
+ lprocfs_oh_tally_log2(&exp->exp_nid_stats->
+ nid_brw_stats->hist[BRW_R_DISK_IOSIZE],
+ size);
+ }
} else {
atomic_inc(&filter->fo_w_in_flight);
lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_W_RPC_HIST],
atomic_read(&filter->fo_w_in_flight));
- lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_W_DISK_IOSIZE],
+ lprocfs_oh_tally_log2(&filter->
+ fo_filter_stats.hist[BRW_W_DISK_IOSIZE],
size);
- lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_RPC_HIST],
- atomic_read(&filter->fo_w_in_flight));
- lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_DISK_IOSIZE], size);
+ if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {
+ lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->
+ hist[BRW_W_RPC_HIST],
+ atomic_read(&filter->fo_r_in_flight));
+ lprocfs_oh_tally_log2(&exp->exp_nid_stats->
+ nid_brw_stats->hist[BRW_W_DISK_IOSIZE],
+ size);
+ }
}
}
wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0);
if (rw == OBD_BRW_READ) {
- lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_R_DIO_FRAGS],
- frags);
- lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DIO_FRAGS],
+ lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.
+ hist[BRW_R_DIO_FRAGS],
frags);
- lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_R_IO_TIME],
+ lprocfs_oh_tally_log2(&obd->u.filter.
+ fo_filter_stats.hist[BRW_R_IO_TIME],
jiffies - start_time);
- lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_IO_TIME], jiffies - start_time);
if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {
- lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_R_DIO_FRAGS],
+ lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->
+ hist[BRW_R_DIO_FRAGS],
frags);
- lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_R_IO_TIME],
+ lprocfs_oh_tally_log2(&exp->exp_nid_stats->
+ nid_brw_stats->hist[BRW_R_IO_TIME],
jiffies - start_time);
}
} else {
- lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_W_DIO_FRAGS],
- frags);
- lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_DIO_FRAGS],
- frags);
- lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_W_IO_TIME],
+ lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.
+ hist[BRW_W_DIO_FRAGS], frags);
+ lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.
+ hist[BRW_W_IO_TIME],
jiffies - start_time);
- lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_IO_TIME], jiffies - start_time);
if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {
- lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_W_DIO_FRAGS],
+ lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->
+ hist[BRW_W_DIO_FRAGS],
frags);
- lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_W_IO_TIME],
+ lprocfs_oh_tally_log2(&exp->exp_nid_stats->
+ nid_brw_stats->hist[BRW_W_IO_TIME],
jiffies - start_time);
}
}
unsigned long *blocks, int blocks_per_page, int wr)
{
struct filter_obd *filter = &exp->exp_obd->u.filter;
- struct filter_export_data *fed = &exp->exp_filter_data;
struct page *last_page = NULL;
unsigned long *last_block = NULL;
unsigned long discont_pages = 0;
lprocfs_oh_tally_log2(&filter->fo_filter_stats.hist[BRW_R_PAGES + wr],
nr_pages);
- lprocfs_oh_tally_log2(&fed->fed_brw_stats.hist[BRW_R_PAGES + wr],
- nr_pages);
if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats)
lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->
hist[BRW_R_PAGES + wr], nr_pages);
lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_PAGES +wr],
discont_pages);
- lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_PAGES + wr],
- discont_pages);
lprocfs_oh_tally(&filter->fo_filter_stats.hist[BRW_R_DISCONT_BLOCKS+wr],
discont_blocks);
- lprocfs_oh_tally(&fed->fed_brw_stats.hist[BRW_R_DISCONT_BLOCKS + wr],
- discont_blocks);
if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {
lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->
&filter_brw_stats_fops, dev);
}
-static int filter_per_export_stats_seq_show(struct seq_file *seq, void *v)
-{
- struct filter_export_data *fed = seq->private;
-
- brw_stats_show(seq, &fed->fed_brw_stats);
-
- return 0;
-}
-
-static ssize_t filter_per_export_stats_seq_write(struct file *file,
- const char *buf, size_t len, loff_t *off)
-{
- struct seq_file *seq = file->private_data;
- struct filter_export_data *fed = seq->private;
- int i;
-
- for (i = 0; i < BRW_LAST; i++)
- lprocfs_oh_clear(&fed->fed_brw_stats.hist[i]);
-
- return len;
-}
-
-LPROC_SEQ_FOPS(filter_per_export_stats);
-
void lprocfs_filter_init_vars(struct lprocfs_static_vars *lvars)
{
lvars->module_vars = lprocfs_filter_module_vars;
static int filter_per_nid_stats_seq_show(struct seq_file *seq, void *v)
{
- nid_stat_t *tmp = seq->private;
+ nid_stat_t * stat = seq->private;
- if (tmp->nid_brw_stats)
- brw_stats_show(seq, tmp->nid_brw_stats);
+ if (stat->nid_brw_stats)
+ brw_stats_show(seq, stat->nid_brw_stats);
return 0;
}
const char *buf, size_t len,
loff_t *off)
{
- struct seq_file *seq = file->private_data;
- nid_stat_t *tmp = seq->private;
+ struct seq_file *seq = file->private_data;
+ nid_stat_t *stat = seq->private;
int i;
- if (tmp->nid_brw_stats)
+ if (stat->nid_brw_stats)
for (i = 0; i < BRW_LAST; i++)
- lprocfs_oh_clear(&tmp->nid_brw_stats->hist[i]);
+ lprocfs_oh_clear(&stat->nid_brw_stats->hist[i]);
return len;
}
MODULES := osc
osc-objs := osc_request.o lproc_osc.o osc_create.o osc_dev.o osc_object.o osc_page.o osc_lock.o osc_io.o
+EXTRA_DIST = $(osc-objs:%.o=%.c) osc_internal.h osc_cl_internal.h
+
@INCLUDE_RULES@
install-data-hook: $(install_data_hook)
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = $(osc-objs:%.o=%.c) osc_internal.h osc_cl_internal.h
*
* This can be optimized to not update attributes when lock is a result of a
* local match.
+ *
+ * Called under lock and resource spin-locks.
*/
static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck,
int rc)
dlmlock = olck->ols_lock;
LASSERT(dlmlock != NULL);
+ /* re-grab LVB from a dlm lock under DLM spin-locks. */
+ *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
size = lvb->lvb_size;
/* Extend KMS up to the end of this lock and no further
* A lock on [x,y] means a KMS of up to y + 1 bytes! */
lvb->lvb_size, oinfo->loi_kms,
dlmlock->l_policy_data.l_extent.end);
}
- ldlm_lock_allow_match(dlmlock);
+ ldlm_lock_allow_match_locked(dlmlock);
} else if (rc == -ENAVAIL && olck->ols_glimpse) {
CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
" kms="LPU64"\n", lvb->lvb_size, oinfo->loi_kms);
EXIT;
}
+/**
+ * Called when a lock is granted, from an upcall (when server returned a
+ * granted lock), or from completion AST, when server returned a blocked lock.
+ *
+ * Called under lock and resource spin-locks, that are released temporarily
+ * here.
+ */
static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck,
struct ldlm_lock *dlmlock, int rc)
{
* tell upper layers the extent of the lock that was actually
* granted
*/
- cl_lock_modify(env, lock, descr);
LINVRNT(osc_lock_invariant(olck));
olck->ols_state = OLS_GRANTED;
osc_lock_lvb_update(env, olck, rc);
+
+ /* release DLM spin-locks to allow cl_lock_{modify,signal}()
+ * to take a semaphore on a parent lock. This is safe, because
+ * spin-locks are needed to protect consistency of
+ * dlmlock->l_*_mode and LVB, and we have finished processing
+ * them. */
+ unlock_res_and_lock(dlmlock);
+ cl_lock_modify(env, lock, descr);
cl_lock_signal(env, lock);
+ lock_res_and_lock(dlmlock);
}
EXIT;
}
LASSERT(olck->ols_lock == NULL);
olck->ols_lock = dlmlock;
spin_unlock(&osc_ast_guard);
- unlock_res_and_lock(dlmlock);
/*
* Lock might be not yet granted. In this case, completion ast
*/
if (dlmlock->l_granted_mode == dlmlock->l_req_mode)
osc_lock_granted(env, olck, dlmlock, 0);
+ unlock_res_and_lock(dlmlock);
+
/*
* osc_enqueue_interpret() decrefs asynchronous locks, counter
* this.
* to lock->l_lvb_data, store it in osc_lock.
*/
LASSERT(dlmlock->l_lvb_data != NULL);
+ lock_res_and_lock(dlmlock);
olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data;
if (olck->ols_lock == NULL)
/*
osc_lock_granted(env, olck, dlmlock, dlmrc);
if (dlmrc != 0)
cl_lock_error(env, lock, dlmrc);
+ unlock_res_and_lock(dlmlock);
cl_lock_mutex_put(env, lock);
osc_ast_data_put(env, olck);
result = 0;
slice->cls_ops = &osc_lock_lockless_ops;
}
}
+ LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
}
/**
ols->ols_state = OLS_GRANTED;
}
}
-
+ LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols)));
RETURN(result);
}
/* size[REQ_REC_OFF] still sizeof (*body) */
if (opc == OST_WRITE) {
if (unlikely(cli->cl_checksum) &&
- req->rq_flvr.sf_bulk_hash == BULK_HASH_ALG_NULL) {
+ !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
/* store cl_cksum_type in a local variable since
* it can be changed via lprocfs */
cksum_type_t cksum_type = cli->cl_cksum_type;
sizeof(__u32) * niocount);
} else {
if (unlikely(cli->cl_checksum) &&
- req->rq_flvr.sf_bulk_hash == BULK_HASH_ALG_NULL) {
+ !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
body->oa.o_flags = 0;
body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type);
}
LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob);
+ if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk))
+ RETURN(-EAGAIN);
+
if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum &&
check_write_checksum(&body->oa, peer, client_cksum,
body->oa.o_cksum, aa->aa_requested_nob,
cksum_type_unpack(aa->aa_oa->o_flags)))
RETURN(-EAGAIN);
- if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk))
- RETURN(-EAGAIN);
-
rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count,
aa->aa_page_count, aa->aa_ppga);
GOTO(out, rc);
}
/* The rest of this function executes only for OST_READs */
+
+ rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, rc);
+ if (rc < 0)
+ GOTO(out, rc);
+
if (rc > aa->aa_requested_nob) {
CERROR("Unexpected rc %d (%d requested)\n", rc,
aa->aa_requested_nob);
if (rc < aa->aa_requested_nob)
handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
- if (sptlrpc_cli_unwrap_bulk_read(req, rc, aa->aa_page_count,
- aa->aa_ppga))
- GOTO(out, rc = -EAGAIN);
-
if (body->oa.o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
__u32 server_cksum = body->oa.o_cksum;
{
struct osd_it_ea *it = (struct osd_it_ea *)di;
struct osd_object *obj = it->oie_obj;
-
+ struct inode *inode = obj->oo_inode;
ENTRY;
+ it->oie_file.f_op->release(inode, &it->oie_file);
lu_object_put(env, &obj->oo_dt.do_lu);
EXIT;
}
{
struct osd_it_ea *it = (struct osd_it_ea *)buf;
struct dirent64 *dirent = &it->oie_dirent64;
- int reclen = LDISKFS_DIR_REC_LEN(namelen);
-
ENTRY;
if (it->oie_namelen)
strncpy(dirent->d_name, name, LDISKFS_NAME_LEN);
dirent->d_name[namelen] = 0;
dirent->d_ino = ino;
- dirent->d_off = offset;
- dirent->d_reclen = reclen;
it->oie_namelen = namelen;
it->oie_curr_pos = offset;
it->oie_next_pos = it->oie_file.f_pos;
- if(!result && it->oie_namelen == 0)
+ if (it->oie_namelen == 0)
result = -EIO;
RETURN(result);
}
rc = osd_ea_fid_get(env, dentry, (struct dt_rec*) rec);
+ if (rc != 0)
+ rec = ERR_PTR(rc);
iput(inode);
RETURN((struct dt_rec *)rec);
int rc;
ENTRY;
- it->oie_curr_pos = it->oie_next_pos = hash;
+ it->oie_curr_pos = hash;
rc = osd_ldiskfs_it_fill(di);
if (rc == 0)
MODULES := ost
ost-objs := ost_handler.o lproc_ost.o
+EXTRA_DIST = $(ost-objs:%.o=%.c) ost_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES = $(ost-objs:%.o=%.c) ost_internal.h
if (exp->exp_failed)
rc = -ENOTCONN;
else {
- sptlrpc_svc_wrap_bulk(req, desc);
-
- rc = ptlrpc_start_bulk_transfer(desc);
+ rc = sptlrpc_svc_wrap_bulk(req, desc);
+ if (rc == 0)
+ rc = ptlrpc_start_bulk_transfer(desc);
}
if (rc == 0) {
local_nb[i].offset & ~CFS_PAGE_MASK,
local_nb[i].len);
+ rc = sptlrpc_svc_prep_bulk(req, desc);
+ if (rc != 0)
+ GOTO(out_lock, rc);
+
/* Check if client was evicted while we were doing i/o before touching
network */
if (desc->bd_export->exp_failed)
DEBUG_REQ(D_ERROR, req, "Eviction on bulk GET");
rc = -ENOTCONN;
ptlrpc_abort_bulk(desc);
- } else if (!desc->bd_success ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk GET %d(%d)",
- desc->bd_success ?
- "truncated" : "network error on",
- desc->bd_nob_transferred, desc->bd_nob);
+ } else if (!desc->bd_success) {
+ DEBUG_REQ(D_ERROR, req, "network error on bulk GET");
/* XXX should this be a different errno? */
rc = -ETIMEDOUT;
+ } else {
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
}
} else {
DEBUG_REQ(D_ERROR, req, "ptlrpc_bulk_get failed: rc %d", rc);
}
no_reply = rc != 0;
- if (rc == 0)
- sptlrpc_svc_unwrap_bulk(req, desc);
-
repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
end = (nb[ioo->ioo_bufcnt - 1].offset +
nb[ioo->ioo_bufcnt - 1].len - 1) | ~CFS_PAGE_MASK;
+ LASSERT(lock->l_resource != NULL);
+ if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_gr,
+ &lock->l_resource->lr_name))
+ RETURN(0);
+
if (!(lock->l_granted_mode & mode))
RETURN(0);
interval_tree.c: @LUSTRE@/ldlm/interval_tree.c
ln -sf $< $@
+EXTRA_DIST = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
EXTRA_PRE_CFLAGS := -I@LUSTRE@/ldlm
@INCLUDE_RULES@
endif
install-data-hook: $(install_data_hook)
-DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ ldlm_*.c l_lock.c interval_tree.c
spin_unlock(&imp->imp_lock);
set->set_remaining--;
- cfs_waitq_signal(&imp->imp_recovery_waitq);
+ cfs_waitq_broadcast(&imp->imp_recovery_waitq);
}
/* If we hit an error, we want to recover promptly. */
LASSERT(!req->rq_receiving_reply);
ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET);
- cfs_waitq_signal(&imp->imp_recovery_waitq);
+ cfs_waitq_broadcast(&imp->imp_recovery_waitq);
RETURN(rc);
}
desc->bd_sender = ev->sender;
}
- sptlrpc_enc_pool_put_pages(desc);
+ /* release the encrypted pages for write */
+ if (desc->bd_req->rq_bulk_write)
+ sptlrpc_enc_pool_put_pages(desc);
/* NB don't unlock till after wakeup; desc can disappear under us
* otherwise */
struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token);
__u32 lgss_verify_mic(
struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token);
__u32 lgss_wrap(
struct gss_ctx *ctx,
rawobj_t *gsshdr,
rawobj_t *token,
rawobj_t *out_msg);
-__u32 lgss_plain_encrypt(
- struct gss_ctx *ctx,
- int decrypt,
- int length,
- void *in_buf,
- void *out_buf);
+__u32 lgss_prep_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc);
+__u32 lgss_wrap_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+__u32 lgss_unwrap_bulk(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token);
__u32 lgss_delete_sec_context(
struct gss_ctx **ctx);
int lgss_display(
struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token);
__u32 (*gss_verify_mic)(
struct gss_ctx *ctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token);
__u32 (*gss_wrap)(
struct gss_ctx *ctx,
rawobj_t *gsshdr,
rawobj_t *token,
rawobj_t *out_msg);
- __u32 (*gss_plain_encrypt)(
- struct gss_ctx *ctx,
- int decrypt,
- int length,
- void *in_buf,
- void *out_buf);
+ __u32 (*gss_prep_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc);
+ __u32 (*gss_wrap_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob);
+ __u32 (*gss_unwrap_bulk)(
+ struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token);
void (*gss_delete_sec_context)(
void *ctx);
int (*gss_display)(
#include "gss_internal.h"
#include "gss_api.h"
-static __u8 zero_iv[CIPHER_MAX_BLKSIZE] = { 0, };
-
-static void buf_to_sl(struct scatterlist *sl,
- void *buf, unsigned int len)
-{
- sl->page = virt_to_page(buf);
- sl->offset = offset_in_page(buf);
- sl->length = len;
-}
-
-/*
- * CTS CBC encryption:
- * 1. X(n-1) = P(n-1)
- * 2. E(n-1) = Encrypt(K, X(n-1))
- * 3. C(n) = HEAD(E(n-1))
- * 4. P = P(n) | 0
- * 5. D(n) = E(n-1) XOR P
- * 6. C(n-1) = Encrypt(K, D(n))
- *
- * CTS encryption using standard CBC interface:
- * 1. pad the last partial block with 0.
- * 2. do CBC encryption.
- * 3. swap the last two ciphertext blocks.
- * 4. truncate to original plaintext size.
- */
-static int cbc_cts_encrypt(struct ll_crypto_cipher *tfm,
- struct scatterlist *sld,
- struct scatterlist *sls)
-{
- struct scatterlist slst, sldt;
- struct blkcipher_desc desc;
- void *data;
- __u8 sbuf[CIPHER_MAX_BLKSIZE];
- __u8 dbuf[CIPHER_MAX_BLKSIZE];
- unsigned int blksize, blks, tail;
- int rc;
-
- blksize = ll_crypto_blkcipher_blocksize(tfm);
- blks = sls->length / blksize;
- tail = sls->length % blksize;
- LASSERT(blks > 0 && tail > 0);
-
- /* pad tail block with 0, copy to sbuf */
- data = cfs_kmap(sls->page);
- memcpy(sbuf, data + sls->offset + blks * blksize, tail);
- memset(sbuf + tail, 0, blksize - tail);
- cfs_kunmap(sls->page);
-
- buf_to_sl(&slst, sbuf, blksize);
- buf_to_sl(&sldt, dbuf, blksize);
- desc.tfm = tfm;
- desc.flags = 0;
-
- /* encrypt head */
- rc = ll_crypto_blkcipher_encrypt(&desc, sld, sls, sls->length - tail);
- if (unlikely(rc)) {
- CERROR("encrypt head (%u) data: %d\n", sls->length - tail, rc);
- return rc;
- }
- /* encrypt tail */
- rc = ll_crypto_blkcipher_encrypt(&desc, &sldt, &slst, blksize);
- if (unlikely(rc)) {
- CERROR("encrypt tail (%u) data: %d\n", slst.length, rc);
- return rc;
- }
-
- /* swab C(n) and C(n-1), if n == 1, then C(n-1) is the IV */
- data = cfs_kmap(sld->page);
-
- memcpy(data + sld->offset + blks * blksize,
- data + sld->offset + (blks - 1) * blksize, tail);
- memcpy(data + sld->offset + (blks - 1) * blksize, dbuf, blksize);
- cfs_kunmap(sld->page);
-
- return 0;
-}
-
-/*
- * CTS CBC decryption:
- * 1. D(n) = Decrypt(K, C(n-1))
- * 2. C = C(n) | 0
- * 3. X(n) = D(n) XOR C
- * 4. P(n) = HEAD(X(n))
- * 5. E(n-1) = C(n) | TAIL(X(n))
- * 6. X(n-1) = Decrypt(K, E(n-1))
- * 7. P(n-1) = X(n-1) XOR C(n-2)
- *
- * CTS decryption using standard CBC interface:
- * 1. D(n) = Decrypt(K, C(n-1))
- * 2. C(n) = C(n) | TAIL(D(n))
- * 3. swap the last two ciphertext blocks.
- * 4. do CBC decryption.
- * 5. truncate to original ciphertext size.
- */
-static int cbc_cts_decrypt(struct ll_crypto_cipher *tfm,
- struct scatterlist *sld,
- struct scatterlist *sls)
-{
- struct blkcipher_desc desc;
- struct scatterlist slst, sldt;
- void *data;
- __u8 sbuf[CIPHER_MAX_BLKSIZE];
- __u8 dbuf[CIPHER_MAX_BLKSIZE];
- unsigned int blksize, blks, tail;
- int rc;
-
- blksize = ll_crypto_blkcipher_blocksize(tfm);
- blks = sls->length / blksize;
- tail = sls->length % blksize;
- LASSERT(blks > 0 && tail > 0);
-
- /* save current IV, and set IV to zero */
- ll_crypto_blkcipher_get_iv(tfm, sbuf, blksize);
- ll_crypto_blkcipher_set_iv(tfm, zero_iv, blksize);
-
- /* D(n) = Decrypt(K, C(n-1)) */
- slst = *sls;
- slst.offset += (blks - 1) * blksize;
- slst.length = blksize;
-
- buf_to_sl(&sldt, dbuf, blksize);
- desc.tfm = tfm;
- desc.flags = 0;
-
- rc = ll_crypto_blkcipher_decrypt(&desc, &sldt, &slst, blksize);
- if (unlikely(rc)) {
- CERROR("decrypt C(n-1) (%u): %d\n", slst.length, rc);
- return rc;
- }
-
- /* restore IV */
- ll_crypto_blkcipher_set_iv(tfm, sbuf, blksize);
-
- data = cfs_kmap(sls->page);
- /* C(n) = C(n) | TAIL(D(n)) */
- memcpy(dbuf, data + sls->offset + blks * blksize, tail);
- /* swab C(n) and C(n-1) */
- memcpy(sbuf, data + sls->offset + (blks - 1) * blksize, blksize);
- memcpy(data + sls->offset + (blks - 1) * blksize, dbuf, blksize);
- cfs_kunmap(sls->page);
-
- /* do cbc decrypt */
- buf_to_sl(&slst, sbuf, blksize);
- buf_to_sl(&sldt, dbuf, blksize);
-
- /* decrypt head */
- rc = ll_crypto_blkcipher_decrypt(&desc, sld, sls, sls->length - tail);
- if (unlikely(rc)) {
- CERROR("decrypt head (%u) data: %d\n", sls->length - tail, rc);
- return rc;
- }
- /* decrypt tail */
- rc = ll_crypto_blkcipher_decrypt(&desc, &sldt, &slst, blksize);
- if (unlikely(rc)) {
- CERROR("decrypt tail (%u) data: %d\n", slst.length, rc);
- return rc;
- }
-
- /* truncate to original ciphertext size */
- data = cfs_kmap(sld->page);
- memcpy(data + sld->offset + blks * blksize, dbuf, tail);
- cfs_kunmap(sld->page);
-
- return 0;
-}
-
-static inline int do_cts_tfm(struct ll_crypto_cipher *tfm,
- int encrypt,
- struct scatterlist *sld,
- struct scatterlist *sls)
-{
-#ifndef HAVE_ASYNC_BLOCK_CIPHER
- LASSERT(tfm->crt_cipher.cit_mode == CRYPTO_TFM_MODE_CBC);
-#endif
-
- if (encrypt)
- return cbc_cts_encrypt(tfm, sld, sls);
- else
- return cbc_cts_decrypt(tfm, sld, sls);
-}
-
-/*
- * normal encrypt/decrypt of data of even blocksize
- */
-static inline int do_cipher_tfm(struct ll_crypto_cipher *tfm,
- int encrypt,
- struct scatterlist *sld,
- struct scatterlist *sls)
-{
- struct blkcipher_desc desc;
- desc.tfm = tfm;
- desc.flags = 0;
- if (encrypt)
- return ll_crypto_blkcipher_encrypt(&desc, sld, sls, sls->length);
- else
- return ll_crypto_blkcipher_decrypt(&desc, sld, sls, sls->length);
-}
-
-static struct ll_crypto_cipher *get_stream_cipher(__u8 *key, unsigned int keylen)
-{
- const struct sptlrpc_ciph_type *ct;
- struct ll_crypto_cipher *tfm;
- int rc;
-
- /* using ARC4, the only stream cipher in linux for now */
- ct = sptlrpc_get_ciph_type(BULK_CIPH_ALG_ARC4);
- LASSERT(ct);
-
- tfm = ll_crypto_alloc_blkcipher(ct->sct_tfm_name, 0, 0);
- if (tfm == NULL) {
- CERROR("Failed to allocate stream TFM %s\n", ct->sct_name);
- return NULL;
- }
- LASSERT(ll_crypto_blkcipher_blocksize(tfm));
-
- if (keylen > ct->sct_keysize)
- keylen = ct->sct_keysize;
-
- LASSERT(keylen >= crypto_tfm_alg_min_keysize(tfm));
- LASSERT(keylen <= crypto_tfm_alg_max_keysize(tfm));
-
- rc = ll_crypto_blkcipher_setkey(tfm, key, keylen);
- if (rc) {
- CERROR("Failed to set key for TFM %s: %d\n", ct->sct_name, rc);
- ll_crypto_free_blkcipher(tfm);
- return NULL;
- }
-
- return tfm;
-}
-
-static int do_bulk_privacy(struct gss_ctx *gctx,
- struct ptlrpc_bulk_desc *desc,
- int encrypt, __u32 alg,
- struct ptlrpc_bulk_sec_desc *bsd)
-{
- const struct sptlrpc_ciph_type *ct = sptlrpc_get_ciph_type(alg);
- struct ll_crypto_cipher *tfm;
- struct ll_crypto_cipher *stfm = NULL; /* backup stream cipher */
- struct scatterlist sls, sld, *sldp;
- unsigned int blksize, keygen_size;
- int i, rc;
- __u8 key[CIPHER_MAX_KEYSIZE];
-
- LASSERT(ct);
-
- if (encrypt)
- bsd->bsd_ciph_alg = BULK_CIPH_ALG_NULL;
-
- if (alg == BULK_CIPH_ALG_NULL)
- return 0;
-
- if (desc->bd_iov_count <= 0) {
- if (encrypt)
- bsd->bsd_ciph_alg = alg;
- return 0;
- }
-
- tfm = ll_crypto_alloc_blkcipher(ct->sct_tfm_name, 0, 0 );
- if (tfm == NULL) {
- CERROR("Failed to allocate TFM %s\n", ct->sct_name);
- return -ENOMEM;
- }
- blksize = ll_crypto_blkcipher_blocksize(tfm);
-
- LASSERT(crypto_tfm_alg_max_keysize(tfm) >= ct->sct_keysize);
- LASSERT(crypto_tfm_alg_min_keysize(tfm) <= ct->sct_keysize);
- LASSERT(ct->sct_ivsize == 0 ||
- ll_crypto_blkcipher_ivsize(tfm) == ct->sct_ivsize);
- LASSERT(ct->sct_keysize <= CIPHER_MAX_KEYSIZE);
- LASSERT(blksize <= CIPHER_MAX_BLKSIZE);
-
- /* generate ramdom key seed and compute the secret key based on it.
- * note determined by algorithm which lgss_plain_encrypt use, it
- * might require the key size be its (blocksize * n). so here for
- * simplicity, we force it's be n * MAX_BLKSIZE by padding 0 */
- keygen_size = (ct->sct_keysize + CIPHER_MAX_BLKSIZE - 1) &
- ~(CIPHER_MAX_BLKSIZE - 1);
- if (encrypt) {
- get_random_bytes(bsd->bsd_key, ct->sct_keysize);
- if (ct->sct_keysize < keygen_size)
- memset(bsd->bsd_key + ct->sct_keysize, 0,
- keygen_size - ct->sct_keysize);
- }
-
- rc = lgss_plain_encrypt(gctx, 0, keygen_size, bsd->bsd_key, key);
- if (rc) {
- CERROR("failed to compute secret key: %d\n", rc);
- goto out;
- }
-
- rc = ll_crypto_blkcipher_setkey(tfm, key, ct->sct_keysize);
- if (rc) {
- CERROR("Failed to set key for TFM %s: %d\n", ct->sct_name, rc);
- goto out;
- }
-
- /* stream cipher doesn't need iv */
- if (blksize > 1)
- ll_crypto_blkcipher_set_iv(tfm, zero_iv, blksize);
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- sls.page = desc->bd_iov[i].kiov_page;
- sls.offset = desc->bd_iov[i].kiov_offset;
- sls.length = desc->bd_iov[i].kiov_len;
-
- if (unlikely(sls.length == 0)) {
- CWARN("page %d with 0 length data?\n", i);
- continue;
- }
-
- if (unlikely(sls.offset % blksize)) {
- CERROR("page %d with odd offset %u, TFM %s\n",
- i, sls.offset, ct->sct_name);
- rc = -EINVAL;
- goto out;
- }
-
- if (desc->bd_enc_pages) {
- sld.page = desc->bd_enc_pages[i];
- sld.offset = desc->bd_iov[i].kiov_offset;
- sld.length = desc->bd_iov[i].kiov_len;
-
- sldp = &sld;
- } else {
- sldp = &sls;
- }
-
- if (likely(sls.length % blksize == 0)) {
- /* data length is n * blocksize, do the normal tfm */
- rc = do_cipher_tfm(tfm, encrypt, sldp, &sls);
- } else if (sls.length < blksize) {
- /* odd data length, and smaller than 1 block, CTS
- * doesn't work in this case because it requires
- * transfer a modified IV to peer. here we use a
- * "backup" stream cipher to do the tfm */
- if (stfm == NULL) {
- stfm = get_stream_cipher(key, ct->sct_keysize);
- if (tfm == NULL) {
- rc = -ENOMEM;
- goto out;
- }
- }
- rc = do_cipher_tfm(stfm, encrypt, sldp, &sls);
- } else {
- /* odd data length but > 1 block, do CTS tfm */
- rc = do_cts_tfm(tfm, encrypt, sldp, &sls);
- }
-
- if (unlikely(rc)) {
- CERROR("error %s page %d/%d: %d\n",
- encrypt ? "encrypt" : "decrypt",
- i + 1, desc->bd_iov_count, rc);
- goto out;
- }
-
- if (desc->bd_enc_pages)
- desc->bd_iov[i].kiov_page = desc->bd_enc_pages[i];
- }
-
- if (encrypt)
- bsd->bsd_ciph_alg = alg;
-
-out:
- if (stfm)
- ll_crypto_free_blkcipher(stfm);
-
- ll_crypto_free_blkcipher(tfm);
- return rc;
-}
-
int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
struct gss_cli_ctx *gctx;
struct lustre_msg *msg;
- struct ptlrpc_bulk_sec_desc *bsdr;
- int offset, rc;
+ struct ptlrpc_bulk_sec_desc *bsd;
+ rawobj_t token;
+ __u32 maj;
+ int offset;
+ int rc;
ENTRY;
LASSERT(req->rq_pack_bulk);
LASSERT(req->rq_bulk_read || req->rq_bulk_write);
- switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
+ LASSERT(gctx->gc_mechctx);
+
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
case SPTLRPC_SVC_NULL:
LASSERT(req->rq_reqbuf->lm_bufcount >= 3);
msg = req->rq_reqbuf;
LBUG();
}
- /* make checksum */
- rc = bulk_csum_cli_request(desc, req->rq_bulk_read,
- req->rq_flvr.sf_bulk_hash, msg, offset);
- if (rc) {
- CERROR("client bulk %s: failed to generate checksum: %d\n",
- req->rq_bulk_read ? "read" : "write", rc);
- RETURN(rc);
- }
+ bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
+ bsd->bsd_version = 0;
+ bsd->bsd_flags = 0;
+ bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
- if (req->rq_flvr.sf_bulk_ciph == BULK_CIPH_ALG_NULL)
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
RETURN(0);
- /* previous bulk_csum_cli_request() has verified bsdr is good */
- bsdr = lustre_msg_buf(msg, offset, 0);
+ LASSERT(bsd->bsd_svc == SPTLRPC_BULK_SVC_INTG ||
+ bsd->bsd_svc == SPTLRPC_BULK_SVC_PRIV);
if (req->rq_bulk_read) {
- bsdr->bsd_ciph_alg = req->rq_flvr.sf_bulk_ciph;
- RETURN(0);
- }
-
- /* it turn out to be bulk write */
- rc = sptlrpc_enc_pool_get_pages(desc);
- if (rc) {
- CERROR("bulk write: failed to allocate encryption pages\n");
- RETURN(rc);
- }
+ /*
+ * bulk read: prepare receiving pages only for privacy mode.
+ */
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_PRIV)
+ return gss_cli_prep_bulk(req, desc);
+ } else {
+ /*
+ * bulk write: sign or encrypt bulk pages.
+ */
+ bsd->bsd_nob = desc->bd_nob;
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_INTG) {
+ /* integrity mode */
+ token.data = bsd->bsd_data;
+ token.len = lustre_msg_buflen(msg, offset) -
+ sizeof(*bsd);
+
+ maj = lgss_get_mic(gctx->gc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov,
+ &token);
+ if (maj != GSS_S_COMPLETE) {
+ CWARN("failed to sign bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ } else {
+ /* privacy mode */
+ if (desc->bd_iov_count == 0)
+ RETURN(0);
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc) {
+ CERROR("bulk write: failed to allocate "
+ "encryption pages: %d\n", rc);
+ RETURN(rc);
+ }
- gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
- LASSERT(gctx->gc_mechctx);
+ token.data = bsd->bsd_data;
+ token.len = lustre_msg_buflen(msg, offset) -
+ sizeof(*bsd);
- rc = do_bulk_privacy(gctx->gc_mechctx, desc, 1,
- req->rq_flvr.sf_bulk_ciph, bsdr);
- if (rc)
- CERROR("bulk write: client failed to encrypt pages\n");
+ maj = lgss_wrap_bulk(gctx->gc_mechctx, desc, &token, 0);
+ if (maj != GSS_S_COMPLETE) {
+ CWARN("fail to encrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ }
+ }
- RETURN(rc);
+ RETURN(0);
}
int gss_cli_ctx_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
struct gss_cli_ctx *gctx;
struct lustre_msg *rmsg, *vmsg;
struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
- int roff, voff, rc;
+ rawobj_t token;
+ __u32 maj;
+ int roff, voff;
ENTRY;
LASSERT(req->rq_pack_bulk);
LASSERT(req->rq_bulk_read || req->rq_bulk_write);
- switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
case SPTLRPC_SVC_NULL:
vmsg = req->rq_repdata;
voff = vmsg->lm_bufcount - 1;
LBUG();
}
- if (req->rq_bulk_read) {
- bsdr = lustre_msg_buf(rmsg, roff, 0);
- if (bsdr->bsd_ciph_alg == BULK_CIPH_ALG_NULL)
- goto verify_csum;
-
- bsdv = lustre_msg_buf(vmsg, voff, 0);
- if (bsdr->bsd_ciph_alg != bsdv->bsd_ciph_alg) {
- CERROR("bulk read: cipher algorithm mismatch: client "
- "request %s but server reply with %s. try to "
- "use the new one for decryption\n",
- sptlrpc_get_ciph_name(bsdr->bsd_ciph_alg),
- sptlrpc_get_ciph_name(bsdv->bsd_ciph_alg));
+ bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
+ bsdv = lustre_msg_buf(vmsg, voff, sizeof(*bsdv));
+ LASSERT(bsdr && bsdv);
+
+ if (bsdr->bsd_version != bsdv->bsd_version ||
+ bsdr->bsd_type != bsdv->bsd_type ||
+ bsdr->bsd_svc != bsdv->bsd_svc) {
+ CERROR("bulk security descriptor mismatch: "
+ "(%u,%u,%u) != (%u,%u,%u)\n",
+ bsdr->bsd_version, bsdr->bsd_type, bsdr->bsd_svc,
+ bsdv->bsd_version, bsdv->bsd_type, bsdv->bsd_svc);
+ RETURN(-EPROTO);
+ }
+
+ LASSERT(bsdv->bsd_svc == SPTLRPC_BULK_SVC_NULL ||
+ bsdv->bsd_svc == SPTLRPC_BULK_SVC_INTG ||
+ bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV);
+
+ /*
+ * in privacy mode if return success, make sure bd_nob_transferred
+ * is the actual size of the clear text, otherwise upper layer
+ * may be surprised.
+ */
+ if (req->rq_bulk_write) {
+ if (bsdv->bsd_flags & BSD_FL_ERR) {
+ CERROR("server reported bulk i/o failure\n");
+ RETURN(-EIO);
}
+ if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV)
+ desc->bd_nob_transferred = desc->bd_nob;
+ } else {
+ /*
+ * bulk read, upon return success, bd_nob_transferred is
+ * the size of plain text actually received.
+ */
gctx = container_of(ctx, struct gss_cli_ctx, gc_base);
LASSERT(gctx->gc_mechctx);
- rc = do_bulk_privacy(gctx->gc_mechctx, desc, 0,
- bsdv->bsd_ciph_alg, bsdv);
- if (rc) {
- CERROR("bulk read: client failed to decrypt data\n");
- RETURN(rc);
+ if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_INTG) {
+ int i, nob;
+
+ /* fix the actual data size */
+ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len + nob >
+ desc->bd_nob_transferred) {
+ desc->bd_iov[i].kiov_len =
+ desc->bd_nob_transferred - nob;
+ }
+ nob += desc->bd_iov[i].kiov_len;
+ }
+
+ token.data = bsdv->bsd_data;
+ token.len = lustre_msg_buflen(vmsg, voff) -
+ sizeof(*bsdv);
+
+ maj = lgss_verify_mic(gctx->gc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov,
+ &token);
+ if (maj != GSS_S_COMPLETE) {
+ CERROR("failed to verify bulk read: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ } else if (bsdv->bsd_svc == SPTLRPC_BULK_SVC_PRIV) {
+ desc->bd_nob = bsdv->bsd_nob;
+ if (desc->bd_nob == 0)
+ RETURN(0);
+
+ token.data = bsdv->bsd_data;
+ token.len = lustre_msg_buflen(vmsg, voff) -
+ sizeof(*bsdr);
+
+ maj = lgss_unwrap_bulk(gctx->gc_mechctx, desc, &token);
+ if (maj != GSS_S_COMPLETE) {
+ CERROR("failed to decrypt bulk read: %x\n",
+ maj);
+ RETURN(-EACCES);
+ }
+
+ desc->bd_nob_transferred = desc->bd_nob;
}
}
-verify_csum:
- rc = bulk_csum_cli_reply(desc, req->rq_bulk_read,
- rmsg, roff, vmsg, voff);
+ RETURN(0);
+}
+
+static int gss_prep_bulk(struct ptlrpc_bulk_desc *desc,
+ struct gss_ctx *mechctx)
+{
+ int rc;
+
+ if (desc->bd_iov_count == 0)
+ return 0;
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc)
+ return rc;
+
+ if (lgss_prep_bulk(mechctx, desc) != GSS_S_COMPLETE)
+ return -EACCES;
+
+ return 0;
+}
+
+int gss_cli_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_cli_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_read);
+
+ if (SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_BULK_SVC_PRIV)
+ RETURN(0);
+
+ rc = gss_prep_bulk(desc, ctx2gctx(req->rq_cli_ctx)->gc_mechctx);
+ if (rc)
+ CERROR("bulk read: failed to prepare encryption "
+ "pages: %d\n", rc);
+
+ RETURN(rc);
+}
+
+int gss_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct gss_svc_reqctx *grctx;
+ struct ptlrpc_bulk_sec_desc *bsd;
+ int rc;
+ ENTRY;
+
+ LASSERT(req->rq_svc_ctx);
+ LASSERT(req->rq_pack_bulk);
+ LASSERT(req->rq_bulk_write);
+
+ grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
+ LASSERT(grctx->src_reqbsd);
+ LASSERT(grctx->src_repbsd);
+ LASSERT(grctx->src_ctx);
+ LASSERT(grctx->src_ctx->gsc_mechctx);
+
+ bsd = grctx->src_reqbsd;
+ if (bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)
+ RETURN(0);
+
+ rc = gss_prep_bulk(desc, grctx->src_ctx->gsc_mechctx);
+ if (rc)
+ CERROR("bulk write: failed to prepare encryption "
+ "pages: %d\n", rc);
+
RETURN(rc);
}
struct ptlrpc_bulk_desc *desc)
{
struct gss_svc_reqctx *grctx;
- int rc;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ rawobj_t token;
+ __u32 maj;
ENTRY;
LASSERT(req->rq_svc_ctx);
LASSERT(grctx->src_ctx);
LASSERT(grctx->src_ctx->gsc_mechctx);
- /* decrypt bulk data if it's encrypted */
- if (grctx->src_reqbsd->bsd_ciph_alg != BULK_CIPH_ALG_NULL) {
- rc = do_bulk_privacy(grctx->src_ctx->gsc_mechctx, desc, 0,
- grctx->src_reqbsd->bsd_ciph_alg,
- grctx->src_reqbsd);
- if (rc) {
- CERROR("bulk write: server failed to decrypt data\n");
- RETURN(rc);
+ bsdr = grctx->src_reqbsd;
+ bsdv = grctx->src_repbsd;
+
+ /* bsdr has been sanity checked during unpacking */
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ switch (bsdv->bsd_svc) {
+ case SPTLRPC_BULK_SVC_INTG:
+ token.data = bsdr->bsd_data;
+ token.len = grctx->src_reqbsd_size - sizeof(*bsdr);
+
+ maj = lgss_verify_mic(grctx->src_ctx->gsc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov, &token);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to verify bulk signature: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ if (bsdr->bsd_nob != desc->bd_nob) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("prepared nob %d doesn't match the actual "
+ "nob %d\n", desc->bd_nob, bsdr->bsd_nob);
+ RETURN(-EPROTO);
}
- }
- /* verify bulk data checksum */
- rc = bulk_csum_svc(desc, req->rq_bulk_read,
- grctx->src_reqbsd, grctx->src_reqbsd_size,
- grctx->src_repbsd, grctx->src_repbsd_size);
+ if (desc->bd_iov_count == 0) {
+ LASSERT(desc->bd_nob == 0);
+ break;
+ }
- RETURN(rc);
+ token.data = bsdr->bsd_data;
+ token.len = grctx->src_reqbsd_size - sizeof(*bsdr);
+
+ maj = lgss_unwrap_bulk(grctx->src_ctx->gsc_mechctx,
+ desc, &token);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed decrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ }
+
+ RETURN(0);
}
int gss_svc_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
struct gss_svc_reqctx *grctx;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ rawobj_t token;
+ __u32 maj;
int rc;
ENTRY;
LASSERT(grctx->src_ctx);
LASSERT(grctx->src_ctx->gsc_mechctx);
- /* generate bulk data checksum */
- rc = bulk_csum_svc(desc, req->rq_bulk_read,
- grctx->src_reqbsd, grctx->src_reqbsd_size,
- grctx->src_repbsd, grctx->src_repbsd_size);
- if (rc)
- RETURN(rc);
-
- /* encrypt bulk data if required */
- if (grctx->src_reqbsd->bsd_ciph_alg != BULK_CIPH_ALG_NULL) {
- rc = do_bulk_privacy(grctx->src_ctx->gsc_mechctx, desc, 1,
- grctx->src_reqbsd->bsd_ciph_alg,
- grctx->src_repbsd);
- if (rc)
- CERROR("bulk read: server failed to encrypt data: "
- "rc %d\n", rc);
+ bsdr = grctx->src_reqbsd;
+ bsdv = grctx->src_repbsd;
+
+ /* bsdr has been sanity checked during unpacking */
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ switch (bsdv->bsd_svc) {
+ case SPTLRPC_BULK_SVC_INTG:
+ token.data = bsdv->bsd_data;
+ token.len = grctx->src_repbsd_size - sizeof(*bsdv);
+
+ maj = lgss_get_mic(grctx->src_ctx->gsc_mechctx, 0, NULL,
+ desc->bd_iov_count, desc->bd_iov, &token);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to sign bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ bsdv->bsd_nob = desc->bd_nob;
+
+ if (desc->bd_iov_count == 0) {
+ LASSERT(desc->bd_nob == 0);
+ break;
+ }
+
+ rc = sptlrpc_enc_pool_get_pages(desc);
+ if (rc) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("bulk read: failed to allocate encryption "
+ "pages: %d\n", rc);
+ RETURN(rc);
+ }
+
+ token.data = bsdv->bsd_data;
+ token.len = grctx->src_repbsd_size - sizeof(*bsdv);
+
+ maj = lgss_wrap_bulk(grctx->src_ctx->gsc_mechctx,
+ desc, &token, 1);
+ if (maj != GSS_S_COMPLETE) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("failed to encrypt bulk data: %x\n", maj);
+ RETURN(-EACCES);
+ }
+ break;
}
- RETURN(rc);
+ RETURN(0);
}
void __exit gss_exit_pipefs(void);
/* gss_bulk.c */
+int gss_cli_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
int gss_cli_ctx_wrap_bulk(struct ptlrpc_cli_ctx *ctx,
struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int gss_cli_ctx_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
+int gss_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc);
int gss_svc_unwrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc);
int gss_svc_wrap_bulk(struct ptlrpc_request *req,
.authorize = gss_svc_authorize,
.free_rs = gss_svc_free_rs,
.free_ctx = gss_svc_free_ctx,
+ .prep_bulk = gss_svc_prep_bulk,
.unwrap_bulk = gss_svc_unwrap_bulk,
.wrap_bulk = gss_svc_wrap_bulk,
.install_rctx = gss_svc_install_rctx_kr,
}
static
-void buf_to_sg(struct scatterlist *sg, char *ptr, int len)
+void buf_to_sg(struct scatterlist *sg, void *ptr, int len)
{
sg->page = virt_to_page(ptr);
sg->offset = offset_in_page(ptr);
return(ret);
}
+#ifdef HAVE_ASYNC_BLOCK_CIPHER
+
static inline
int krb5_digest_hmac(struct ll_crypto_hash *tfm,
rawobj_t *key,
struct krb5_header *khdr,
int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
rawobj_t *cksum)
-#ifdef HAVE_ASYNC_BLOCK_CIPHER
{
struct hash_desc desc;
struct scatterlist sg[1];
ll_crypto_hash_update(&desc, sg, msgs[i].len);
}
+ for (i = 0; i < iovcnt; i++) {
+ if (iovs[i].kiov_len == 0)
+ continue;
+ sg[0].page = iovs[i].kiov_page;
+ sg[0].offset = iovs[i].kiov_offset;
+ sg[0].length = iovs[i].kiov_len;
+ ll_crypto_hash_update(&desc, sg, iovs[i].kiov_len);
+ }
+
if (khdr) {
buf_to_sg(sg, (char *) khdr, sizeof(*khdr));
ll_crypto_hash_update(&desc, sg, sizeof(*khdr));
return ll_crypto_hash_final(&desc, cksum->data);
}
-#else /* HAVE_ASYNC_BLOCK_CIPHER */
+
+#else /* ! HAVE_ASYNC_BLOCK_CIPHER */
+
+static inline
+int krb5_digest_hmac(struct ll_crypto_hash *tfm,
+ rawobj_t *key,
+ struct krb5_header *khdr,
+ int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
+ rawobj_t *cksum)
{
struct scatterlist sg[1];
__u32 keylen = key->len, i;
crypto_hmac_update(tfm, sg, 1);
}
+ for (i = 0; i < iovcnt; i++) {
+ if (iovs[i].kiov_len == 0)
+ continue;
+ sg[0].page = iovs[i].kiov_page;
+ sg[0].offset = iovs[i].kiov_offset;
+ sg[0].length = iovs[i].kiov_len;
+ crypto_hmac_update(tfm, sg, 1);
+ }
+
if (khdr) {
buf_to_sg(sg, (char *) khdr, sizeof(*khdr));
crypto_hmac_update(tfm, sg, 1);
crypto_hmac_final(tfm, key->data, &keylen, cksum->data);
return 0;
}
+
#endif /* HAVE_ASYNC_BLOCK_CIPHER */
static inline
struct krb5_keyblock *kb,
struct krb5_header *khdr,
int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
rawobj_t *cksum)
{
struct hash_desc desc;
ll_crypto_hash_update(&desc, sg, msgs[i].len);
}
+ for (i = 0; i < iovcnt; i++) {
+ if (iovs[i].kiov_len == 0)
+ continue;
+ sg[0].page = iovs[i].kiov_page;
+ sg[0].offset = iovs[i].kiov_offset;
+ sg[0].length = iovs[i].kiov_len;
+ ll_crypto_hash_update(&desc, sg, iovs[i].kiov_len);
+ }
+
if (khdr) {
buf_to_sg(sg, (char *) khdr, sizeof(*khdr));
ll_crypto_hash_update(&desc, sg, sizeof(*khdr));
struct krb5_keyblock *kb,
struct krb5_header *khdr,
int msgcnt, rawobj_t *msgs,
+ int iovcnt, lnet_kiov_t *iovs,
rawobj_t *cksum)
{
struct krb5_enctype *ke = &enctypes[enctype];
if (ke->ke_hash_hmac)
rc = krb5_digest_hmac(tfm, &kb->kb_key,
- khdr, msgcnt, msgs, cksum);
+ khdr, msgcnt, msgs, iovcnt, iovs, cksum);
else
rc = krb5_digest_norm(tfm, kb,
- khdr, msgcnt, msgs, cksum);
+ khdr, msgcnt, msgs, iovcnt, iovs, cksum);
if (rc == 0)
code = GSS_S_COMPLETE;
return code;
}
+static void fill_krb5_header(struct krb5_ctx *kctx,
+ struct krb5_header *khdr,
+ int privacy)
+{
+ unsigned char acceptor_flag;
+
+ acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR;
+
+ if (privacy) {
+ khdr->kh_tok_id = cpu_to_be16(KG_TOK_WRAP_MSG);
+ khdr->kh_flags = acceptor_flag | FLAG_WRAP_CONFIDENTIAL;
+ khdr->kh_ec = cpu_to_be16(0);
+ khdr->kh_rrc = cpu_to_be16(0);
+ } else {
+ khdr->kh_tok_id = cpu_to_be16(KG_TOK_MIC_MSG);
+ khdr->kh_flags = acceptor_flag;
+ khdr->kh_ec = cpu_to_be16(0xffff);
+ khdr->kh_rrc = cpu_to_be16(0xffff);
+ }
+
+ khdr->kh_filler = 0xff;
+ spin_lock(&krb5_seq_lock);
+ khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++);
+ spin_unlock(&krb5_seq_lock);
+}
+
+static __u32 verify_krb5_header(struct krb5_ctx *kctx,
+ struct krb5_header *khdr,
+ int privacy)
+{
+ unsigned char acceptor_flag;
+ __u16 tok_id, ec_rrc;
+
+ acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0;
+
+ if (privacy) {
+ tok_id = KG_TOK_WRAP_MSG;
+ ec_rrc = 0x0;
+ } else {
+ tok_id = KG_TOK_MIC_MSG;
+ ec_rrc = 0xffff;
+ }
+
+ /* sanity checks */
+ if (be16_to_cpu(khdr->kh_tok_id) != tok_id) {
+ CERROR("bad token id\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) {
+ CERROR("bad direction flag\n");
+ return GSS_S_BAD_SIG;
+ }
+ if (privacy && (khdr->kh_flags & FLAG_WRAP_CONFIDENTIAL) == 0) {
+ CERROR("missing confidential flag\n");
+ return GSS_S_BAD_SIG;
+ }
+ if (khdr->kh_filler != 0xff) {
+ CERROR("bad filler\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ if (be16_to_cpu(khdr->kh_ec) != ec_rrc ||
+ be16_to_cpu(khdr->kh_rrc) != ec_rrc) {
+ CERROR("bad EC or RRC\n");
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+ return GSS_S_COMPLETE;
+}
+
static
__u32 gss_get_mic_kerberos(struct gss_ctx *gctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *token)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
struct krb5_header *khdr;
- unsigned char acceptor_flag;
rawobj_t cksum = RAWOBJ_EMPTY;
- __u32 rc = GSS_S_FAILURE;
-
- acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR;
/* fill krb5 header */
LASSERT(token->len >= sizeof(*khdr));
khdr = (struct krb5_header *) token->data;
-
- khdr->kh_tok_id = cpu_to_be16(KG_TOK_MIC_MSG);
- khdr->kh_flags = acceptor_flag;
- khdr->kh_filler = 0xff;
- khdr->kh_ec = cpu_to_be16(0xffff);
- khdr->kh_rrc = cpu_to_be16(0xffff);
- spin_lock(&krb5_seq_lock);
- khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++);
- spin_unlock(&krb5_seq_lock);
+ fill_krb5_header(kctx, khdr, 0);
/* checksum */
if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc,
- khdr, msgcnt, msgs, &cksum))
- goto out_err;
+ khdr, msgcnt, msgs, iovcnt, iovs, &cksum))
+ return GSS_S_FAILURE;
LASSERT(cksum.len >= ke->ke_hash_size);
LASSERT(token->len >= sizeof(*khdr) + ke->ke_hash_size);
ke->ke_hash_size);
token->len = sizeof(*khdr) + ke->ke_hash_size;
- rc = GSS_S_COMPLETE;
-out_err:
rawobj_free(&cksum);
- return rc;
+ return GSS_S_COMPLETE;
}
static
__u32 gss_verify_mic_kerberos(struct gss_ctx *gctx,
int msgcnt,
rawobj_t *msgs,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *token)
{
struct krb5_ctx *kctx = gctx->internal_ctx_id;
struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
struct krb5_header *khdr;
- unsigned char acceptor_flag;
rawobj_t cksum = RAWOBJ_EMPTY;
- __u32 rc = GSS_S_FAILURE;
-
- acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0;
+ __u32 major;
if (token->len < sizeof(*khdr)) {
CERROR("short signature: %u\n", token->len);
khdr = (struct krb5_header *) token->data;
- /* sanity checks */
- if (be16_to_cpu(khdr->kh_tok_id) != KG_TOK_MIC_MSG) {
- CERROR("bad token id\n");
- return GSS_S_DEFECTIVE_TOKEN;
- }
- if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) {
- CERROR("bad direction flag\n");
- return GSS_S_BAD_SIG;
- }
- if (khdr->kh_filler != 0xff) {
- CERROR("bad filler\n");
- return GSS_S_DEFECTIVE_TOKEN;
- }
- if (be16_to_cpu(khdr->kh_ec) != 0xffff ||
- be16_to_cpu(khdr->kh_rrc) != 0xffff) {
- CERROR("bad EC or RRC\n");
- return GSS_S_DEFECTIVE_TOKEN;
+ major = verify_krb5_header(kctx, khdr, 0);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
}
if (token->len < sizeof(*khdr) + ke->ke_hash_size) {
CERROR("short signature: %u, require %d\n",
token->len, (int) sizeof(*khdr) + ke->ke_hash_size);
- goto out;
+ return GSS_S_FAILURE;
}
if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyc,
- khdr, msgcnt, msgs, &cksum))
+ khdr, msgcnt, msgs, iovcnt, iovs, &cksum)) {
+ CERROR("failed to make checksum\n");
return GSS_S_FAILURE;
+ }
LASSERT(cksum.len >= ke->ke_hash_size);
if (memcmp(khdr + 1, cksum.data + cksum.len - ke->ke_hash_size,
ke->ke_hash_size)) {
CERROR("checksum mismatch\n");
- rc = GSS_S_BAD_SIG;
- goto out;
+ rawobj_free(&cksum);
+ return GSS_S_BAD_SIG;
}
- rc = GSS_S_COMPLETE;
-out:
rawobj_free(&cksum);
- return rc;
+ return GSS_S_COMPLETE;
}
static
}
static
+int krb5_encrypt_bulk(struct ll_crypto_cipher *tfm,
+ struct krb5_header *khdr,
+ char *confounder,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *cipher,
+ int adj_nob)
+{
+ struct blkcipher_desc ciph_desc;
+ __u8 local_iv[16] = {0};
+ struct scatterlist src, dst;
+ int blocksize, i, rc, nob = 0;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+
+ blocksize = ll_crypto_blkcipher_blocksize(tfm);
+ LASSERT(blocksize > 1);
+ LASSERT(cipher->len == blocksize + sizeof(*khdr));
+
+ ciph_desc.tfm = tfm;
+ ciph_desc.info = local_iv;
+ ciph_desc.flags = 0;
+
+ /* encrypt confounder */
+ buf_to_sg(&src, confounder, blocksize);
+ buf_to_sg(&dst, cipher->data, blocksize);
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc, &dst, &src, blocksize);
+ if (rc) {
+ CERROR("error to encrypt confounder: %d\n", rc);
+ return rc;
+ }
+
+ /* encrypt clear pages */
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ src.page = desc->bd_iov[i].kiov_page;
+ src.offset = desc->bd_iov[i].kiov_offset;
+ src.length = (desc->bd_iov[i].kiov_len + blocksize - 1) &
+ (~(blocksize - 1));
+
+ if (adj_nob)
+ nob += src.length;
+
+ dst.page = desc->bd_enc_iov[i].kiov_page;
+ dst.offset = src.offset;
+ dst.length = src.length;
+
+ desc->bd_enc_iov[i].kiov_offset = dst.offset;
+ desc->bd_enc_iov[i].kiov_len = dst.length;
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc, &dst, &src,
+ src.length);
+ if (rc) {
+ CERROR("error to encrypt page: %d\n", rc);
+ return rc;
+ }
+ }
+
+ /* encrypt krb5 header */
+ buf_to_sg(&src, khdr, sizeof(*khdr));
+ buf_to_sg(&dst, cipher->data + blocksize, sizeof(*khdr));
+
+ rc = ll_crypto_blkcipher_encrypt_iv(&ciph_desc,
+ &dst, &src, sizeof(*khdr));
+ if (rc) {
+ CERROR("error to encrypt krb5 header: %d\n", rc);
+ return rc;
+ }
+
+ if (adj_nob)
+ desc->bd_nob = nob;
+
+ return 0;
+}
+
+/*
+ * desc->bd_nob_transferred is the size of cipher text received.
+ * desc->bd_nob is the target size of plain text supposed to be.
+ */
+static
+int krb5_decrypt_bulk(struct ll_crypto_cipher *tfm,
+ struct krb5_header *khdr,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *cipher,
+ rawobj_t *plain)
+{
+ struct blkcipher_desc ciph_desc;
+ __u8 local_iv[16] = {0};
+ struct scatterlist src, dst;
+ int ct_nob = 0, pt_nob = 0;
+ int blocksize, i, rc;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+ LASSERT(desc->bd_nob_transferred);
+
+ blocksize = ll_crypto_blkcipher_blocksize(tfm);
+ LASSERT(blocksize > 1);
+ LASSERT(cipher->len == blocksize + sizeof(*khdr));
+
+ ciph_desc.tfm = tfm;
+ ciph_desc.info = local_iv;
+ ciph_desc.flags = 0;
+
+ if (desc->bd_nob_transferred % blocksize) {
+ CERROR("odd transferred nob: %d\n", desc->bd_nob_transferred);
+ return -EPROTO;
+ }
+
+ /* decrypt head (confounder) */
+ buf_to_sg(&src, cipher->data, blocksize);
+ buf_to_sg(&dst, plain->data, blocksize);
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc, &dst, &src, blocksize);
+ if (rc) {
+ CERROR("error to decrypt confounder: %d\n", rc);
+ return rc;
+ }
+
+ /*
+ * decrypt clear pages. note the enc_iov is prepared by prep_bulk()
+ * which already done some sanity checkings.
+ *
+ * desc->bd_nob is the actual plain text size supposed to be
+ * transferred. desc->bd_nob_transferred is the actual cipher
+ * text received.
+ */
+ for (i = 0; i < desc->bd_iov_count && ct_nob < desc->bd_nob_transferred;
+ i++) {
+ if (desc->bd_enc_iov[i].kiov_len == 0)
+ continue;
+
+ if (ct_nob + desc->bd_enc_iov[i].kiov_len >
+ desc->bd_nob_transferred)
+ desc->bd_enc_iov[i].kiov_len =
+ desc->bd_nob_transferred - ct_nob;
+
+ desc->bd_iov[i].kiov_len = desc->bd_enc_iov[i].kiov_len;
+ if (pt_nob + desc->bd_enc_iov[i].kiov_len > desc->bd_nob)
+ desc->bd_iov[i].kiov_len = desc->bd_nob - pt_nob;
+
+ src.page = desc->bd_enc_iov[i].kiov_page;
+ src.offset = desc->bd_enc_iov[i].kiov_offset;
+ src.length = desc->bd_enc_iov[i].kiov_len;
+
+ dst = src;
+
+ if (desc->bd_iov[i].kiov_offset % blocksize == 0)
+ dst.page = desc->bd_iov[i].kiov_page;
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc, &dst, &src,
+ src.length);
+ if (rc) {
+ CERROR("error to decrypt page: %d\n", rc);
+ return rc;
+ }
+
+ if (desc->bd_iov[i].kiov_offset % blocksize) {
+ memcpy(cfs_page_address(desc->bd_iov[i].kiov_page) +
+ desc->bd_iov[i].kiov_offset,
+ cfs_page_address(desc->bd_enc_iov[i].kiov_page) +
+ desc->bd_iov[i].kiov_offset,
+ desc->bd_iov[i].kiov_len);
+ }
+
+ ct_nob += desc->bd_enc_iov[i].kiov_len;
+ pt_nob += desc->bd_iov[i].kiov_len;
+ }
+
+ /* decrypt tail (krb5 header) */
+ buf_to_sg(&src, cipher->data + blocksize, sizeof(*khdr));
+ buf_to_sg(&dst, cipher->data + blocksize, sizeof(*khdr));
+
+ rc = ll_crypto_blkcipher_decrypt_iv(&ciph_desc,
+ &dst, &src, sizeof(*khdr));
+ if (rc) {
+ CERROR("error to decrypt tail: %d\n", rc);
+ return rc;
+ }
+
+ if (memcmp(cipher->data + blocksize, khdr, sizeof(*khdr))) {
+ CERROR("krb5 header doesn't match\n");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static
__u32 gss_wrap_kerberos(struct gss_ctx *gctx,
rawobj_t *gsshdr,
rawobj_t *msg,
struct krb5_ctx *kctx = gctx->internal_ctx_id;
struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
struct krb5_header *khdr;
- unsigned char acceptor_flag;
int blocksize;
rawobj_t cksum = RAWOBJ_EMPTY;
- rawobj_t data_desc[4], cipher;
+ rawobj_t data_desc[3], cipher;
__u8 conf[GSS_MAX_CIPHER_BLOCK];
- int enc_rc = 0;
+ int rc = 0;
LASSERT(ke);
LASSERT(ke->ke_conf_size <= GSS_MAX_CIPHER_BLOCK);
/* fill krb5 header */
LASSERT(token->len >= sizeof(*khdr));
khdr = (struct krb5_header *) token->data;
- acceptor_flag = kctx->kc_initiate ? 0 : FLAG_SENDER_IS_ACCEPTOR;
-
- khdr->kh_tok_id = cpu_to_be16(KG_TOK_WRAP_MSG);
- khdr->kh_flags = acceptor_flag | FLAG_WRAP_CONFIDENTIAL;
- khdr->kh_filler = 0xff;
- khdr->kh_ec = cpu_to_be16(0);
- khdr->kh_rrc = cpu_to_be16(0);
- spin_lock(&krb5_seq_lock);
- khdr->kh_seq = cpu_to_be64(kctx->kc_seq_send++);
- spin_unlock(&krb5_seq_lock);
+ fill_krb5_header(kctx, khdr, 1);
/* generate confounder */
get_random_bytes(conf, ke->ke_conf_size);
data_desc[1].len = gsshdr->len;
data_desc[2].data = msg->data;
data_desc[2].len = msg->len;
- data_desc[3].data = (__u8 *) khdr;
- data_desc[3].len = sizeof(*khdr);
/* compute checksum */
if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
- khdr, 4, data_desc, &cksum))
+ khdr, 3, data_desc, 0, NULL, &cksum))
return GSS_S_FAILURE;
LASSERT(cksum.len >= ke->ke_hash_size);
struct ll_crypto_cipher *arc4_tfm;
if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi,
- NULL, 1, &cksum, &arc4_keye)) {
+ NULL, 1, &cksum, 0, NULL, &arc4_keye)) {
CERROR("failed to obtain arc4 enc key\n");
- GOTO(arc4_out, enc_rc = -EACCES);
+ GOTO(arc4_out, rc = -EACCES);
}
arc4_tfm = ll_crypto_alloc_blkcipher("ecb(arc4)", 0, 0);
if (arc4_tfm == NULL) {
CERROR("failed to alloc tfm arc4 in ECB mode\n");
- GOTO(arc4_out_key, enc_rc = -EACCES);
+ GOTO(arc4_out_key, rc = -EACCES);
}
if (ll_crypto_blkcipher_setkey(arc4_tfm, arc4_keye.data,
arc4_keye.len)) {
CERROR("failed to set arc4 key, len %d\n",
arc4_keye.len);
- GOTO(arc4_out_tfm, enc_rc = -EACCES);
+ GOTO(arc4_out_tfm, rc = -EACCES);
}
- enc_rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
- 3, data_desc, &cipher, 1);
+ rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
+ 3, data_desc, &cipher, 1);
arc4_out_tfm:
ll_crypto_free_blkcipher(arc4_tfm);
arc4_out_key:
arc4_out:
do {} while(0); /* just to avoid compile warning */
} else {
- enc_rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
- 3, data_desc, &cipher, 1);
+ rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
+ 3, data_desc, &cipher, 1);
+ }
+
+ if (rc != 0) {
+ rawobj_free(&cksum);
+ return GSS_S_FAILURE;
+ }
+
+ /* fill in checksum */
+ LASSERT(token->len >= sizeof(*khdr) + cipher.len + ke->ke_hash_size);
+ memcpy((char *)(khdr + 1) + cipher.len,
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size);
+ rawobj_free(&cksum);
+
+ /* final token length */
+ token->len = sizeof(*khdr) + cipher.len + ke->ke_hash_size;
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_prep_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ int blocksize, i;
+
+ LASSERT(desc->bd_iov_count);
+ LASSERT(desc->bd_enc_iov);
+ LASSERT(kctx->kc_keye.kb_tfm);
+
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page);
+ /*
+ * offset should always start at page boundary of either
+ * client or server side.
+ */
+ if (desc->bd_iov[i].kiov_offset & blocksize) {
+ CERROR("odd offset %d in page %d\n",
+ desc->bd_iov[i].kiov_offset, i);
+ return GSS_S_FAILURE;
+ }
+
+ desc->bd_enc_iov[i].kiov_offset = desc->bd_iov[i].kiov_offset;
+ desc->bd_enc_iov[i].kiov_len = (desc->bd_iov[i].kiov_len +
+ blocksize - 1) & (~(blocksize - 1));
+ }
+
+ return GSS_S_COMPLETE;
+}
+
+static
+__u32 gss_wrap_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token, int adj_nob)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ int blocksize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t data_desc[1], cipher;
+ __u8 conf[GSS_MAX_CIPHER_BLOCK];
+ int rc = 0;
+
+ LASSERT(ke);
+ LASSERT(ke->ke_conf_size <= GSS_MAX_CIPHER_BLOCK);
+
+ /*
+ * final token format:
+ * --------------------------------------------------
+ * | krb5 header | head/tail cipher text | checksum |
+ * --------------------------------------------------
+ */
+
+ /* fill krb5 header */
+ LASSERT(token->len >= sizeof(*khdr));
+ khdr = (struct krb5_header *) token->data;
+ fill_krb5_header(kctx, khdr, 1);
+
+ /* generate confounder */
+ get_random_bytes(conf, ke->ke_conf_size);
+
+ /* get encryption blocksize. note kc_keye might not associated with
+ * a tfm, currently only for arcfour-hmac */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+
+ /*
+ * we assume the size of krb5_header (16 bytes) must be n * blocksize.
+ * the bulk token size would be exactly (sizeof(krb5_header) +
+ * blocksize + sizeof(krb5_header) + hashsize)
+ */
+ LASSERT(blocksize <= ke->ke_conf_size);
+ LASSERT(sizeof(*khdr) >= blocksize && sizeof(*khdr) % blocksize == 0);
+ LASSERT(token->len >= sizeof(*khdr) + blocksize + sizeof(*khdr) + 16);
+
+ /*
+ * clear text layout for checksum:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+
+ /* compute checksum */
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 1, data_desc,
+ desc->bd_iov_count, desc->bd_iov,
+ &cksum))
+ return GSS_S_FAILURE;
+ LASSERT(cksum.len >= ke->ke_hash_size);
+
+ /*
+ * clear text layout for encryption:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ * | | |
+ * ---------- (cipher pages) |
+ * result token: | |
+ * -------------------------------------------
+ * | krb5 header | cipher text | cipher text |
+ * -------------------------------------------
+ */
+ data_desc[0].data = conf;
+ data_desc[0].len = ke->ke_conf_size;
+
+ cipher.data = (__u8 *) (khdr + 1);
+ cipher.len = blocksize + sizeof(*khdr);
+
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LBUG();
+ rc = 0;
+ } else {
+ rc = krb5_encrypt_bulk(kctx->kc_keye.kb_tfm, khdr,
+ conf, desc, &cipher, adj_nob);
}
- if (enc_rc != 0) {
+ if (rc != 0) {
rawobj_free(&cksum);
return GSS_S_FAILURE;
}
struct krb5_ctx *kctx = gctx->internal_ctx_id;
struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
struct krb5_header *khdr;
- unsigned char acceptor_flag;
unsigned char *tmpbuf;
int blocksize, bodysize;
rawobj_t cksum = RAWOBJ_EMPTY;
rawobj_t cipher_in, plain_out;
rawobj_t hash_objs[3];
- __u32 rc = GSS_S_FAILURE, enc_rc = 0;
+ int rc = 0;
+ __u32 major;
LASSERT(ke);
- acceptor_flag = kctx->kc_initiate ? FLAG_SENDER_IS_ACCEPTOR : 0;
-
if (token->len < sizeof(*khdr)) {
CERROR("short signature: %u\n", token->len);
return GSS_S_DEFECTIVE_TOKEN;
khdr = (struct krb5_header *) token->data;
- /* sanity check header */
- if (be16_to_cpu(khdr->kh_tok_id) != KG_TOK_WRAP_MSG) {
- CERROR("bad token id\n");
- return GSS_S_DEFECTIVE_TOKEN;
- }
- if ((khdr->kh_flags & FLAG_SENDER_IS_ACCEPTOR) != acceptor_flag) {
- CERROR("bad direction flag\n");
- return GSS_S_BAD_SIG;
- }
- if ((khdr->kh_flags & FLAG_WRAP_CONFIDENTIAL) == 0) {
- CERROR("missing confidential flag\n");
- return GSS_S_BAD_SIG;
- }
- if (khdr->kh_filler != 0xff) {
- CERROR("bad filler\n");
- return GSS_S_DEFECTIVE_TOKEN;
- }
- if (be16_to_cpu(khdr->kh_ec) != 0x0 ||
- be16_to_cpu(khdr->kh_rrc) != 0x0) {
- CERROR("bad EC or RRC\n");
- return GSS_S_DEFECTIVE_TOKEN;
+ major = verify_krb5_header(kctx, khdr, 1);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
}
/* block size */
if (!tmpbuf)
return GSS_S_FAILURE;
+ major = GSS_S_FAILURE;
+
cipher_in.data = (__u8 *) (khdr + 1);
cipher_in.len = bodysize;
plain_out.data = tmpbuf;
cksum.len = ke->ke_hash_size;
if (krb5_make_checksum(ENCTYPE_ARCFOUR_HMAC, &kctx->kc_keyi,
- NULL, 1, &cksum, &arc4_keye)) {
+ NULL, 1, &cksum, 0, NULL, &arc4_keye)) {
CERROR("failed to obtain arc4 enc key\n");
- GOTO(arc4_out, enc_rc = -EACCES);
+ GOTO(arc4_out, rc = -EACCES);
}
arc4_tfm = ll_crypto_alloc_blkcipher("ecb(arc4)", 0, 0);
if (arc4_tfm == NULL) {
CERROR("failed to alloc tfm arc4 in ECB mode\n");
- GOTO(arc4_out_key, enc_rc = -EACCES);
+ GOTO(arc4_out_key, rc = -EACCES);
}
if (ll_crypto_blkcipher_setkey(arc4_tfm,
arc4_keye.data, arc4_keye.len)) {
CERROR("failed to set arc4 key, len %d\n",
arc4_keye.len);
- GOTO(arc4_out_tfm, enc_rc = -EACCES);
+ GOTO(arc4_out_tfm, rc = -EACCES);
}
- enc_rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
- 1, &cipher_in, &plain_out, 0);
+ rc = krb5_encrypt_rawobjs(arc4_tfm, 1,
+ 1, &cipher_in, &plain_out, 0);
arc4_out_tfm:
ll_crypto_free_blkcipher(arc4_tfm);
arc4_out_key:
arc4_out:
cksum = RAWOBJ_EMPTY;
} else {
- enc_rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
- 1, &cipher_in, &plain_out, 0);
+ rc = krb5_encrypt_rawobjs(kctx->kc_keye.kb_tfm, 0,
+ 1, &cipher_in, &plain_out, 0);
}
- if (enc_rc != 0) {
+ if (rc != 0) {
CERROR("error decrypt\n");
goto out_free;
}
hash_objs[0].data = plain_out.data;
hash_objs[1].len = gsshdr->len;
hash_objs[1].data = gsshdr->data;
- hash_objs[2].len = plain_out.len - ke->ke_conf_size;
+ hash_objs[2].len = plain_out.len - ke->ke_conf_size - sizeof(*khdr);
hash_objs[2].data = plain_out.data + ke->ke_conf_size;
if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
- khdr, 3, hash_objs, &cksum))
+ khdr, 3, hash_objs, 0, NULL, &cksum))
goto out_free;
LASSERT(cksum.len >= ke->ke_hash_size);
if (memcmp((char *)(khdr + 1) + bodysize,
cksum.data + cksum.len - ke->ke_hash_size,
ke->ke_hash_size)) {
- CERROR("cksum mismatch\n");
+ CERROR("checksum mismatch\n");
goto out_free;
}
msg->len = bodysize - ke->ke_conf_size - sizeof(*khdr);
memcpy(msg->data, tmpbuf + ke->ke_conf_size, msg->len);
- rc = GSS_S_COMPLETE;
+ major = GSS_S_COMPLETE;
out_free:
OBD_FREE(tmpbuf, bodysize);
rawobj_free(&cksum);
- return rc;
+ return major;
}
static
-__u32 gss_plain_encrypt_kerberos(struct gss_ctx *ctx,
- int decrypt,
- int length,
- void *in_buf,
- void *out_buf)
+__u32 gss_unwrap_bulk_kerberos(struct gss_ctx *gctx,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token)
{
- struct krb5_ctx *kctx = ctx->internal_ctx_id;
- __u32 rc;
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+ struct krb5_enctype *ke = &enctypes[kctx->kc_enctype];
+ struct krb5_header *khdr;
+ int blocksize;
+ rawobj_t cksum = RAWOBJ_EMPTY;
+ rawobj_t cipher, plain;
+ rawobj_t data_desc[1];
+ int rc;
+ __u32 major;
+
+ LASSERT(ke);
+
+ if (token->len < sizeof(*khdr)) {
+ CERROR("short signature: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ khdr = (struct krb5_header *) token->data;
+
+ major = verify_krb5_header(kctx, khdr, 1);
+ if (major != GSS_S_COMPLETE) {
+ CERROR("bad krb5 header\n");
+ return major;
+ }
+
+ /* block size */
+ if (kctx->kc_enctype == ENCTYPE_ARCFOUR_HMAC) {
+ LASSERT(kctx->kc_keye.kb_tfm == NULL);
+ blocksize = 1;
+ LBUG();
+ } else {
+ LASSERT(kctx->kc_keye.kb_tfm);
+ blocksize = ll_crypto_blkcipher_blocksize(kctx->kc_keye.kb_tfm);
+ }
+ LASSERT(sizeof(*khdr) >= blocksize && sizeof(*khdr) % blocksize == 0);
+
+ /*
+ * token format is expected as:
+ * -----------------------------------------------
+ * | krb5 header | head/tail cipher text | cksum |
+ * -----------------------------------------------
+ */
+ if (token->len < sizeof(*khdr) + blocksize + sizeof(*khdr) +
+ ke->ke_hash_size) {
+ CERROR("short token size: %u\n", token->len);
+ return GSS_S_DEFECTIVE_TOKEN;
+ }
+
+ cipher.data = (__u8 *) (khdr + 1);
+ cipher.len = blocksize + sizeof(*khdr);
+ plain.data = cipher.data;
+ plain.len = cipher.len;
- rc = krb5_encrypt(kctx->kc_keye.kb_tfm, decrypt,
- NULL, in_buf, out_buf, length);
+ rc = krb5_decrypt_bulk(kctx->kc_keye.kb_tfm, khdr,
+ desc, &cipher, &plain);
if (rc)
- CERROR("plain encrypt error: %d\n", rc);
+ return GSS_S_DEFECTIVE_TOKEN;
+
+ /*
+ * verify checksum, compose clear text as layout:
+ * ------------------------------------------
+ * | confounder | clear pages | krb5 header |
+ * ------------------------------------------
+ */
+ data_desc[0].data = plain.data;
+ data_desc[0].len = blocksize;
+
+ if (krb5_make_checksum(kctx->kc_enctype, &kctx->kc_keyi,
+ khdr, 1, data_desc,
+ desc->bd_iov_count, desc->bd_iov,
+ &cksum))
+ return GSS_S_FAILURE;
+ LASSERT(cksum.len >= ke->ke_hash_size);
+
+ if (memcmp(plain.data + blocksize + sizeof(*khdr),
+ cksum.data + cksum.len - ke->ke_hash_size,
+ ke->ke_hash_size)) {
+ CERROR("checksum mismatch\n");
+ rawobj_free(&cksum);
+ return GSS_S_BAD_SIG;
+ }
- return rc;
+ rawobj_free(&cksum);
+ return GSS_S_COMPLETE;
}
int gss_display_kerberos(struct gss_ctx *ctx,
.gss_verify_mic = gss_verify_mic_kerberos,
.gss_wrap = gss_wrap_kerberos,
.gss_unwrap = gss_unwrap_kerberos,
- .gss_plain_encrypt = gss_plain_encrypt_kerberos,
+ .gss_prep_bulk = gss_prep_bulk_kerberos,
+ .gss_wrap_bulk = gss_wrap_bulk_kerberos,
+ .gss_unwrap_bulk = gss_unwrap_bulk_kerberos,
.gss_delete_sec_context = gss_delete_sec_context_kerberos,
.gss_display = gss_display_kerberos,
};
__u32 lgss_get_mic(struct gss_ctx *context_handle,
int msgcnt,
rawobj_t *msg,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token)
{
LASSERT(context_handle);
->gss_get_mic(context_handle,
msgcnt,
msg,
+ iovcnt,
+ iovs,
mic_token);
}
__u32 lgss_verify_mic(struct gss_ctx *context_handle,
int msgcnt,
rawobj_t *msg,
+ int iovcnt,
+ lnet_kiov_t *iovs,
rawobj_t *mic_token)
{
LASSERT(context_handle);
->gss_verify_mic(context_handle,
msgcnt,
msg,
+ iovcnt,
+ iovs,
mic_token);
}
}
-__u32 lgss_plain_encrypt(struct gss_ctx *ctx,
- int decrypt,
- int length,
- void *in_buf,
- void *out_buf)
+__u32 lgss_prep_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc)
{
- LASSERT(ctx);
- LASSERT(ctx->mech_type);
- LASSERT(ctx->mech_type->gm_ops);
- LASSERT(ctx->mech_type->gm_ops->gss_plain_encrypt);
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_prep_bulk);
- return ctx->mech_type->gm_ops
- ->gss_plain_encrypt(ctx, decrypt, length, in_buf, out_buf);
+ return context_handle->mech_type->gm_ops
+ ->gss_prep_bulk(context_handle, desc);
+}
+
+__u32 lgss_wrap_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token,
+ int adj_nob)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_wrap_bulk);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_wrap_bulk(context_handle, desc, token, adj_nob);
+}
+
+__u32 lgss_unwrap_bulk(struct gss_ctx *context_handle,
+ struct ptlrpc_bulk_desc *desc,
+ rawobj_t *token)
+{
+ LASSERT(context_handle);
+ LASSERT(context_handle->mech_type);
+ LASSERT(context_handle->mech_type->gm_ops);
+ LASSERT(context_handle->mech_type->gm_ops->gss_unwrap_bulk);
+
+ return context_handle->mech_type->gm_ops
+ ->gss_unwrap_bulk(context_handle, desc, token);
}
/* gss_delete_sec_context: free all resources associated with context_handle.
rawobj_t *handle)
{
struct gss_header *ghdr;
- rawobj_t text[3], mic;
+ rawobj_t text[4], mic;
int textcnt, max_textcnt, mic_idx;
__u32 major;
mic.len = msg->lm_buflens[mic_idx];
mic.data = lustre_msg_buf(msg, mic_idx, 0);
- major = lgss_get_mic(mechctx, textcnt, text, &mic);
+ major = lgss_get_mic(mechctx, textcnt, text, 0, NULL, &mic);
if (major != GSS_S_COMPLETE) {
CERROR("fail to generate MIC: %08x\n", major);
return -EPERM;
struct gss_ctx *mechctx,
__u32 svc)
{
- rawobj_t text[3], mic;
+ rawobj_t text[4], mic;
int textcnt, max_textcnt;
int mic_idx;
__u32 major;
mic.len = msg->lm_buflens[mic_idx];
mic.data = lustre_msg_buf(msg, mic_idx, 0);
- major = lgss_verify_mic(mechctx, textcnt, text, &mic);
+ major = lgss_verify_mic(mechctx, textcnt, text, 0, NULL, &mic);
if (major != GSS_S_COMPLETE)
CERROR("mic verify error: %08x\n", major);
return gss_mech_payload(NULL, msgsize, privacy);
}
+static int gss_cli_bulk_payload(struct ptlrpc_cli_ctx *ctx,
+ struct sptlrpc_flavor *flvr,
+ int reply, int read)
+{
+ int payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ LASSERT(SPTLRPC_FLVR_BULK_TYPE(flvr->sf_rpc) == SPTLRPC_BULK_DEFAULT);
+
+ if ((!reply && !read) || (reply && read)) {
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_NULL:
+ break;
+ case SPTLRPC_BULK_SVC_INTG:
+ payload += gss_cli_payload(ctx, 0, 0);
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ payload += gss_cli_payload(ctx, 0, 1);
+ break;
+ case SPTLRPC_BULK_SVC_AUTH:
+ default:
+ LBUG();
+ }
+ }
+
+ return payload;
+}
+
int gss_cli_ctx_match(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred)
{
return (ctx->cc_vcred.vc_uid == vcred->vc_uid);
if (req->rq_ctx_init)
RETURN(0);
- svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
if (req->rq_pack_bulk)
flags |= LUSTRE_GSS_PACK_BULK;
if (req->rq_pack_udesc)
gss_header_swabber(ghdr);
major = gss_verify_msg(msg, gctx->gc_mechctx, reqhdr->gh_svc);
- if (major != GSS_S_COMPLETE)
+ if (major != GSS_S_COMPLETE) {
+ CERROR("failed to verify reply: %x\n", major);
RETURN(-EPERM);
+ }
if (req->rq_early && reqhdr->gh_svc == SPTLRPC_SVC_NULL) {
__u32 cksum;
major = gss_unseal_msg(gctx->gc_mechctx, msg,
&msglen, req->rq_repdata_len);
if (major != GSS_S_COMPLETE) {
+ CERROR("failed to unwrap reply: %x\n", major);
rc = -EPERM;
break;
}
}
/* bulk checksum is the last segment */
- if (bulk_sec_desc_unpack(msg, msg->lm_bufcount-1))
+ if (bulk_sec_desc_unpack(msg, msg->lm_bufcount - 1))
RETURN(-EPROTO);
}
struct ptlrpc_sec *sec;
LASSERT(imp);
- LASSERT(RPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_GSS);
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_GSS);
- gsec->gs_mech = lgss_subflavor_to_mech(RPC_FLVR_SUB(sf->sf_rpc));
+ gsec->gs_mech = lgss_subflavor_to_mech(
+ SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
if (!gsec->gs_mech) {
CERROR("gss backend 0x%x not found\n",
- RPC_FLVR_SUB(sf->sf_rpc));
+ SPTLRPC_FLVR_BASE_SUB(sf->sf_rpc));
return -EOPNOTSUPP;
}
sec->ps_gc_interval = 0;
}
- if (sec->ps_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL &&
- sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_BULK)
+ if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
sptlrpc_enc_pool_add_user();
CDEBUG(D_SEC, "create %s%s@%p\n", (svcctx ? "reverse " : ""),
class_import_put(sec->ps_import);
- if (sec->ps_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL &&
- sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_BULK)
+ if (SPTLRPC_FLVR_BULK_SVC(sec->ps_flvr.sf_rpc) == SPTLRPC_BULK_SVC_PRIV)
sptlrpc_enc_pool_del_user();
EXIT;
}
if (req->rq_pack_bulk) {
- buflens[bufcnt] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 1,
- req->rq_bulk_read);
+ buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 0, req->rq_bulk_read);
if (svc == SPTLRPC_SVC_INTG)
txtsize += buflens[bufcnt];
bufcnt++;
if (req->rq_pack_udesc)
ibuflens[ibufcnt++] = sptlrpc_current_user_desc_size();
if (req->rq_pack_bulk)
- ibuflens[ibufcnt++] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 1,
- req->rq_bulk_read);
+ ibuflens[ibufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr, 0,
+ req->rq_bulk_read);
clearsize = lustre_msg_size_v2(ibufcnt, ibuflens);
/* to allow append padding during encryption */
struct ptlrpc_request *req,
int msgsize)
{
- int svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
LASSERT(!req->rq_pack_bulk ||
(req->rq_bulk_read || req->rq_bulk_write));
ENTRY;
LASSERT(!req->rq_pool || req->rq_reqbuf);
- privacy = RPC_FLVR_SVC(req->rq_flvr.sf_rpc) == SPTLRPC_SVC_PRIV;
+ privacy = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) == SPTLRPC_SVC_PRIV;
if (!req->rq_clrbuf)
goto release_reqbuf;
txtsize += buflens[1];
if (req->rq_pack_bulk) {
- buflens[bufcnt] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 0,
- req->rq_bulk_read);
+ buflens[bufcnt] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 1, req->rq_bulk_read);
if (svc == SPTLRPC_SVC_INTG)
txtsize += buflens[bufcnt];
bufcnt++;
buflens[0] = msgsize;
if (req->rq_pack_bulk)
- buflens[bufcnt++] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 0,
- req->rq_bulk_read);
+ buflens[bufcnt++] = gss_cli_bulk_payload(req->rq_cli_ctx,
+ &req->rq_flvr,
+ 1, req->rq_bulk_read);
txtsize = lustre_msg_size_v2(bufcnt, buflens);
txtsize += GSS_MAX_CIPHER_BLOCK;
struct ptlrpc_request *req,
int msgsize)
{
- int svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
ENTRY;
LASSERT(!req->rq_pack_bulk ||
struct ptlrpc_request *req,
int segment, int newsize)
{
- int svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ int svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
LASSERT(!req->rq_ctx_init && !req->rq_ctx_fini);
}
*major = gss_verify_msg(msg, gctx->gsc_mechctx, gw->gw_svc);
- if (*major != GSS_S_COMPLETE)
+ if (*major != GSS_S_COMPLETE) {
+ CERROR("failed to verify request: %x\n", *major);
RETURN(-EACCES);
+ }
if (gctx->gsc_reverse == 0 &&
gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
offset++;
}
- /* check bulk cksum data */
+ /* check bulk_sec_desc data */
if (gw->gw_flags & LUSTRE_GSS_PACK_BULK) {
if (msg->lm_bufcount < (offset + 1)) {
- CERROR("no bulk checksum included\n");
+ CERROR("missing bulk sec descriptor\n");
RETURN(-EINVAL);
}
*major = gss_unseal_msg(gctx->gsc_mechctx, msg,
&msglen, req->rq_reqdata_len);
- if (*major != GSS_S_COMPLETE)
+ if (*major != GSS_S_COMPLETE) {
+ CERROR("failed to unwrap request: %x\n", *major);
RETURN(-EACCES);
+ }
if (gss_check_seq_num(&gctx->gsc_seqdata, gw->gw_seq, 1)) {
CERROR("phase 1+: discard replayed req: seq %u\n", gw->gw_seq);
return gss_mech_payload(NULL, msgsize, privacy);
}
+static int gss_svc_bulk_payload(struct gss_svc_ctx *gctx,
+ struct sptlrpc_flavor *flvr,
+ int read)
+{
+ int payload = sizeof(struct ptlrpc_bulk_sec_desc);
+
+ if (read) {
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_NULL:
+ break;
+ case SPTLRPC_BULK_SVC_INTG:
+ payload += gss_mech_payload(NULL, 0, 0);
+ break;
+ case SPTLRPC_BULK_SVC_PRIV:
+ payload += gss_mech_payload(NULL, 0, 1);
+ break;
+ case SPTLRPC_BULK_SVC_AUTH:
+ default:
+ LBUG();
+ }
+ }
+
+ return payload;
+}
+
int gss_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
{
struct gss_svc_reqctx *grctx;
RETURN(-EPROTO);
}
- svc = RPC_FLVR_SVC(req->rq_flvr.sf_rpc);
+ svc = SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc);
early = (req->rq_packed_final == 0);
grctx = gss_svc_ctx2reqctx(req->rq_svc_ctx);
LASSERT(grctx->src_reqbsd);
bsd_off = ibufcnt;
- ibuflens[ibufcnt++] = bulk_sec_desc_size(
- grctx->src_reqbsd->bsd_hash_alg,
- 0, req->rq_bulk_read);
+ ibuflens[ibufcnt++] = gss_svc_bulk_payload(
+ grctx->src_ctx,
+ &req->rq_flvr,
+ req->rq_bulk_read);
}
txtsize = lustre_msg_size_v2(ibufcnt, ibuflens);
LASSERT(grctx->src_reqbsd);
bsd_off = bufcnt;
- buflens[bufcnt] = bulk_sec_desc_size(
- grctx->src_reqbsd->bsd_hash_alg,
- 0, req->rq_bulk_read);
+ buflens[bufcnt] = gss_svc_bulk_payload(
+ grctx->src_ctx,
+ &req->rq_flvr,
+ req->rq_bulk_read);
if (svc == SPTLRPC_SVC_INTG)
txtsize += buflens[bufcnt];
bufcnt++;
sptlrpc_import_flush_all_ctx(imp);
atomic_dec(&imp->imp_inval_count);
- cfs_waitq_signal(&imp->imp_recovery_waitq);
+ cfs_waitq_broadcast(&imp->imp_recovery_waitq);
}
/* unset imp_invalid */
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS);
} else {
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
- }
-
- spin_lock(&imp->imp_lock);
- if (imp->imp_invalid) {
- spin_unlock(&imp->imp_lock);
ptlrpc_activate_import(imp);
- } else {
- spin_unlock(&imp->imp_lock);
}
GOTO(finish, rc = 0);
imp->imp_last_recon = 0;
spin_unlock(&imp->imp_lock);
- cfs_waitq_signal(&imp->imp_recovery_waitq);
+ cfs_waitq_broadcast(&imp->imp_recovery_waitq);
RETURN(rc);
}
}
if (imp->imp_state == LUSTRE_IMP_FULL) {
- cfs_waitq_signal(&imp->imp_recovery_waitq);
+ cfs_waitq_broadcast(&imp->imp_recovery_waitq);
ptlrpc_wake_delayed(imp);
}
{ LLOG_CATINFO, "llog_catinfo" },
{ LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" },
{ LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" },
- { FLD_QUERY, "fld_query" },
+ { QUOTA_DQACQ, "quota_acquire" },
+ { QUOTA_DQREL, "quota_release" },
{ SEQ_QUERY, "seq_query" },
{ SEC_CTX_INIT, "sec_ctx_init" },
{ SEC_CTX_INIT_CONT,"sec_ctx_init_cont" },
{ SEC_CTX_FINI, "sec_ctx_fini" },
- { QUOTA_DQACQ, "quota_acquire" },
- { QUOTA_DQREL, "quota_release" }
+ { FLD_QUERY, "fld_query" }
};
struct ll_eopcode {
lustre_msghdr_set_flags(request->rq_reqmsg,
request->rq_import->imp_msghdr_flags);
+ if (request->rq_resend)
+ lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+
rc = sptlrpc_cli_wrap_request(request);
if (rc)
RETURN(rc);
RETURN(rc);
}
- if (request->rq_resend)
- lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
-
if (!noreply) {
LASSERT (request->rq_replen != 0);
if (request->rq_repbuf == NULL) {
buflen = m->lm_buflens[n];
if (unlikely(buflen < min_size)) {
- CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
- m, n, buflen, min_size);
+ CERROR("msg %p buffer[%d] size %d too small "
+ "(required %d, opc=%d)\n",
+ m, n, buflen, min_size, lustre_msg_get_opc(m));
return NULL;
}
/* uuid endian insensitive */
}
-/*begin adding MDT by huanghua@clusterfs.com*/
void lustre_swab_lmv_desc (struct lmv_desc *ld)
{
__swab32s (&ld->ld_tgt_count);
__swab32s (&ld->ld_active_tgt_count);
+ __swab32s (&ld->ld_default_stripe_count);
+ __swab32s (&ld->ld_pattern);
+ __swab64s (&ld->ld_default_hash_size);
+ __swab32s (&ld->ld_qos_maxage);
/* uuid endian insensitive */
}
+void lustre_swab_lmv_stripe_md (struct lmv_stripe_md *mea)
+{
+ __swab32s(&mea->mea_magic);
+ __swab32s(&mea->mea_count);
+ __swab32s(&mea->mea_master);
+ CLASSERT(offsetof(typeof(*mea), mea_padding) != 0);
+}
+
+
static void print_lum (struct lov_user_md *lum)
{
CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
EXIT;
}
+void lustre_swab_lov_mds_md(struct lov_mds_md *lmm)
+{
+ ENTRY;
+ CDEBUG(D_IOCTL, "swabbing lov_mds_md\n");
+ __swab32s(&lmm->lmm_magic);
+ __swab32s(&lmm->lmm_pattern);
+ __swab64s(&lmm->lmm_object_id);
+ __swab64s(&lmm->lmm_object_gr);
+ __swab32s(&lmm->lmm_stripe_size);
+ __swab32s(&lmm->lmm_stripe_count);
+ EXIT;
+}
+
void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj)
{
ENTRY;
LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS)));
md->options |= LNET_MD_KIOV;
- md->start = &desc->bd_iov[0];
md->length = desc->bd_iov_count;
+ if (desc->bd_enc_iov)
+ md->start = desc->bd_enc_iov;
+ else
+ md->start = desc->bd_iov;
}
void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page,
EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
EXPORT_SYMBOL(lustre_swab_lov_user_md_join);
+EXPORT_SYMBOL(lustre_swab_lov_mds_md);
EXPORT_SYMBOL(lustre_swab_ldlm_res_id);
EXPORT_SYMBOL(lustre_swab_ldlm_policy_data);
EXPORT_SYMBOL(lustre_swab_ldlm_intent);
mutex_down(&ctxt->loc_sem);
lcm = ctxt->loc_lcm;
+ CDEBUG(D_INFO, "cancel on lsm %p\n", lcm);
/*
* Let's check if we have all structures alive. We also check for
EXPORT_SYMBOL(sptlrpc_unregister_policy);
static
-struct ptlrpc_sec_policy * sptlrpc_rpcflavor2policy(__u16 flavor)
+struct ptlrpc_sec_policy * sptlrpc_wireflavor2policy(__u32 flavor)
{
static DECLARE_MUTEX(load_mutex);
static atomic_t loaded = ATOMIC_INIT(0);
struct ptlrpc_sec_policy *policy;
- __u16 number = RPC_FLVR_POLICY(flavor), flag = 0;
+ __u16 number = SPTLRPC_FLVR_POLICY(flavor);
+ __u16 flag = 0;
if (number >= SPTLRPC_POLICY_MAX)
return NULL;
return policy;
}
-__u16 sptlrpc_name2rpcflavor(const char *name)
+__u32 sptlrpc_name2flavor_base(const char *name)
{
if (!strcmp(name, "null"))
return SPTLRPC_FLVR_NULL;
return SPTLRPC_FLVR_INVALID;
}
-EXPORT_SYMBOL(sptlrpc_name2rpcflavor);
+EXPORT_SYMBOL(sptlrpc_name2flavor_base);
-const char *sptlrpc_rpcflavor2name(__u16 flavor)
+const char *sptlrpc_flavor2name_base(__u32 flvr)
{
- switch (flavor) {
- case SPTLRPC_FLVR_NULL:
+ __u32 base = SPTLRPC_FLVR_BASE(flvr);
+
+ if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
return "null";
- case SPTLRPC_FLVR_PLAIN:
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
return "plain";
- case SPTLRPC_FLVR_KRB5N:
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
return "krb5n";
- case SPTLRPC_FLVR_KRB5A:
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
return "krb5a";
- case SPTLRPC_FLVR_KRB5I:
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
return "krb5i";
- case SPTLRPC_FLVR_KRB5P:
+ else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
return "krb5p";
- default:
- CERROR("invalid rpc flavor 0x%x(p%u,s%u,v%u)\n", flavor,
- RPC_FLVR_POLICY(flavor), RPC_FLVR_MECH(flavor),
- RPC_FLVR_SVC(flavor));
- }
- return "unknown";
+
+ CERROR("invalid wire flavor 0x%x\n", flvr);
+ return "invalid";
}
-EXPORT_SYMBOL(sptlrpc_rpcflavor2name);
+EXPORT_SYMBOL(sptlrpc_flavor2name_base);
-int sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
+ char *buf, int bufsize)
{
- char *bulk;
-
- if (sf->sf_bulk_ciph != BULK_CIPH_ALG_NULL)
- bulk = "bulkp";
- else if (sf->sf_bulk_hash != BULK_HASH_ALG_NULL)
- bulk = "bulki";
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
+ snprintf(buf, bufsize, "hash:%s",
+ sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
else
- bulk = "bulkn";
+ snprintf(buf, bufsize, "%s",
+ sptlrpc_flavor2name_base(sf->sf_rpc));
- snprintf(buf, bufsize, "%s-%s:%s/%s",
- sptlrpc_rpcflavor2name(sf->sf_rpc), bulk,
- sptlrpc_get_hash_name(sf->sf_bulk_hash),
- sptlrpc_get_ciph_name(sf->sf_bulk_ciph));
- return 0;
+ buf[bufsize - 1] = '\0';
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
+
+char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
+{
+ snprintf(buf, bufsize, "%s", sptlrpc_flavor2name_base(sf->sf_rpc));
+
+ /*
+ * currently we don't support customized bulk specification for
+ * flavors other than plain
+ */
+ if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
+ char bspec[16];
+
+ bspec[0] = '-';
+ sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
+ strncat(buf, bspec, bufsize);
+ }
+
+ buf[bufsize - 1] = '\0';
+ return buf;
}
EXPORT_SYMBOL(sptlrpc_flavor2name);
+char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
+{
+ buf[0] = '\0';
+
+ if (flags & PTLRPC_SEC_FL_REVERSE)
+ strncat(buf, "reverse,", bufsize);
+ if (flags & PTLRPC_SEC_FL_ROOTONLY)
+ strncat(buf, "rootonly,", bufsize);
+ if (flags & PTLRPC_SEC_FL_UDESC)
+ strncat(buf, "udesc,", bufsize);
+ if (flags & PTLRPC_SEC_FL_BULK)
+ strncat(buf, "bulk,", bufsize);
+ if (buf[0] == '\0')
+ strncat(buf, "-,", bufsize);
+
+ buf[bufsize - 1] = '\0';
+ return buf;
+}
+EXPORT_SYMBOL(sptlrpc_secflags2str);
+
/**************************************************
* client context APIs *
**************************************************/
/* special security flags accoding to opcode */
switch (opcode) {
case OST_READ:
+ case MDS_READPAGE:
req->rq_bulk_read = 1;
break;
case OST_WRITE:
+ case MDS_WRITEPAGE:
req->rq_bulk_write = 1;
break;
case SEC_CTX_INIT:
/* force SVC_NULL for context initiation rpc, SVC_INTG for context
* destruction rpc */
if (unlikely(req->rq_ctx_init))
- rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
else if (unlikely(req->rq_ctx_fini))
- rpc_flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
+ flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
/* user descriptor flag, null security can't do it anyway */
if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
/* bulk security flag */
if ((req->rq_bulk_read || req->rq_bulk_write) &&
- (req->rq_flvr.sf_bulk_ciph != BULK_CIPH_ALG_NULL ||
- req->rq_flvr.sf_bulk_hash != BULK_HASH_ALG_NULL))
+ sptlrpc_flavor_has_bulk(&req->rq_flvr))
req->rq_pack_bulk = 1;
}
void sptlrpc_request_out_callback(struct ptlrpc_request *req)
{
- if (RPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
+ if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
return;
LASSERT(req->rq_clrbuf);
RETURN(rc);
}
- switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
case SPTLRPC_SVC_NULL:
case SPTLRPC_SVC_AUTH:
case SPTLRPC_SVC_INTG:
{
struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
int rc;
- __u16 rpc_flvr;
+ __u32 flvr;
ENTRY;
LASSERT(ctx);
}
/* v2 message, check request/reply policy match */
- rpc_flvr = WIRE_FLVR_RPC(req->rq_repdata->lm_secflvr);
+ flvr = WIRE_FLVR(req->rq_repdata->lm_secflvr);
if (req->rq_repdata->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED)
- __swab16s(&rpc_flvr);
+ __swab32s(&flvr);
- if (RPC_FLVR_POLICY(rpc_flvr) !=
- RPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
+ if (SPTLRPC_FLVR_POLICY(flvr) !=
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
CERROR("request policy was %u while reply with %u\n",
- RPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
- RPC_FLVR_POLICY(rpc_flvr));
+ SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc),
+ SPTLRPC_FLVR_POLICY(flvr));
RETURN(-EPROTO);
}
/* do nothing if it's null policy; otherwise unpack the
* wrapper message */
- if (RPC_FLVR_POLICY(rpc_flvr) != SPTLRPC_POLICY_NULL &&
+ if (SPTLRPC_FLVR_POLICY(flvr) != SPTLRPC_POLICY_NULL &&
lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len))
RETURN(-EPROTO);
- switch (RPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
+ switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
case SPTLRPC_SVC_NULL:
case SPTLRPC_SVC_AUTH:
case SPTLRPC_SVC_INTG:
EXPORT_SYMBOL(sptlrpc_sec_put);
/*
- * it's policy module responsible for taking refrence of import
+ * policy module is responsible for taking refrence of import
*/
static
struct ptlrpc_sec * sptlrpc_sec_create(struct obd_import *imp,
{
struct ptlrpc_sec_policy *policy;
struct ptlrpc_sec *sec;
+ char str[32];
ENTRY;
if (svc_ctx) {
CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
imp->imp_obd->obd_type->typ_name,
imp->imp_obd->obd_name,
- sptlrpc_rpcflavor2name(sf->sf_rpc));
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
policy = sptlrpc_policy_get(svc_ctx->sc_policy);
sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
imp->imp_obd->obd_type->typ_name,
imp->imp_obd->obd_name,
- sptlrpc_rpcflavor2name(sf->sf_rpc));
+ sptlrpc_flavor2name(sf, str, sizeof(str)));
- policy = sptlrpc_rpcflavor2policy(sf->sf_rpc);
+ policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
if (!policy) {
CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
RETURN(NULL);
}
}
+static inline
+int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
+{
+ return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
+}
+
+static inline
+void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
+{
+ *dst = *src;
+}
+
static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
struct ptlrpc_sec *sec,
struct sptlrpc_flavor *sf)
{
- if (sf->sf_bulk_ciph != sec->ps_flvr.sf_bulk_ciph ||
- sf->sf_bulk_hash != sec->ps_flvr.sf_bulk_hash) {
- CWARN("imp %p (%s->%s): changing bulk flavor %s/%s -> %s/%s\n",
- imp, imp->imp_obd->obd_name,
- obd_uuid2str(&imp->imp_connection->c_remote_uuid),
- sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
- sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
- sptlrpc_get_ciph_name(sf->sf_bulk_ciph),
- sptlrpc_get_hash_name(sf->sf_bulk_hash));
-
- spin_lock(&sec->ps_lock);
- sec->ps_flvr.sf_bulk_ciph = sf->sf_bulk_ciph;
- sec->ps_flvr.sf_bulk_hash = sf->sf_bulk_hash;
- spin_unlock(&sec->ps_lock);
- }
+ char str1[32], str2[32];
- if (!equi(sf->sf_flags & PTLRPC_SEC_FL_UDESC,
- sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC)) {
- CWARN("imp %p (%s->%s): %s shipping user descriptor\n",
- imp, imp->imp_obd->obd_name,
- obd_uuid2str(&imp->imp_connection->c_remote_uuid),
- (sf->sf_flags & PTLRPC_SEC_FL_UDESC) ? "start" : "stop");
+ if (sec->ps_flvr.sf_flags != sf->sf_flags)
+ CWARN("changing sec flags: %s -> %s\n",
+ sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
+ str1, sizeof(str1)),
+ sptlrpc_secflags2str(sf->sf_flags,
+ str2, sizeof(str2)));
- spin_lock(&sec->ps_lock);
- sec->ps_flvr.sf_flags &= ~PTLRPC_SEC_FL_UDESC;
- sec->ps_flvr.sf_flags |= sf->sf_flags & PTLRPC_SEC_FL_UDESC;
- spin_unlock(&sec->ps_lock);
- }
+ spin_lock(&sec->ps_lock);
+ flavor_copy(&sec->ps_flvr, sf);
+ spin_unlock(&sec->ps_lock);
}
/*
- * for normal import, @svc_ctx should be NULL and @rpc_flavor is ignored;
- * for reverse import, @svc_ctx and @rpc_flavor is from incoming request.
+ * for normal import, @svc_ctx should be NULL and @flvr is ignored;
+ * for reverse import, @svc_ctx and @flvr is from incoming request.
*/
int sptlrpc_import_sec_adapt(struct obd_import *imp,
struct ptlrpc_svc_ctx *svc_ctx,
- __u16 rpc_flavor)
+ struct sptlrpc_flavor *flvr)
{
struct ptlrpc_connection *conn;
struct sptlrpc_flavor sf;
struct ptlrpc_sec *sec, *newsec;
enum lustre_sec_part sp;
+ char str[24];
int rc;
might_sleep();
sp = imp->imp_obd->u.cli.cl_sp_me;
} else {
/* reverse import, determine flavor from incoming reqeust */
- sf.sf_rpc = rpc_flavor;
- sf.sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- sf.sf_bulk_hash = BULK_HASH_ALG_NULL;
- sf.sf_flags = PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
+ sf = *flvr;
+
+ if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
+ sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
+ PTLRPC_SEC_FL_ROOTONLY;
sp = sptlrpc_target_sec_part(imp->imp_obd);
}
sec = sptlrpc_import_sec_ref(imp);
if (sec) {
- if (svc_ctx == NULL) {
- /* normal import, only check rpc flavor, if just bulk
- * flavor or flags changed, we can handle it on the fly
- * without switching sec. */
- if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
- sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
-
- rc = 0;
- goto out;
- }
- } else {
- /* reverse import, do not compare bulk flavor */
- if (sf.sf_rpc == sec->ps_flvr.sf_rpc) {
- rc = 0;
- goto out;
- }
- }
+ char str2[24];
+
+ if (flavor_equal(&sf, &sec->ps_flvr))
+ goto out;
CWARN("%simport %p (%s%s%s): changing flavor "
- "(%s, %s/%s) -> (%s, %s/%s)\n",
- svc_ctx ? "reverse " : "",
+ "%s -> %s\n", svc_ctx ? "reverse " : "",
imp, imp->imp_obd->obd_name,
svc_ctx == NULL ? "->" : "<-",
obd_uuid2str(&conn->c_remote_uuid),
- sptlrpc_rpcflavor2name(sec->ps_flvr.sf_rpc),
- sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
- sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph),
- sptlrpc_rpcflavor2name(sf.sf_rpc),
- sptlrpc_get_hash_name(sf.sf_bulk_hash),
- sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
+ sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
+ sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
+
+ if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
+ SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
+ SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
+ sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
+ goto out;
+ }
} else {
- CWARN("%simport %p (%s%s%s) netid %x: "
- "select initial flavor (%s, %s/%s)\n",
+ CWARN("%simport %p (%s%s%s) netid %x: select flavor %s\n",
svc_ctx == NULL ? "" : "reverse ",
imp, imp->imp_obd->obd_name,
svc_ctx == NULL ? "->" : "<-",
obd_uuid2str(&conn->c_remote_uuid),
LNET_NIDNET(conn->c_self),
- sptlrpc_rpcflavor2name(sf.sf_rpc),
- sptlrpc_get_hash_name(sf.sf_bulk_hash),
- sptlrpc_get_ciph_name(sf.sf_bulk_ciph));
+ sptlrpc_flavor2name(&sf, str, sizeof(str)));
}
mutex_down(&imp->imp_sec_mutex);
return 1;
if ((req->rq_ctx_init || req->rq_ctx_fini) &&
- RPC_FLVR_POLICY(exp->sf_rpc) == RPC_FLVR_POLICY(flvr->sf_rpc) &&
- RPC_FLVR_MECH(exp->sf_rpc) == RPC_FLVR_MECH(flvr->sf_rpc))
+ SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
+ SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
+ SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
return 1;
return 0;
spin_unlock(&exp->exp_lock);
return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
- req->rq_svc_ctx, flavor.sf_rpc);
+ req->rq_svc_ctx, &flavor);
}
/* if it equals to the current flavor, we accept it, but need to
return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
req->rq_svc_ctx,
- flavor.sf_rpc);
+ &flavor);
} else {
CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, "
"install rvs ctx\n", exp, exp->exp_flvr.sf_rpc,
exp->exp_connection->c_peer.nid,
&new_flvr);
if (exp->exp_flvr_changed ||
- memcmp(&new_flvr, &exp->exp_flvr, sizeof(new_flvr))) {
+ !flavor_equal(&new_flvr, &exp->exp_flvr)) {
exp->exp_flvr_old[1] = new_flvr;
exp->exp_flvr_expire[1] = 0;
exp->exp_flvr_changed = 1;
int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
{
struct ptlrpc_sec_policy *policy;
- struct lustre_msg *msg = req->rq_reqbuf;
- int rc;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ int rc;
ENTRY;
LASSERT(msg);
LASSERT(req->rq_reqmsg == NULL);
LASSERT(req->rq_repmsg == NULL);
+ LASSERT(req->rq_svc_ctx == NULL);
req->rq_sp_from = LUSTRE_SP_ANY;
req->rq_auth_uid = INVALID_UID;
}
/*
- * v2 message.
+ * only expect v2 message.
*/
- if (msg->lm_magic == LUSTRE_MSG_MAGIC_V2)
- req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(msg->lm_secflvr);
- else
- req->rq_flvr.sf_rpc = WIRE_FLVR_RPC(__swab32(msg->lm_secflvr));
+ switch (msg->lm_magic) {
+ case LUSTRE_MSG_MAGIC_V2:
+ req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
+ break;
+ case LUSTRE_MSG_MAGIC_V2_SWABBED:
+ req->rq_flvr.sf_rpc = WIRE_FLVR(__swab32(msg->lm_secflvr));
+ break;
+ default:
+ CERROR("invalid magic %x\n", msg->lm_magic);
+ RETURN(SECSVC_DROP);
+ }
/* unpack the wrapper message if the policy is not null */
- if ((RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) &&
- lustre_unpack_msg(msg, req->rq_reqdata_len))
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
+ lustre_unpack_msg(msg, req->rq_reqdata_len)) {
+ CERROR("invalid wrapper msg format\n");
RETURN(SECSVC_DROP);
+ }
- policy = sptlrpc_rpcflavor2policy(req->rq_flvr.sf_rpc);
+ policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
if (!policy) {
CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
RETURN(SECSVC_DROP);
rc = policy->sp_sops->accept(req);
LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
+ LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
sptlrpc_policy_put(policy);
/* sanity check for the request source */
rc = sptlrpc_svc_check_from(req, rc);
-
- /* FIXME move to proper place */
- if (rc == SECSVC_OK) {
- __u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
-
- if (opc == OST_WRITE)
- req->rq_bulk_write = 1;
- else if (opc == OST_READ)
- req->rq_bulk_read = 1;
- }
-
- LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
RETURN(rc);
}
{
struct ptlrpc_cli_ctx *ctx;
+ LASSERT(req->rq_bulk_read || req->rq_bulk_write);
+
if (!req->rq_pack_bulk)
return 0;
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
ctx = req->rq_cli_ctx;
if (ctx->cc_ops->wrap_bulk)
return ctx->cc_ops->wrap_bulk(ctx, req, desc);
}
EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
-static
-void pga_to_bulk_desc(int nob, obd_count pg_count, struct brw_page **pga,
- struct ptlrpc_bulk_desc *desc)
-{
- int i;
-
- LASSERT(pga);
- LASSERT(*pga);
-
- for (i = 0; i < pg_count && nob > 0; i++) {
-#ifdef __KERNEL__
- desc->bd_iov[i].kiov_page = pga[i]->pg;
- desc->bd_iov[i].kiov_len = pga[i]->count > nob ?
- nob : pga[i]->count;
- desc->bd_iov[i].kiov_offset = pga[i]->off & ~CFS_PAGE_MASK;
-#else
- /* FIXME currently liblustre doesn't support bulk encryption.
- * if we do, check again following may not be right. */
- LASSERTF(0, "Bulk encryption not implemented for liblustre\n");
- desc->bd_iov[i].iov_base = pga[i]->pg->addr;
- desc->bd_iov[i].iov_len = pga[i]->count > nob ?
- nob : pga[i]->count;
-#endif
-
- desc->bd_iov_count++;
- nob -= pga[i]->count;
- }
-}
-
+/*
+ * return nob of actual plain text size received, or error code.
+ */
int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
- int nob, obd_count pg_count,
- struct brw_page **pga)
+ struct ptlrpc_bulk_desc *desc,
+ int nob)
{
- struct ptlrpc_bulk_desc *desc;
- struct ptlrpc_cli_ctx *ctx;
- int rc = 0;
-
- if (!req->rq_pack_bulk)
- return 0;
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
- OBD_ALLOC(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
- if (desc == NULL) {
- CERROR("out of memory, can't verify bulk read data\n");
- return -ENOMEM;
- }
-
- pga_to_bulk_desc(nob, pg_count, pga, desc);
+ if (!req->rq_pack_bulk)
+ return desc->bd_nob_transferred;
ctx = req->rq_cli_ctx;
- if (ctx->cc_ops->unwrap_bulk)
+ if (ctx->cc_ops->unwrap_bulk) {
rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
-
- OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[pg_count]));
-
- return rc;
+ if (rc < 0)
+ return rc;
+ }
+ return desc->bd_nob_transferred;
}
EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
+/*
+ * return 0 for success or error code.
+ */
int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
- struct ptlrpc_cli_ctx *ctx;
+ struct ptlrpc_cli_ctx *ctx;
+ int rc;
+
+ LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
if (!req->rq_pack_bulk)
return 0;
- LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
-
ctx = req->rq_cli_ctx;
- if (ctx->cc_ops->unwrap_bulk)
- return ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (ctx->cc_ops->unwrap_bulk) {
+ rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
+ if (rc < 0)
+ return rc;
+ }
+
+ /*
+ * if everything is going right, nob should equals to nob_transferred.
+ * in case of privacy mode, nob_transferred needs to be adjusted.
+ */
+ if (desc->bd_nob != desc->bd_nob_transferred) {
+ CERROR("nob %d doesn't match transferred nob %d",
+ desc->bd_nob, desc->bd_nob_transferred);
+ return -EPROTO;
+ }
return 0;
}
{
struct ptlrpc_svc_ctx *ctx;
+ LASSERT(req->rq_bulk_read);
+
if (!req->rq_pack_bulk)
return 0;
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
ctx = req->rq_svc_ctx;
if (ctx->sc_policy->sp_sops->wrap_bulk)
return ctx->sc_policy->sp_sops->wrap_bulk(req, desc);
struct ptlrpc_bulk_desc *desc)
{
struct ptlrpc_svc_ctx *ctx;
+ int rc;
+
+ LASSERT(req->rq_bulk_write);
+
+ if (desc->bd_nob_transferred != desc->bd_nob &&
+ SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_BULK_SVC_PRIV) {
+ DEBUG_REQ(D_ERROR, req, "truncated bulk GET %d(%d)",
+ desc->bd_nob_transferred, desc->bd_nob);
+ return -ETIMEDOUT;
+ }
if (!req->rq_pack_bulk)
return 0;
- LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
ctx = req->rq_svc_ctx;
- if (ctx->sc_policy->sp_sops->unwrap_bulk);
- return ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+ if (ctx->sc_policy->sp_sops->unwrap_bulk) {
+ rc = ctx->sc_policy->sp_sops->unwrap_bulk(req, desc);
+ if (rc)
+ CERROR("error unwrap bulk: %d\n", rc);
+ }
+ /* return 0 to allow reply be sent */
return 0;
}
EXPORT_SYMBOL(sptlrpc_svc_unwrap_bulk);
+int sptlrpc_svc_prep_bulk(struct ptlrpc_request *req,
+ struct ptlrpc_bulk_desc *desc)
+{
+ struct ptlrpc_svc_ctx *ctx;
+
+ LASSERT(req->rq_bulk_write);
+
+ if (!req->rq_pack_bulk)
+ return 0;
+
+ ctx = req->rq_svc_ctx;
+ if (ctx->sc_policy->sp_sops->prep_bulk)
+ return ctx->sc_policy->sp_sops->prep_bulk(req, desc);
+
+ return 0;
+}
+EXPORT_SYMBOL(sptlrpc_svc_prep_bulk);
/****************************************
* user descriptor helpers *
}
EXPORT_SYMBOL(sec2target_str);
+/*
+ * return true if the bulk data is protected
+ */
+int sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
+{
+ switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
+ case SPTLRPC_BULK_SVC_INTG:
+ case SPTLRPC_BULK_SVC_PRIV:
+ return 1;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
+
/****************************************
* crypto API helper/alloc blkciper *
****************************************/
static inline void enc_pools_wakeup(void)
{
+ LASSERT_SPIN_LOCKED(&page_pools.epp_lock);
+ LASSERT(page_pools.epp_waitqlen >= 0);
+
if (unlikely(page_pools.epp_waitqlen)) {
- LASSERT(page_pools.epp_waitqlen > 0);
LASSERT(cfs_waitq_active(&page_pools.epp_waitq));
cfs_waitq_broadcast(&page_pools.epp_waitq);
}
if (page_pools.epp_total_pages < page_needed)
return 1;
- /* if we just did a shrink due to memory tight, we'd better
- * wait a while to grow again.
+ /*
+ * we wanted to return 0 here if there was a shrink just happened
+ * moment ago, but this may cause deadlock if both client and ost
+ * live on single node.
*/
+#if 0
if (now - page_pools.epp_last_shrink < 2)
return 0;
+#endif
/*
* here we perhaps need consider other factors like wait queue
int p_idx, g_idx;
int i;
- LASSERT(desc->bd_max_iov > 0);
- LASSERT(desc->bd_max_iov <= page_pools.epp_max_pages);
+ LASSERT(desc->bd_iov_count > 0);
+ LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages);
- /* resent bulk, enc pages might have been allocated previously */
- if (desc->bd_enc_pages != NULL)
+ /* resent bulk, enc iov might have been allocated previously */
+ if (desc->bd_enc_iov != NULL)
return 0;
- OBD_ALLOC(desc->bd_enc_pages,
- desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
- if (desc->bd_enc_pages == NULL)
+ OBD_ALLOC(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ if (desc->bd_enc_iov == NULL)
return -ENOMEM;
spin_lock(&page_pools.epp_lock);
page_pools.epp_st_access++;
again:
- if (unlikely(page_pools.epp_free_pages < desc->bd_max_iov)) {
+ if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) {
if (tick == 0)
tick = cfs_time_current();
now = cfs_time_current_sec();
page_pools.epp_st_missings++;
- page_pools.epp_pages_short += desc->bd_max_iov;
+ page_pools.epp_pages_short += desc->bd_iov_count;
- if (enc_pools_should_grow(desc->bd_max_iov, now)) {
+ if (enc_pools_should_grow(desc->bd_iov_count, now)) {
page_pools.epp_growing = 1;
spin_unlock(&page_pools.epp_lock);
spin_lock(&page_pools.epp_lock);
page_pools.epp_growing = 0;
+
+ enc_pools_wakeup();
} else {
if (++page_pools.epp_waitqlen >
page_pools.epp_st_max_wqlen)
spin_unlock(&page_pools.epp_lock);
cfs_waitq_wait(&waitlink, CFS_TASK_UNINT);
cfs_waitq_del(&page_pools.epp_waitq, &waitlink);
- spin_lock(&page_pools.epp_lock);
-
LASSERT(page_pools.epp_waitqlen > 0);
+ spin_lock(&page_pools.epp_lock);
page_pools.epp_waitqlen--;
}
- LASSERT(page_pools.epp_pages_short >= desc->bd_max_iov);
- page_pools.epp_pages_short -= desc->bd_max_iov;
+ LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count);
+ page_pools.epp_pages_short -= desc->bd_iov_count;
this_idle = 0;
goto again;
}
/* proceed with rest of allocation */
- page_pools.epp_free_pages -= desc->bd_max_iov;
+ page_pools.epp_free_pages -= desc->bd_iov_count;
p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
- for (i = 0; i < desc->bd_max_iov; i++) {
+ for (i = 0; i < desc->bd_iov_count; i++) {
LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL);
- desc->bd_enc_pages[i] = page_pools.epp_pools[p_idx][g_idx];
+ desc->bd_enc_iov[i].kiov_page =
+ page_pools.epp_pools[p_idx][g_idx];
page_pools.epp_pools[p_idx][g_idx] = NULL;
if (++g_idx == PAGES_PER_POOL) {
int p_idx, g_idx;
int i;
- if (desc->bd_enc_pages == NULL)
- return;
- if (desc->bd_max_iov == 0)
+ if (desc->bd_enc_iov == NULL)
return;
+ LASSERT(desc->bd_iov_count > 0);
+
spin_lock(&page_pools.epp_lock);
p_idx = page_pools.epp_free_pages / PAGES_PER_POOL;
g_idx = page_pools.epp_free_pages % PAGES_PER_POOL;
- LASSERT(page_pools.epp_free_pages + desc->bd_max_iov <=
+ LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <=
page_pools.epp_total_pages);
LASSERT(page_pools.epp_pools[p_idx]);
- for (i = 0; i < desc->bd_max_iov; i++) {
- LASSERT(desc->bd_enc_pages[i] != NULL);
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ LASSERT(desc->bd_enc_iov[i].kiov_page != NULL);
LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL);
- page_pools.epp_pools[p_idx][g_idx] = desc->bd_enc_pages[i];
+ page_pools.epp_pools[p_idx][g_idx] =
+ desc->bd_enc_iov[i].kiov_page;
if (++g_idx == PAGES_PER_POOL) {
p_idx++;
}
}
- page_pools.epp_free_pages += desc->bd_max_iov;
+ page_pools.epp_free_pages += desc->bd_iov_count;
enc_pools_wakeup();
spin_unlock(&page_pools.epp_lock);
- OBD_FREE(desc->bd_enc_pages,
- desc->bd_max_iov * sizeof(*desc->bd_enc_pages));
- desc->bd_enc_pages = NULL;
+ OBD_FREE(desc->bd_enc_iov,
+ desc->bd_iov_count * sizeof(*desc->bd_enc_iov));
+ desc->bd_enc_iov = NULL;
}
EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
spin_unlock(&page_pools.epp_lock);
if (need_grow) {
- enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES);
+ enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES +
+ PTLRPC_MAX_BRW_PAGES);
spin_lock(&page_pools.epp_lock);
page_pools.epp_growing = 0;
[BULK_HASH_ALG_SHA256] = { "sha256", "sha256", 32 },
[BULK_HASH_ALG_SHA384] = { "sha384", "sha384", 48 },
[BULK_HASH_ALG_SHA512] = { "sha512", "sha512", 64 },
- [BULK_HASH_ALG_WP256] = { "wp256", "wp256", 32 },
- [BULK_HASH_ALG_WP384] = { "wp384", "wp384", 48 },
- [BULK_HASH_ALG_WP512] = { "wp512", "wp512", 64 },
};
const struct sptlrpc_hash_type *sptlrpc_get_hash_type(__u8 hash_alg)
}
EXPORT_SYMBOL(sptlrpc_get_hash_name);
-int bulk_sec_desc_size(__u8 hash_alg, int request, int read)
+__u8 sptlrpc_get_hash_alg(const char *algname)
{
- int size = sizeof(struct ptlrpc_bulk_sec_desc);
-
- LASSERT(hash_alg < BULK_HASH_ALG_MAX);
-
- /* read request don't need extra data */
- if (!(read && request))
- size += hash_types[hash_alg].sht_size;
+ int i;
- return size;
+ for (i = 0; i < BULK_HASH_ALG_MAX; i++)
+ if (!strcmp(hash_types[i].sht_name, algname))
+ break;
+ return i;
}
-EXPORT_SYMBOL(bulk_sec_desc_size);
+EXPORT_SYMBOL(sptlrpc_get_hash_alg);
int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset)
{
struct ptlrpc_bulk_sec_desc *bsd;
- int size = msg->lm_buflens[offset];
+ int size = msg->lm_buflens[offset];
bsd = lustre_msg_buf(msg, offset, sizeof(*bsd));
if (bsd == NULL) {
return -EINVAL;
}
- /* nothing to swab */
+ if (lustre_msg_swabbed(msg)) {
+ __swab32s(&bsd->bsd_nob);
+ }
if (unlikely(bsd->bsd_version != 0)) {
CERROR("Unexpected version %u\n", bsd->bsd_version);
return -EPROTO;
}
- if (unlikely(bsd->bsd_flags != 0)) {
- CERROR("Unexpected flags %x\n", bsd->bsd_flags);
+ if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) {
+ CERROR("Invalid type %u\n", bsd->bsd_type);
return -EPROTO;
}
- if (unlikely(!sptlrpc_get_hash_type(bsd->bsd_hash_alg))) {
- CERROR("Unsupported checksum algorithm %u\n",
- bsd->bsd_hash_alg);
- return -EINVAL;
- }
+ /* FIXME more sanity check here */
- if (unlikely(!sptlrpc_get_ciph_type(bsd->bsd_ciph_alg))) {
- CERROR("Unsupported cipher algorithm %u\n",
- bsd->bsd_ciph_alg);
- return -EINVAL;
- }
-
- if (unlikely(size > sizeof(*bsd)) &&
- size < sizeof(*bsd) + hash_types[bsd->bsd_hash_alg].sht_size) {
- CERROR("Mal-formed checksum data: csum alg %u, size %d\n",
- bsd->bsd_hash_alg, size);
- return -EINVAL;
+ if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) {
+ CERROR("Invalid svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
}
return 0;
return 0;
}
-static int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen)
{
struct hash_desc hdesc;
- struct scatterlist *sl;
- int i, rc = 0, bytes = 0;
+ int hashsize;
+ char hashbuf[64];
+ struct scatterlist sl;
+ int i;
- LASSERT(alg > BULK_HASH_ALG_NULL &&
- alg < BULK_HASH_ALG_MAX);
+ LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX);
+ LASSERT(buflen >= 4);
switch (alg) {
case BULK_HASH_ALG_ADLER32:
CERROR("Unable to allocate TFM %s\n", hash_types[alg].sht_name);
return -ENOMEM;
}
+
hdesc.flags = 0;
+ ll_crypto_hash_init(&hdesc);
- OBD_ALLOC(sl, sizeof(*sl) * desc->bd_iov_count);
- if (sl == NULL) {
- rc = -ENOMEM;
- goto out_tfm;
- }
+ hashsize = ll_crypto_hash_digestsize(hdesc.tfm);
for (i = 0; i < desc->bd_iov_count; i++) {
- sl[i].page = desc->bd_iov[i].kiov_page;
- sl[i].offset = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- sl[i].length = desc->bd_iov[i].kiov_len;
- bytes += desc->bd_iov[i].kiov_len;
+ sl.page = desc->bd_iov[i].kiov_page;
+ sl.offset = desc->bd_iov[i].kiov_offset;
+ sl.length = desc->bd_iov[i].kiov_len;
+ ll_crypto_hash_update(&hdesc, &sl, sl.length);
}
- ll_crypto_hash_init(&hdesc);
- ll_crypto_hash_update(&hdesc, sl, bytes);
- ll_crypto_hash_final(&hdesc, buf);
-
- OBD_FREE(sl, sizeof(*sl) * desc->bd_iov_count);
+ if (hashsize > buflen) {
+ ll_crypto_hash_final(&hdesc, hashbuf);
+ memcpy(buf, hashbuf, buflen);
+ } else {
+ ll_crypto_hash_final(&hdesc, buf);
+ }
-out_tfm:
ll_crypto_free_hash(hdesc.tfm);
- return rc;
+ return 0;
}
+EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
#else /* !__KERNEL__ */
-static int do_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u32 alg, void *buf)
+int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
+ void *buf, int buflen)
{
__u32 csum32;
int i;
}
#endif /* __KERNEL__ */
-
-/*
- * perform algorithm @alg checksum on @desc, store result in @buf.
- * if anything goes wrong, leave 'alg' be BULK_HASH_ALG_NULL.
- */
-static
-int generate_bulk_csum(struct ptlrpc_bulk_desc *desc, __u32 alg,
- struct ptlrpc_bulk_sec_desc *bsd, int bsdsize)
-{
- int rc;
-
- LASSERT(bsd);
- LASSERT(alg < BULK_HASH_ALG_MAX);
-
- bsd->bsd_hash_alg = BULK_HASH_ALG_NULL;
-
- if (alg == BULK_HASH_ALG_NULL)
- return 0;
-
- LASSERT(bsdsize >= sizeof(*bsd) + hash_types[alg].sht_size);
-
- rc = do_bulk_checksum(desc, alg, bsd->bsd_csum);
- if (rc == 0)
- bsd->bsd_hash_alg = alg;
-
- return rc;
-}
-
-static
-int verify_bulk_csum(struct ptlrpc_bulk_desc *desc, int read,
- struct ptlrpc_bulk_sec_desc *bsdv, int bsdvsize,
- struct ptlrpc_bulk_sec_desc *bsdr, int bsdrsize)
-{
- char *csum_p;
- char *buf = NULL;
- int csum_size, rc = 0;
-
- LASSERT(bsdv);
- LASSERT(bsdv->bsd_hash_alg < BULK_HASH_ALG_MAX);
-
- if (bsdr)
- bsdr->bsd_hash_alg = BULK_HASH_ALG_NULL;
-
- if (bsdv->bsd_hash_alg == BULK_HASH_ALG_NULL)
- return 0;
-
- /* for all supported algorithms */
- csum_size = hash_types[bsdv->bsd_hash_alg].sht_size;
-
- if (bsdvsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- bsdvsize, (int) sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- if (bsdr) {
- LASSERT(bsdrsize >= sizeof(*bsdr) + csum_size);
- csum_p = (char *) bsdr->bsd_csum;
- } else {
- OBD_ALLOC(buf, csum_size);
- if (buf == NULL)
- return -EINVAL;
- csum_p = buf;
- }
-
- rc = do_bulk_checksum(desc, bsdv->bsd_hash_alg, csum_p);
-
- if (memcmp(bsdv->bsd_csum, csum_p, csum_size)) {
- CERROR("BAD %s CHECKSUM (%s), data mutated during "
- "transfer!\n", read ? "READ" : "WRITE",
- hash_types[bsdv->bsd_hash_alg].sht_name);
- rc = -EINVAL;
- } else {
- CDEBUG(D_SEC, "bulk %s checksum (%s) verified\n",
- read ? "read" : "write",
- hash_types[bsdv->bsd_hash_alg].sht_name);
- }
-
- if (bsdr) {
- bsdr->bsd_hash_alg = bsdv->bsd_hash_alg;
- memcpy(bsdr->bsd_csum, csum_p, csum_size);
- } else {
- LASSERT(buf);
- OBD_FREE(buf, csum_size);
- }
-
- return rc;
-}
-
-int bulk_csum_cli_request(struct ptlrpc_bulk_desc *desc, int read,
- __u32 alg, struct lustre_msg *rmsg, int roff)
-{
- struct ptlrpc_bulk_sec_desc *bsdr;
- int rsize, rc = 0;
-
- rsize = rmsg->lm_buflens[roff];
- bsdr = lustre_msg_buf(rmsg, roff, sizeof(*bsdr));
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(alg < BULK_HASH_ALG_MAX);
-
- if (read) {
- bsdr->bsd_hash_alg = alg;
- } else {
- rc = generate_bulk_csum(desc, alg, bsdr, rsize);
- if (rc)
- CERROR("bulk write: client failed to compute "
- "checksum: %d\n", rc);
-
- /* For sending we only compute the wrong checksum instead
- * of corrupting the data so it is still correct on a redo */
- if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
- bsdr->bsd_hash_alg != BULK_HASH_ALG_NULL)
- bsdr->bsd_csum[0] ^= 0x1;
- }
-
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_cli_request);
-
-int bulk_csum_cli_reply(struct ptlrpc_bulk_desc *desc, int read,
- struct lustre_msg *rmsg, int roff,
- struct lustre_msg *vmsg, int voff)
-{
- struct ptlrpc_bulk_sec_desc *bsdv, *bsdr;
- int rsize, vsize;
-
- rsize = rmsg->lm_buflens[roff];
- vsize = vmsg->lm_buflens[voff];
- bsdr = lustre_msg_buf(rmsg, roff, 0);
- bsdv = lustre_msg_buf(vmsg, voff, 0);
-
- if (bsdv == NULL || vsize < sizeof(*bsdv)) {
- CERROR("Invalid checksum verifier from server: size %d\n",
- vsize);
- return -EINVAL;
- }
-
- LASSERT(bsdr);
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(vsize >= sizeof(*bsdv));
-
- if (bsdr->bsd_hash_alg != bsdv->bsd_hash_alg) {
- CERROR("bulk %s: checksum algorithm mismatch: client request "
- "%s but server reply with %s. try to use the new one "
- "for checksum verification\n",
- read ? "read" : "write",
- hash_types[bsdr->bsd_hash_alg].sht_name,
- hash_types[bsdv->bsd_hash_alg].sht_name);
- }
-
- if (read)
- return verify_bulk_csum(desc, 1, bsdv, vsize, NULL, 0);
- else {
- char *cli, *srv, *new = NULL;
- int csum_size = hash_types[bsdr->bsd_hash_alg].sht_size;
-
- LASSERT(bsdr->bsd_hash_alg < BULK_HASH_ALG_MAX);
- if (bsdr->bsd_hash_alg == BULK_HASH_ALG_NULL)
- return 0;
-
- if (vsize < sizeof(*bsdv) + csum_size) {
- CERROR("verifier size %d too small, require %d\n",
- vsize, (int) sizeof(*bsdv) + csum_size);
- return -EINVAL;
- }
-
- cli = (char *) (bsdr + 1);
- srv = (char *) (bsdv + 1);
-
- if (!memcmp(cli, srv, csum_size)) {
- /* checksum confirmed */
- CDEBUG(D_SEC, "bulk write checksum (%s) confirmed\n",
- hash_types[bsdr->bsd_hash_alg].sht_name);
- return 0;
- }
-
- /* checksum mismatch, re-compute a new one and compare with
- * others, give out proper warnings. */
- OBD_ALLOC(new, csum_size);
- if (new == NULL)
- return -ENOMEM;
-
- do_bulk_checksum(desc, bsdr->bsd_hash_alg, new);
-
- if (!memcmp(new, srv, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "on the client after we checksummed them\n",
- hash_types[bsdr->bsd_hash_alg].sht_name);
- } else if (!memcmp(new, cli, csum_size)) {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit\n",
- hash_types[bsdr->bsd_hash_alg].sht_name);
- } else {
- CERROR("BAD WRITE CHECKSUM (%s): pages were mutated "
- "in transit, and the current page contents "
- "don't match the originals and what the server "
- "received\n",
- hash_types[bsdr->bsd_hash_alg].sht_name);
- }
- OBD_FREE(new, csum_size);
-
- return -EINVAL;
- }
-}
-EXPORT_SYMBOL(bulk_csum_cli_reply);
-
-#ifdef __KERNEL__
-static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
-{
- char *ptr;
- unsigned int off, i;
-
- for (i = 0; i < desc->bd_iov_count; i++) {
- if (desc->bd_iov[i].kiov_len == 0)
- continue;
-
- ptr = cfs_kmap(desc->bd_iov[i].kiov_page);
- off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
- ptr[off] ^= 0x1;
- cfs_kunmap(desc->bd_iov[i].kiov_page);
- return;
- }
-}
-#else
-static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
-{
-}
-#endif /* __KERNEL__ */
-
-int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
- struct ptlrpc_bulk_sec_desc *bsdv, int vsize,
- struct ptlrpc_bulk_sec_desc *bsdr, int rsize)
-{
- int rc;
-
- LASSERT(vsize >= sizeof(*bsdv));
- LASSERT(rsize >= sizeof(*bsdr));
- LASSERT(bsdv && bsdr);
-
- if (read) {
- rc = generate_bulk_csum(desc, bsdv->bsd_hash_alg, bsdr, rsize);
- if (rc)
- CERROR("bulk read: server failed to generate %s "
- "checksum: %d\n",
- hash_types[bsdv->bsd_hash_alg].sht_name, rc);
-
- /* corrupt the data after we compute the checksum, to
- * simulate an OST->client data error */
- if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
- corrupt_bulk_data(desc);
- } else {
- rc = verify_bulk_csum(desc, 0, bsdv, vsize, bsdr, rsize);
- }
-
- return rc;
-}
-EXPORT_SYMBOL(bulk_csum_svc);
-
-/****************************************
- * Helpers to assist policy modules to *
- * implement encryption funcationality *
- ****************************************/
-
-/* FIXME */
-#ifndef __KERNEL__
-#define CRYPTO_TFM_MODE_ECB (0)
-#define CRYPTO_TFM_MODE_CBC (1)
-#endif
-
-static struct sptlrpc_ciph_type cipher_types[] = {
- [BULK_CIPH_ALG_NULL] = {
- "null", "null", 0, 0, 0
- },
- [BULK_CIPH_ALG_ARC4] = {
- "arc4", "ecb(arc4)", 0, 0, 16
- },
- [BULK_CIPH_ALG_AES128] = {
- "aes128", "cbc(aes)", 0, 16, 16
- },
- [BULK_CIPH_ALG_AES192] = {
- "aes192", "cbc(aes)", 0, 16, 24
- },
- [BULK_CIPH_ALG_AES256] = {
- "aes256", "cbc(aes)", 0, 16, 32
- },
- [BULK_CIPH_ALG_CAST128] = {
- "cast128", "cbc(cast5)", 0, 8, 16
- },
- [BULK_CIPH_ALG_CAST256] = {
- "cast256", "cbc(cast6)", 0, 16, 32
- },
- [BULK_CIPH_ALG_TWOFISH128] = {
- "twofish128", "cbc(twofish)", 0, 16, 16
- },
- [BULK_CIPH_ALG_TWOFISH256] = {
- "twofish256", "cbc(twofish)", 0, 16, 32
- },
-};
-
-const struct sptlrpc_ciph_type *sptlrpc_get_ciph_type(__u8 ciph_alg)
-{
- struct sptlrpc_ciph_type *ct;
-
- if (ciph_alg < BULK_CIPH_ALG_MAX) {
- ct = &cipher_types[ciph_alg];
- if (ct->sct_tfm_name)
- return ct;
- }
- return NULL;
-}
-EXPORT_SYMBOL(sptlrpc_get_ciph_type);
-
-const char *sptlrpc_get_ciph_name(__u8 ciph_alg)
-{
- const struct sptlrpc_ciph_type *ct;
-
- ct = sptlrpc_get_ciph_type(ciph_alg);
- if (ct)
- return ct->sct_name;
- else
- return "unknown";
-}
-EXPORT_SYMBOL(sptlrpc_get_ciph_name);
* user supplied flavor string parsing *
****************************************/
-#ifdef HAVE_ADLER
-#define BULK_HASH_ALG_DEFAULT BULK_HASH_ALG_ADLER32
-#else
-#define BULK_HASH_ALG_DEFAULT BULK_HASH_ALG_CRC32
-#endif
-
-typedef enum {
- BULK_TYPE_N = 0,
- BULK_TYPE_I = 1,
- BULK_TYPE_P = 2
-} bulk_type_t;
-
-static void get_default_flavor(struct sptlrpc_flavor *sf)
-{
- sf->sf_rpc = SPTLRPC_FLVR_NULL;
- sf->sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- sf->sf_bulk_hash = BULK_HASH_ALG_NULL;
- sf->sf_flags = 0;
-}
-
-static void get_flavor_by_rpc(struct sptlrpc_flavor *flvr, __u16 rpc_flavor)
-{
- get_default_flavor(flvr);
-
- flvr->sf_rpc = rpc_flavor;
-
- switch (rpc_flavor) {
- case SPTLRPC_FLVR_NULL:
- break;
- case SPTLRPC_FLVR_PLAIN:
- case SPTLRPC_FLVR_KRB5N:
- case SPTLRPC_FLVR_KRB5A:
- flvr->sf_bulk_hash = BULK_HASH_ALG_DEFAULT;
- break;
- case SPTLRPC_FLVR_KRB5P:
- flvr->sf_bulk_ciph = BULK_CIPH_ALG_AES128;
- /* fall through */
- case SPTLRPC_FLVR_KRB5I:
- flvr->sf_bulk_hash = BULK_HASH_ALG_SHA1;
- break;
- default:
- LBUG();
- }
-}
-
-static void get_flavor_by_bulk(struct sptlrpc_flavor *flvr,
- __u16 rpc_flavor, bulk_type_t bulk_type)
-{
- switch (bulk_type) {
- case BULK_TYPE_N:
- flvr->sf_bulk_hash = BULK_HASH_ALG_NULL;
- flvr->sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- break;
- case BULK_TYPE_I:
- switch (rpc_flavor) {
- case SPTLRPC_FLVR_PLAIN:
- case SPTLRPC_FLVR_KRB5N:
- case SPTLRPC_FLVR_KRB5A:
- flvr->sf_bulk_hash = BULK_HASH_ALG_DEFAULT;
- break;
- case SPTLRPC_FLVR_KRB5I:
- case SPTLRPC_FLVR_KRB5P:
- flvr->sf_bulk_hash = BULK_HASH_ALG_SHA1;
- break;
- default:
- LBUG();
- }
- flvr->sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- break;
- case BULK_TYPE_P:
- flvr->sf_bulk_hash = BULK_HASH_ALG_SHA1;
- flvr->sf_bulk_ciph = BULK_CIPH_ALG_AES128;
- break;
- default:
- LBUG();
- }
-}
-
-static __u16 __flavors[] = {
- SPTLRPC_FLVR_NULL,
- SPTLRPC_FLVR_PLAIN,
- SPTLRPC_FLVR_KRB5N,
- SPTLRPC_FLVR_KRB5A,
- SPTLRPC_FLVR_KRB5I,
- SPTLRPC_FLVR_KRB5P,
-};
-
-#define __nflavors ARRAY_SIZE(__flavors)
-
/*
- * flavor string format: rpc[-bulk{n|i|p}[:cksum/enc]]
- * for examples:
- * null
- * plain-bulki
- * krb5p-bulkn
- * krb5i-bulkp
- * krb5i-bulkp:sha512/arc4
+ * format: <base_flavor>[-<bulk_type:alg_spec>]
*/
int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr)
{
- const char *f;
- char *bulk, *alg, *enc;
- char buf[64];
- bulk_type_t bulk_type;
- __u8 i;
- ENTRY;
+ char buf[32];
+ char *bulk, *alg;
+
+ memset(flvr, 0, sizeof(*flvr));
if (str == NULL || str[0] == '\0') {
flvr->sf_rpc = SPTLRPC_FLVR_INVALID;
- goto out;
+ return 0;
}
- for (i = 0; i < __nflavors; i++) {
- f = sptlrpc_rpcflavor2name(__flavors[i]);
- if (strncmp(str, f, strlen(f)) == 0)
- break;
- }
-
- if (i >= __nflavors)
- GOTO(invalid, -EINVAL);
+ strncpy(buf, str, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
- /* prepare local buffer thus we can modify it as we want */
- strncpy(buf, str, 64);
- buf[64 - 1] = '\0';
-
- /* find bulk string */
bulk = strchr(buf, '-');
if (bulk)
*bulk++ = '\0';
- /* now the first part must equal to rpc flavor name */
- if (strcmp(buf, f) != 0)
- GOTO(invalid, -EINVAL);
-
- get_flavor_by_rpc(flvr, __flavors[i]);
-
- if (bulk == NULL)
- goto out;
-
- /* find bulk algorithm string */
- alg = strchr(bulk, ':');
- if (alg)
- *alg++ = '\0';
-
- /* verify bulk section */
- if (strcmp(bulk, "bulkn") == 0) {
- flvr->sf_bulk_hash = BULK_HASH_ALG_NULL;
- flvr->sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- bulk_type = BULK_TYPE_N;
- } else if (strcmp(bulk, "bulki") == 0)
- bulk_type = BULK_TYPE_I;
- else if (strcmp(bulk, "bulkp") == 0)
- bulk_type = BULK_TYPE_P;
- else
- GOTO(invalid, -EINVAL);
-
- /* null flavor don't support bulk i/p */
- if (__flavors[i] == SPTLRPC_FLVR_NULL && bulk_type != BULK_TYPE_N)
- GOTO(invalid, -EINVAL);
-
- /* plain policy dosen't support bulk p */
- if (__flavors[i] == SPTLRPC_FLVR_PLAIN && bulk_type == BULK_TYPE_P)
- GOTO(invalid, -EINVAL);
-
- get_flavor_by_bulk(flvr, __flavors[i], bulk_type);
-
- if (alg == NULL)
- goto out;
-
- /* find encryption algorithm string */
- enc = strchr(alg, '/');
- if (enc)
- *enc++ = '\0';
-
- /* checksum algorithm */
- for (i = 0; i < BULK_HASH_ALG_MAX; i++) {
- if (strcmp(alg, sptlrpc_get_hash_name(i)) == 0) {
- flvr->sf_bulk_hash = i;
- break;
- }
- }
- if (i >= BULK_HASH_ALG_MAX)
- GOTO(invalid, -EINVAL);
-
- /* privacy algorithm */
- if (enc) {
- for (i = 0; i < BULK_CIPH_ALG_MAX; i++) {
- if (strcmp(enc, sptlrpc_get_ciph_name(i)) == 0) {
- flvr->sf_bulk_ciph = i;
- break;
- }
- }
- if (i >= BULK_CIPH_ALG_MAX)
- GOTO(invalid, -EINVAL);
- }
+ flvr->sf_rpc = sptlrpc_name2flavor_base(buf);
+ if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID)
+ goto err_out;
/*
- * bulk combination sanity checks
+ * currently only base flavor "plain" can have bulk specification.
*/
- if (bulk_type == BULK_TYPE_P &&
- flvr->sf_bulk_ciph == BULK_CIPH_ALG_NULL)
- GOTO(invalid, -EINVAL);
-
- if (bulk_type == BULK_TYPE_I &&
- (flvr->sf_bulk_hash == BULK_HASH_ALG_NULL ||
- flvr->sf_bulk_ciph != BULK_CIPH_ALG_NULL))
- GOTO(invalid, -EINVAL);
+ if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) {
+ flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32;
+ if (bulk) {
+ /*
+ * format: plain-hash:<hash_alg>
+ */
+ alg = strchr(bulk, ':');
+ if (alg == NULL)
+ goto err_out;
+ *alg++ = '\0';
+
+ if (strcmp(bulk, "hash"))
+ goto err_out;
+
+ flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg);
+ if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX)
+ goto err_out;
+ }
- if (bulk_type == BULK_TYPE_N &&
- (flvr->sf_bulk_hash != BULK_HASH_ALG_NULL ||
- flvr->sf_bulk_ciph != BULK_CIPH_ALG_NULL))
- GOTO(invalid, -EINVAL);
+ if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL)
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL);
+ else
+ flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG);
+ } else {
+ if (bulk)
+ goto err_out;
+ }
-out:
+ flvr->sf_flags = 0;
return 0;
-invalid:
+
+err_out:
CERROR("invalid flavor string: %s\n", str);
return -EINVAL;
}
* configure rules *
****************************************/
+static void get_default_flavor(struct sptlrpc_flavor *sf)
+{
+ memset(sf, 0, sizeof(*sf));
+
+ sf->sf_rpc = SPTLRPC_FLVR_NULL;
+ sf->sf_flags = 0;
+}
+
static void sptlrpc_rule_init(struct sptlrpc_rule *rule)
{
rule->sr_netid = LNET_NIDNET(LNET_NID_ANY);
/*
* return 0 if the rule set could accomodate one more rule.
- * if @expand != 0, the rule set might be expanded.
*/
-int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset, int expand)
+int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset)
{
struct sptlrpc_rule *rules;
int nslot;
+ might_sleep();
+
if (rset->srs_nrule < rset->srs_nslot)
return 0;
- if (expand == 0)
- return -E2BIG;
-
nslot = rset->srs_nslot + 8;
/* better use realloc() if available */
/*
* merge @rule into @rset.
- * if @expand != 0 then @rset slots might be expanded.
+ * the @rset slots might be expanded.
*/
int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset,
- struct sptlrpc_rule *rule,
- int expand)
+ struct sptlrpc_rule *rule)
{
struct sptlrpc_rule *p = rset->srs_rules;
int spec_dir, spec_net;
int rc, n, match = 0;
+ might_sleep();
+
spec_net = rule_spec_net(rule);
spec_dir = rule_spec_dir(rule);
LASSERT(n >= 0 && n <= rset->srs_nrule);
if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) {
- rc = sptlrpc_rule_set_expand(rset, expand);
+ rc = sptlrpc_rule_set_expand(rset);
if (rc)
return rc;
struct sptlrpc_rule *rule;
int i, n, rc;
+ might_sleep();
+
/* merge general rules firstly, then target-specific rules */
for (i = 0; i < 2; i++) {
if (src[i] == NULL)
rule->sr_to != to)
continue;
- rc = sptlrpc_rule_set_merge(rset, rule, 1);
+ rc = sptlrpc_rule_set_merge(rset, rule);
if (rc) {
CERROR("can't merge: %d\n", rc);
return rc;
}
}
- return sptlrpc_rule_set_merge(rule_set, rule, 1);
+ return sptlrpc_rule_set_merge(rule_set, rule);
}
/**
RETURN(-EINVAL);
}
- CDEBUG(D_SEC, "got one rule: %s.%s\n", target, param);
+ CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param);
/* parse rule to make sure the format is correct */
if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
enum lustre_sec_part to,
unsigned int fl_udesc)
{
+ /*
+ * null flavor doesn't need to set any flavor, and in fact
+ * we'd better not do that because everybody share a single sec.
+ */
+ if (sf->sf_rpc == SPTLRPC_FLVR_NULL)
+ return;
+
if (from == LUSTRE_SP_MDT) {
/* MDT->MDT; MDT->OST */
sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY;
struct proc_dir_entry *sptlrpc_proc_root = NULL;
EXPORT_SYMBOL(sptlrpc_proc_root);
-void sec_flags2str(unsigned long flags, char *buf, int bufsize)
+char *sec_flags2str(unsigned long flags, char *buf, int bufsize)
{
buf[0] = '\0';
strncat(buf, "-,", bufsize);
buf[strlen(buf) - 1] = '\0';
-
+ return buf;
}
static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v)
struct obd_device *dev = seq->private;
struct client_obd *cli = &dev->u.cli;
struct ptlrpc_sec *sec = NULL;
- char flags_str[32];
+ char str[32];
LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 ||
strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 ||
if (sec == NULL)
goto out;
- sec_flags2str(sec->ps_flvr.sf_flags, flags_str, sizeof(flags_str));
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str));
seq_printf(seq, "rpc flavor: %s\n",
- sptlrpc_rpcflavor2name(sec->ps_flvr.sf_rpc));
- seq_printf(seq, "bulk flavor: %s/%s\n",
- sptlrpc_get_hash_name(sec->ps_flvr.sf_bulk_hash),
- sptlrpc_get_ciph_name(sec->ps_flvr.sf_bulk_ciph));
- seq_printf(seq, "flags: %s\n", flags_str);
+ sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc));
+ seq_printf(seq, "bulk flavor: %s\n",
+ sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str)));
+ seq_printf(seq, "flags: %s\n",
+ sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)));
seq_printf(seq, "id: %d\n", sec->ps_id);
seq_printf(seq, "refcount: %d\n", atomic_read(&sec->ps_refcount));
seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx));
static struct ptlrpc_svc_ctx null_svc_ctx;
/*
- * null sec temporarily use the third byte of lm_secflvr to identify
+ * we can temporarily use the topmost 8-bits of lm_secflvr to identify
* the source sec part.
*/
static inline
void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
{
- msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 16;
+ msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 24;
}
static inline
{
switch (msg->lm_magic) {
case LUSTRE_MSG_MAGIC_V2:
- return (msg->lm_secflvr >> 16) & 0xFF;
+ return (msg->lm_secflvr >> 24) & 0xFF;
case LUSTRE_MSG_MAGIC_V2_SWABBED:
- return (msg->lm_secflvr >> 8) & 0xFF;
+ return (msg->lm_secflvr) & 0xFF;
default:
return LUSTRE_SP_ANY;
}
struct ptlrpc_svc_ctx *svc_ctx,
struct sptlrpc_flavor *sf)
{
- LASSERT(RPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
-
- if (sf->sf_bulk_ciph != BULK_CIPH_ALG_NULL ||
- sf->sf_bulk_hash != BULK_HASH_ALG_NULL) {
- CERROR("null sec don't support bulk algorithm: %u/%u\n",
- sf->sf_bulk_ciph, sf->sf_bulk_hash);
- return NULL;
- }
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL);
/* general layer has take a module reference for us, because we never
* really destroy the sec, simply release the reference here.
static
int null_accept(struct ptlrpc_request *req)
{
- LASSERT(RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == SPTLRPC_POLICY_NULL);
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_NULL);
if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) {
CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc);
null_sec.ps_id = -1;
null_sec.ps_import = NULL;
null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
- null_sec.ps_flvr.sf_bulk_ciph = BULK_CIPH_ALG_NULL;
- null_sec.ps_flvr.sf_bulk_hash = BULK_HASH_ALG_NULL;
null_sec.ps_flvr.sf_flags = 0;
null_sec.ps_part = LUSTRE_SP_ANY;
null_sec.ps_dying = 0;
static unsigned int plain_at_offset;
/*
- * flavor flags (maximum 8 flags)
+ * for simplicity, plain policy rpc use fixed layout.
*/
-#define PLAIN_WFLVR_FLAGS_OFFSET (12)
-#define PLAIN_WFLVR_FLAG_BULK (1 << (0 + PLAIN_WFLVR_FLAGS_OFFSET))
-#define PLAIN_WFLVR_FLAG_USER (1 << (1 + PLAIN_WFLVR_FLAGS_OFFSET))
+#define PLAIN_PACK_SEGMENTS (4)
+
+#define PLAIN_PACK_HDR_OFF (0)
+#define PLAIN_PACK_MSG_OFF (1)
+#define PLAIN_PACK_USER_OFF (2)
+#define PLAIN_PACK_BULK_OFF (3)
+
+#define PLAIN_FL_USER (0x01)
+#define PLAIN_FL_BULK (0x02)
+
+struct plain_header {
+ __u8 ph_ver; /* 0 */
+ __u8 ph_flags;
+ __u8 ph_sp; /* source */
+ __u8 ph_bulk_hash_alg; /* complete flavor desc */
+ __u8 ph_pad[4];
+};
-#define PLAIN_WFLVR_HAS_BULK(wflvr) \
- (((wflvr) & PLAIN_WFLVR_FLAG_BULK) != 0)
-#define PLAIN_WFLVR_HAS_USER(wflvr) \
- (((wflvr) & PLAIN_WFLVR_FLAG_USER) != 0)
+struct plain_bulk_token {
+ __u8 pbt_hash[8];
+};
-#define PLAIN_WFLVR_TO_RPC(wflvr) \
- ((wflvr) & ((1 << PLAIN_WFLVR_FLAGS_OFFSET) - 1))
+#define PLAIN_BSD_SIZE \
+ (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token))
-/*
- * similar to null sec, temporarily use the third byte of lm_secflvr to identify
- * the source sec part.
- */
-static inline
-void plain_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp)
+/****************************************
+ * bulk checksum helpers *
+ ****************************************/
+
+static int plain_unpack_bsd(struct lustre_msg *msg)
{
- msg->lm_secflvr |= (((__u32) sp) & 0xFF) << 16;
+ struct ptlrpc_bulk_sec_desc *bsd;
+
+ if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF))
+ return -EPROTO;
+
+ bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE);
+ if (bsd == NULL) {
+ CERROR("bulk sec desc has short size %d\n",
+ lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF));
+ return -EPROTO;
+ }
+
+ if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL &&
+ bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) {
+ CERROR("invalid bulk svc %u\n", bsd->bsd_svc);
+ return -EPROTO;
+ }
+
+ return 0;
}
-static inline
-enum lustre_sec_part plain_decode_sec_part(struct lustre_msg *msg)
+static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *token)
{
- return (msg->lm_secflvr >> 16) & 0xFF;
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
+
+ memset(token->pbt_hash, 0, sizeof(token->pbt_hash));
+ return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash,
+ sizeof(token->pbt_hash));
}
-/*
- * for simplicity, plain policy rpc use fixed layout.
- */
-#define PLAIN_PACK_SEGMENTS (3)
+static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc,
+ __u8 hash_alg,
+ struct plain_bulk_token *tokenr)
+{
+ struct plain_bulk_token tokenv;
+ int rc;
+
+ if (hash_alg == BULK_HASH_ALG_NULL)
+ return 0;
-#define PLAIN_PACK_MSG_OFF (0)
-#define PLAIN_PACK_USER_OFF (1)
-#define PLAIN_PACK_BULK_OFF (2)
+ memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash));
+ rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash,
+ sizeof(tokenv.pbt_hash));
+ if (rc)
+ return rc;
+
+ if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash)))
+ return -EACCES;
+ return 0;
+}
+
+#ifdef __KERNEL__
+static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
+{
+ char *ptr;
+ unsigned int off, i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len == 0)
+ continue;
+
+ ptr = cfs_kmap(desc->bd_iov[i].kiov_page);
+ off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK;
+ ptr[off] ^= 0x1;
+ cfs_kunmap(desc->bd_iov[i].kiov_page);
+ return;
+ }
+}
+#else
+static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
+{
+ unsigned int i;
+
+ for (i = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].iov_len == 0)
+ continue;
+
+ ((char *)desc->bd_iov[i].iov_base)[i] ^= 0x1;
+ return;
+ }
+}
+#endif /* __KERNEL__ */
/****************************************
* cli_ctx apis *
static
int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
{
- struct lustre_msg_v2 *msg = req->rq_reqbuf;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
ENTRY;
msg->lm_secflvr = req->rq_flvr.sf_rpc;
- if (req->rq_pack_bulk)
- msg->lm_secflvr |= PLAIN_WFLVR_FLAG_BULK;
- if (req->rq_pack_udesc)
- msg->lm_secflvr |= PLAIN_WFLVR_FLAG_USER;
- plain_encode_sec_part(msg, ctx->cc_sec->ps_part);
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_sp = ctx->cc_sec->ps_part;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
+ if (req->rq_pack_udesc)
+ phdr->ph_flags |= PLAIN_FL_USER;
+ if (req->rq_pack_bulk)
+ phdr->ph_flags |= PLAIN_FL_BULK;
req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount,
msg->lm_buflens);
static
int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
{
- struct lustre_msg *msg = req->rq_repdata;
- __u32 cksum;
+ struct lustre_msg *msg = req->rq_repdata;
+ struct plain_header *phdr;
+ __u32 cksum;
ENTRY;
if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) {
RETURN(-EPROTO);
}
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ RETURN(-EPROTO);
+ }
+
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ RETURN(-EPROTO);
+ }
+
/* expect no user desc in reply */
- if (PLAIN_WFLVR_HAS_USER(msg->lm_secflvr)) {
+ if (phdr->ph_flags & PLAIN_FL_USER) {
CERROR("Unexpected udesc flag in reply\n");
RETURN(-EPROTO);
}
+ if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) {
+ CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg,
+ req->rq_flvr.u_bulk.hash.hash_alg);
+ RETURN(-EPROTO);
+ }
+
if (unlikely(req->rq_early)) {
cksum = crc32_le(!(__u32) 0,
lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
* in reply, except for early reply */
if (!req->rq_early &&
!equi(req->rq_pack_bulk == 1,
- PLAIN_WFLVR_HAS_BULK(msg->lm_secflvr))) {
+ phdr->ph_flags & PLAIN_FL_BULK)) {
CERROR("%s bulk checksum in reply\n",
req->rq_pack_bulk ? "Missing" : "Unexpected");
RETURN(-EPROTO);
}
- if (PLAIN_WFLVR_HAS_BULK(msg->lm_secflvr) &&
- bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF)) {
- CERROR("Mal-formed bulk checksum reply\n");
- RETURN(-EINVAL);
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg))
+ RETURN(-EPROTO);
}
}
struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
+ struct ptlrpc_bulk_sec_desc *bsd;
+ struct plain_bulk_token *token;
+ int rc;
+
LASSERT(req->rq_pack_bulk);
LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
- return bulk_csum_cli_request(desc, req->rq_bulk_read,
- req->rq_flvr.sf_bulk_hash,
- req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF);
+ bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ token = (struct plain_bulk_token *) bsd->bsd_data;
+
+ bsd->bsd_version = 0;
+ bsd->bsd_flags = 0;
+ bsd->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc);
+
+ if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ RETURN(0);
+
+ if (req->rq_bulk_read)
+ RETURN(0);
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ token);
+ if (rc) {
+ CERROR("bulk write: failed to compute checksum: %d\n", rc);
+ } else {
+ /*
+ * for sending we only compute the wrong checksum instead
+ * of corrupting the data so it is still correct on a redo
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) &&
+ req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL)
+ token->pbt_hash[0] ^= 0x1;
+ }
+
+ return rc;
}
static
struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenr, *tokenv;
+ int rc;
+#ifdef __KERNEL__
+ int i, nob;
+#endif
+
LASSERT(req->rq_pack_bulk);
LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS);
- return bulk_csum_cli_reply(desc, req->rq_bulk_read,
- req->rq_reqbuf, PLAIN_PACK_BULK_OFF,
- req->rq_repdata, PLAIN_PACK_BULK_OFF);
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ if (req->rq_bulk_write) {
+ if (bsdv->bsd_flags & BSD_FL_ERR)
+ return -EIO;
+ return 0;
+ }
+
+#ifdef __KERNEL__
+ /* fix the actual data size */
+ for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
+ if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
+ desc->bd_iov[i].kiov_len =
+ desc->bd_nob_transferred - nob;
+ }
+ nob += desc->bd_iov[i].kiov_len;
+ }
+#endif
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc)
+ CERROR("bulk read: client verify failed: %d\n", rc);
+
+ return rc;
}
/****************************************
struct ptlrpc_cli_ctx *ctx;
ENTRY;
- LASSERT(RPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
-
- if (sf->sf_bulk_ciph != BULK_CIPH_ALG_NULL) {
- CERROR("plain policy don't support bulk cipher: %u\n",
- sf->sf_bulk_ciph);
- RETURN(NULL);
- }
+ LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN);
OBD_ALLOC_PTR(plsec);
if (plsec == NULL)
int msgsize)
{
__u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
- int alloc_len;
+ int alloc_len;
ENTRY;
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
buflens[PLAIN_PACK_MSG_OFF] = msgsize;
if (req->rq_pack_udesc)
if (req->rq_pack_bulk) {
LASSERT(req->rq_bulk_read || req->rq_bulk_write);
-
- buflens[PLAIN_PACK_BULK_OFF] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 1,
- req->rq_bulk_read);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
}
alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
}
lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL);
- req->rq_reqmsg = lustre_msg_buf_v2(req->rq_reqbuf, 0, 0);
+ req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0);
if (req->rq_pack_udesc)
sptlrpc_pack_user_desc(req->rq_reqbuf, PLAIN_PACK_USER_OFF);
int alloc_len;
ENTRY;
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
buflens[PLAIN_PACK_MSG_OFF] = msgsize;
if (req->rq_pack_bulk) {
LASSERT(req->rq_bulk_read || req->rq_bulk_write);
- buflens[PLAIN_PACK_BULK_OFF] = bulk_sec_desc_size(
- req->rq_flvr.sf_bulk_hash, 0,
- req->rq_bulk_read);
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
}
alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
static
int plain_accept(struct ptlrpc_request *req)
{
- struct lustre_msg *msg = req->rq_reqbuf;
+ struct lustre_msg *msg = req->rq_reqbuf;
+ struct plain_header *phdr;
ENTRY;
- LASSERT(RPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == SPTLRPC_POLICY_PLAIN);
+ LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) ==
+ SPTLRPC_POLICY_PLAIN);
+
+ if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) ||
+ SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) !=
+ SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) {
+ CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
+ RETURN(SECSVC_DROP);
+ }
if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) {
CERROR("unexpected request buf count %u\n", msg->lm_bufcount);
RETURN(SECSVC_DROP);
}
- if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_PLAIN) {
- CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc);
- RETURN(SECSVC_DROP);
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr));
+ if (phdr == NULL) {
+ CERROR("missing plain header\n");
+ RETURN(-EPROTO);
}
- req->rq_sp_from = plain_decode_sec_part(msg);
+ if (phdr->ph_ver != 0) {
+ CERROR("Invalid header version\n");
+ RETURN(-EPROTO);
+ }
- if (PLAIN_WFLVR_HAS_USER(msg->lm_secflvr)) {
+ if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) {
+ CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg);
+ RETURN(-EPROTO);
+ }
+
+ req->rq_sp_from = phdr->ph_sp;
+ req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg;
+
+ if (phdr->ph_flags & PLAIN_FL_USER) {
if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF)) {
CERROR("Mal-formed user descriptor\n");
RETURN(SECSVC_DROP);
req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0);
}
- if (PLAIN_WFLVR_HAS_BULK(msg->lm_secflvr)) {
- if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF)) {
- CERROR("Mal-formed bulk checksum request\n");
+ if (phdr->ph_flags & PLAIN_FL_BULK) {
+ if (plain_unpack_bsd(msg))
RETURN(SECSVC_DROP);
- }
req->rq_pack_bulk = 1;
}
int plain_alloc_rs(struct ptlrpc_request *req, int msgsize)
{
struct ptlrpc_reply_state *rs;
- struct ptlrpc_bulk_sec_desc *bsd;
__u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, };
int rs_size = sizeof(*rs);
ENTRY;
LASSERT(msgsize % 8 == 0);
+ buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header);
buflens[PLAIN_PACK_MSG_OFF] = msgsize;
- if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write)) {
- bsd = lustre_msg_buf(req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF, sizeof(*bsd));
- LASSERT(bsd);
+ if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write))
+ buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE;
- buflens[PLAIN_PACK_BULK_OFF] = bulk_sec_desc_size(
- bsd->bsd_hash_alg, 0,
- req->rq_bulk_read);
- }
rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens);
rs = req->rq_reply_state;
{
struct ptlrpc_reply_state *rs = req->rq_reply_state;
struct lustre_msg_v2 *msg = rs->rs_repbuf;
+ struct plain_header *phdr;
int len;
ENTRY;
len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
msg->lm_secflvr = req->rq_flvr.sf_rpc;
+
+ phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0);
+ phdr->ph_ver = 0;
+ phdr->ph_flags = 0;
+ phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg;
+
if (req->rq_pack_bulk)
- msg->lm_secflvr |= PLAIN_WFLVR_FLAG_BULK;
+ phdr->ph_flags |= PLAIN_FL_BULK;
rs->rs_repdata_len = len;
int plain_svc_unwrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenr, *tokenv;
+ int rc;
- LASSERT(rs);
+ LASSERT(req->rq_bulk_write);
LASSERT(req->rq_pack_bulk);
- LASSERT(req->rq_reqbuf->lm_bufcount >= PLAIN_PACK_SEGMENTS);
- LASSERT(rs->rs_repbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
- return bulk_csum_svc(desc, req->rq_bulk_read,
- lustre_msg_buf(req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF, 0),
- lustre_msg_buflen(req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF),
- lustre_msg_buf(rs->rs_repbuf,
- PLAIN_PACK_BULK_OFF, 0),
- lustre_msg_buflen(rs->rs_repbuf,
- PLAIN_PACK_BULK_OFF));
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenr);
+ if (rc) {
+ bsdv->bsd_flags |= BSD_FL_ERR;
+ CERROR("bulk write: server verify failed: %d\n", rc);
+ }
+
+ return rc;
}
static
int plain_svc_wrap_bulk(struct ptlrpc_request *req,
struct ptlrpc_bulk_desc *desc)
{
- struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_reply_state *rs = req->rq_reply_state;
+ struct ptlrpc_bulk_sec_desc *bsdr, *bsdv;
+ struct plain_bulk_token *tokenr, *tokenv;
+ int rc;
- LASSERT(rs);
+ LASSERT(req->rq_bulk_read);
LASSERT(req->rq_pack_bulk);
- LASSERT(req->rq_reqbuf->lm_bufcount >= PLAIN_PACK_SEGMENTS);
- LASSERT(rs->rs_repbuf->lm_bufcount == PLAIN_PACK_SEGMENTS);
- return bulk_csum_svc(desc, req->rq_bulk_read,
- lustre_msg_buf(req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF, 0),
- lustre_msg_buflen(req->rq_reqbuf,
- PLAIN_PACK_BULK_OFF),
- lustre_msg_buf(rs->rs_repbuf,
- PLAIN_PACK_BULK_OFF, 0),
- lustre_msg_buflen(rs->rs_repbuf,
- PLAIN_PACK_BULK_OFF));
+ bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenr = (struct plain_bulk_token *) bsdr->bsd_data;
+ bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0);
+ tokenv = (struct plain_bulk_token *) bsdv->bsd_data;
+
+ bsdv->bsd_version = 0;
+ bsdv->bsd_type = SPTLRPC_BULK_DEFAULT;
+ bsdv->bsd_svc = bsdr->bsd_svc;
+ bsdv->bsd_flags = 0;
+
+ if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL)
+ return 0;
+
+ rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
+ tokenv);
+ if (rc) {
+ CERROR("bulk read: server failed to compute "
+ "checksum: %d\n", rc);
+ } else {
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE))
+ corrupt_bulk_data(desc);
+ }
+
+ return rc;
}
static struct ptlrpc_ctx_ops plain_ctx_ops = {
.release_ctx = plain_release_ctx,
.flush_ctx_cache = plain_flush_ctx_cache,
.alloc_reqbuf = plain_alloc_reqbuf,
- .alloc_repbuf = plain_alloc_repbuf,
.free_reqbuf = plain_free_reqbuf,
+ .alloc_repbuf = plain_alloc_repbuf,
.free_repbuf = plain_free_repbuf,
.enlarge_reqbuf = plain_enlarge_reqbuf,
};
goto err_req;
}
+ switch(lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_WRITEPAGE:
+ case OST_WRITE:
+ req->rq_bulk_write = 1;
+ break;
+ case MDS_READPAGE:
+ case OST_READ:
+ req->rq_bulk_read = 1;
+ break;
+ }
+
CDEBUG(D_NET, "got req "LPD64"\n", req->rq_xid);
req->rq_export = class_conn2export(
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux lin2 2.6.18-92.1.17-prep #3 Sun Nov 23 14:29:36 IST 2008 i686 i686 i386 G
- * with gcc version 3.4.6 20060404 (Red Hat 3.4.6-10) */
+ * running on Linux localhost.localdomain 2.6.18-prep #3 SMP Sun Nov 23 08:04:44 EST 2008 i68
+ * with gcc version 4.1.1 20061011 (Red Hat 4.1.1-30) */
/* Constants... */
(long long)OBD_QC_CALLBACK);
LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
(long long)OBD_LAST_OPC);
- LASSERTF(QUOTA_DQACQ == 901, " found %lld\n",
+ LASSERTF(QUOTA_DQACQ == 601, " found %lld\n",
(long long)QUOTA_DQACQ);
- LASSERTF(QUOTA_DQREL == 902, " found %lld\n",
+ LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
(long long)QUOTA_DQREL);
LASSERTF(MGS_CONNECT == 250, " found %lld\n",
(long long)MGS_CONNECT);
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- CLASSERT(OBD_CONNECT_RDONLY == 0x00000001ULL);
- CLASSERT(OBD_CONNECT_INDEX == 0x00000002ULL);
- CLASSERT(OBD_CONNECT_GRANT == 0x00000008ULL);
- CLASSERT(OBD_CONNECT_SRVLOCK == 0x00000010ULL);
- CLASSERT(OBD_CONNECT_VERSION == 0x00000020ULL);
- CLASSERT(OBD_CONNECT_REQPORTAL == 0x00000040ULL);
- CLASSERT(OBD_CONNECT_ACL == 0x00000080ULL);
- CLASSERT(OBD_CONNECT_XATTR == 0x00000100ULL);
+ CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+ CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+ CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+ CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+ CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+ CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+ CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+ CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
CLASSERT(OBD_CONNECT_REAL == 0x08000000ULL);
CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
- CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x00000400ULL);
- CLASSERT(OBD_CONNECT_IBITS == 0x00001000ULL);
- CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL);
- CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL);
- CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL);
+ CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+ CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+ CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+ CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+ CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL);
CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL);
- CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL);
- CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL);
- CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL);
- CLASSERT(OBD_CONNECT_OSS_CAPA == 0x00200000ULL);
+ CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+ CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+ CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+ CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
CLASSERT(OBD_CONNECT_MDS_MDS == 0x04000000ULL);
CLASSERT(OBD_CONNECT_SOM == 0x00800000ULL);
CLASSERT(OBD_CONNECT_AT == 0x01000000ULL);
- CLASSERT(OBD_CONNECT_CANCELSET == 0x00400000ULL);
+ CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x02000000ULL);
/* Checks for struct obdo */
CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
/* Checks for struct ll_fiemap_extent */
- LASSERTF((int)sizeof(struct ll_fiemap_extent) == 32, " found %lld\n",
+ LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, " found %lld\n",
(long long)(int)sizeof(struct ll_fiemap_extent));
LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
(long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 24, " found %lld\n",
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 28, " found %lld\n",
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
- CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00000008);
- CLASSERT(FIEMAP_EXTENT_SECONDARY == 0x00000010);
- CLASSERT(FIEMAP_EXTENT_NET == 0x00000020);
- CLASSERT(FIEMAP_EXTENT_DATA_COMPRESSED == 0x00000040);
+ CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008);
CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080);
CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100);
CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200);
CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400);
CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800);
CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000);
+ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
#ifdef LIBLUSTRE_POSIX_ACL
/* Checks for type posix_acl_xattr_entry */
lquota-objs := quota_check.o quota_context.o quota_ctl.o quota_interface.o
lquota-objs += quota_master.o quota_adjust_qunit.o lproc_quota.o
+EXTRA_DIST := $(lquota-objs:%.o=%.c) $(quotactl-objs:%.o=%.c) $(quotacheck-objs:%.o=%.c) quota_internal.h
+
@INCLUDE_RULES@
endif
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@
-DIST_SOURCES := $(lquota-objs:%.o=%.c) quota_internal.h
if (rc > 0) {
rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 0, NULL);
- if (rc == -EDQUOT || rc == -EBUSY || rc == -EAGAIN) {
+ if (rc == -EDQUOT || rc == -EBUSY ||
+ rc == QUOTA_REQ_RETURNED || rc == -EAGAIN) {
CDEBUG(D_QUOTA, "rc: %d.\n", rc);
rc = 0;
}
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = target_quotacheck_callback(exp, oqctl);
+ class_export_put(exp);
atomic_inc(qta->qta_sem);
}
}
+ /* we get ref for exp because target_quotacheck_callback() will use this
+ * export later b=18126 */
+ class_export_get(exp);
rc = kernel_thread(target_quotacheck_thread, qta, CLONE_VM|CLONE_FILES);
if (rc >= 0) {
CDEBUG(D_INFO, "%s: target_quotacheck_thread: %d\n",
RETURN(0);
}
+ class_export_put(exp);
CERROR("%s: error starting quotacheck_thread: %d\n",
obd->obd_name, rc);
OBD_FREE_PTR(qta);
ENTRY;
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- int err;
-
if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active) {
CERROR("lov idx %d inactive\n", i);
RETURN(-EIO);
}
+ }
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ int err;
err = obd_quotacheck(lov->lov_tgts[i]->ltd_exp, oqctl);
if (err && !rc)
compute_lqs_after_removing_qunit(qunit);
- /* wake up all waiters */
+ if (rc == 0)
+ rc = QUOTA_REQ_RETURNED;
QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, rc);
+ /* wake up all waiters */
wake_up_all(&qunit->lq_waitq);
/* this is for dqacq_in_flight() */
CERROR("adjust slave's qunit size failed!(rc:%d)\n", rc1);
RETURN(rc1);
}
- if (err || (rc && rc != -EBUSY && rc1 == 0) || is_master(qctxt))
+ if (err || (rc < 0 && rc != -EBUSY && rc1 == 0) || is_master(qctxt))
RETURN(err);
/* reschedule another dqacq/dqrel if needed */
RETURN(rc);
}
+/* wake up all waiting threads when lqc_import is NULL */
+void dqacq_interrupt(struct lustre_quota_ctxt *qctxt)
+{
+ struct lustre_qunit *qunit, *tmp;
+ int i;
+ ENTRY;
+
+ spin_lock(&qunit_hash_lock);
+ for (i = 0; i < NR_DQHASH; i++) {
+ list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) {
+ if (qunit->lq_ctxt != qctxt)
+ continue;
+
+ /* Wake up all waiters. Do not change lq_state.
+ * The waiters will check lq_rc which is kept as 0
+ * if no others change it, then the waiters will return
+ * -EAGAIN to caller who can perform related quota
+ * acq/rel if necessary. */
+ wake_up_all(&qunit->lq_waitq);
+ }
+ }
+ spin_unlock(&qunit_hash_lock);
+ EXIT;
+}
+
static int got_qunit(struct lustre_qunit *qunit)
{
- int rc;
+ struct lustre_quota_ctxt *qctxt = qunit->lq_ctxt;
+ int rc = 0;
ENTRY;
spin_lock(&qunit->lq_lock);
switch (qunit->lq_state) {
case QUNIT_IN_HASH:
case QUNIT_RM_FROM_HASH:
- rc = 0;
break;
case QUNIT_FINISHED:
rc = 1;
break;
default:
- rc = 0;
CERROR("invalid qunit state %d\n", qunit->lq_state);
}
spin_unlock(&qunit->lq_lock);
+
+ if (!rc) {
+ spin_lock(&qctxt->lqc_lock);
+ rc = !qctxt->lqc_import || !qctxt->lqc_valid;
+ spin_unlock(&qctxt->lqc_lock);
+ }
+
RETURN(rc);
}
QDATA_DEBUG(p, "qunit(%p) is waiting for dqacq.\n", qunit);
l_wait_event(qunit->lq_waitq, got_qunit(qunit), &lwi);
- /* rc = -EAGAIN, it means a quota req is finished;
+ /* rc = -EAGAIN, it means the quota master isn't ready yet
+ * rc = QUOTA_REQ_RETURNED, it means a quota req is finished;
* rc = -EDQUOT, it means out of quota
* rc = -EBUSY, it means recovery is happening
* other rc < 0, it means real errors, functions who call
* schedule_dqacq should take care of this */
spin_lock(&qunit->lq_lock);
- if (qunit->lq_rc == 0)
- rc = -EAGAIN;
- else
- rc = qunit->lq_rc;
+ rc = qunit->lq_rc;
spin_unlock(&qunit->lq_lock);
CDEBUG(D_QUOTA, "qunit(%p) finishes waiting. (rc:%d)\n",
qunit, rc);
qunit, qunit->lq_rc);
/* keep same as schedule_dqacq() b=17030 */
spin_lock(&qunit->lq_lock);
- if (qunit->lq_rc == 0)
- rc = -EAGAIN;
- else
- rc = qunit->lq_rc;
+ rc = qunit->lq_rc;
spin_unlock(&qunit->lq_lock);
/* this is for dqacq_in_flight() */
qunit_put(qunit);
spin_lock(&qctxt->lqc_lock);
qctxt->lqc_import = NULL;
spin_unlock(&qctxt->lqc_lock);
+ dqacq_interrupt(qctxt);
CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
obd->obd_name, obd);
}
/* please reference to dqacq_completion for the below */
/* a new request is finished, try again */
- if (rc == -EAGAIN) {
+ if (rc == QUOTA_REQ_RETURNED) {
CDEBUG(D_QUOTA, "finish a quota req, try again\n");
continue;
}
struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata,
int isblk);
int check_qm(struct lustre_quota_ctxt *qctxt);
+void dqacq_interrupt(struct lustre_quota_ctxt *qctxt);
/* quota_master.c */
int lustre_dquot_init(void);
void lustre_dquot_exit(void);
#define LQS_INO_DECREASE 4
#define LQS_INO_INCREASE 8
+/* the return status of quota operation */
+#define QUOTA_REQ_RETURNED 1
#endif
int client_quota_adjust_qunit(struct obd_export *exp,
}
if (rc2)
- CDEBUG(rc2 == -EAGAIN ? D_QUOTA: D_ERROR,
- "mds adjust qunit failed! (opc:%d rc:%d)\n", opc, rc2);
+ CDEBUG(rc2 == QUOTA_REQ_RETURNED ? D_QUOTA: D_ERROR,
+ "mds adjust qunit %ssuccessfully! (opc:%d rc:%d)\n",
+ rc2 == QUOTA_REQ_RETURNED ? "" : "un", opc, rc2);
RETURN(0);
}
if (rc || rc2) {
if (!rc)
rc = rc2;
- CDEBUG(rc == -EAGAIN ? D_QUOTA: D_ERROR,
- "filter adjust qunit failed! (opc:%d rc%d)\n",
- opc, rc);
+ CDEBUG(rc == QUOTA_REQ_RETURNED ? D_QUOTA: D_ERROR,
+ "filter adjust qunit %ssuccessfully! (opc:%d rc%d)\n",
+ QUOTA_REQ_RETURNED ? "" : "un", opc, rc);
}
RETURN(0);
local MOUNTED=$(mounted_lustre_filesystems)
if $(echo $MOUNTED | grep -w -q $MOUNT); then
check_config $MOUNT
- init_versions_vars
+ init_param_vars
return
fi
check_mount || return 41
echo "check journal size..."
- local FOUNDSIZE=`do_facet mds "$$DEBUGFS -c -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'`
+ local FOUNDSIZE=`do_facet mds "$DEBUGFS -c -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'`
if [ $FOUNDSIZE -gt $((32 * 1024 * 1024)) ]; then
log "Success: mkfs creates large journals. Size: $((FOUNDSIZE >> 20))M"
else
}
run_test 26 "MDT startup failure cleans LOV (should return errs)"
-wait_update () {
- local node=$1
- local TEST=$2
- local FINAL=$3
-
- local RESULT
- local MAX=90
- local WAIT=0
- local sleep=5
- while [ $WAIT -lt $MAX ]; do
- RESULT=$(do_node $node "$TEST")
- if [ $RESULT -eq $FINAL ]; then
- echo "Updated config after $WAIT sec: wanted $FINAL got $RESULT"
- return 0
- fi
- WAIT=$((WAIT + sleep))
- echo "Waiting $((MAX - WAIT)) secs for config update"
- sleep $sleep
- done
- echo "Config update not seen after $MAX sec: wanted $FINAL got $RESULT"
- return 3
-}
-
set_and_check() {
local myfacet=$1
local TEST=$2
TESTDIR=$MOUNT
# Requirements
+# set NUM_FILES=0 to force TIME_PERIOD work
+NUM_FILES=${NUM_FILES:-1000000}
TIME_PERIOD=${TIME_PERIOD:-600} # seconds
-SINGLE_TARGET_RATE=$((1300 / OSTCOUNT)) # ops/sec
-AGGREGATE_TARGET_RATE=$((7000 / OSTCOUNT)) # ops/sec
# Local test variables
TESTDIR_SINGLE="${TESTDIR}/single"
check_and_setup_lustre
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
generate_machine_file $NODES_TO_USE $MACHINEFILE || error "can not generate machinefile"
$LFS setstripe $TESTDIR -c -1
echo "Running creates on 1 node(s)."
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --create --time ${TIME_PERIOD}
- --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
+ --nfiles ${NUM_FILES} --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np 1 -machinefile ${MACHINEFILE} ${COMMAND} | tee ${LOG}
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate create ${SINGLE_TARGET_RATE} 1 ${LOG} || true
-
log "===== $0 ### 1 NODE UNLINK ###"
echo "Running unlinks on 1 node(s)."
- let NUM_FILES=${SINGLE_TARGET_RATE}\*${TIME_PERIOD}
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --unlink --time ${TIME_PERIOD}
- --nfiles ${NUM_FILES} --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
+ --nfiles ${NUM_FILES} --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np 1 -machinefile ${MACHINEFILE} ${COMMAND} | tee ${LOG}
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
+fi
- check_rate unlink ${SINGLE_TARGET_RATE} 1 ${LOG} || true
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
fi
if [ -n "$NOMULTI" ]; then
echo "Running creates on ${NUM_CLIENTS} node(s)."
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --create --time ${TIME_PERIOD}
- --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
+ --nfiles $NUM_FILES --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np ${NUM_CLIENTS} -machinefile ${MACHINEFILE} ${COMMAND} | tee ${LOG}
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate create ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
-
echo "Running unlinks on ${NUM_CLIENTS} node(s)."
- let NUM_FILES=${AGGREGATE_TARGET_RATE}\*${TIME_PERIOD}
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --unlink --time ${TIME_PERIOD}
- --nfiles ${NUM_FILES} --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
+ --nfiles ${NUM_FILES} --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np ${NUM_CLIENTS} -machinefile ${MACHINEFILE} ${COMMAND} | tee ${LOG}
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate unlink ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
equals_msg `basename $0`: test complete, cleaning up
rm -f $MACHINEFILE
-zconf_umount_clients $NODES_TO_USE $MOUNT
check_and_cleanup_lustre
#rm -f $LOG
TESTDIR=$MOUNT
# Requirements
-# The default number of stripes per file is set to 1 in test3/run_test.sh.
+NUM_FILES=${NUM_FILES:-1000000}
TIME_PERIOD=${TIME_PERIOD:-600} # seconds
-SINGLE_TARGET_RATE=1400 # ops/sec
-AGGREGATE_TARGET_RATE=10000 # ops/sec
# Local test variables
TESTDIR_SINGLE="${TESTDIR}/single"
check_and_setup_lustre
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
generate_machine_file $NODES_TO_USE $MACHINEFILE || error "can not generate machinefile"
$LFS setstripe $TESTDIR -i 0 -c 1
echo "Running creates on 1 node(s)."
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --create --time ${TIME_PERIOD}
- --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
+ --nfiles $NUM_FILES --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np 1 -machinefile ${MACHINEFILE} ${COMMAND} | tee ${LOG}
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate create ${SINGLE_TARGET_RATE} 1 ${LOG} || true
fi
if [ -n "$NOUNLINK" ]; then
log "===== $0 ### 1 NODE UNLINK ###"
echo "Running unlinks on 1 node(s)."
- let NUM_FILES=${SINGLE_TARGET_RATE}\*${TIME_PERIOD}
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --unlink --time ${TIME_PERIOD}
--nfiles ${NUM_FILES} --dir ${TESTDIR_SINGLE} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate unlink ${SINGLE_TARGET_RATE} 1 ${LOG} || true
fi
fi
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
if [ -n "$NOMULTI" ]; then
echo "NO tests on multiple nodes."
else
echo "Running creates on ${NUM_CLIENTS} node(s) with $THREADS_PER_CLIENT threads per client."
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --create --time ${TIME_PERIOD}
- --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
+ --nfiles $NUM_FILES --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
mpi_run -np $((NUM_CLIENTS * THREADS_PER_CLIENT)) -machinefile ${MACHINEFILE} \
${COMMAND} | tee ${LOG}
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate create ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
if [ -n "$NOUNLINK" ]; then
log "===== $0 ### $NUM_CLIENTS NODES UNLINK ###"
echo "Running unlinks on ${NUM_CLIENTS} node(s) with $THREADS_PER_CLIENT threads per client."
- let NUM_FILES=${AGGREGATE_TARGET_RATE}\*${TIME_PERIOD}
COMMAND="${MDSRATE} ${MDSRATE_DEBUG} --unlink --time ${TIME_PERIOD}
--nfiles ${NUM_FILES} --dir ${TESTDIR_MULTI} --filefmt 'f%%d'"
echo "+ ${COMMAND}"
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate unlink ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
fi
equals_msg `basename $0`: test complete, cleaning up
rm -f $MACHINEFILE
-zconf_umount_clients $NODES_TO_USE $MOUNT
check_and_cleanup_lustre
#rm -f $LOG
# Requirements
NUM_FILES=${NUM_FILES:-1000000}
TIME_PERIOD=${TIME_PERIOD:-600} # seconds
-SINGLE_TARGET_RATE=5900 # ops/sec
-AGGREGATE_TARGET_RATE=62000 # ops/sec
LOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log}
CLIENT=$SINGLECLIENT
check_and_setup_lustre
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
generate_machine_file $NODES_TO_USE $MACHINEFILE || error "can not generate machinefile"
$LFS setstripe $TESTDIR -c 1
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate lookup ${SINGLE_TARGET_RATE} 1 ${LOG} || true
fi
# 2
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate lookup ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
equals_msg `basename $0`: test complete, cleaning up
+mdsrate_cleanup $NUM_CLIENTS $MACHINEFILE $NUM_FILES $TESTDIR 'f%%d'
rm -f $MACHINEFILE
-zconf_umount_clients $NODES_TO_USE $MOUNT
check_and_cleanup_lustre
#rm -f $LOG
# Requirements
NUM_FILES=${NUM_FILES:-1000000}
TIME_PERIOD=${TIME_PERIOD:-600} # seconds
-SINGLE_TARGET_RATE=$((3300 / OSTCOUNT)) # ops/sec
-AGGREGATE_TARGET_RATE=$((28500 / OSTCOUNT)) # ops/sec
# --random_order (default) -OR- --readdir_order
DIR_ORDER=${DIR_ORDER:-"--readdir_order"}
check_and_setup_lustre
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
generate_machine_file $NODES_TO_USE $MACHINEFILE || error "can not generate machinefile"
$LFS setstripe $TESTDIR -c -1
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate stat ${SINGLE_TARGET_RATE} 1 ${LOG} || true
fi
# 2
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate stat ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
equals_msg `basename $0`: test complete, cleaning up
+mdsrate_cleanup $NUM_CLIENTS $MACHINEFILE $NUM_FILES $TESTDIR 'f%%d'
rm -f $MACHINEFILE
-zconf_umount_clients $NODES_TO_USE $MOUNT
check_and_cleanup_lustre
#rm -f $LOG
# Requirements
NUM_FILES=${NUM_FILES:-1000000}
TIME_PERIOD=${TIME_PERIOD:-600} # seconds
-SINGLE_TARGET_RATE=3200 # ops/sec
-AGGREGATE_TARGET_RATE=29000 # ops/sec
# --random_order (default) -OR- --readdir_order
DIR_ORDER=${DIR_ORDER:-"--readdir_order"}
check_and_setup_lustre
+IFree=$(inodes_available)
+if [ $IFree -lt $NUM_FILES ]; then
+ NUM_FILES=$IFree
+fi
+
generate_machine_file $NODES_TO_USE $MACHINEFILE || error "can not generate machinefile"
$LFS setstripe $TESTDIR -i 0 -c 1
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate stat ${SINGLE_TARGET_RATE} 1 ${LOG} || true
fi
# 2
[ -f $LOG ] && cat $LOG
error "mpirun ... mdsrate ... failed, aborting"
fi
- check_rate stat ${AGGREGATE_TARGET_RATE} ${NUM_CLIENTS} ${LOG} || true
fi
equals_msg `basename $0`: test complete, cleaning up
+mdsrate_cleanup $NUM_CLIENTS $MACHINEFILE $NUM_FILES $TESTDIR 'f%%d'
rm -f $MACHINEFILE
-zconf_umount_clients $NODES_TO_USE $MOUNT
check_and_cleanup_lustre
#rm -f $LOG
}
run_test 24 "fsync error (should return error)"
+wait_client_evicted () {
+ local facet=$1
+ local exports=$2
+ local varsvc=${facet}_svc
+
+ wait_update $(facet_host $facet) "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" $((exports - 1)) $3
+}
+
test_26a() { # was test_26 bug 5921 - evict dead exports by pinger
# this test can only run from a client on a separate node.
remote_ost || { skip "local OST" && return 0; }
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
remote_mds || { skip "local MDS" && return 0; }
- OST_FILE=obdfilter.${ost1_svc}.num_exports
- OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
- OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
- echo starting with $OST_NEXP1 OST exports
+
+ check_timeout || return 1
+
+ local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2)
+
+ echo starting with $OST_NEXP OST exports
# OBD_FAIL_PTLRPC_DROP_RPC 0x505
do_facet client lctl set_param fail_loc=0x505
# evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
# But if there's a race to start the evictor from various obds,
# the loser might have to wait for the next ping.
- echo Waiting for $(($TIMEOUT * 8)) secs
- sleep $(($TIMEOUT * 8))
- OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
- OST_NEXP2=`echo $OST_EXP | cut -d' ' -f2`
- echo ending with $OST_NEXP2 OST exports
+
+ local rc=0
+ wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4))
+ rc=$?
do_facet client lctl set_param fail_loc=0x0
- [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted"
- return 0
+ [ $rc -eq 0 ] || error "client not evicted from OST"
}
run_test 26a "evict dead exports"
test_26b() { # bug 10140 - evict dead exports by pinger
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+ check_timeout || return 1
client_df
- zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
- sleep 1 # wait connections being established
- MDS_FILE=mdt.${mds1_svc}.num_exports
- MDS_NEXP1="`do_facet $SINGLEMDS lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
- OST_FILE=obdfilter.${ost1_svc}.num_exports
- OST_NEXP1="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
- echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
- zconf_umount `hostname` $MOUNT2 -f
- # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
- # But if there's a race to start the evictor from various obds,
- # the loser might have to wait for the next ping.
- echo Waiting for $(($TIMEOUT * 3)) secs
- sleep $(($TIMEOUT * 3))
- OST_NEXP2="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
- MDS_NEXP2="`do_facet $SINGLEMDS lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
- echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
- [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
- [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
- return 0
+ zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
+ sleep 1 # wait connections being established
+
+ local MDS_NEXP=$(do_facet $SINGLEMDS lctl get_param -n mdt.${mds1_svc}.num_exports | cut -d' ' -f2)
+ local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2)
+
+ echo starting with $OST_NEXP OST and $MDS_NEXP MDS exports
+
+ zconf_umount `hostname` $MOUNT2 -f
+
+ # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+ # But if there's a race to start the evictor from various obds,
+ # the loser might have to wait for the next ping.
+ # PING_INTERVAL max(obd_timeout / 4, 1U)
+ # sleep (2*PING_INTERVAL)
+
+ local rc=0
+ wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+ error "Client was not evicted by ost" rc=1
+ wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+ error "Client was not evicted by mds"
}
run_test 26b "evict dead exports"
remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
# Skip these tests
-# bug number: 17466
-ALWAYS_EXCEPT="61d $REPLAY_SINGLE_EXCEPT"
+# bug number: 17466 15962
+ALWAYS_EXCEPT="61d 33b $REPLAY_SINGLE_EXCEPT"
if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then
CONFIG_EXCEPTIONS="0b 42 47 61a 61c"
}
run_test 33a "abort recovery before client does replay"
-# Stale FID sequence
+# Stale FID sequence bug 15962
test_33b() { # was test_33a
replay_barrier $SINGLEMDS
createmany -o $DIR/$tfile-%d 10
run_test 53a "|X| close request while two MDC requests in flight"
test_53b() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53b "|X| open request while two MDC requests in flight"
test_53c() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53c "|X| open request and close request while two MDC requests in flight"
test_53d() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53d "|X| close reply while two MDC requests in flight"
test_53e() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53e "|X| open reply while two MDC requests in flight"
test_53f() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53f "|X| open reply and close reply while two MDC requests in flight"
test_53g() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
run_test 53g "|X| drop open reply and close request while close and open are both in flight"
test_53h() {
+ rm -rf $DIR/${tdir}-1 $DIR/${tdir}-2
+
mkdir -p $DIR/${tdir}-1
mkdir -p $DIR/${tdir}-2
multiop $DIR/${tdir}-1/f O_c &
#Adaptive Timeouts (bug 3055)
AT_MAX_SET=0
-# Suppose that all osts have the same at_max
-for facet in mds client ost; do
- eval AT_MAX_SAVE_${facet}=$(at_max_get $facet)
-done
+
+at_cleanup () {
+ local var
+ local facet
+ local at_new
+
+ echo "Cleaning up AT ..."
+ if [ -n "$ATOLDBASE" ]; then
+ local at_history=$(do_facet mds "find /sys/ -name at_history")
+ do_facet mds "echo $ATOLDBASE >> $at_history" || true
+ do_facet ost1 "echo $ATOLDBASE >> $at_history" || true
+ fi
+
+ if [ $AT_MAX_SET -ne 0 ]; then
+ for facet in mds client ost; do
+ var=AT_MAX_SAVE_${facet}
+ echo restore AT on $facet to saved value ${!var}
+ at_max_set ${!var} $facet
+ at_new=$(at_max_get $facet)
+ echo Restored AT value on $facet $at_new
+ [ $at_new -eq ${!var} ] || \
+ error "$facet : AT value was not restored SAVED ${!var} NEW $at_new"
+ done
+ fi
+}
at_start()
{
return 1
fi
+ # Save at_max original values
+ local facet
+ if [ $AT_MAX_SET -eq 0 ]; then
+ # Suppose that all osts have the same at_max
+ for facet in mds client ost; do
+ eval AT_MAX_SAVE_${facet}=$(at_max_get $facet)
+ done
+ fi
local at_max
-
for facet in mds client ost; do
at_max=$(at_max_get $facet)
if [ $at_max -ne $at_max_new ]; then
}
run_test 68 "AT: verify slowing locks"
-if [ -n "$ATOLDBASE" ]; then
- at_history=$(do_facet mds "find /sys/ -name at_history")
- do_facet mds "echo $ATOLDBASE >> $at_history" || true
- do_facet ost1 "echo $ATOLDBASE >> $at_history" || true
-fi
-
-if [ $AT_MAX_SET -ne 0 ]; then
- for facet in mds client ost; do
- var=AT_MAX_SAVE_${facet}
- echo restore AT on $facet to saved value ${!var}
- at_max_set ${!var} $facet
- AT_NEW=$(at_max_get $facet)
- echo Restored AT value on $facet $AT_NEW
- [ $AT_NEW -ne ${!var} ] && \
- error "$facet : AT value was not restored SAVED ${!var} NEW $AT_NEW"
- done
-fi
-
+at_cleanup
# end of AT tests includes above lines
rm -rf $DIR/[df][0-9]*
-check_runas_id $RUNAS_ID $RUNAS
+check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS
build_test_filter
test_8()
{
- sleep $TIMEOUT
+ local ATHISTORY=$(do_facet mds "find /sys/ -name at_history")
+ local ATOLDBASE=$(do_facet mds "cat $ATHISTORY")
+ do_facet mds "echo 8 >> $ATHISTORY"
+
$LCTL dk > /dev/null
debugsave
sysctl -w lnet.debug="+other"
+ mkdir -p $DIR/d8
+ chmod a+w $DIR/d8
+
+ REQ_DELAY=`lctl get_param -n mdc.${FSNAME}-MDT0000-mdc-*.timeouts |
+ awk '/portal 12/ {print $5}' | tail -1`
+ REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
+
# sleep sometime in ctx handle
- do_facet mds lctl set_param fail_val=30
+ do_facet mds lctl set_param fail_val=$REQ_DELAY
#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204
do_facet mds lctl set_param fail_loc=0x1204
$RUNAS $LFS flushctx || error "can't flush ctx"
- $RUNAS df $DIR &
- DFPID=$!
- echo "waiting df (pid $TOUCHPID) to finish..."
- sleep 2 # give df a chance to really trigger context init rpc
+ $RUNAS touch $DIR/d8/f &
+ TOUCHPID=$!
+ echo "waiting for touch (pid $TOUCHPID) to finish..."
+ sleep 2 # give it a chance to really trigger context init rpc
do_facet mds sysctl -w lustre.fail_loc=0
- wait $DFPID || error "df should have succeeded"
+ wait $TOUCHPID || error "touch should have succeeded"
$LCTL dk | grep "Early reply #" || error "No early reply"
+
debugrestore
+ do_facet mds "echo $ATOLDBASE >> $ATHISTORY" || true
}
run_test 8 "Early reply sent for slow gss context negotiation"
# so each test should not assume any start flavor.
#
-test_50() {
- local sample=$TMP/sanity-gss-8
- local tdir=$MOUNT/dir8
- local iosize="256K"
- local hash_algs="adler32 crc32 md5 sha1 sha256 sha384 sha512 wp256 wp384 wp512"
-
- # create sample file with aligned size for direct i/o
- dd if=/dev/zero of=$sample bs=$iosize count=1 || error
- dd conv=notrunc if=/etc/termcap of=$sample bs=$iosize count=1 || error
-
- rm -rf $tdir
- mkdir $tdir || error "create dir $tdir"
-
- restore_to_default_flavor
-
- for alg in $hash_algs; do
- echo "Testing $alg..."
- flavor=krb5i-bulki:$alg/null
- set_rule $FSNAME any cli2ost $flavor
- wait_flavor cli2ost $flavor $cnt_cli2ost
-
- dd if=$sample of=$tdir/$alg oflag=direct,dsync bs=$iosize || error "$alg write"
- diff $sample $tdir/$alg || error "$alg read"
- done
-
- rm -rf $tdir
- rm -f $sample
-}
-run_test 50 "verify bulk hash algorithms works"
-
-test_51() {
- local s1=$TMP/sanity-gss-9.1
- local s2=$TMP/sanity-gss-9.2
- local s3=$TMP/sanity-gss-9.3
- local s4=$TMP/sanity-gss-9.4
- local tdir=$MOUNT/dir9
- local s1_size=4194304 # n * pagesize (4M)
- local s2_size=512 # n * blksize
- local s3_size=111 # n * blksize + m
- local s4_size=5 # m
- local cipher_algs="arc4 aes128 aes192 aes256 cast128 cast256 twofish128 twofish256"
-
- # create sample files for each situation
- rm -f $s1 $s2 $s2 $s4
- dd if=/dev/urandom of=$s1 bs=1M count=4 || error
- dd if=/dev/urandom of=$s2 bs=$s2_size count=1 || error
- dd if=/dev/urandom of=$s3 bs=$s3_size count=1 || error
- dd if=/dev/urandom of=$s4 bs=$s4_size count=1 || error
-
- rm -rf $tdir
- mkdir $tdir || error "create dir $tdir"
-
- restore_to_default_flavor
-
- #
- # different bulk data alignment will lead to different behavior of
- # the implementation: (n > 0; 0 < m < encryption_block_size)
- # - full page i/o
- # - partial page, size = n * encryption_block_size
- # - partial page, size = n * encryption_block_size + m
- # - partial page, size = m
- #
- for alg in $cipher_algs; do
- echo "Testing $alg..."
- flavor=krb5p-bulkp:sha1/$alg
- set_rule $FSNAME any cli2ost $flavor
- wait_flavor cli2ost $flavor $cnt_cli2ost
-
- # sync write
- dd if=$s1 of=$tdir/$alg.1 oflag=dsync bs=1M || error "write $alg.1"
- dd if=$s2 of=$tdir/$alg.2 oflag=dsync || error "write $alg.2"
- dd if=$s3 of=$tdir/$alg.3 oflag=dsync || error "write $alg.3"
- dd if=$s4 of=$tdir/$alg.4 oflag=dsync || error "write $alg.4"
-
- # remount client
- umount_client $MOUNT
- umount_client $MOUNT2
- mount_client $MOUNT
- mount_client $MOUNT2
-
- # read & compare
- diff $tdir/$alg.1 $s1 || error "read $alg.1"
- diff $tdir/$alg.2 $s2 || error "read $alg.2"
- diff $tdir/$alg.3 $s3 || error "read $alg.3"
- diff $tdir/$alg.4 $s4 || error "read $alg.4"
- done
-
- rm -rf $tdir
- rm -f $sample
-}
-run_test 51 "bulk data alignment test under encryption mode"
-
test_90() {
if [ "$SLOW" = "no" ]; then
total=10
lustre_fail() {
local fail_node=$1
local fail_loc=$2
-
- case $fail_node in
- "mds" )
- do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ;;
- "ost" )
- for num in `seq $OSTCOUNT`; do
- do_facet ost$num "lctl set_param fail_loc=$fail_loc"
- done ;;
- "mds_ost" )
- do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ;
- for num in `seq $OSTCOUNT`; do
- do_facet ost$num "lctl set_param fail_loc=$fail_loc"
- done ;;
- * ) echo "usage: lustre_fail fail_node fail_loc" ;
- return 1 ;;
- esac
+ local fail_val=${3:-0}
+
+ if [ $fail_node == "mds" ] || [ $fail_node == "mds_ost" ]; then
+ if [ $((fail_loc & 0x10000000)) -ne 0 -a $fail_val -gt 0 ] || \
+ [ $((fail_loc)) -eq 0 ]; then
+ do_facet $SINGLEMDS "lctl set_param fail_val=$fail_val"
+ fi
+ do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc"
+ fi
+ if [ $fail_node == "ost" ] || [ $fail_node == "mds_ost" ]; then
+ for num in `seq $OSTCOUNT`; do
+ if [ $((fail_loc & 0x10000000)) -ne 0 -a $fail_val -gt 0 ] || \
+ [ $((fail_loc)) -eq 0 ]; then
+ do_facet ost$num "lctl set_param fail_val=$fail_val"
+ fi
+ do_facet ost$num "lctl set_param fail_loc=$fail_loc"
+ done
+ fi
}
RUNAS="runas -u $TSTID -g $TSTID"
[ "$(grep $DIR2 /proc/mounts)" ] || mount_client $DIR2 || \
{ skip "Need lustre mounted on $MOUNT2 " && retutn 0; }
- if [ $OSTCOUNT -lt 2 ]; then
- skip "$OSTCOUNT < 2, too few osts"
- return 0;
- fi
-
LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever
TESTFILE="$DIR/$tdir/$tfile-0"
TESTFILE2="$DIR2/$tdir/$tfile-1"
$LFS setstripe $TESTFILE -i 0 -c 1
chown $TSTUSR.$TSTUSR $TESTFILE
- $LFS setstripe $TESTFILE2 -i 1 -c 1
+ $LFS setstripe $TESTFILE2 -i 0 -c 1
chown $TSTUSR2.$TSTUSR2 $TESTFILE2
#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f
- lustre_fail ost 0x0000021f
+ #define OBD_FAIL_SOME 0x10000000 /* fail N times */
+ lustre_fail ost $((0x0000021f | 0x10000000)) 1
echo " step1: write out of block quota ..."
$RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 &
chmod 0777 $DIR/$tdir
TESTFILE="$DIR/$tdir/$tfile-0"
rm -f $TESTFILE
+ LIMIT=$(( $BUNIT_SZ * ($OSTCOUNT + 1) + 4096 ))
wait_delete_completed
# set quota for $TSTUSR
log "setquota for $TSTUSR"
- $LFS setquota $1 $TSTUSR -b 10240 -B 10240 -i 10 -I 10 $DIR
+ $LFS setquota $1 $TSTUSR -b $LIMIT -B $LIMIT -i 10 -I 10 $DIR
sleep 3
show_quota $1 $TSTUSR
# set quota for $TSTUSR2
log "setquota for $TSTUSR2"
- $LFS setquota $1 $TSTUSR2 -b 10240 -B 10240 -i 10 -I 10 $DIR
+ $LFS setquota $1 $TSTUSR2 -b $LIMIT -B $LIMIT -i 10 -I 10 $DIR
sleep 3
show_quota $1 $TSTUSR2
# set stripe index to 0
log "setstripe for $DIR/$tdir to 0"
- $LFS setstripe $DIR/$tdir -i 0
+ $LFS setstripe $DIR/$tdir -c 1 -i 0
MDS_UUID=`do_facet $SINGLEMDS $LCTL dl | grep -m1 " mdt " | awk '{print $((NF-1))}'`
OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'`
MDS_QUOTA_USED_OLD=`$LFS quota -o $MDS_UUID $1 $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $4 }'`
ONLY=${ONLY:-"$*"}
# bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 12653 12653 5188 10764 16260
-ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68 75 119d $SANITY_EXCEPT"
+ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68b 75 119d $SANITY_EXCEPT"
# bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443
#ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
cleanup_68() {
trap 0
if [ ! -z "$LLOOP" ]; then
- swapoff $LLOOP || error "swapoff failed"
+ if swapon -s | grep -q $LLOOP; then
+ swapoff $LLOOP || error "swapoff failed"
+ fi
+
$LCTL blockdev_detach $LLOOP || error "detach failed"
rm -f $LLOOP
unset LLOOP
fi
- rm -f $DIR/f68
+ rm -f $DIR/f68*
}
meminfo() {
swapon -s | awk '($1 == "'$1'") { print $4 }'
}
+# test case for lloop driver, basic function
+test_68a() {
+ [ "$UID" != 0 ] && skip "must run as root" && return
+
+ grep -q llite_lloop /proc/modules
+ [ $? -ne 0 ] && skip "can't find module llite_lloop" && return
+
+ LLOOP=$TMP/lloop.`date +%s`.`date +%N`
+ dd if=/dev/zero of=$DIR/f68a bs=4k count=1024
+ $LCTL blockdev_attach $DIR/f68a $LLOOP || error "attach failed"
+
+ trap cleanup_68 EXIT
+
+ directio rdwr $LLOOP 0 1024 4096 || error "direct write failed"
+ directio rdwr $LLOOP 0 1025 4096 && error "direct write should fail"
+
+ cleanup_68
+}
+run_test 68a "lloop driver - basic test ========================"
# excercise swapping to lustre by adding a high priority swapfile entry
# and then consuming memory until it is used.
-test_68() {
+test_68b() { # was test_68
[ "$UID" != 0 ] && skip "must run as root" && return
lctl get_param -n devices | grep -q obdfilter && \
skip "local OST" && return
[[ $NR_BLOCKS -le 2048 ]] && NR_BLOCKS=2048
LLOOP=$TMP/lloop.`date +%s`.`date +%N`
- dd if=/dev/zero of=$DIR/f68 bs=64k seek=$NR_BLOCKS count=1
- mkswap $DIR/f68
+ dd if=/dev/zero of=$DIR/f68b bs=64k seek=$NR_BLOCKS count=1
+ mkswap $DIR/f68b
- $LCTL blockdev_attach $DIR/f68 $LLOOP || error "attach failed"
+ $LCTL blockdev_attach $DIR/f68b $LLOOP || error "attach failed"
trap cleanup_68 EXIT
[ $SWAPUSED -eq 0 ] && echo "no swap used???" || true
}
-run_test 68 "support swapping to Lustre ========================"
+run_test 68b "support swapping to Lustre ========================"
# bug5265, obdfilter oa2dentry return -ENOENT
# #define OBD_FAIL_OST_ENOENT 0x217
}
test_77a() { # bug 10889
+ $GSS && skip "could not run with gss" && return
[ ! -f $F77_TMP ] && setup_f77
set_checksums 1
dd if=$F77_TMP of=$DIR/$tfile bs=1M count=$F77SZ || error "dd error"
run_test 77a "normal checksum read/write operation ============="
test_77b() { # bug 10889
+ $GSS && skip "could not run with gss" && return
[ ! -f $F77_TMP ] && setup_f77
#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
lctl set_param fail_loc=0x80000409
run_test 77b "checksum error on client write ===================="
test_77c() { # bug 10889
+ $GSS && skip "could not run with gss" && return
[ ! -f $DIR/f77b ] && skip "requires 77b - skipping" && return
set_checksums 1
for algo in $CKSUM_TYPES; do
run_test 77c "checksum error on client read ==================="
test_77d() { # bug 10889
+ $GSS && skip "could not run with gss" && return
#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409
lctl set_param fail_loc=0x80000409
set_checksums 1
run_test 77d "checksum error on OST direct write ==============="
test_77e() { # bug 10889
+ $GSS && skip "could not run with gss" && return
[ ! -f $DIR/f77 ] && skip "requires 77d - skipping" && return
#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408
lctl set_param fail_loc=0x80000408
run_test 77e "checksum error on OST direct read ================"
test_77f() { # bug 10889
+ $GSS && skip "could not run with gss" && return
set_checksums 1
for algo in $CKSUM_TYPES; do
cancel_lru_locks osc
run_test 77f "repeat checksum error on write (expect error) ===="
test_77g() { # bug 10889
+ $GSS && skip "could not run with gss" && return
remote_ost_nodsh && skip "remote OST with nodsh" && return
[ ! -f $F77_TMP ] && setup_f77
run_test 77g "checksum error on OST write ======================"
test_77h() { # bug 10889
+ $GSS && skip "could not run with gss" && return
remote_ost_nodsh && skip "remote OST with nodsh" && return
[ ! -f $DIR/f77g ] && skip "requires 77g - skipping" && return
run_test 77h "checksum error on OST read ======================="
test_77i() { # bug 13805
+ $GSS && skip "could not run with gss" && return
#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b
lctl set_param fail_loc=0x40b
remount_client $MOUNT
run_test 77i "client not supporting OSD_CONNECT_CKSUM =========="
test_77j() { # bug 13805
+ $GSS && skip "could not run with gss" && return
#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c
lctl set_param fail_loc=0x40c
remount_client $MOUNT
trap cleanup_test102 EXIT
cd $DIR
- # $1 = runas
$1 $SETSTRIPE $tdir -s $STRIPE_SIZE -i $STRIPE_OFFSET -c $STRIPE_COUNT
cd $DIR/$tdir
for num in 1 2 3 4
done
cd $DIR
- if [ "$TAR" == "tar" ]; then
- TAR_OPTS="--xattrs"
- fi
- $1 $TAR cf $TMP/f102.tar $tdir $TAR_OPTS
+ $1 $TAR cf $TMP/f102.tar $tdir --xattrs
SETUP_TEST102=yes
}
}
find_lustre_tar() {
- [ -n "$(which star 2>/dev/null)" ] && strings $(which star) | grep -q lustre && echo star && return
[ -n "$(which tar 2>/dev/null)" ] && strings $(which tar) | grep -q lustre && echo tar
}
test_102d() {
- # b10930: (s)tar test for trusted.lov xattr
+ # b10930: tar test for trusted.lov xattr
TAR=$(find_lustre_tar)
- [ -z "$TAR" ] && skip "lustre-aware (s)tar is not installed" && return
+ [ -z "$TAR" ] && skip "lustre-aware tar is not installed" && return
[ "$OSTCOUNT" -lt "2" ] && skip "skipping N-stripe test" && return
setup_test102
mkdir -p $DIR/d102d
- if [ "$TAR" == "tar" ]; then
- TAR_OPTS="--xattrs"
- fi
- $TAR xf $TMP/f102.tar -C $DIR/d102d $TAR_OPTS
+ $TAR xf $TMP/f102.tar -C $DIR/d102d --xattrs
cd $DIR/d102d/$tdir
compare_stripe_info1
}
-run_test 102d "(s)tar restore stripe info from tarfile,not keep osts ==========="
-
-test_102e() {
- # b10930: star test for trusted.lov xattr
- TAR=$(find_lustre_tar)
- [ "$TAR" != star ] && skip "lustre-aware star is not installed" && return
- [ "$OSTCOUNT" -lt "2" ] && skip "skipping N-stripe test" && return
- setup_test102
- mkdir -p $DIR/d102e
- star -x -preserve-osts f=$TMP/f102.tar -C $DIR/d102e
- cd $DIR/d102e/$tdir
- compare_stripe_info2
-}
-run_test 102e "star restore stripe info from tarfile, keep osts ==========="
+run_test 102d "tar restore stripe info from tarfile,not keep osts ==========="
test_102f() {
- # b10930: (s)tar test for trusted.lov xattr
+ # b10930: tar test for trusted.lov xattr
TAR=$(find_lustre_tar)
- [ -z "$TAR" ] && skip "lustre-aware (s)tar is not installed" && return
+ [ -z "$TAR" ] && skip "lustre-aware tar is not installed" && return
[ "$OSTCOUNT" -lt "2" ] && skip "skipping N-stripe test" && return
setup_test102
mkdir -p $DIR/d102f
cd $DIR
- if [ "$TAR" == "tar" ]; then
- TAR_OPTS="--xattrs"
- fi
- $TAR cf - $TAR_OPTS . | $TAR xf - $TAR_OPTS -C $DIR/d102f
+ $TAR cf - --xattrs $tdir | $TAR xf - --xattrs -C $DIR/d102f
cd $DIR/d102f/$tdir
compare_stripe_info1
}
-run_test 102f "(s)tar copy files, not keep osts ==========="
-
-test_102g() {
- # b10930: star test for trusted.lov xattr
- TAR=$(find_lustre_tar)
- [ "$TAR" != star ] && skip "lustre-aware star is not installed" && return
- [ "$OSTCOUNT" -lt "2" ] && skip "skipping N-stripe test" && return
- setup_test102
- mkdir -p $DIR/d102g
- cd $DIR
- star -copy -preserve-osts $tdir $DIR/d102g
- cd $DIR/d102g/$tdir
- compare_stripe_info2
- cleanup_test102
-}
-run_test 102g "star copy files, keep osts ==========="
+run_test 102f "tar copy files, not keep osts ==========="
test_102h() { # bug 15777
[ -z $(lctl get_param -n mdc.*.connect_flags | grep xattr) ] &&
test_102j() {
TAR=$(find_lustre_tar)
- [ -z "$TAR" ] && skip "lustre-aware (s)tar is not installed" && return
+ [ -z "$TAR" ] && skip "lustre-aware tar is not installed" && return
[ "$OSTCOUNT" -lt "2" ] && skip "skipping N-stripe test" && return
setup_test102 "$RUNAS"
mkdir -p $DIR/d102j
chown $RUNAS_ID $DIR/d102j
- if [ "$TAR" == "tar" ]; then
- TAR_OPTS="--xattrs"
- fi
- $RUNAS $TAR xf $TMP/f102.tar -C $DIR/d102j $TAR_OPTS
+ $RUNAS $TAR xf $TMP/f102.tar -C $DIR/d102j --xattrs
cd $DIR/d102j/$tdir
compare_stripe_info1 "$RUNAS"
}
-run_test 102j "non-root (s)tar restore stripe info from tarfile,not keep osts ="
+run_test 102j "non-root tar restore stripe info from tarfile, not keep osts ==="
run_acl_subtest()
{
cancel_lru_locks mdc
cancel_lru_locks osc
stime=`date +%s`
- time ls -l $DIR/$tdir > /dev/null
+ time ls -l $DIR/$tdir | wc -l
etime=`date +%s`
delta=$((etime - stime))
log "ls $i files without statahead: $delta sec"
cancel_lru_locks mdc
cancel_lru_locks osc
stime=`date +%s`
- time ls -l $DIR/$tdir > /dev/null
+ time ls -l $DIR/$tdir | wc -l
etime=`date +%s`
delta_sa=$((etime - stime))
- log "ls $i files with statahead: $delta_sa sec"
+ log "ls $i files with statahead: $delta_sa sec"
lctl get_param -n llite.*.statahead_stats
ewrong=`lctl get_param -n llite.*.statahead_stats | grep "statahead wrong:" | awk '{print $3}'`
log "statahead was stopped, maybe too many locks held!"
fi
+ [ $delta -eq 0 ] && continue
+
if [ $((delta_sa * 100)) -gt $((delta * 105)) ]; then
if [ $SLOWOK -eq 0 ]; then
error "ls $i files is slower with statahead!"
+
+ max=`lctl get_param -n llite.*.statahead_max | head -n 1`
+ lctl set_param -n llite.*.statahead_max 0
+ lctl get_param llite.*.statahead_max
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+ $LCTL dk > /dev/null
+ stime=`date +%s`
+ time ls -l $DIR/$tdir | wc -l
+ etime=`date +%s`
+ $LCTL dk > $TMP/sanity_test_123a_${i}_disable_${etime}.log
+ delta=$((etime - stime))
+ log "ls $i files without statahead: $delta sec, dump to $TMP/sanity_test_123a_${i}_disable_${etime}.log"
+ lctl set_param llite.*.statahead_max=$max
+
+ lctl get_param -n llite.*.statahead_max | grep '[0-9]'
+ cancel_lru_locks mdc
+ cancel_lru_locks osc
+ $LCTL dk > /dev/null
+ stime=`date +%s`
+ time ls -l $DIR/$tdir | wc -l
+ etime=`date +%s`
+ $LCTL dk > $TMP/sanity_test_123a_${i}_enable_${etime}.log
+ delta_sa=$((etime - stime))
+ log "ls $i files with statahead: $delta_sa sec, dump to $TMP/sanity_test_123a_${i}_enable_${etime}.log"
+ lctl get_param -n llite.*.statahead_stats
else
log "ls $i files is slower with statahead!"
fi
- break;
+ break
fi
[ $delta -gt 20 ] && break
fi
}
+test_154() {
+ cp /etc/hosts $DIR/$tfile
+
+ fid=`$LFS path2fid $DIR/$tfile`
+ rc=$?
+ [ $rc -ne 0 ] && error "error: could not get fid for $DIR/$tfile."
+
+ diff $DIR/$tfile $DIR/.lustre/fid/$fid || error "open by fid failed: did not find expected data in file."
+
+ echo "Opening a file by FID succeeded"
+}
+run_test 154 "Opening a file by FID"
+
#Changelogs
test_160() {
remote_mds && skip "remote MDS" && return
echo writing on client1
dd if=/dev/zero of=$DIR1/$tfile count=100 conv=notrunc > /dev/null 2>&1
sync &
- # wait for the flush
- sleep 1
echo reading on client2
dd of=/dev/null if=$DIR2/$tfile > /dev/null 2>&1
# wait for a lock timeout
}
run_test 35 "-EINTR cp_ast vs. bl_ast race does not evict client"
+test_36() { #bug 16417
+ local SIZE
+ mkdir -p $MOUNT1/$tdir
+ lfs setstripe -c -1 $MOUNT1/$tdir
+ i=0
+ SIZE=100
+
+ while [ $i -le 10 ]; do
+ lctl mark "start test"
+ before=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }')
+ dd if=/dev/zero of=$MOUNT1/$tdir/file000 bs=1M count=$SIZE
+ dd if=$MOUNT2/$tdir/file000 of=/dev/null bs=1M count=$SIZE &
+ read_pid=$!
+ sleep 0.1
+ rm -f $MOUNT1/$tdir/file000
+ wait $read_pid
+ after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }')
+ if [ $before -gt $after ]; then
+ error "space leaked"
+ exit;
+ fi
+ let i=i+1
+ done
+}
+run_test 36 "handle ESTALE/open-unlink corectly"
+
log "cleanup: ======================================================"
check_and_cleanup_lustre
export TUNEFS=${TUNEFS:-"$LUSTRE/utils/tunefs.lustre"}
[ ! -f "$TUNEFS" ] && export TUNEFS=$(which tunefs.lustre)
export CHECKSTAT="${CHECKSTAT:-"checkstat -v"} "
- export FSYTPE=${FSTYPE:-"ldiskfs"}
+ export FSTYPE=${FSTYPE:-"ldiskfs"}
export NAME=${NAME:-local}
export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"}
[ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] && \
load_module mgc/mgc
if [ -z "$CLIENTONLY" ] && [ -z "$CLIENTMODSONLY" ]; then
grep -q crc16 /proc/kallsyms || { modprobe crc16 2>/dev/null || true; }
+ grep -q jbd /proc/kallsyms || { modprobe jbd 2>/dev/null || true; }
[ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs
load_module mgs/mgs
load_module mds/mds
init_gss() {
if $GSS; then
start_gss_daemons
+
+ if [ -n "$LGSS_KEYRING_DEBUG" ]; then
+ echo $LGSS_KEYRING_DEBUG > /proc/fs/lustre/sptlrpc/gss/lgss_keyring/debug_level
+ fi
fi
}
return 0
}
+wait_update () {
+ local node=$1
+ local TEST=$2
+ local FINAL=$3
+ local MAX=${4:-90}
+
+ local RESULT
+ local WAIT=0
+ local sleep=5
+ while [ $WAIT -lt $MAX ]; do
+ sleep $sleep
+ RESULT=$(do_node $node "$TEST")
+ if [ $RESULT -eq $FINAL ]; then
+ echo "Updated after $WAIT sec: wanted $FINAL got $RESULT"
+ return 0
+ fi
+ WAIT=$((WAIT + sleep))
+ echo "Waiting $((MAX - WAIT)) secs for update"
+ done
+ echo "Update not seen after $MAX sec: wanted $FINAL got $RESULT"
+ return 3
+}
+
+wait_update_facet () {
+ local facet=$1
+ wait_update $(facet_host $facet) $@
+}
+
wait_delete_completed () {
local TOTALPREV=`lctl get_param -n osc.*.kbytesavail | \
awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
}
wait_for_host() {
- HOST=$1
+ local HOST=$1
check_network "$HOST" 900
while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done
}
wait_for() {
- facet=$1
- HOST=`facet_active_host $facet`
+ local facet=$1
+ local HOST=`facet_active_host $facet`
wait_for_host $HOST
}
#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)
# as we are in process of changing obd_timeout in different ways
# let's set MAX longer than that
- MAX=$(( timeout * 4 ))
- WAIT=0
+ local MAX=$(( timeout * 4 ))
+ local WAIT=0
while [ $WAIT -lt $MAX ]; do
STATUS=`do_facet $SINGLEMDS "lctl get_param -n mdt.*-MDT0000.recovery_status | grep status"`
echo $STATUS | grep COMPLETE && return 0
}
facet_failover() {
- facet=$1
- sleep_time=$2
+ local facet=$1
+ local sleep_time=$2
echo "Failing $facet on node `facet_active_host $facet`"
shutdown_facet $facet
[ -n "$sleep_time" ] && sleep $sleep_time
zconf_mount `hostname` $1 || error "mount failed"
}
-set_obd_timeout() {
- local facet=$1
- local timeout=$2
-
- do_facet $facet lsmod | grep -q obdclass || \
- do_facet $facet "modprobe obdclass"
-
- do_facet $facet "lctl set_param timeout=$timeout"
-}
-
writeconf_facet () {
local facet=$1
local dev=$2
writeconf_all
for num in `seq $MDSCOUNT`; do
DEVNAME=$(mdsdevname $num)
- set_obd_timeout mds$num $TIMEOUT
start mds$num $DEVNAME $MDS_MOUNT_OPTS
# We started mds, now we should set failover variables properly.
done
for num in `seq $OSTCOUNT`; do
DEVNAME=$(ostdevname $num)
- set_obd_timeout ost$num $TIMEOUT
start ost$num $DEVNAME $OST_MOUNT_OPTS
# We started ost$num, now we should set ost${num}failover variable properly.
[ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT2
fi
- init_versions_vars
+ init_param_vars
# by remounting mdt before ost, initial connect from mdt to ost might
# timeout because ost is not ready yet. wait some time to its fully
done
}
-init_versions_vars () {
+init_param_vars () {
export MDSVER=$(do_facet $SINGLEMDS "lctl get_param version" | cut -d. -f1,2)
export OSTVER=$(do_facet ost1 "lctl get_param version" | cut -d. -f1,2)
export CLIVER=$(lctl get_param version | cut -d. -f 1,2)
+
+ TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
+ log "Using TIMEOUT=$TIMEOUT"
}
check_config () {
fi
}
+check_timeout () {
+ local mdstimeout=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
+ local cltimeout=$(lctl get_param -n timeout)
+ if [ $mdstimeout -ne $TIMEOUT ] || [ $mdstimeout -ne $cltimeout ]; then
+ error "timeouts are wrong! mds: $mdstimeout, client: $cltimeout, TIMEOUT=$TIMEOUT"
+ return 1
+ fi
+}
+
check_and_setup_lustre() {
local MOUNTED=$(mounted_lustre_filesystems)
if [ -z "$MOUNTED" ] || ! $(echo $MOUNTED | grep -w -q $MOUNT); then
else
check_config $MOUNT
init_facets_vars
- init_versions_vars
+ init_param_vars
fi
if [ "$ONLY" == "setup" ]; then
exit 0
return 0
}
-check_rate() {
- local OP=$1
- local TARGET_RATE=$2
- local NUM_CLIENTS=$3
- local LOG=$4
-
- local RATE=$(awk '/^Rate: [0-9\.]+ '"${OP}"'s\/sec/ { print $2}' ${LOG})
-
- # We need to use bc since the rate is a floating point number
- local RES=$(echo "${RATE} < ${TARGET_RATE}" | bc -l )
- if [ "${RES}" = 0 ]; then
- echo "Success: ${RATE} ${OP}s/sec met target rate" \
- "${TARGET_RATE} ${OP}s/sec for ${NUM_CLIENTS} client(s)."
- return 0
- else
- echo "Failure: ${RATE} ${OP}s/sec did not meet target rate" \
- "${TARGET_RATE} ${OP}s/sec for ${NUM_CLIENTS} client(s)."
- return 1
- fi
+inodes_available () {
+ local IFree=$($LFS df -i $MOUNT | grep ^$FSNAME | awk '{print $4}' | sort -un | head -1) || return 1
+ echo $IFree
}
# reset llite stat counters
eval $command
}
+mdsrate_cleanup () {
+ mpi_run -np $1 -machinefile $2 ${MDSRATE} --unlink --nfiles $3 --dir $4 --filefmt $5
+}
+
}
obd_matches:
- /* If file still fits the request, ask osd for updated info.
- The regulat stat is almost of the same speed as some new
+ /* If file still fits the request, ask ost for updated info.
+ The regular stat is almost of the same speed as some new
'glimpse-size-ioctl'. */
if (!decision && S_ISREG(st->st_mode) &&
(param->lmd->lmd_lmm.lmm_stripe_count || param->size)) {
const int nthreads = 1;
#endif
-static char rawbuf[8192];
-static char *buf = rawbuf;
-static int max = sizeof(rawbuf);
+#define MAX_IOC_BUFLEN 8192
static int cur_device = -1;
return l_ioctl(dev_id, opc, buf);
}
-#define IOC_INIT(data) \
-do { \
- memset(&data, 0, sizeof(data)); \
- data.ioc_dev = cur_device; \
-} while (0)
-
-#define IOC_PACK(func, data) \
-do { \
- memset(buf, 0, sizeof(rawbuf)); \
- if (obd_ioctl_pack(&data, &buf, max)) { \
- fprintf(stderr, "error: %s: invalid ioctl\n", \
- jt_cmdname(func)); \
- return -2; \
- } \
-} while (0)
-
-#define IOC_UNPACK(func, data) \
-do { \
- if (obd_ioctl_unpack(&data, buf, max)) { \
- fprintf(stderr, "error: %s: invalid reply\n", \
- jt_cmdname(func)); \
- return -2; \
- } \
-} while (0)
-
int lcfg_ioctl(char * func, int dev_id, struct lustre_cfg *lcfg)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_type = LUSTRE_CFG_TYPE;
data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
lcfg->lcfg_buflens);
data.ioc_pbuf1 = (void *)lcfg;
- IOC_PACK(func, data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(func));
+ return rc;
+ }
rc = l_ioctl(dev_id, OBD_IOC_PROCESS_CFG, buf);
int lcfg_mgs_ioctl(char *func, int dev_id, struct lustre_cfg *lcfg)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
rc = data.ioc_dev = get_mgs_device();
if (rc < 0)
goto out;
data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
lcfg->lcfg_buflens);
data.ioc_pbuf1 = (void *)lcfg;
- IOC_PACK(func, data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(func));
+ return rc;
+ }
rc = l_ioctl(dev_id, OBD_IOC_PARAM, buf);
out:
static int do_name2dev(char *func, char *name)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
-
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(name) + 1;
data.ioc_inlbuf1 = name;
- IOC_PACK(func, data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(func));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_NAME2DEV, buf);
if (rc < 0)
return errno;
- IOC_UNPACK(func, data);
+ rc = obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid reply\n",
+ jt_cmdname(func));
+ return rc;
+ }
return data.ioc_dev + N2D_OFF;
}
// printf("Name %s is device %d\n", name, ret);
} else {
fprintf(stderr, "No device found for name %s: %s\n",
- name, strerror(rc));
+ name, strerror(rc));
}
}
return ret;
int jt_obd_no_transno(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 1)
return CMD_HELP;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_NO_TRANSNO, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
int jt_obd_set_readonly(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 1)
return CMD_HELP;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_SET_READONLY, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
int jt_obd_abort_recovery(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 1)
return CMD_HELP;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_ABORT_RECOVERY, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
int jt_get_version(int argc, char **argv)
{
int rc;
- char buf[8192];
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
if (argc != 1)
return CMD_HELP;
- memset(buf, 0, sizeof(buf));
+ memset(buf, 0, sizeof(rawbuf));
data->ioc_version = OBD_IOCTL_VERSION;
- data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+ data->ioc_inllen1 = sizeof(rawbuf) - size_round(sizeof(*data));
data->ioc_inlbuf1 = buf + size_round(sizeof(*data));
data->ioc_len = obd_ioctl_packlen(data);
int jt_obd_list_ioctl(int argc, char **argv)
{
int rc, index;
- char buf[8192];
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
if (argc > 2)
return CMD_HELP;
for (index = 0;; index++) {
- memset(buf, 0, sizeof(buf));
+ memset(buf, 0, sizeof(rawbuf));
data->ioc_version = OBD_IOCTL_VERSION;
- data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+ data->ioc_inllen1 = sizeof(rawbuf) - size_round(sizeof(*data));
data->ioc_inlbuf1 = buf + size_round(sizeof(*data));
data->ioc_len = obd_ioctl_packlen(data);
data->ioc_count = index;
rc = 0;
else
fprintf(stderr, "Error getting device list: %s: "
- "check dmesg.\n",
- strerror(errno));
+ "check dmesg.\n", strerror(errno));
}
return rc;
}
return 0;
}
-
-
-
/* Create one or more objects, arg[4] may describe stripe meta-data. If
* not, defaults assumed. This echo-client instance stashes the stripe
* object ids. Use get_stripe on this node to print full lsm and
/* create <count> [<file_create_mode>] [q|v|# verbosity] [striping] */
int jt_obd_create(int argc, char **argv)
{
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
struct obd_ioctl_data data;
struct timeval next_time;
__u64 count = 1, next_count, base_id = 0;
int verbose = 1, mode = 0100644, rc = 0, i, valid_lsm = 0;
char *end;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc < 2 || argc > 5)
return CMD_HELP;
data.ioc_pbuf1 = (char *)&lsm_buffer;
}
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_CREATE, buf);
- IOC_UNPACK(argv[0], data);
+ obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
shmem_bump();
if (rc < 0) {
fprintf(stderr, "error: %s: #%d - %s\n",
int jt_obd_setattr(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
char *end;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 2)
return CMD_HELP;
}
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_SETATTR, buf);
if (rc < 0)
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
struct obd_ioctl_data data;
struct timeval start, next_time;
__u64 i, count, next_count;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int verbose = 1;
obd_id objid = 3;
char *end;
if (argc < 2 || argc > 4)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
count = strtoull(argv[1], &end, 0);
if (*end) {
fprintf(stderr, "error: %s: invalid iteration count '%s'\n",
data.ioc_obdo1.o_id = objid;
data.ioc_obdo1.o_mode = S_IFREG;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
- IOC_PACK(argv[0], data);
+ memset(buf, 0x00, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_SETATTR, &data);
shmem_bump();
if (rc < 0) {
{
struct obd_ioctl_data data;
struct timeval next_time;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
__u64 count = 1, next_count;
int verbose = 1;
__u64 id;
char *end;
int rc = 0, i;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc < 2 || argc > 4)
return CMD_HELP;
data.ioc_obdo1.o_mode = S_IFREG | 0644;
data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_DESTROY, buf);
- IOC_UNPACK(argv[0], data);
+ obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
shmem_bump();
if (rc < 0) {
fprintf(stderr, "error: %s: objid "LPX64": %s\n",
int jt_obd_getattr(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
char *end;
int rc;
if (argc != 2)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_obdo1.o_id = strtoull(argv[1], &end, 0);
if (*end) {
fprintf(stderr, "error: %s: invalid objid '%s'\n",
data.ioc_obdo1.o_valid = 0xffffffff;
printf("%s: object id "LPX64"\n", jt_cmdname(argv[0]),data.ioc_obdo1.o_id);
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, buf);
- IOC_UNPACK(argv[0], data);
+ obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
if (rc) {
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
strerror(rc = errno));
{
struct obd_ioctl_data data;
struct timeval start, next_time;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
__u64 i, count, next_count;
int verbose = 1;
obd_id objid = 3;
if (argc < 2 || argc > 4)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
count = strtoull(argv[1], &end, 0);
if (*end) {
fprintf(stderr, "error: %s: invalid iteration count '%s'\n",
data.ioc_obdo1.o_id = objid;
data.ioc_obdo1.o_mode = S_IFREG;
data.ioc_obdo1.o_valid = 0xffffffff;
- IOC_PACK(argv[0], data);
+ memset(buf, 0x00, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, &data);
shmem_bump();
if (rc < 0) {
{
struct obd_ioctl_data data;
struct timeval start, next_time;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
__u64 count, next_count, len, stride, thr_offset = 0, objid = 3;
int write = 0, verbose = 1, cmd, i, rc = 0, pages = 1;
int offset_pages = 0;
}
}
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
/* communicate the 'type' of brw test and batching to echo_client.
* don't start. we'd love to refactor this lctl->echo_client
cmd = write ? OBD_IOC_BRW_WRITE : OBD_IOC_BRW_READ;
for (i = 1, next_count = verbose; i <= count && shmem_running(); i++) {
data.ioc_obdo1.o_valid &= ~(OBD_MD_FLBLOCKS|OBD_MD_FLGRANT);
- IOC_PACK(argv[0], data);
+ memset(buf, 0x00, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, cmd, buf);
shmem_bump();
if (rc) {
struct obd_ioctl_data data;
struct lov_desc desc;
struct obd_uuid *uuidarray;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
__u32 *obdgens;
char *path;
int rc, fd;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 2)
return CMD_HELP;
desc.ld_tgt_count = ((OBD_MAX_IOCTL_BUFFER-sizeof(data)-sizeof(desc)) /
(sizeof(*uuidarray) + sizeof(*obdgens)));
-
repeat:
uuidarray = calloc(desc.ld_tgt_count, sizeof(*uuidarray));
if (!uuidarray) {
goto out_uuidarray;
}
+ memset(buf, 0x00, sizeof(rawbuf));
data.ioc_inllen1 = sizeof(desc);
data.ioc_inlbuf1 = (char *)&desc;
data.ioc_inllen2 = desc.ld_tgt_count * sizeof(*uuidarray);
data.ioc_inllen3 = desc.ld_tgt_count * sizeof(*obdgens);
data.ioc_inlbuf3 = (char *)obdgens;
- if (obd_ioctl_pack(&data, &buf, max)) {
+ if (obd_ioctl_pack(&data, &buf, sizeof(rawbuf))) {
fprintf(stderr, "error: %s: invalid ioctl\n",
jt_cmdname(argv[0]));
rc = -EINVAL;
__u32 *genp;
int i;
- if (obd_ioctl_unpack(&data, buf, max)) {
+ if (obd_ioctl_unpack(&data, buf, sizeof(rawbuf))) {
fprintf(stderr, "error: %s: invalid reply\n",
jt_cmdname(argv[0]));
rc = -EINVAL;
{
int rc;
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
char argstring[200];
int i, count = sizeof(argstring) - 1;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc > 5)
return CMD_HELP;
data.ioc_inllen1 = strlen(argstring) + 1;
}
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_START, buf);
if (rc)
fprintf(stderr, "error: %s: test failed: %s\n",
int jt_obd_ldlm_regress_stop(int argc, char **argv)
{
int rc;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
struct obd_ioctl_data data;
- IOC_INIT(data);
+
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 1)
return CMD_HELP;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_STOP, buf);
if (rc)
static int do_activate(int argc, char **argv, int flag)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc != 1)
return CMD_HELP;
/* reuse offset for 'active' */
data.ioc_offset = flag;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, IOC_OSC_SET_ACTIVE, buf);
if (rc)
fprintf(stderr, "error: %s: failed: %s\n",
int jt_obd_recover(int argc, char **argv)
{
int rc;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
struct obd_ioctl_data data;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
if (argc > 2)
return CMD_HELP;
data.ioc_inlbuf1 = argv[1];
}
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_CLIENT_RECOVER, buf);
if (rc < 0) {
fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]),
int jt_obd_mdc_lookup(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
char *parent, *child;
int rc, fd, verbose = 1;
if (argc == 4)
verbose = get_verbose(argv[0], argv[3]);
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(child) + 1;
data.ioc_inlbuf1 = child;
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
fd = open(parent, O_RDONLY);
if (fd < 0) {
close(fd);
if (verbose) {
- IOC_UNPACK(argv[0], data);
+ rc = obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid reply\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
printf("%s: mode %o uid %d gid %d\n", child,
data.ioc_obdo1.o_mode, data.ioc_obdo1.o_uid,
data.ioc_obdo1.o_gid);
int jt_cfg_dump_log(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 2)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DUMP_LOG, buf);
if (rc < 0)
fprintf(stderr, "OBD_IOC_DUMP_LOG failed: %s\n",
int jt_llog_catlist(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 1)
return CMD_HELP;
- IOC_INIT(data);
- data.ioc_inllen1 = max - size_round(sizeof(data));
- IOC_PACK(argv[0], data);
-
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
+ data.ioc_inllen1 = sizeof(rawbuf) - size_round(sizeof(data));
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CATLOGLIST, buf);
if (rc == 0)
fprintf(stdout, "%s", ((struct obd_ioctl_data*)buf)->ioc_bulk);
int jt_llog_info(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 2)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
- data.ioc_inllen2 = max - size_round(sizeof(data)) -
+ data.ioc_inllen2 = sizeof(rawbuf) - size_round(sizeof(data)) -
size_round(data.ioc_inllen1);
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_INFO, buf);
if (rc == 0)
int jt_llog_print(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 2 && argc != 4)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
if (argc == 4) {
data.ioc_inllen3 = strlen(to) + 1;
data.ioc_inlbuf3 = to;
}
- data.ioc_inllen4 = max - size_round(sizeof(data)) -
+ data.ioc_inllen4 = sizeof(rawbuf) - size_round(sizeof(data)) -
size_round(data.ioc_inllen1) -
size_round(data.ioc_inllen2) -
size_round(data.ioc_inllen3);
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_PRINT, buf);
if (rc == 0)
int jt_llog_cancel(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 4)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
data.ioc_inllen2 = strlen(argv[2]) + 1;
data.ioc_inlbuf2 = argv[2];
data.ioc_inllen3 = strlen(argv[3]) + 1;
data.ioc_inlbuf3 = argv[3];
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_CANCEL, buf);
if (rc == 0)
int jt_llog_check(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 2 && argc != 4)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
if (argc == 4) {
data.ioc_inllen3 = strlen(to) + 1;
data.ioc_inlbuf3 = to;
}
- data.ioc_inllen4 = max - size_round(sizeof(data)) -
+ data.ioc_inllen4 = sizeof(rawbuf) - size_round(sizeof(data)) -
size_round(data.ioc_inllen1) -
size_round(data.ioc_inllen2) -
size_round(data.ioc_inllen3);
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_CHECK, buf);
if (rc == 0)
int jt_llog_remove(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
int rc;
if (argc != 3 && argc != 2)
return CMD_HELP;
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
+ data.ioc_dev = cur_device;
data.ioc_inllen1 = strlen(argv[1]) + 1;
data.ioc_inlbuf1 = argv[1];
if (argc == 3){
data.ioc_inllen2 = strlen(argv[2]) + 1;
data.ioc_inlbuf2 = argv[2];
}
- IOC_PACK(argv[0], data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(argv[0]));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LLOG_REMOVE, buf);
if (rc == 0) {
{
FILE *fp;
int found = 0;
- char modname[256];
+ char buf[1024];
fp = fopen("/proc/modules", "r");
if (fp == NULL)
return -1;
- while (fscanf(fp, "%s %*s %*s %*s %*s %*s", modname) == 1) {
- if (strcmp(module, modname) == 0) {
+ while (fgets(buf, 1024, fp) != NULL) {
+ *strchr(buf, ' ') = 0;
+ if (strcmp(module, buf) == 0) {
found = 1;
break;
}
struct obd_ioctl_data data;
struct lustre_cfg_bufs bufs;
struct lustre_cfg *lcfg;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
rc = check_pool_cmd(cmd, fsname, poolname, ostname);
if (rc)
return rc;
}
- IOC_INIT(data);
+ memset(&data, 0x00, sizeof(data));
rc = data.ioc_dev = get_mgs_device();
if (rc < 0)
goto out;
data.ioc_plen1 = lustre_cfg_len(lcfg->lcfg_bufcount,
lcfg->lcfg_buflens);
data.ioc_pbuf1 = (void *)lcfg;
- IOC_PACK(cmdname, data);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr, "error: %s: invalid ioctl\n",
+ jt_cmdname(cmdname));
+ return rc;
+ }
rc = l_ioctl(OBD_DEV_ID, OBD_IOC_POOL, buf);
out:
if (rc)
{
int rc;
struct obd_ioctl_data data;
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
memset(&data, 0, sizeof(data));
data.ioc_inlbuf4 = obd_name;
data.ioc_inllen4 = strlen(obd_name) + 1;
data.ioc_dev = OBD_DEV_BY_DEVNAME;
memset(buf, 0, sizeof(rawbuf));
- if (obd_ioctl_pack(&data, &buf, max)) {
+ if (obd_ioctl_pack(&data, &buf, sizeof(rawbuf))) {
fprintf(stderr, "error: invalid ioctl\n");
return;
}
CHECK_CDEFINE(FIEMAP_EXTENT_LAST);
CHECK_CDEFINE(FIEMAP_EXTENT_UNKNOWN);
CHECK_CDEFINE(FIEMAP_EXTENT_DELALLOC);
- CHECK_CDEFINE(FIEMAP_EXTENT_NO_DIRECT);
- CHECK_CDEFINE(FIEMAP_EXTENT_SECONDARY);
- CHECK_CDEFINE(FIEMAP_EXTENT_NET);
- CHECK_CDEFINE(FIEMAP_EXTENT_DATA_COMPRESSED);
+ CHECK_CDEFINE(FIEMAP_EXTENT_ENCODED);
CHECK_CDEFINE(FIEMAP_EXTENT_DATA_ENCRYPTED);
CHECK_CDEFINE(FIEMAP_EXTENT_NOT_ALIGNED);
CHECK_CDEFINE(FIEMAP_EXTENT_DATA_INLINE);
CHECK_CDEFINE(FIEMAP_EXTENT_DATA_TAIL);
CHECK_CDEFINE(FIEMAP_EXTENT_UNWRITTEN);
CHECK_CDEFINE(FIEMAP_EXTENT_MERGED);
+ CHECK_CDEFINE(FIEMAP_EXTENT_NO_DIRECT);
+ CHECK_CDEFINE(FIEMAP_EXTENT_NET);
}
static void
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux lin2 2.6.18-92.1.17-prep #3 Sun Nov 23 14:29:36 IST 2008 i686 i686 i386 G
- * with gcc version 3.4.6 20060404 (Red Hat 3.4.6-10) */
+ * running on Linux localhost.localdomain 2.6.18-prep #3 SMP Sun Nov 23 08:04:44 EST 2008 i68
+ * with gcc version 4.1.1 20061011 (Red Hat 4.1.1-30) */
/* Constants... */
(long long)OBD_QC_CALLBACK);
LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
(long long)OBD_LAST_OPC);
- LASSERTF(QUOTA_DQACQ == 901, " found %lld\n",
+ LASSERTF(QUOTA_DQACQ == 601, " found %lld\n",
(long long)QUOTA_DQACQ);
- LASSERTF(QUOTA_DQREL == 902, " found %lld\n",
+ LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
(long long)QUOTA_DQREL);
LASSERTF(MGS_CONNECT == 250, " found %lld\n",
(long long)MGS_CONNECT);
(long long)(int)offsetof(struct obd_connect_data, padding2));
LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
(long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
- CLASSERT(OBD_CONNECT_RDONLY == 0x00000001ULL);
- CLASSERT(OBD_CONNECT_INDEX == 0x00000002ULL);
- CLASSERT(OBD_CONNECT_GRANT == 0x00000008ULL);
- CLASSERT(OBD_CONNECT_SRVLOCK == 0x00000010ULL);
- CLASSERT(OBD_CONNECT_VERSION == 0x00000020ULL);
- CLASSERT(OBD_CONNECT_REQPORTAL == 0x00000040ULL);
- CLASSERT(OBD_CONNECT_ACL == 0x00000080ULL);
- CLASSERT(OBD_CONNECT_XATTR == 0x00000100ULL);
+ CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+ CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+ CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+ CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+ CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+ CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+ CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+ CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
CLASSERT(OBD_CONNECT_REAL == 0x08000000ULL);
CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
- CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x00000400ULL);
- CLASSERT(OBD_CONNECT_IBITS == 0x00001000ULL);
- CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL);
- CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL);
- CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL);
+ CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+ CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+ CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+ CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+ CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL);
CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL);
- CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL);
- CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL);
- CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL);
- CLASSERT(OBD_CONNECT_OSS_CAPA == 0x00200000ULL);
+ CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+ CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+ CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+ CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
CLASSERT(OBD_CONNECT_MDS_MDS == 0x04000000ULL);
CLASSERT(OBD_CONNECT_SOM == 0x00800000ULL);
CLASSERT(OBD_CONNECT_AT == 0x01000000ULL);
- CLASSERT(OBD_CONNECT_CANCELSET == 0x00400000ULL);
+ CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x02000000ULL);
/* Checks for struct obdo */
CLASSERT(FIEMAP_FLAG_DEVICE_ORDER == 0x40000000);
/* Checks for struct ll_fiemap_extent */
- LASSERTF((int)sizeof(struct ll_fiemap_extent) == 32, " found %lld\n",
+ LASSERTF((int)sizeof(struct ll_fiemap_extent) == 56, " found %lld\n",
(long long)(int)sizeof(struct ll_fiemap_extent));
LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_logical) == 0, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_logical));
(long long)(int)offsetof(struct ll_fiemap_extent, fe_length));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_length) == 8, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_length));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 24, " found %lld\n",
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_flags) == 40, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_flags));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags) == 4, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_flags));
- LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 28, " found %lld\n",
+ LASSERTF((int)offsetof(struct ll_fiemap_extent, fe_device) == 44, " found %lld\n",
(long long)(int)offsetof(struct ll_fiemap_extent, fe_device));
LASSERTF((int)sizeof(((struct ll_fiemap_extent *)0)->fe_device) == 4, " found %lld\n",
(long long)(int)sizeof(((struct ll_fiemap_extent *)0)->fe_device));
CLASSERT(FIEMAP_EXTENT_LAST == 0x00000001);
CLASSERT(FIEMAP_EXTENT_UNKNOWN == 0x00000002);
CLASSERT(FIEMAP_EXTENT_DELALLOC == 0x00000004);
- CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x00000008);
- CLASSERT(FIEMAP_EXTENT_SECONDARY == 0x00000010);
- CLASSERT(FIEMAP_EXTENT_NET == 0x00000020);
- CLASSERT(FIEMAP_EXTENT_DATA_COMPRESSED == 0x00000040);
+ CLASSERT(FIEMAP_EXTENT_ENCODED == 0x00000008);
CLASSERT(FIEMAP_EXTENT_DATA_ENCRYPTED == 0x00000080);
CLASSERT(FIEMAP_EXTENT_NOT_ALIGNED == 0x00000100);
CLASSERT(FIEMAP_EXTENT_DATA_INLINE == 0x00000200);
CLASSERT(FIEMAP_EXTENT_DATA_TAIL == 0x00000400);
CLASSERT(FIEMAP_EXTENT_UNWRITTEN == 0x00000800);
CLASSERT(FIEMAP_EXTENT_MERGED == 0x00001000);
-#if defined(LIBLUSTRE_POSIX_ACL) && defined(CONFIG_FS_POSIX_ACL)
+ CLASSERT(FIEMAP_EXTENT_NO_DIRECT == 0x40000000);
+ CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
+#ifdef LIBLUSTRE_POSIX_ACL
/* Checks for type posix_acl_xattr_entry */
LASSERTF((int)sizeof(xattr_acl_entry) == 8, " found %lld\n",