#include <linux/user_namespace.h>
#include <linux/uidgid.h>
#include <linux/falloc.h>
+#include <linux/ktime.h>
#include <uapi/linux/lustre/lustre_ioctl.h>
#include <uapi/linux/llcrypt.h>
lli->lli_async_rc = 0;
}
+ lli->lli_close_fd_time = ktime_get();
+
rc = ll_md_close(inode, file);
if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
RETURN(0);
}
+void ll_track_file_opens(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+ /* do not skew results with delays from never-opened inodes */
+ if (ktime_to_ns(lli->lli_close_fd_time))
+ ll_stats_ops_tally(sbi, LPROC_LL_INODE_OPCLTM,
+ ktime_us_delta(ktime_get(), lli->lli_close_fd_time));
+
+ if (ktime_after(ktime_get(),
+ ktime_add_ms(lli->lli_close_fd_time,
+ sbi->ll_oc_max_ms))) {
+ lli->lli_open_fd_count = 1;
+ lli->lli_close_fd_time = ns_to_ktime(0);
+ } else {
+ lli->lli_open_fd_count++;
+ }
+
+ ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_OCOUNT,
+ lli->lli_open_fd_count);
+}
+
/* Open a file, and (for the very first open) create objects on the OSTs at
* this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
* creation or open until ll_lov_setstripe() ioctl is called.
if (S_ISDIR(inode->i_mode))
ll_authorize_statahead(inode, fd);
+ ll_track_file_opens(inode);
if (is_root_inode(inode)) {
file->private_data = fd;
RETURN(0);
LASSERT(*och_usecount == 0);
if (!it->it_disposition) {
struct dentry *dentry = file_dentry(file);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_dentry_data *ldd;
/* We cannot just request lock handle now, new ELC code
* handle to be returned from LOOKUP|OPEN request,
* for example if the target entry was a symlink.
*
- * Only fetch MDS_OPEN_LOCK if this is in NFS path,
- * marked by a bit set in ll_iget_for_nfs. Clear the
- * bit so that it's not confusing later callers.
+ * In NFS path we know there's pathologic behavior
+ * so we always enable open lock caching when coming
+ * from there. It's detected by setting a flag in
+ * ll_iget_for_nfs.
*
- * NB; when ldd is NULL, it must have come via normal
- * lookup path only, since ll_iget_for_nfs always calls
- * ll_d_init().
+ * After reaching number of opens of this inode
+ * we always ask for an open lock on it to handle
+ * bad userspace actors that open and close files
+ * in a loop for absolutely no good reason
*/
+
ldd = ll_d2d(dentry);
- if (ldd && ldd->lld_nfs_dentry) {
+ if (filename_is_volatile(dentry->d_name.name,
+ dentry->d_name.len,
+ NULL)) {
+ /* There really is nothing here, but this
+ * make this more readable I think.
+ * We do not want openlock for volatile
+ * files under any circumstances
+ */
+ } else if (ldd && ldd->lld_nfs_dentry) {
+ /* NFS path. This also happens to catch
+ * open by fh files I guess
+ */
+ it->it_flags |= MDS_OPEN_LOCK;
+ /* clear the flag for future lookups */
ldd->lld_nfs_dentry = 0;
- if (!filename_is_volatile(dentry->d_name.name,
- dentry->d_name.len,
- NULL))
+ } else if (sbi->ll_oc_thrsh_count > 0) {
+ /* Take MDS_OPEN_LOCK with many opens */
+ if (lli->lli_open_fd_count >=
+ sbi->ll_oc_thrsh_count)
+ it->it_flags |= MDS_OPEN_LOCK;
+
+ /* If this is open after we just closed */
+ else if (ktime_before(ktime_get(),
+ ktime_add_ms(lli->lli_close_fd_time,
+ sbi->ll_oc_thrsh_ms)))
it->it_flags |= MDS_OPEN_LOCK;
}
__u64 lli_open_fd_read_count;
__u64 lli_open_fd_write_count;
__u64 lli_open_fd_exec_count;
+
+ /* Number of times this inode was opened */
+ u64 lli_open_fd_count;
+ /* When last close was performed on this inode */
+ ktime_t lli_close_fd_time;
+
/* Protects access to och pointers and their usage counters */
struct mutex lli_och_mutex;
unsigned int ll_heat_decay_weight;
unsigned int ll_heat_period_second;
+ /* Opens of the same inode before we start requesting open lock */
+ u32 ll_oc_thrsh_count;
+
+ /* Time in ms between last inode close and next open to be considered
+ * instant back to back and would trigger an open lock request
+ */
+ u32 ll_oc_thrsh_ms;
+
+ /* Time in ms after last file close that we no longer count prior opens*/
+ u32 ll_oc_max_ms;
+
/* filesystem fsname */
char ll_fsname[LUSTRE_MAXFSNAME + 1];
#define SBI_DEFAULT_HEAT_DECAY_WEIGHT ((80 * 256 + 50) / 100)
#define SBI_DEFAULT_HEAT_PERIOD_SECOND (60)
+
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT (5)
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_MS (100) /* 0.1 second */
+#define SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS (60000) /* 1 minute */
+
/*
* per file-descriptor read-ahead data.
*/
LPROC_LL_REMOVEXATTR,
LPROC_LL_INODE_PERM,
LPROC_LL_FALLOCATE,
+ LPROC_LL_INODE_OCOUNT,
+ LPROC_LL_INODE_OPCLTM,
LPROC_LL_FILE_OPCODES
};
int ll_file_release(struct inode *inode, struct file *file);
int ll_release_openhandle(struct dentry *, struct lookup_intent *);
int ll_md_real_close(struct inode *inode, fmode_t fmode);
+void ll_track_file_opens(struct inode *inode);
extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
struct ll_file_data *file, loff_t pos,
size_t count, int rw);
/* Per-filesystem file heat */
sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
+
+ /* Per-fs open heat level before requesting open lock */
+ sbi->ll_oc_thrsh_count = SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT;
+ sbi->ll_oc_max_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS;
+ sbi->ll_oc_thrsh_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MS;
RETURN(sbi);
out_destroy_ra:
if (sbi->ll_foreign_symlink_prefix)
}
LUSTRE_RW_ATTR(heat_period_second);
+static ssize_t opencache_threshold_count_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ if (sbi->ll_oc_thrsh_count)
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ sbi->ll_oc_thrsh_count);
+ else
+ return snprintf(buf, PAGE_SIZE, "off\n");
+}
+
+static ssize_t opencache_threshold_count_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc) {
+ bool enable;
+ /* also accept "off" to disable and "on" to always cache */
+ rc = kstrtobool(buffer, &enable);
+ if (rc)
+ return rc;
+ val = enable;
+ }
+ sbi->ll_oc_thrsh_count = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_threshold_count);
+
+static ssize_t opencache_threshold_ms_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_oc_thrsh_ms);
+}
+
+static ssize_t opencache_threshold_ms_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ sbi->ll_oc_thrsh_ms = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_threshold_ms);
+
+static ssize_t opencache_max_ms_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->ll_oc_max_ms);
+}
+
+static ssize_t opencache_max_ms_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ sbi->ll_oc_max_ms = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(opencache_max_ms);
+
static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
{
struct super_block *sb = m->private;
&lustre_attr_file_heat.attr,
&lustre_attr_heat_decay_percentage.attr,
&lustre_attr_heat_period_second.attr,
+ &lustre_attr_opencache_threshold_count.attr,
+ &lustre_attr_opencache_threshold_ms.attr,
+ &lustre_attr_opencache_max_ms.attr,
NULL,
};
{ LPROC_LL_LLSEEK, LPROCFS_TYPE_LATENCY, "seek" },
{ LPROC_LL_FSYNC, LPROCFS_TYPE_LATENCY, "fsync" },
{ LPROC_LL_READDIR, LPROCFS_TYPE_LATENCY, "readdir" },
+ { LPROC_LL_INODE_OCOUNT,LPROCFS_TYPE_REQS |
+ LPROCFS_CNTR_AVGMINMAX |
+ LPROCFS_CNTR_STDDEV, "opencount" },
+ { LPROC_LL_INODE_OPCLTM,LPROCFS_TYPE_LATENCY, "openclosetime" },
/* inode operation */
{ LPROC_LL_SETATTR, LPROCFS_TYPE_LATENCY, "setattr" },
{ LPROC_LL_TRUNC, LPROCFS_TYPE_LATENCY, "truncate" },
OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE2, cfs_fail_val);
+ /* We can only arrive at this path when we have no inode, so
+ * we only need to request open lock if it was requested
+ * for every open
+ */
+ if (ll_i2sbi(dir)->ll_oc_thrsh_count == 1)
+ it->it_flags |= MDS_OPEN_LOCK;
+
/* Dentry added to dcache tree in ll_lookup_it */
de = ll_lookup_it(dir, dentry, it, &secctx, &secctxlen, &pca, encrypt,
&encctx, &encctxlen);
[ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
skip "need MDS version at least 2.10.58"
+ stack_trap "restore_opencache" EXIT
+ disable_opencache
+
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
[ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
skip "need MDS version at least 2.10.58"
+ stack_trap "restore_opencache" EXIT
+ disable_opencache
+
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
[ "$MDS1_VERSION" -lt $(version_code 2.11.56) ] &&
skip "Need MDS >= 2.11.56"
+ stack_trap "restore_opencache" EXIT
+ disable_opencache
+
local sepol
mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
}
run_test 428 "large block size IO should not hang"
+test_429() { # LU-7915 / LU-10948
+ local ll_opencache_threshold_count="llite.*.opencache_threshold_count"
+ local testfile=$DIR/$tfile
+ local mdc_rpcstats="mdc.$FSNAME-MDT0000-*.stats"
+ local new_flag=1
+ local first_rpc
+ local second_rpc
+ local third_rpc
+
+ $LCTL get_param $ll_opencache_threshold_count ||
+ skip "client does not have opencache parameter"
+
+ set_opencache $new_flag
+ stack_trap "restore_opencache"
+ [ $($LCTL get_param -n $ll_opencache_threshold_count) == $new_flag ] ||
+ error "enable opencache failed"
+ touch $testfile
+ # drop MDC DLM locks
+ cancel_lru_locks mdc
+ # clear MDC RPC stats counters
+ $LCTL set_param $mdc_rpcstats=clear
+
+ # According to the current implementation, we need to run 3 times
+ # open & close file to verify if opencache is enabled correctly.
+ # 1st, RPCs are sent for lookup/open and open handle is released on
+ # close finally.
+ # 2nd, RPC is sent for open, MDS_OPEN_LOCK is fetched automatically,
+ # so open handle won't be released thereafter.
+ # 3rd, No RPC is sent out.
+ $MULTIOP $testfile oc || error "multiop failed"
+ first_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue)
+ echo "1st: $first_rpc RPCs in flight"
+
+ $MULTIOP $testfile oc || error "multiop failed"
+ second_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue)
+ echo "2nd: $second_rpc RPCs in flight"
+
+ $MULTIOP $testfile oc || error "multiop failed"
+ third_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue)
+ echo "3rd: $third_rpc RPCs in flight"
+
+ #verify no MDC RPC is sent
+ [[ $second_rpc == $third_rpc ]] || error "MDC RPC is still sent"
+}
+run_test 429 "verify if opencache flag on client side does work"
+
lseek_test_430() {
local offset
local file=$1
check_lsom_data()
{
local file=$1
- local size=$($LFS getsom -s $file)
local expect=$(stat -c %s $file)
- [[ $size == $expect ]] ||
- error "$file expected size: $expect, got: $size"
+ check_lsom_size $1 $expect
local blocks=$($LFS getsom -b $file)
expect=$(stat -c %b $file)
check_lsom_size()
{
- local size=$($LFS getsom -s $1)
+ local size
local expect=$2
+ cancel_lru_locks mdc
+
+ size=$($LFS getsom -s $1)
[[ $size == $expect ]] ||
error "$file expected size: $expect, got: $size"
}
check_set_fallocate || skip "need at least 2.13.57 for fallocate"
}
+function disable_opencache()
+{
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" | head -1)
+
+ test -z "${saved_OPENCACHE_value}" &&
+ export saved_OPENCACHE_value="$state"
+
+ [[ "$state" = "off" ]] && return
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=off
+}
+
+function set_opencache()
+{
+ local newvalue="$1"
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count")
+
+ [[ -n "$newvalue" ]] || return
+
+ [[ -n "${saved_OPENCACHE_value}" ]] ||
+ export saved_OPENCACHE_value="$state"
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue
+}
+
+
+
+function restore_opencache()
+{
+ [[ -z "${saved_OPENCACHE_value}" ]] ||
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value}
+}