#include <linux/fs.h>
/* XATTR_{REPLACE,CREATE} */
#include <linux/xattr.h>
+#include <linux/workqueue.h>
#include <ldiskfs/ldiskfs.h>
#include <ldiskfs/xattr.h>
module_param(ldiskfs_track_declares_assert, int, 0644);
MODULE_PARM_DESC(ldiskfs_track_declares_assert, "LBUG during tracking of declares");
+struct work_struct flush_fput;
+atomic_t descriptors_cnt;
+unsigned int ldiskfs_flush_descriptors_cnt = 5000;
+unsigned int ldiskfs_flush_descriptors_seconds = 10;
+
/* 1 GiB in 512-byte sectors */
int ldiskfs_delayed_unlink_blocks = (1 << (30 - 9));
oti->oti_obj_dentry.d_inode = inode;
oti->oti_obj_dentry.d_sb = inode->i_sb;
- filp = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
- inode->i_fop);
+ filp = osd_alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
if (IS_ERR(filp))
RETURN(-ENOMEM);
int rc;
ENTRY;
- file = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
- inode->i_fop);
+ file = osd_alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
if (IS_ERR(file))
RETURN(PTR_ERR(file));
struct file *file;
ENTRY;
- file = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
- inode->i_fop);
+ file = osd_alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
if (IS_ERR(file))
RETURN(ERR_CAST(file));
}
LUSTRE_RW_ATTR(track_declares_assert);
+static ssize_t flush_descriptors_cnt_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%u\n", ldiskfs_flush_descriptors_cnt);
+}
+
+static ssize_t flush_descriptors_cnt_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ int rc;
+
+ rc = kstrtou32(buffer, 0, &ldiskfs_flush_descriptors_cnt);
+ if (rc)
+ return rc;
+ return count;
+}
+LUSTRE_RW_ATTR(flush_descriptors_cnt);
+
+static void osd_flush_fput(struct work_struct *work)
+{
+ /* flush file descriptors when too many files */
+ CDEBUG_LIMIT(D_HA, "Flushing file descriptors limit %d\n",
+ ldiskfs_flush_descriptors_cnt);
+
+ /* descriptors_cnt triggers the threshold when a flush is started,
+ * but all pending descriptors will be flushed each time, so it
+ * doesn't need to exactly match the number of descriptors.
+ */
+ atomic_set(&descriptors_cnt, 0);
+ cfs_flush_delayed_fput();
+}
+
static int __init osd_init(void)
{
struct kobject *kobj;
if (rc)
return rc;
+ atomic_set(&descriptors_cnt, 0);
osd_oi_mod_init();
rc = lu_kmem_init(ldiskfs_caches);
rc = 0;
}
+ rc = sysfs_create_file(kobj,
+ &lustre_attr_flush_descriptors_cnt.attr);
+ if (rc) {
+ CWARN("%s: flush_descriptors_cnt registration failed: rc = %d\n",
+ "osd-ldiskfs", rc);
+ rc = 0;
+ }
+
kobject_put(kobj);
}
cfs_kallsyms_lookup_name("flush_delayed_fput");
#endif
+ INIT_WORK(&flush_fput, osd_flush_fput);
+
return rc;
}
{
struct kobject *kobj;
+ cancel_work_sync(&flush_fput);
kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME);
if (kobj) {
sysfs_remove_file(kobj,
&lustre_attr_track_declares_assert.attr);
+ sysfs_remove_file(kobj,
+ &lustre_attr_flush_descriptors_cnt.attr);
kobject_put(kobj);
}
class_unregister_type(LUSTRE_OSD_LDISKFS_NAME);
#define osd_dirty_inode(inode, flag) (inode)->i_sb->s_op->dirty_inode((inode), flag)
#define osd_i_blocks(inode, size) ((size) >> (inode)->i_blkbits)
+extern atomic_t descriptors_cnt;
+extern unsigned int ldiskfs_flush_descriptors_cnt;
+extern struct work_struct flush_fput;
+#define osd_alloc_file_pseudo(inode, mnt, name, flags, fops) \
+({ \
+ struct file *__f; \
+ int __descriptors_cnt; \
+ __f = alloc_file_pseudo(inode, mnt, name, flags, fops); \
+ __descriptors_cnt = atomic_inc_return(&descriptors_cnt); \
+ if (unlikely(__descriptors_cnt >= ldiskfs_flush_descriptors_cnt)) {\
+ /* drop here to skip queue_work */ \
+ atomic_set(&descriptors_cnt, 0); \
+ queue_work(system_long_wq, &flush_fput); \
+ } \
+ __f; \
+})
#if defined HAVE_INODE_TIMESPEC64 || defined HAVE_INODE_GET_MTIME_SEC
# define osd_timespec timespec64
return 0;
}
-#ifdef HAVE_SERVER_SUPPORT
-# ifdef HAVE_FLUSH_DELAYED_FPUT
-# define cfs_flush_delayed_fput() flush_delayed_fput()
-# else
-void (*cfs_flush_delayed_fput)(void);
-# endif /* HAVE_FLUSH_DELAYED_FPUT */
-#else /* !HAVE_SERVER_SUPPORT */
-#define cfs_flush_delayed_fput() do {} while (0)
-#endif /* HAVE_SERVER_SUPPORT */
-
/**
* Main thread body for service threads.
* Waits in a loop waiting for new requests to process to appear.
CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
svcpt->scp_nthrs_running);
-#ifdef HAVE_SERVER_SUPPORT
-#ifndef HAVE_FLUSH_DELAYED_FPUT
- if (unlikely(cfs_flush_delayed_fput == NULL))
- cfs_flush_delayed_fput =
- cfs_kallsyms_lookup_name("flush_delayed_fput");
-#endif
-#endif
/* XXX maintain a list of all managed devices: insert here */
while (!ptlrpc_thread_stopping(thread)) {
- bool idle = true;
if (ptlrpc_wait_event(svcpt, thread))
break;
if (ptlrpc_threads_need_create(svcpt)) {
/* Ignore return code - we tried... */
ptlrpc_start_thread(svcpt, 0);
- idle = false;
}
/* reset le_ses to initial state */
if (counter++ < 100)
continue;
counter = 0;
- idle = false;
}
if (ptlrpc_at_check(svcpt))
lu_context_enter(&env->le_ctx);
ptlrpc_server_handle_request(svcpt, thread);
lu_context_exit(&env->le_ctx);
- idle = false;
}
if (ptlrpc_rqbd_pending(svcpt) &&
svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
svcpt->scp_nrqbds_posted);
- idle = false;
}
- /* If nothing to do, flush old alloc_file_pseudo() descriptors.
- * This has internal atomicity so it is OK to call often.
- * We could also do other idle tasks at this time.
- */
- if (idle)
- cfs_flush_delayed_fput();
-
/*
* If the number of threads has been tuned downward and this
* thread should be stopped, then stop in reverse order so the