removal, now use "all default" to signal dir stripe removal
as a directory striping of "all default" is not useful
+Severity : minor
+Frequency : common for large clusters running liblustre clients
+Bugzilla : 7198
+Description: doing an ls when liblustre clients are running is slow
+Details : sending a glimpse AST to a liblustre client waits for every AST
+ to time out, as liblustre clients will not respond. Since they
+ cannot cache data we refresh the OST lock LVB from disk instead.
+
------------------------------------------------------------------------------
08-26-2005 Cluster File Systems, Inc. <info@clusterfs.com>
struct list_head *tmp;
ldlm_error_t err;
int tmpflags = 0, rc, repsize[2] = {sizeof(*rep), sizeof(*reply_lvb)};
+ int only_liblustre = 0;
ENTRY;
policy = ldlm_get_processing_policy(res);
if (tmplock->l_policy_data.l_extent.end <= reply_lvb->lvb_size)
continue;
+ /* Don't send glimpse ASTs to liblustre clients. They aren't
+ * listening for them, and they do entirely synchronous I/O
+ * anyways. */
+ if (tmplock->l_export == NULL ||
+ tmplock->l_export->exp_libclient == 1) {
+ only_liblustre = 1;
+ continue;
+ }
+
if (l == NULL) {
l = LDLM_LOCK_GET(tmplock);
continue;
l_unlock(&res->lr_namespace->ns_lock);
/* There were no PW locks beyond the size in the LVB; finished. */
- if (l == NULL)
+ if (l == NULL) {
+ if (only_liblustre) {
+ /* If we discovered a liblustre client with a PW lock,
+ * however, the LVB may be out of date! The LVB is
+ * updated only on glimpse (which we don't do for
+ * liblustre clients) and cancel (which the client
+ * obviously has not yet done). So if it has written
+ * data but kept the lock, the LVB is stale and needs
+ * to be updated from disk.
+ *
+ * Of course, this will all disappear when we switch to
+ * taking liblustre locks on the OST. */
+ if (res->lr_namespace->ns_lvbo &&
+ res->lr_namespace->ns_lvbo->lvbo_update) {
+ res->lr_namespace->ns_lvbo->lvbo_update
+ (res, NULL, 0, 1);
+ }
+ }
RETURN(ELDLM_LOCK_ABORTED);
+ }
if (l->l_glimpse_ast == NULL) {
/* We are racing with unlink(); just return -ENOENT */
LASSERTF(l->l_glimpse_ast != NULL, "l == %p", l);
rc = l->l_glimpse_ast(l, NULL); /* this will update the LVB */
+ /* Update the LVB from disk if the AST failed (this is a legal race) */
if (rc != 0 && res->lr_namespace->ns_lvbo &&
res->lr_namespace->ns_lvbo->lvbo_update) {
res->lr_namespace->ns_lvbo->lvbo_update(res, NULL, 0, 1);
obd = class_exp2obd(exp);
if (obd == NULL) {
- CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
- exp->exp_handle.h_cookie);
+ CDEBUG(D_IOCTL, "invalid client export %p\n", exp);
RETURN(-EINVAL);
}
obd = class_exp2obd(exp);
if (obd == NULL) {
- CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
- exp->exp_handle.h_cookie);
+ CDEBUG(D_IOCTL, "invalid client export %p\n", exp);
RETURN(-EINVAL);
}
void *key, __u32 vallen, void *val)
{
struct obd_device *obd;
- struct lustre_handle conn;
struct llog_ctxt *ctxt;
+ char str[PTL_NALFMT_SIZE];
int rc = 0;
ENTRY;
- conn.cookie = exp->exp_handle.h_cookie;
-
obd = exp->exp_obd;
if (obd == NULL) {
- CDEBUG(D_IOCTL, "invalid exp %p cookie "LPX64"\n",
- exp, conn.cookie);
+ CDEBUG(D_IOCTL, "invalid export %p\n", exp);
RETURN(-EINVAL);
}
memcmp(key, "mds_conn", keylen) != 0)
RETURN(-EINVAL);
- CWARN("%s: received MDS connection ("LPX64")\n",
- obd->obd_name, conn.cookie);
- memcpy(&obd->u.filter.fo_mdc_conn, &conn, sizeof(conn));
+ CWARN("%s: received MDS connection from %s\n", obd->obd_name,
+ ptlrpc_peernid2str(&exp->exp_connection->c_peer, str));
+ obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
/* setup llog imports */
ctxt = llog_get_context(obd, LLOG_MDS_OST_REPL_CTXT);
rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
-
+
filter_quota_set_info(exp, obd);
RETURN(rc);
* m != NULL : called by the DLM itself after a glimpse callback
* m == NULL : called by the filter after a disk write
*
- * If 'increase' is true, don't allow values to move backwards.
+ * If 'increase_only' is true, don't allow values to move backwards.
*/
static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m,
- int buf_idx, int increase)
+ int buf_idx, int increase_only)
{
int rc = 0;
struct ost_lvb *lvb;
lustre_swab_ost_lvb);
if (new == NULL) {
CERROR("lustre_swab_buf failed\n");
- //GOTO(out, rc = -EPROTO);
- GOTO(out, rc = 0);
+ goto disk_update;
}
- if (new->lvb_size > lvb->lvb_size || !increase) {
+ if (new->lvb_size > lvb->lvb_size || !increase_only) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_size, new->lvb_size);
lvb->lvb_size = new->lvb_size;
}
- if (new->lvb_mtime > lvb->lvb_mtime || !increase) {
+ if (new->lvb_mtime > lvb->lvb_mtime || !increase_only) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_mtime, new->lvb_mtime);
lvb->lvb_mtime = new->lvb_mtime;
}
- if (new->lvb_atime > lvb->lvb_atime || !increase) {
+ if (new->lvb_atime > lvb->lvb_atime || !increase_only) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_atime, new->lvb_atime);
lvb->lvb_atime = new->lvb_atime;
}
- if (new->lvb_ctime > lvb->lvb_ctime || !increase) {
+ if (new->lvb_ctime > lvb->lvb_ctime || !increase_only) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
lvb->lvb_ctime, new->lvb_ctime);
}
}
+ disk_update:
/* Update the LVB from the disk inode */
obd = res->lr_namespace->ns_lvbp;
LASSERT(obd);
if (dentry->d_inode == NULL)
GOTO(out_dentry, rc = -ENOENT);
- if (dentry->d_inode->i_size > lvb->lvb_size || !increase) {
+ if (dentry->d_inode->i_size > lvb->lvb_size || !increase_only) {
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb size from disk: "
LPU64" -> %llu\n", res->lr_name.name[0],
lvb->lvb_size, dentry->d_inode->i_size);
lvb->lvb_size = dentry->d_inode->i_size;
}
- if (LTIME_S(dentry->d_inode->i_mtime) > lvb->lvb_mtime || !increase) {
+ if (LTIME_S(dentry->d_inode->i_mtime) >lvb->lvb_mtime|| !increase_only){
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb mtime from disk: "
LPU64" -> %lu\n", res->lr_name.name[0],
lvb->lvb_mtime, LTIME_S(dentry->d_inode->i_mtime));
lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
}
- if (LTIME_S(dentry->d_inode->i_atime) > lvb->lvb_atime || !increase) {
+ if (LTIME_S(dentry->d_inode->i_atime) >lvb->lvb_atime|| !increase_only){
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime from disk: "
LPU64" -> %lu\n", res->lr_name.name[0],
lvb->lvb_atime, LTIME_S(dentry->d_inode->i_atime));
lvb->lvb_atime = LTIME_S(dentry->d_inode->i_atime);
}
- if (LTIME_S(dentry->d_inode->i_ctime) > lvb->lvb_ctime || !increase) {
+ if (LTIME_S(dentry->d_inode->i_ctime) >lvb->lvb_ctime|| !increase_only){
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime from disk: "
LPU64" -> %lu\n", res->lr_name.name[0],
lvb->lvb_ctime, LTIME_S(dentry->d_inode->i_ctime));