- HEAD version of both patches in this one.
int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
int lustre_fsync(struct file *file);
long l_readdir(struct file * file, struct list_head *dentry_list);
+int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
+ struct iattr *newattrs);
+int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
+ char *name, loff_t length);
static inline void l_dput(struct dentry *de)
{
int nid_exp_ref_count;
}nid_stat_t;
+enum obd_option {
+ OBD_OPT_FORCE = 0x0001,
+ OBD_OPT_FAILOVER = 0x0002,
+ OBD_OPT_ABORT_RECOV = 0x0004,
+};
+
struct obd_export {
struct portals_handle exp_handle;
atomic_t exp_refcount;
spinlock_t exp_lock; /* protects flags int below */
/* ^ protects exp_outstanding_replies too */
__u64 exp_connect_flags;
- int exp_flags;
+ enum obd_option exp_flags;
unsigned long exp_failed:1,
exp_in_recovery:1,
exp_disconnected:1,
struct lu_ref obd_reference;
};
-#define OBD_OPT_FORCE 0x0001
-#define OBD_OPT_FAILOVER 0x0002
-
#define OBD_LLOG_FL_SENDNOW 0x0001
enum obd_cleanup_stage {
int class_disconnect(struct obd_export *exp);
void class_fail_export(struct obd_export *exp);
void class_disconnect_exports(struct obd_device *obddev);
-int class_disconnect_stale_exports(struct obd_device *,
- int (*test_export)(struct obd_export *));
int class_manual_cleanup(struct obd_device *obd);
+int class_disconnect_stale_exports(struct obd_device *,
+ int (*test_export)(struct obd_export *),
+ enum obd_option flags);
+
+static inline enum obd_option exp_flags_from_obd(struct obd_device *obd)
+{
+ return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
+ (obd->obd_force ? OBD_OPT_FORCE : 0) |
+ (obd->obd_abort_recovery ? OBD_OPT_ABORT_RECOV : 0) |
+ 0);
+}
void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
"evict them\n", obd->obd_connected_clients,
obd->obd_max_recoverable_clients);
obd->obd_abort_recovery = obd->obd_stopping;
- class_disconnect_stale_exports(obd, connect_done);
+ class_disconnect_stale_exports(obd, connect_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
}
/* next stage: replay requests */
delta = jiffies;
if (obd->obd_abort_recovery) {
CDEBUG(D_ERROR, "req replay timed out, aborting ...\n");
obd->obd_abort_recovery = obd->obd_stopping;
- class_disconnect_stale_exports(obd, req_replay_done);
+ class_disconnect_stale_exports(obd, req_replay_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
abort_req_replay_queue(obd);
}
int stale;
CERROR("lock replay timed out, aborting ...\n");
obd->obd_abort_recovery = obd->obd_stopping;
- stale = class_disconnect_stale_exports(obd, lock_replay_done);
+ stale = class_disconnect_stale_exports(obd, lock_replay_done,
+ exp_flags_from_obd(obd) |
+ OBD_OPT_ABORT_RECOV);
abort_lock_replay_queue(obd);
}
}
EXPORT_SYMBOL(l_readdir);
+int l_notify_change(struct vfsmount *mnt, struct dentry *dchild,
+ struct iattr *newattrs)
+{
+ int rc;
+
+ LOCK_INODE_MUTEX(dchild->d_inode);
+#ifdef HAVE_SECURITY_PLUG
+ rc = notify_change(dchild, mnt, newattrs);
+#else
+ rc = notify_change(dchild, newattrs);
+#endif
+ UNLOCK_INODE_MUTEX(dchild->d_inode);
+ return rc;
+}
+EXPORT_SYMBOL(l_notify_change);
+
+/* utility to truncate a file */
+int simple_truncate(struct dentry *dir, struct vfsmount *mnt,
+ char *name, loff_t length)
+{
+ struct dentry *dchild;
+ struct iattr newattrs;
+ int err = 0;
+ ENTRY;
+
+ CDEBUG(D_INODE, "truncating file %.*s to %lld\n", (int)strlen(name),
+ name, (long long)length);
+ dchild = ll_lookup_one_len(name, dir, strlen(name));
+ if (IS_ERR(dchild))
+ GOTO(out, err = PTR_ERR(dchild));
+
+ if (dchild->d_inode) {
+ int old_mode = dchild->d_inode->i_mode;
+ if (S_ISDIR(old_mode)) {
+ CERROR("found %s (%lu/%u) is mode %o\n", name,
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation, old_mode);
+ GOTO(out_dput, err = -EISDIR);
+ }
+
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE;
+ err = l_notify_change(mnt, dchild, &newattrs);
+ }
+ EXIT;
+out_dput:
+ dput(dchild);
+out:
+ return err;
+}
+EXPORT_SYMBOL(simple_truncate);
+
#ifdef LUSTRE_KERNEL_VERSION
#ifndef HAVE_CLEAR_RDONLY_ON_PUT
#error rdonly patchset must be updated [cfs bz11248]
#include "mdt_internal.h"
static int mdt_server_data_update(const struct lu_env *env,
- struct mdt_device *mdt);
+ struct mdt_device *mdt,
+ int need_sync);
struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len)
{
return rc;
}
+static void mdt_client_cb(const struct mdt_device *mdt, __u64 transno,
+ void *data, int err)
+{
+ struct obd_device *obd = mdt2obd_dev(mdt);
+ target_client_add_cb(obd, transno, data, err);
+}
+
static inline int mdt_last_rcvd_header_write(const struct lu_env *env,
- struct mdt_device *mdt)
+ struct mdt_device *mdt,
+ int need_sync)
{
struct mdt_thread_info *mti;
struct thandle *th;
mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
+ if (mti->mti_exp) {
+ spin_lock(&mti->mti_exp->exp_lock);
+ mti->mti_exp->exp_need_sync = need_sync;
+ spin_unlock(&mti->mti_exp->exp_lock);
+ }
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
mti->mti_off = 0;
lsd_cpu_to_le(&mdt->mdt_lsd, &mti->mti_lsd);
+ if (need_sync && mti->mti_exp)
+ mdt_trans_add_cb(th, mdt_client_cb, mti->mti_exp);
+
rc = mdt_record_write(env, mdt->mdt_last_rcvd,
mdt_buf_const(env, &mti->mti_lsd,
sizeof(mti->mti_lsd)),
lsd->lsd_mount_count = mdt->mdt_mount_count;
/* save it, so mount count and last_transno is current */
- rc = mdt_server_data_update(env, mdt);
+ rc = mdt_server_data_update(env, mdt, (mti->mti_exp &&
+ mti->mti_exp->exp_need_sync));
if (rc)
GOTO(err_client, rc);
}
static int mdt_server_data_update(const struct lu_env *env,
- struct mdt_device *mdt)
+ struct mdt_device *mdt,
+ int need_sync)
{
int rc = 0;
ENTRY;
* mdt->mdt_last_rcvd may be NULL that time.
*/
if (mdt->mdt_last_rcvd != NULL)
- rc = mdt_last_rcvd_header_write(env, mdt);
+ rc = mdt_last_rcvd_header_write(env, mdt, need_sync);
RETURN(rc);
}
-void mdt_cb_new_client(const struct mdt_device *mdt, __u64 transno,
- void *data, int err)
-{
- struct obd_device *obd = mdt2obd_dev(mdt);
-
- target_client_add_cb(obd, transno, data, err);
-}
-
int mdt_client_new(const struct lu_env *env, struct mdt_device *mdt)
{
unsigned long *bitmap = mdt->mdt_client_bitmap;
init_mutex(&med->med_lcd_lock);
LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off);
- /* write new client data */
+
+ /* Write new client data. */
off = med->med_lr_off;
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
+
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
RETURN(PTR_ERR(th));
- /* until this operations will be committed the sync is needed for this
- * export */
- mdt_trans_add_cb(th, mdt_cb_new_client, mti->mti_exp);
+ /*
+ * Until this operations will be committed the sync is needed
+ * for this export. This should be done _after_ starting the
+ * transaction so that many connecting clients will not bring
+ * server down with lots of sync writes.
+ */
+ mdt_trans_add_cb(th, mdt_client_cb, mti->mti_exp);
spin_lock(&mti->mti_exp->exp_lock);
mti->mti_exp->exp_need_sync = 1;
spin_unlock(&mti->mti_exp->exp_lock);
struct mdt_export_data *med;
struct lsd_client_data *lcd;
struct obd_device *obd = mdt2obd_dev(mdt);
- struct thandle *th;
- loff_t off;
- int rc = 0;
+ struct obd_export *exp;
+ struct thandle *th;
+ int need_sync;
+ loff_t off;
+ int rc = 0;
ENTRY;
mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
LASSERT(mti != NULL);
- med = &mti->mti_exp->exp_mdt_data;
+ exp = mti->mti_exp;
+ med = &exp->exp_mdt_data;
lcd = med->med_lcd;
if (!lcd)
RETURN(0);
/* XXX: If lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (!strcmp(med->med_lcd->lcd_uuid, obd->obd_uuid.uuid))
+ if (!strcmp(lcd->lcd_uuid, obd->obd_uuid.uuid))
GOTO(free, 0);
CDEBUG(D_INFO, "freeing client at idx %u, offset %lld\n",
LBUG();
}
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ need_sync = (!exp->exp_libclient || exp->exp_need_sync) &&
+ !(exp->exp_flags & OBD_OPT_ABORT_RECOV);
+
/*
* This may be called from difficult reply handler path and
* mdt->mdt_last_rcvd may be NULL that time.
*/
if (mdt->mdt_last_rcvd != NULL) {
mdt_trans_credit_init(env, mdt, MDT_TXN_LAST_RCVD_WRITE_OP);
+
+ spin_lock(&exp->exp_lock);
+ exp->exp_need_sync = need_sync;
+ spin_unlock(&exp->exp_lock);
+
th = mdt_trans_start(env, mdt);
if (IS_ERR(th))
GOTO(free, rc = PTR_ERR(th));
+ if (need_sync) {
+ /*
+ * Until this operations will be committed the sync
+ * is needed for this export.
+ */
+ mdt_trans_add_cb(th, mdt_client_cb, exp);
+ }
+
mutex_down(&med->med_lcd_lock);
memset(lcd, 0, sizeof *lcd);
}
CDEBUG(rc == 0 ? D_INFO : D_ERROR, "Zeroing out client idx %u in "
- "%s rc %d\n", med->med_lr_idx, LAST_RCVD, rc);
+ "%s %ssync rc %d\n", med->med_lr_idx, LAST_RCVD,
+ need_sync ? "" : "a", rc);
spin_lock(&mdt->mdt_client_bitmap_lock);
clear_bit(med->med_lr_idx, mdt->mdt_client_bitmap);
spin_unlock(&mdt->mdt_client_bitmap_lock);
- /*
- * Make sure the server's last_transno is up to date. Do this after the
- * client is freed so we know all the client's transactions have been
- * committed.
+ /*
+ * Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed.
*/
- mdt_server_data_update(env, mdt);
+ mdt_server_data_update(env, mdt, need_sync);
+
EXIT;
free:
OBD_FREE_PTR(lcd);
*/
if (mti->mti_transno == 0 &&
*transno_p == mdt->mdt_last_transno)
- mdt_server_data_update(mti->mti_env, mdt);
+ mdt_server_data_update(mti->mti_env, mdt,
+ (mti->mti_exp &&
+ mti->mti_exp->exp_need_sync));
*transno_p = mti->mti_transno;
RETURN(0);
}
-static void class_disconnect_export_list(struct list_head *list, int flags)
+static void class_disconnect_export_list(struct list_head *list,
+ enum obd_option flags)
{
int rc;
struct lustre_handle fake_conn;
EXIT;
}
-static inline int get_exp_flags_from_obd(struct obd_device *obd)
-{
- return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
- (obd->obd_force ? OBD_OPT_FORCE : 0));
-}
-
void class_disconnect_exports(struct obd_device *obd)
{
struct list_head work_list;
CDEBUG(D_HA, "OBD device %d (%p) has exports, "
"disconnecting them\n", obd->obd_minor, obd);
class_disconnect_export_list(&work_list,
- get_exp_flags_from_obd(obd));
+ exp_flags_from_obd(obd));
} else
CDEBUG(D_HA, "OBD device %d (%p) has no exports\n",
obd->obd_minor, obd);
/* Remove exports that have not completed recovery.
*/
int class_disconnect_stale_exports(struct obd_device *obd,
- int (*test_export)(struct obd_export *))
+ int (*test_export)(struct obd_export *),
+ enum obd_option flags)
{
struct list_head work_list;
struct list_head *pos, *n;
CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n",
obd->obd_name, cnt);
- class_disconnect_export_list(&work_list, get_exp_flags_from_obd(obd));
+ class_disconnect_export_list(&work_list, flags);
RETURN(cnt);
}
EXPORT_SYMBOL(class_disconnect_stale_exports);
be no more in-progress ops by this point.*/
spin_lock(&obd->obd_self_export->exp_lock);
- obd->obd_self_export->exp_flags |=
- (obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
- (obd->obd_force ? OBD_OPT_FORCE : 0);
+ obd->obd_self_export->exp_flags |= exp_flags_from_obd(obd);
spin_unlock(&obd->obd_self_export->exp_lock);
/* note that we'll recurse into class_decref again */
GOTO(out_free, rc);
}
+ if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
+ simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
+ LR_CLIENT_START);
+
OBD_PAGE_FREE(__page);
lsi->lsi_ldd = ldd; /* freed at lsi cleanup */
CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
RETURN(0);
}
+struct lsd_client_data zero_lcd; /* globals are implicitly zeroed */
+
static int filter_client_free(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct obd_device *obd = exp->exp_obd;
- struct lsd_client_data zero_lcd;
struct lvfs_run_ctxt saved;
int rc;
loff_t off;
}
if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
- memset(&zero_lcd, 0, sizeof zero_lcd);
+ /* Don't force sync on disconnect if aborting recovery,
+ * or it does num_clients * num_osts. b=17194 */
+ int need_sync = (!exp->exp_libclient || exp->exp_need_sync) &&
+ !(exp->exp_flags&OBD_OPT_ABORT_RECOV);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
- sizeof(zero_lcd), &off,
- (!exp->exp_libclient ||
- exp->exp_need_sync));
+ sizeof(zero_lcd), &off, 0);
+
+ /* Make sure the server's last_transno is up to date. Do this
+ * after the client is freed so we know all the client's
+ * transactions have been committed. */
if (rc == 0)
- /* update server's transno */
filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd,
- !exp->exp_libclient);
+ filter->fo_fsd, need_sync);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
- "zeroing out client %s at idx %u (%llu) in %s rc %d\n",
+ "zero out client %s at idx %u/%llu in %s %ssync rc %d\n",
fed->fed_lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
- LAST_RCVD, rc);
+ LAST_RCVD, need_sync ? "" : "a", rc);
}
if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
# Skip these tests
-# bug number: 17466
-ALWAYS_EXCEPT="61d $REPLAY_SINGLE_EXCEPT"
+# bug number: 17466 15962
+ALWAYS_EXCEPT="61d 33b $REPLAY_SINGLE_EXCEPT"
if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then
CONFIG_EXCEPTIONS="0b 42 47 61a 61c"
}
run_test 33a "abort recovery before client does replay"
-# Stale FID sequence
+# Stale FID sequence bug 15962
test_33b() { # was test_33a
replay_barrier $SINGLEMDS
createmany -o $DIR/$tfile-%d 10