update from b1_4.
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.4.7
+ * bug fixes
+
+Severity : major
+Frequency : rare
+Bugzilla : 5719, 9635, 9792, 9684,
+Description: OST (or MDS) trips assertions in (re)connection under heavy load
+Details : If a server is under heavy load and cannot reply to new
+ connection requests before the client resends the (re)connect,
+ the connection handling code can behave badly if two service
+ threads are concurrently handing separate (re)connections from
+ the same client. Add better locking to the connection handling
+ code, and ensure that only a single connection will be processed
+ for a given client UUID, even if the lock is dropped.
+
+------------------------------------------------------------------------------
+
02-14-2006 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.4.6
* WIRE PROTOCOL CHANGE. This version of Lustre networking WILL NOT
* bug fixes
* Support for newer kernels: 2.6.9-22.0.2.EL (RHEL 4),
2.6.5-7.244 (SLES 9) - same as 1.4.5.2.
+ 2.6.12.6 vanilla (kernel.org)
Severity : enhancement
-Bugzilla : 9461
-Description: Implement 'lfs df' to report actual free space on per-OST basis
-Details : Add sub-command 'df' on 'lfs' to report the disk space usage of
- MDS/OSDs. Usage: lfs df [-i][-h]. Command Options: '-i' to report
- usage of objects; '-h' to report in human readable format.
-
-Severity : enhancement
Bugzilla : 7981/8208
Description: Introduced Lustre Networking (LNET)
Details : LNET is new networking infrastructure for Lustre, it includes
considers this an error and immediately begins cleaning up the
lov, just after starting the mds_lov process
+Severity : enhancement
+Bugzilla : 9461
+Description: Implement 'lfs df' to report actual free space on per-OST basis
+Details : Add sub-command 'df' on 'lfs' to report the disk space usage of
+ MDS/OSDs. Usage: lfs df [-i][-h]. Command Options: '-i' to report
+ usage of objects; '-h' to report in human readable format.
+
------------------------------------------------------------------------------
08-26-2005 Cluster File Systems, Inc. <info@clusterfs.com>
int exp_flags;
unsigned int exp_failed:1,
exp_disconnected:1,
+ exp_connecting:1,
exp_replay_needed:1,
exp_libclient:1; /* liblustre client? */
union {
void class_notify_import_observers(struct obd_import *imp, int event,
void *event_arg);
-#define IMP_EVENT_ACTIVE 1
-#define IMP_EVENT_INACTIVE 2
-
/* genops.c */
struct obd_export;
extern struct obd_import *class_exp2cliimp(struct obd_export *);
spin_unlock(&lco->lco_lock);
result = 0;
} else {
- CERROR("unexpected notification of %s %s!\n",
+ CERROR("unexpected notification from %s %s!\n",
watched->obd_type->typ_name,
watched->obd_name);
result = -EINVAL;
__class_export_put(exp); \
} while (0)
void __class_export_put(struct obd_export *);
-struct obd_export *class_new_export(struct obd_device *obddev);
+struct obd_export *class_new_export(struct obd_device *obddev,
+ struct obd_uuid *cluuid);
void class_unlink_export(struct obd_export *exp);
void class_update_export_timer(struct obd_export *exp, time_t extra_delay);
#define OBD_FAIL_TGT_REPLY_NET 0x700
#define OBD_FAIL_TGT_CONN_RACE 0x701
+#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702
+#define OBD_FAIL_TGT_DELAY_CONNECT 0x703
+#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
--- /dev/null
+Index: linux+rhel4+chaos/include/linux/sysctl.h
+===================================================================
+--- linux+rhel4+chaos.orig/include/linux/sysctl.h
++++ linux+rhel4+chaos/include/linux/sysctl.h
+@@ -348,6 +348,8 @@ enum
+ NET_TCP_TSO_WIN_DIVISOR=107,
+ NET_TCP_BIC_BETA=108,
+ NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
++ NET_TCP_RTO_MAX=110,
++ NET_TCP_RTO_INIT=111,
+ };
+
+ enum {
+Index: linux+rhel4+chaos/net/ipv4/sysctl_net_ipv4.c
+===================================================================
+--- linux+rhel4+chaos.orig/net/ipv4/sysctl_net_ipv4.c
++++ linux+rhel4+chaos/net/ipv4/sysctl_net_ipv4.c
+@@ -49,6 +49,10 @@ extern int inet_peer_maxttl;
+ extern int inet_peer_gc_mintime;
+ extern int inet_peer_gc_maxtime;
+
++/* From tcp_timer.c */
++extern unsigned sysctl_tcp_rto_max;
++extern unsigned sysctl_tcp_rto_init;
++
+ #ifdef CONFIG_SYSCTL
+ static int tcp_retr1_max = 255;
+ static int ip_local_port_range_min[] = { 1, 1 };
+@@ -699,6 +703,22 @@ ctl_table ipv4_table[] = {
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
++ {
++ .ctl_name = NET_TCP_RTO_MAX,
++ .procname = "tcp_rto_max",
++ .data = &sysctl_tcp_rto_max,
++ .maxlen = sizeof(unsigned),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
++ {
++ .ctl_name = NET_TCP_RTO_INIT,
++ .procname = "tcp_rto_init",
++ .data = &sysctl_tcp_rto_init,
++ .maxlen = sizeof(unsigned),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec
++ },
+ { .ctl_name = 0 }
+ };
+
+Index: linux+rhel4+chaos/net/ipv4/tcp_timer.c
+===================================================================
+--- linux+rhel4+chaos.orig/net/ipv4/tcp_timer.c
++++ linux+rhel4+chaos/net/ipv4/tcp_timer.c
+@@ -32,6 +32,9 @@ int sysctl_tcp_retries1 = TCP_RETR1;
+ int sysctl_tcp_retries2 = TCP_RETR2;
+ int sysctl_tcp_orphan_retries;
+
++unsigned sysctl_tcp_rto_max = TCP_RTO_MAX;
++unsigned sysctl_tcp_rto_init = TCP_TIMEOUT_INIT;
++
+ static void tcp_write_timer(unsigned long);
+ static void tcp_delack_timer(unsigned long);
+ static void tcp_keepalive_timer (unsigned long data);
+@@ -104,7 +107,7 @@ static int tcp_out_of_resources(struct s
+
+ /* If peer does not open window for long time, or did not transmit
+ * anything for long time, penalize it. */
+- if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
++ if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*sysctl_tcp_rto_max || !do_reset)
+ orphans <<= 1;
+
+ /* If some dubious ICMP arrived, penalize even more. */
+@@ -186,7 +189,7 @@ static int tcp_write_timeout(struct sock
+
+ retry_until = sysctl_tcp_retries2;
+ if (sock_flag(sk, SOCK_DEAD)) {
+- int alive = (tp->rto < TCP_RTO_MAX);
++ int alive = (tp->rto < sysctl_tcp_rto_max);
+
+ retry_until = tcp_orphan_retries(sk, alive);
+
+@@ -292,7 +295,7 @@ static void tcp_probe_timer(struct sock
+ max_probes = sysctl_tcp_retries2;
+
+ if (sock_flag(sk, SOCK_DEAD)) {
+- int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
++ int alive = ((tp->rto<<tp->backoff) < sysctl_tcp_rto_max);
+
+ max_probes = tcp_orphan_retries(sk, alive);
+
+@@ -336,7 +339,7 @@ static void tcp_retransmit_timer(struct
+ inet->num, tp->snd_una, tp->snd_nxt);
+ }
+ #endif
+- if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
++ if (tcp_time_stamp - tp->rcv_tstamp > sysctl_tcp_rto_max) {
+ tcp_write_err(sk);
+ goto out;
+ }
+@@ -405,7 +408,7 @@ static void tcp_retransmit_timer(struct
+ tp->retransmits++;
+
+ out_reset_timer:
+- tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
++ tp->rto = min(tp->rto << 1, sysctl_tcp_rto_max);
+ tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+ if (tp->retransmits > sysctl_tcp_retries1)
+ __sk_dst_reset(sk);
+@@ -502,7 +505,7 @@ static void tcp_synack_timer(struct sock
+ if (tp->defer_accept)
+ max_retries = tp->defer_accept;
+
+- budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
++ budget = 2*(TCP_SYNQ_HSIZE/(sysctl_tcp_rto_init/TCP_SYNQ_INTERVAL));
+ i = lopt->clock_hand;
+
+ do {
+@@ -516,8 +519,8 @@ static void tcp_synack_timer(struct sock
+
+ if (req->retrans++ == 0)
+ lopt->qlen_young--;
+- timeo = min((TCP_TIMEOUT_INIT << req->retrans),
+- TCP_RTO_MAX);
++ timeo = min((sysctl_tcp_rto_init << req->retrans),
++ sysctl_tcp_rto_max);
+ req->expires = now + timeo;
+ reqp = &req->dl_next;
+ continue;
/* Offset between the two in bytes */
offset = data - skb->head;
--- linux-2.6.9-org/net/ipv4/tcp.c 2005-05-20 10:09:34.000000000 +0100
-+++ ./linux-2.6.9/net/ipv4/tcp.c 2005-05-20 10:22:14.000000000 +0100
++++ linux-2.6.9/net/ipv4/tcp.c 2005-05-20 10:22:14.000000000 +0100
@@ -628,8 +628,9 @@
}
}
linux-2.6-binutils-2.16.patch
compile-fixes-2.6.9-rhel4-22.patch
vm-tunables-rhel4.patch
+tcp-zero-copy-2.6.9-rhel4.patch
spin_lock_init(&cli->cl_write_page_hist.oh_lock);
spin_lock_init(&cli->cl_read_offset_hist.oh_lock);
spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
- if (num_physpages >> (20 - PAGE_SHIFT) <= 128) { /* <= 128 MB */
- cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 4;
- cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 4;
- } else if (num_physpages >> (20 - PAGE_SHIFT) <= 256) { /* <= 256 MB */
- cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES / 2;
- cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT / 2;
+ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
+ if (num_physpages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 2;
+ } else if (num_physpages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 3;
+ } else if (num_physpages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 4;
} else {
- cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES;
cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
}
struct obd_uuid *cluuid)
{
ENTRY;
- if (exp->exp_connection) {
+ if (exp->exp_connection && exp->exp_imp_reverse) {
struct lustre_handle *hdl;
hdl = &exp->exp_imp_reverse->imp_remote_handle;
/* Might be a re-connect after a partition. */
if (!memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
- CWARN("%s reconnecting\n", cluuid->uuid);
+ CWARN("%s: %s reconnecting\n", exp->exp_obd->obd_name,
+ cluuid->uuid);
conn->cookie = exp->exp_handle.h_cookie;
/* target_handle_connect() treats EALREADY and
* -EALREADY differently. EALREADY means we are
list_for_each(p, &target->obd_exports) {
export = list_entry(p, struct obd_export, exp_obd_chain);
if (obd_uuid_equals(&cluuid, &export->exp_client_uuid)) {
+ if (export->exp_connecting) { /* bug 9635, et. al. */
+ CWARN("%s: exp %p already connecting\n",
+ export->exp_obd->obd_name, export);
+ export = NULL;
+ rc = -EALREADY;
+ break;
+ }
+ export->exp_connecting = 1;
spin_unlock(&target->obd_dev_lock);
LASSERT(export->exp_obd == target);
/* If we found an export, we already unlocked. */
if (!export) {
spin_unlock(&target->obd_dev_lock);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 2 * obd_timeout);
} else if (req->rq_reqmsg->conn_cnt == 1) {
CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; "
"cookies not random?\n", target->obd_name,
libcfs_nid2str(req->rq_peer.nid), cluuid.uuid);
GOTO(out, rc = -EALREADY);
+ } else {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
}
- /* We indicate the reconnection in a flag, not an error code. */
+ /* We want to handle EALREADY but *not* -EALREADY from
+ * target_handle_reconnect(), return reconnection state in a flag */
if (rc == EALREADY) {
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
rc = 0;
+ } else if (rc) {
+ GOTO(out, rc);
}
/* Tell the client if we're in recovery. */
rc = obd_reconnect(export, target, &cluuid, data);
}
- /* we want to handle EALREADY but *not* -EALREADY from
- * target_handle_reconnect() */
- if (rc && rc != EALREADY)
+ if (rc)
GOTO(out, rc);
/* Return only the parts of obd_connect_data that we understand, so the
revimp->imp_state = LUSTRE_IMP_FULL;
class_import_put(revimp);
out:
+ if (export)
+ export->exp_connecting = 0;
if (rc)
req->rq_status = rc;
RETURN(rc);
if (body->valid & OBD_MD_FLID)
st->st_ino = body->ino;
- if (body->valid & OBD_MD_FLATIME &&
+ if (body->valid & OBD_MD_FLMTIME &&
body->mtime > LTIME_S(st->st_mtime))
LTIME_S(st->st_mtime) = body->mtime;
- if (body->valid & OBD_MD_FLMTIME &&
+ if (body->valid & OBD_MD_FLATIME &&
body->atime > LTIME_S(st->st_atime))
LTIME_S(st->st_atime) = body->atime;
if (body->valid & OBD_MD_FLCTIME &&
RETURN(PTR_ERR(filename));
ll_inode2fid(&fid, inode);
-
+
rc = ll_get_max_mdsize(sbi, &lmmsize);
- if (rc)
+ if (rc)
RETURN(rc);
rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename,
int lmj_size, i, aindex = 0, rc;
rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
- if (rc < 0)
+ if (rc < 0)
GOTO(out_req, rc = -ENOMEM);
rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
- if (rc)
- GOTO(out_free_memmd, rc);
-
+ if (rc)
+ GOTO(out_free_memmd, rc);
+
lmj_size = sizeof(struct lov_user_md_join) +
lsm->lsm_stripe_count *
sizeof(struct lov_user_ost_data_join);
OBD_ALLOC(lmj, lmj_size);
- if (!lmj)
+ if (!lmj)
GOTO(out_free_memmd, rc = -ENOMEM);
-
+
memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
- for(i = 0; i < lsm->lsm_stripe_count; i++) {
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
struct lov_array_info *lai = lsm->lsm_array;
if ((lai->lai_ext_array[aindex].le_loi_idx +
lai->lai_ext_array[aindex].le_stripe_count)<=i){
aindex ++;
}
- CDEBUG(D_INFO, "aindex %d i %d l_extent_start"LPU64""
- "len %d \n", aindex, i,
- lai->lai_ext_array[aindex].le_start,
- (int)lai->lai_ext_array[aindex].le_len);
+ CDEBUG(D_INFO, "aindex %d i %d l_extent_start"
+ LPU64"len %d \n", aindex, i,
+ lai->lai_ext_array[aindex].le_start,
+ (int)lai->lai_ext_array[aindex].le_len);
lmj->lmm_objects[i].l_extent_start =
lai->lai_ext_array[aindex].le_start;
-
+
if ((int)lai->lai_ext_array[aindex].le_len == -1) {
lmj->lmm_objects[i].l_extent_end = -1;
} else {
- lmj->lmm_objects[i].l_extent_end =
- lai->lai_ext_array[aindex].le_start +
- lai->lai_ext_array[aindex].le_len;
+ lmj->lmm_objects[i].l_extent_end =
+ lai->lai_ext_array[aindex].le_start +
+ lai->lai_ext_array[aindex].le_len;
}
lmj->lmm_objects[i].l_object_id =
lsm->lsm_oinfo[i].loi_id;
RETURN(rc);
}
-static int mds_init_export(struct obd_export *exp)
+int mds_init_export(struct obd_export *exp)
{
struct mds_export_data *med = &exp->exp_mds_data;
INIT_LIST_HEAD(&med->med_open_head);
spin_lock_init(&med->med_open_lock);
+ exp->exp_connecting = 1;
RETURN(0);
}
struct mds_obd *mds = &obd->u.mds;
*eof = 1;
- return snprintf(page, count, "%u\n", mds->mds_atime_diff);
+ return snprintf(page, count, "%lu\n", mds->mds_atime_diff);
}
struct lprocfs_vars lprocfs_mds_obd_vars[] = {
last_transno, le64_to_cpu(msd->msd_last_transno),
le64_to_cpu(mcd->mcd_last_xid));
- exp = class_new_export(obd);
- if (exp == NULL)
- GOTO(err_client, rc = -ENOMEM);
+ exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid);
+ if (IS_ERR(exp))
+ GOTO(err_client, rc = PTR_ERR(exp));
- memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
- sizeof exp->exp_client_uuid.uuid);
med = &exp->exp_mds_data;
med->med_mcd = mcd;
rc = mds_client_add(obd, mds, med, cl_idx);
LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
- /* create helper if export init gets more complex */
- INIT_LIST_HEAD(&med->med_open_head);
- spin_lock_init(&med->med_open_lock);
mcd = NULL;
exp->exp_replay_needed = 1;
+ exp->exp_connecting = 0;
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
class_export_put(exp);
extern int mds_iocontrol(unsigned int cmd, struct obd_export *exp,
int len, void *karg, void *uarg);
int mds_postrecov(struct obd_device *obd);
+int mds_init_export(struct obd_export *exp);
#ifdef __KERNEL__
int mds_get_md(struct obd_device *, struct inode *, void *md, int *size,
int lock);
/* Creates a new export, adds it to the hash table, and returns a
* pointer to it. The refcount is 2: one for the hash reference, and
* one for the pointer returned by this function. */
-struct obd_export *class_new_export(struct obd_device *obd)
+struct obd_export *class_new_export(struct obd_device *obd,
+ struct obd_uuid *cluuid)
{
- struct obd_export *export;
+ struct obd_export *export, *tmp;
OBD_ALLOC(export, sizeof(*export));
- if (!export) {
- CERROR("no memory! (minor %d)\n", obd->obd_minor);
- return NULL;
- }
+ if (!export)
+ return ERR_PTR(-ENOMEM);
export->exp_conn_cnt = 0;
atomic_set(&export->exp_refcount, 2);
export->exp_last_request_time = CURRENT_SECONDS;
spin_lock_init(&export->exp_lock);
+ export->exp_client_uuid = *cluuid;
+ obd_init_export(export);
+
spin_lock(&obd->obd_dev_lock);
+ if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
+ list_for_each_entry(tmp, &obd->obd_exports, exp_obd_chain) {
+ if (obd_uuid_equals(cluuid, &tmp->exp_client_uuid)) {
+ spin_unlock(&obd->obd_dev_lock);
+ CWARN("%s: denying duplicate export for %s\n",
+ obd->obd_name, cluuid->uuid);
+ class_handle_unhash(&export->exp_handle);
+ OBD_FREE_PTR(export);
+ return ERR_PTR(-EALREADY);
+ }
+ }
+ }
LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
atomic_inc(&obd->obd_refcount);
list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
export->exp_obd->obd_num_exports++;
spin_unlock(&obd->obd_dev_lock);
- obd_init_export(export);
return export;
}
EXPORT_SYMBOL(class_new_export);
LASSERT(cluuid != NULL);
ENTRY;
- export = class_new_export(obd);
- if (export == NULL)
- RETURN(-ENOMEM);
+ export = class_new_export(obd, cluuid);
+ if (IS_ERR(export))
+ RETURN(PTR_ERR(export));
conn->cookie = export->exp_handle.h_cookie;
- memcpy(&export->exp_client_uuid, cluuid,
- sizeof(export->exp_client_uuid));
class_export_put(export);
CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
obd->obd_starting = 1;
spin_unlock(&obd->obd_dev_lock);
- exp = class_new_export(obd);
- if (exp == NULL)
- RETURN(err);
- memcpy(&exp->exp_client_uuid, &obd->obd_uuid,
- sizeof(exp->exp_client_uuid));
+ exp = class_new_export(obd, &obd->obd_uuid);
+ if (IS_ERR(exp))
+ RETURN(PTR_ERR(exp));
obd->obd_self_export = exp;
list_del_init(&exp->exp_obd_chain_timed);
class_export_put(exp);
#include <obd_support.h>
#include <obd_class.h>
#include <obd_echo.h>
+#include <lustre_ver.h>
#include <lustre_debug.h>
#include <lprocfs_status.h>
struct obd_device *tgt;
struct lustre_handle conn = {0, };
struct obd_uuid echo_uuid = { "ECHO_UUID" };
+ struct obd_connect_data *ocd = NULL;
int rc;
ENTRY;
CFS_INIT_LIST_HEAD (&ec->ec_objects);
ec->ec_unique = 0;
- rc = obd_connect(&conn, tgt, &echo_uuid, NULL /* obd_connect_data */);
- if (rc) {
+ OBD_ALLOC(ocd, sizeof(*ocd));
+ if (ocd == NULL) {
+ CERROR("Can't alloc ocd connecting to %s\n",
+ lustre_cfg_string(lcfg, 1));
+ return -ENOMEM;
+ }
+
+ ocd->ocd_version = LUSTRE_VERSION_CODE;
+
+ rc = obd_connect(&conn, tgt, &echo_uuid, ocd);
+
+ OBD_FREE(ocd, sizeof(*ocd));
+
+ if (rc != 0) {
CERROR("fail to connect to device %s\n",
lustre_cfg_string(lcfg, 1));
return (rc);
return 0;
}
+static int filter_init_export(struct obd_export *exp)
+{
+ spin_lock_init(&exp->exp_filter_data.fed_lock);
+ exp->exp_connecting = 1;
+
+ return 0;
+}
+
static int filter_free_server_data(struct filter_obd *filter)
{
OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
/* These exports are cleaned up by filter_disconnect(), so they
* need to be set up like real exports as filter_connect() does.
*/
- exp = class_new_export(obd);
+ exp = class_new_export(obd, (struct obd_uuid *)fcd->fcd_uuid);
CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
" srv lr: "LPU64"\n", fcd->fcd_uuid, cl_idx,
last_rcvd, le64_to_cpu(fsd->fsd_last_transno));
- if (exp == NULL)
- GOTO(err_client, rc = -ENOMEM);
+ if (IS_ERR(exp))
+ GOTO(err_client, rc = PTR_ERR(exp));
- memcpy(&exp->exp_client_uuid.uuid, fcd->fcd_uuid,
- sizeof exp->exp_client_uuid.uuid);
fed = &exp->exp_filter_data;
fed->fed_fcd = fcd;
rc = filter_client_add(obd, filter, fed, cl_idx);
LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
- /* create helper if export init gets more complex */
- spin_lock_init(&fed->fed_lock);
fcd = NULL;
exp->exp_replay_needed = 1;
+ exp->exp_connecting = 0;
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
class_export_put(exp);
.o_connect = filter_connect,
.o_reconnect = filter_reconnect,
.o_disconnect = filter_disconnect,
+ .o_init_export = filter_init_export,
+ .o_destroy_export = filter_destroy_export,
.o_statfs = filter_statfs,
.o_getattr = filter_getattr,
.o_unpackmd = filter_unpackmd,
.o_sync = filter_sync,
.o_preprw = filter_preprw,
.o_commitrw = filter_commitrw,
- .o_destroy_export = filter_destroy_export,
.o_llog_init = filter_llog_init,
.o_llog_finish = filter_llog_finish,
.o_iocontrol = filter_iocontrol,
.o_connect = filter_connect,
.o_reconnect = filter_reconnect,
.o_disconnect = filter_disconnect,
+ .o_init_export = filter_init_export,
+ .o_destroy_export = filter_destroy_export,
.o_statfs = filter_statfs,
.o_getattr = filter_getattr,
.o_unpackmd = filter_unpackmd,
.o_preprw = filter_preprw,
.o_commitrw = filter_commitrw,
.o_san_preprw = filter_san_preprw,
- .o_destroy_export = filter_destroy_export,
.o_llog_init = filter_llog_init,
.o_llog_finish = filter_llog_finish,
.o_iocontrol = filter_iocontrol,
deuuidify(imp->imp_target_uuid.uuid, NULL,
&target_start, &target_len);
- LCONSOLE_ERROR("Connection to service %.*s via nid %s was "
+ LCONSOLE_ERROR("%s: Connection to service %.*s via nid %s was "
"lost; in progress operations using this "
- "service will %s.\n",
+ "service will %s.\n", imp->imp_obd->obd_name,
target_len, target_start,
libcfs_nid2str(imp->imp_connection->c_peer.nid),
imp->imp_replayable ?
- "wait for recovery to complete" : "fail");
+ "wait for recovery to complete" : "fail");
if (obd_dump_on_timeout)
libcfs_debug_dumplog();
- CDEBUG(D_HA, "%s: connection lost to %s@%s\n",
- imp->imp_obd->obd_name,
- imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid);
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
/* Sigh, some compilers do not like #ifdef in the middle
of macro arguments */
#ifdef __KERNEL__
- char *action = "upgrading this client";
+ const char *action = "upgrading this client";
#else
- char *action = "recompiling this application";
+ const char *action = "recompiling this application";
#endif
CWARN("Server %s version (%d.%d.%d.%d) is much newer. "
OBD_OCD_VERSION_PATCH(ocd->ocd_version),
OBD_OCD_VERSION_FIX(ocd->ocd_version),
LUSTRE_VERSION_STRING);
+ ptlrpc_deactivate_import(imp);
IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED);
}
RETURN(-EPROTO);
else
pc = &ptlrpcd_recovery_pc;
- ptlrpc_set_add_new_req(pc->pc_set, req);
req->rq_ptlrpcd_data = pc;
-
- ptlrpcd_wake(req);
+ ptlrpc_set_add_new_req(pc->pc_set, req);
+ wake_up(&pc->pc_waitq);
}
static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
/* Initialize quota limit to MIN_QLIMIT */
LASSERT(oqctl->qc_dqblk.dqb_valid == QIF_BLIMITS);
- LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0);
/* There might be a pending dqacq/dqrel (which is going to
qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt,
oqctl->qc_id, oqctl->qc_type, 1);
+ if (!oqctl->qc_dqblk.dqb_bhardlimit)
+ goto adjust;
+
+ LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
if (rc)
RETURN(rc);
-
+adjust:
/* Trigger qunit pre-acquire */
if (oqctl->qc_type == USRQUOTA)
uid = oqctl->qc_id;
}
static int mds_init_slave_ilimits(struct obd_device *obd,
- struct obd_quotactl *oqctl)
+ struct obd_quotactl *oqctl, int set)
{
/* XXX: for file limits only adjust local now */
unsigned int uid = 0, gid = 0;
- struct obd_quotactl *ioqc;
+ struct obd_quotactl *ioqc = NULL;
int rc;
ENTRY;
/* if we are going to set zero limit, needn't init slaves */
if (!oqctl->qc_dqblk.dqb_ihardlimit && !oqctl->qc_dqblk.dqb_isoftlimit)
RETURN(0);
+
+ if (!set)
+ goto acquire;
OBD_ALLOC_PTR(ioqc);
if (!ioqc)
rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
if (rc)
GOTO(out, rc);
-
+acquire:
/* trigger local qunit pre-acquire */
if (oqctl->qc_type == USRQUOTA)
uid = oqctl->qc_id;
/* FIXME initialize all slaves in CMD */
EXIT;
out:
- OBD_FREE_PTR(ioqc);
+ if (ioqc)
+ OBD_FREE_PTR(ioqc);
return rc;
}
static int mds_init_slave_blimits(struct obd_device *obd,
- struct obd_quotactl *oqctl)
+ struct obd_quotactl *oqctl, int set)
{
struct mds_obd *mds = &obd->u.mds;
struct obd_quotactl *ioqc;
ioqc->qc_id = oqctl->qc_id;
ioqc->qc_type = oqctl->qc_type;
ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS;
- ioqc->qc_dqblk.dqb_bhardlimit = MIN_QLIMIT;
+ ioqc->qc_dqblk.dqb_bhardlimit = set ? MIN_QLIMIT : 0;
/* set local limit to MIN_QLIMIT */
- rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
- if (rc)
- GOTO(out, rc);
+ if (set) {
+ rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
+ if (rc)
+ GOTO(out, rc);
+ }
/* trigger local qunit pre-acquire */
if (oqctl->qc_type == USRQUOTA)
time_t btime, itime;
struct lustre_dquot *dquot;
struct obd_dqblk *dqblk = &oqctl->qc_dqblk;
- int rc;
+ int set, rc;
ENTRY;
down(&mds->mds_qonoff_sem);
}
up(&mds->mds_qonoff_sem);
- if (dqblk->dqb_valid & QIF_ILIMITS && !(ihardlimit || isoftlimit)) {
- rc = mds_init_slave_ilimits(obd, oqctl);
+ if (dqblk->dqb_valid & QIF_ILIMITS) {
+ set = !(ihardlimit || isoftlimit);
+ rc = mds_init_slave_ilimits(obd, oqctl, set);
if (rc) {
CERROR("init slave ilimits failed! (rc:%d)\n", rc);
goto revoke_out;
}
}
- if (dqblk->dqb_valid & QIF_BLIMITS && !(bhardlimit || bsoftlimit)) {
- rc = mds_init_slave_blimits(obd, oqctl);
+ if (dqblk->dqb_valid & QIF_BLIMITS) {
+ set = !(bhardlimit || bsoftlimit);
+ rc = mds_init_slave_blimits(obd, oqctl, set);
if (rc) {
CERROR("init slave blimits failed! (rc:%d)\n", rc);
goto revoke_out;
POWER_DOWN=${POWER_DOWN:-"powerman --off"}
POWER_UP=${POWER_UP:-"powerman --on"}
-PDSH=no_dsh
+PDSH=${PDSH:-no_dsh}
start_ost
start_mds
mount_client $MOUNT
- CHECK_PTLDEBUG="`cat /proc/sys/lnet/debug`"
- if [ $CHECK_PTLDEBUG = "1" ]; then
+ CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug | sed -e 's/.* = //'`"
+ if [ "$CHECK_PTLDEBUG" = "1" ]; then
echo "lmc --debug success"
else
echo "lmc --debug: want 1, have $CHECK_PTLDEBUG"
return 1
fi
- CHECK_SUBSYSTEM="`cat /proc/sys/lnet/subsystem_debug`"
- if [ $CHECK_SUBSYSTEM = "2" ]; then
+ # again with the pdsh prefix
+ CHECK_SUBSYSTEM="`do_facet mds sysctl lnet.subsystem_debug | cut -d= -f2`"
+ if [ "$CHECK_SUBSYSTEM" = "2" ]; then
echo "lmc --subsystem success"
else
echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM"
echo "lconf --debug: want 3, have $CHECK_PTLDEBUG"
return 1
fi
- CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug|cut -d= -f2`"
+ CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug | cut -d= -f2`"
if [ $CHECK_SUBSYS = "20" ]; then
echo "lconf --subsystem success"
else
FOUNDSTRING=`awk -F"<" '/<mkfsoptions>/{print $2}' $XMLCONFIG`
EXPECTEDSTRING="mkfsoptions>-Llabel_conf_14"
- if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then
+ if [ "$EXPECTEDSTRING" != "$FOUNDSTRING" ]; then
echo "Error: expected: $EXPECTEDSTRING; found: $FOUNDSTRING"
return 1
fi
start_ost
start_mds
mount_client $MOUNT || return $?
- if [ -z "`dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then
+ if [ -z "`do_facet ost dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then
echo "Error: the mkoptions not applied to mke2fs of ost."
return 1
fi
[ -f "$MOUNTLUSTRE" ] && echo "can't move $MOUNTLUSTRE" && return 40
trap cleanup_15 EXIT INT
[ ! `cp $(which llmount) $MOUNTLUSTRE` ] || return $?
- do_node `hostname` mkdir -p $MOUNT 2> /dev/null
+ do_facet client "mkdir -p $MOUNT 2> /dev/null"
# load llite module on the client if it isn't in /lib/modules
- do_node `hostname` lconf --nosetup --node client_facet $XMLCONFIG
- do_node `hostname` mount -t lustre -o nettype=$NETTYPE,$MOUNTOPT \
- `facet_nid mds`:/mds_svc/client_facet $MOUNT ||return $?
+ do_facet client "$LCONF --nosetup --node client_facet $XMLCONFIG"
+ do_facet client "mount -t lustre -o $MOUNTOPT \
+ `facet_nid mds`:/mds_svc/client_facet $MOUNT" ||return $?
echo "mount lustre on $MOUNT with $MOUNTLUSTRE: success"
[ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
check_mount || return 41
fi
echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
- [ -d $TMPMTPT ] || mkdir -p $TMPMTPT
- mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $?
- chmod 555 $TMPMTPT/OBJECTS || return $?
- chmod 555 $TMPMTPT/LOGS || return $?
- chmod 555 $TMPMTPT/PENDING || return $?
- umount $TMPMTPT || return $?
-
+ do_facet mds "[ -d $TMPMTPT ] || mkdir -p $TMPMTPT;
+ mount -o loop -t ext3 $MDSDEV $TMPMTPT || return \$?;
+ chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} || return \$?;
+ umount $TMPMTPT || return \$?" || return $?
+
echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre"
start_ost
start_mds
cleanup || return $?
echo "read the mode of OBJECTS/LOGS/PENDING and check if they has been changed properly"
- EXPECTEDOBJECTSMODE=`debugfs -R "stat OBJECTS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
- EXPECTEDLOGSMODE=`debugfs -R "stat LOGS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
- EXPECTEDPENDINGMODE=`debugfs -R "stat PENDING" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
+ EXPECTEDOBJECTSMODE=`do_facet mds "debugfs -R 'stat OBJECTS' $MDSDEV 2> /dev/null" | awk '/Mode: /{print $NF}'`
+ EXPECTEDLOGSMODE=`do_facet mds "debugfs -R 'stat LOGS' $MDSDEV 2> /dev/null" | awk '/Mode: /{print $NF}'`
+ EXPECTEDPENDINGMODE=`do_facet mds "debugfs -R 'stat PENDING' $MDSDEV 2> /dev/null" | awk '/Mode: /{print $NF}'`
if [ "$EXPECTEDOBJECTSMODE" = "0777" ]; then
echo "Success:Lustre change the mode of OBJECTS correctly"
fi
echo "Remove mds config log"
- [ -d $TMPMTPT ] || mkdir -p $TMPMTPT
- mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $?
- rm -f $TMPMTPT/LOGS/mds_svc || return $?
- umount $TMPMTPT || return $?
+ do_facet mds "debugfs -w -R 'unlink LOGS/mds_svc' $MDSDEV || return \$?" || return $?
start_ost
start mds $MDSLCONFARGS && return 42
check_mount || return 41
echo "check journal size..."
- FOUNDJOURNALSIZE=`debugfs -R "stat <8>" $MDSDEV | awk '/Size: / { print $6; exit;}'`
+ FOUNDJOURNALSIZE=`do_facet mds "debugfs -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'`
if [ "$FOUNDJOURNALSIZE" = "79691776" ]; then
echo "Success:lconf creates large journals"
else
echo "$LEAK_LUSTRE" 1>&2
echo "$LEAK_PORTALS" 1>&2
mv $TMP/debug $TMP/debug-leak.`date +%s`
+ echo "Memory leaks detected"
exit 254
fi
lsmod | grep lnet && echo "modules still loaded" && exit 1
stop_read_ahead
#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
- sysctl -w lustre.fail_loc=0x80000504
+ do_facet ost sysctl -w lustre.fail_loc=0x80000504
cancel_lru_locks OSC
# will get evicted here
do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1
test_44() {
mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
- do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
- $LCTL --device $mdcdev recover
- df $MOUNT
+ for i in `seq 1 10`; do
+ #define OBD_FAIL_TGT_CONN_RACE 0x701
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ done
do_facet mds "sysctl -w lustre.fail_loc=0"
return 0
}
run_test 44 "race in target handle connect"
+test_44b() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ for i in `seq 1 10`; do
+ #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ done
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 44b "race in target handle connect"
+
# Handle failed close
test_45() {
mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
clean() {
echo -n "cln.."
- sh llmountcleanup.sh ${FORCE} > /dev/null || exit 20
+ sh llmountcleanup.sh ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
}
CLEAN=${CLEAN:-:}
[ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
[ -z "`mount | grep " $DIR .*\<acl\>"`" ] && echo "skipping $TESTNAME (must have acl)" && return
[ -z "`grep acl $LPROC/mdc/MDC*MNT*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return
+ $(which setfacl 2>/dev/null) || echo "skipping $TESTNAME (could not find setfacl)" && return
echo "performing cp ..."
run_acl_subtest cp || error
if [ -x /sbin/mount.lustre ] ; then
do_node $client mount -t lustre $OPTIONS \
`facet_nid mds`:/mds_svc/client_facet $mnt || return 1
+ do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
else
# this is so cheating
do_node $client $LCONF --nosetup --node client_facet $XMLCONFIG > \
do_node() {
HOST=$1
shift
+ local myPDSH=$PDSH
+ if [ "$HOST" = "$(hostname)" ]; then
+ myPDSH="no_dsh"
+ fi
if $VERBOSE; then
echo "CMD: $HOST $@"
- $PDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || :
+ $myPDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || :
fi
- $PDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")"
+ $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")"
}
do_facet() {
##################################
# Test interface
error() {
- sysctl -w lustre.fail_loc=0
+ sysctl -w lustre.fail_loc=0 || true
echo "${TESTSUITE}: **** FAIL:" $@
log "FAIL: $@"
exit 1
for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
memcpy(&qctl->obd_uuid, uuidp, sizeof(*uuidp));
+ /* XXX clear this flag to get quota from osts */
+ qctl->qc_dqblk.dqb_valid = 0;
rc = llapi_quotactl(mnt, qctl);
if (rc) {
fprintf(stderr, "%s quotactl failed: %s\n",