Merging of changes from b1_2 into b1_4.
- don't ASSERT in ptl_send_rpc() if we run out of memory (5119)
- lock /proc/sys/portals/routes internal state, avoiding oops (4827)
- debugging for client eviction looping (4908)
tbd Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.3.4
* bug fixes
+ - changes from 1.2.9
- flock/lockf fixes (but it's still disabled, pending 5135)
- don't use EXT3 constants in llite code (5094)
- - return async write errors to application if possible (2248)
- - return last_committed value from OST to avoid OOM (4966)
- memory shortage at startup could cause assertion (5176)
- - the watchdog thread now runs as interruptible (5246)
* miscellania
- reorganization of lov code
- single portals codebase
- Infiniband NAL
- - add pid to ldlm debugging output (4922)
- - return last_committed value from OST to avoid OOM (4966)
- add extents/mballoc support (5025)
- direct I/O reads in the obdfilter (4048)
tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.9
+ - send OST transaction number in read/write reply to free req (4966)
+ - don't ASSERT in ptl_send_rpc() if we run out of memory (5119)
+ - lock /proc/sys/portals/routes internal state, avoiding oops (4827)
+ - the watchdog thread now runs as interruptible (5246)
+ * miscellania
+ - add pid to ldlm debugging output (4922)
+
+2004-11-17 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.8
* bug fixes
- allocate qswnal tx descriptors singly to avoid fragmentation (4504)
- add software watchdogs to catch hung threads quickly (4941)
- make lustrefs init script start after nfs is mounted
- fix CWARN/ERROR duplication (4930)
+ - return async write errors to application if possible (2248)
- add /proc/sys/portal/memused (bytes allocated by PORTALS_ALLOC)
- print NAL number in %x format (4645)
$(MAKE) sources -C lvfs
modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources
- $(MAKE) $(ARCH_UM) CC=$(CC) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
+ $(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
endif # MODULES
LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
"sleeping");
- ldlm_lock_dump(D_DLMTRACE, lock, 0);
+ ldlm_lock_dump(D_OTHER, lock, 0);
fwd.fwd_lock = lock;
obd = class_exp2obd(lock->l_conn_export);
static void waiting_locks_callback(unsigned long unused)
{
- struct ldlm_lock *lock;
+ struct ldlm_lock *lock, *last = NULL;
char str[PTL_NALFMT_SIZE];
spin_lock_bh(&waiting_locks_spinlock);
lock->l_export->exp_connection->c_remote_uuid.uuid,
ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str));
+ if (lock == last) {
+ LDLM_ERROR(lock, "waiting on lock multiple times");
+ CERROR("wll %p .prev %p, l_pending.next %p .prev %p\n",
+ waiting_locks_list.next, waiting_locks_list.prev,
+ lock->l_pending_chain.next,
+ lock->l_pending_chain.prev);
+ spin_unlock(&waiting_locks_spinlock);
+ LBUG();
+ }
+ last = lock;
+
spin_lock_bh(&expired_lock_thread.elt_lock);
list_del(&lock->l_pending_chain);
list_add(&lock->l_pending_chain,
lock->l_ast_data = NULL;
if (lock->l_req_mode != lock->l_granted_mode)
- LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); }
+ LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
+ }
return LDLM_ITER_CONTINUE;
}
GOTO(out, rc);
/* make full-page requests if we are not at EOF (bug 4410) */
- if (llap->llap_page->index < size_index) {
+ if (to != PAGE_SIZE && llap->llap_page->index < size_index) {
LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
"sync write before EOF: size_index %lu, to %d\n",
size_index, to);
to = PAGE_SIZE;
- } else if (llap->llap_page->index == size_index) {
+ } else if (to != PAGE_SIZE && llap->llap_page->index == size_index) {
int size_to = inode->i_size & ~PAGE_MASK;
LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
"sync write at EOF: size_index %lu, to %d/%d\n",
int lap_magic;
int lap_stripe;
obd_off lap_sub_offset;
+ obd_id lap_loi_id;
void *lap_sub_cookie;
struct obd_async_page_ops *lap_caller_ops;
void *lap_caller_data;
- obd_id lap_loi_id;
};
#define LAP_FROM_COOKIE(c) \
memset(&zero_fcd, 0, sizeof zero_fcd);
push_ctxt(&saved, &obd->obd_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd,
- sizeof(zero_fcd), &off, 1);
+ sizeof(zero_fcd), &off, 0);
+
+ if (rc == 0)
+ /* update server's transno */
+ filter_update_server_data(obd, filter->fo_rcvd_filp,
+ filter->fo_fsd, 1);
pop_ctxt(&saved, &obd->obd_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
ENTRY;
if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
- CERROR("destroying objid %.*s nlink = %lu, count = %d\n",
- dchild->d_name.len, dchild->d_name.name,
+ CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n",
+ dchild->d_name.len, dchild->d_name.name, inode->i_ino,
(unsigned long)inode->i_nlink,
atomic_read(&inode->i_count));
}
obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize;
obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
- int level = D_CACHE;
if (list_empty(&obd->obd_exports))
return;
spin_lock(&obd->obd_osfs_lock);
spin_lock(&obd->obd_dev_lock);
list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+ int error = 0;
fed = &exp->exp_filter_data;
if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
fed->fed_dirty < 0)
- level = D_ERROR;
+ error = 1;
if (maxsize > 0) { /* we may not have done a statfs yet */
LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
"%s: cli %s/%p %ld+%ld > "LPU64"\n", func,
exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, maxsize);
}
- CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
- obd->obd_name, exp->exp_client_uuid.uuid, exp,
- fed->fed_dirty, fed->fed_pending, fed->fed_grant);
+ if (error)
+ CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
+ obd->obd_name, exp->exp_client_uuid.uuid, exp,
+ fed->fed_dirty, fed->fed_pending,fed->fed_grant);
+ else
+ CDEBUG(D_CACHE, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
+ obd->obd_name, exp->exp_client_uuid.uuid, exp,
+ fed->fed_dirty, fed->fed_pending,fed->fed_grant);
tot_granted += fed->fed_grant + fed->fed_pending;
tot_pending += fed->fed_pending;
tot_dirty += fed->fed_dirty;
if (left >= tot_granted) {
left -= tot_granted;
} else {
- static unsigned long next;
- if (left < tot_granted - obd->u.filter.fo_tot_pending &&
- time_after(jiffies, next)) {
- spin_unlock(&obd->obd_osfs_lock);
+ if (left < tot_granted - obd->u.filter.fo_tot_pending + 65536) {
CERROR("%s: cli %s/%p grant "LPU64" > available "
LPU64" and pending "LPU64"\n", obd->obd_name,
exp->exp_client_uuid.uuid, exp, tot_granted,
left, obd->u.filter.fo_tot_pending);
- if (next == 0)
- portals_debug_dumplog();
- next = jiffies + 20 * HZ;
- spin_lock(&obd->obd_osfs_lock);
}
left = 0;
}
struct osc_async_page {
int oap_magic;
+ int oap_cmd;
struct list_head oap_pending_item;
struct list_head oap_urgent_item;
struct list_head oap_rpc_item;
- struct page *oap_page;
- int oap_cmd;
obd_off oap_obj_off;
obd_off oap_page_off;
enum async_flags oap_async_flags;
unsigned long oap_interrupted:1;
- struct obd_io_group *oap_oig;
struct oig_callback_context oap_occ;
+ struct page *oap_page;
+ struct obd_io_group *oap_oig;
struct ptlrpc_request *oap_request;
struct client_obd *oap_cli;
struct lov_oinfo *oap_loi;
struct list_head *curr;
unsigned int generation;
off_t skip;
+ rwlock_t proc_route_rwlock;
} kpr_read_routes_data;
/* nal2name support re-used from utils/portals.c */
static int kpr_proc_routes_read(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- struct proc_route_data *prd = data;
- kpr_route_entry_t *re;
- kpr_gateway_entry_t *ge;
- int chunk_len = 0;
- int line_len = 0;
- int user_len = 0;
+ struct proc_route_data *prd = data;
+ kpr_route_entry_t *re;
+ kpr_gateway_entry_t *ge;
+ int chunk_len = 0;
+ int line_len = 0;
+ int user_len = 0;
+ int rc = 0;
*eof = 1;
*start = page;
+ write_lock(&(prd->proc_route_rwlock));
+
if (prd->curr == NULL) {
if (off != 0)
- return 0;
+ goto routes_read_exit;
/* First pass, initialize our private data */
prd->curr = kpr_routes.next;
/* Abort route list generation change */
if (prd->generation != kpr_routes_generation) {
prd->curr = NULL;
- return sprintf(page, "\nError: Routes Changed\n");
+ rc = sprintf(page, "\nError: Routes Changed\n");
+ goto routes_read_exit;
}
/* All the routes have been walked */
if (prd->curr == &kpr_routes) {
prd->curr = NULL;
- return 0;
+ goto routes_read_exit;
}
}
if (prd->curr->next == NULL) {
prd->curr = NULL;
read_unlock(&kpr_rwlock);
- return sprintf(page, "\nError: Routes Changed\n");
+ rc = sprintf(page, "\nError: Routes Changed\n");
+ goto routes_read_exit;
}
prd->curr = prd->curr->next;
prd->curr = prd->curr->prev;
prd->skip = line_len - (user_len - count);
read_unlock(&kpr_rwlock);
- return count;
+ rc = count;
+ goto routes_read_exit;
}
/* Not enough data to entirely satify callers request */
prd->skip = 0;
read_unlock(&kpr_rwlock);
- return user_len;
+ rc = user_len;
+
+routes_read_exit:
+ write_unlock(&(prd->proc_route_rwlock));
+ return rc;
}
static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
kpr_read_routes_data.curr = NULL;
kpr_read_routes_data.generation = 0;
kpr_read_routes_data.skip = 0;
+ kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED;
routes_entry->data = &kpr_read_routes_data;
routes_entry->read_proc = kpr_proc_routes_read;
if (rc != PTL_OK) {
CERROR("PtlMDAttach failed: %d\n", rc);
LASSERT (rc == PTL_NO_SPACE);
+ spin_lock_irqsave (&request->rq_lock, flags);
+ /* ...but the MD attach didn't succeed... */
+ request->rq_receiving_reply = 0;
+ spin_unlock_irqrestore (&request->rq_lock, flags);
GOTO(cleanup_me, rc -ENOMEM);
}
request->rq_reply_portal,
connection->c_peer.peer_ni->pni_name);
- ptlrpc_request_addref(request); /* +1 ref for the SENT callback */
+ ptlrpc_request_addref(request); /* +1 ref for the SENT callback */
request->rq_sent = CURRENT_SECONDS;
ptlrpc_pinger_sending_on_import(request->rq_import);
int ptlrpc_resend(struct obd_import *imp)
{
- struct list_head *tmp, *pos;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req, *next;
unsigned long flags;
ENTRY;
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
- list_for_each_safe(tmp, pos, &imp->imp_sending_list) {
- req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
+ LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
+ "req %p bad\n", req);
+ LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
ptlrpc_resend_req(req);
}
run_test 10 "finish request on server after client eviction (bug 1521)"
#bug 2460
-# wake up a thead waiting for completion after eviction
+# wake up a thread waiting for completion after eviction
test_11(){
do_facet client multiop $MOUNT/$tfile Ow || return 1
do_facet client multiop $MOUNT/$tfile or || return 2
do_facet client munlink $MOUNT/$tfile || return 4
}
-run_test 11 "wake up a thead waiting for completion after eviction (b=2460)"
+run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"
#b=2494
test_12(){
iface, star = string.split(wildcard, ':')
local = if2addr(iface)
if not local:
- panic ("unable to determine ip for:", wildcard)
+ panic("unable to determine ip for:", wildcard)
else:
host = socket.gethostname()
local = socket.gethostbyname(host)