From 24c89ee151b88587daab7d0ed60889e7e18e040d Mon Sep 17 00:00:00 2001 From: adilger Date: Thu, 18 Nov 2004 20:57:47 +0000 Subject: [PATCH] Branch: b1_4 Merging of changes from b1_2 into b1_4. - don't ASSERT in ptl_send_rpc() if we run out of memory (5119) - lock /proc/sys/portals/routes internal state, avoiding oops (4827) - debugging for client eviction looping (4908) --- lustre/ChangeLog | 16 +++++++++++----- lustre/autoMakefile.am | 2 +- lustre/ldlm/ldlm_flock.c | 2 +- lustre/ldlm/ldlm_lockd.c | 13 ++++++++++++- lustre/llite/llite_lib.c | 3 ++- lustre/llite/rw.c | 4 ++-- lustre/lov/lov_internal.h | 2 +- lustre/obdfilter/filter.c | 26 ++++++++++++++++++-------- lustre/obdfilter/filter_io.c | 9 +-------- lustre/osc/osc_internal.h | 6 +++--- lustre/portals/router/proc.c | 36 ++++++++++++++++++++++++------------ lustre/ptlrpc/niobuf.c | 6 +++++- lustre/ptlrpc/recover.c | 9 +++++---- lustre/tests/recovery-small.sh | 4 ++-- lustre/utils/lconf | 2 +- 15 files changed, 89 insertions(+), 51 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 04449b8..85f9bae 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,22 +1,27 @@ tbd Cluster File Systems, Inc. * version 1.3.4 * bug fixes + - changes from 1.2.9 - flock/lockf fixes (but it's still disabled, pending 5135) - don't use EXT3 constants in llite code (5094) - - return async write errors to application if possible (2248) - - return last_committed value from OST to avoid OOM (4966) - memory shortage at startup could cause assertion (5176) - - the watchdog thread now runs as interruptible (5246) * miscellania - reorganization of lov code - single portals codebase - Infiniband NAL - - add pid to ldlm debugging output (4922) - - return last_committed value from OST to avoid OOM (4966) - add extents/mballoc support (5025) - direct I/O reads in the obdfilter (4048) tbd Cluster File Systems, Inc. + * version 1.2.9 + - send OST transaction number in read/write reply to free req (4966) + - don't ASSERT in ptl_send_rpc() if we run out of memory (5119) + - lock /proc/sys/portals/routes internal state, avoiding oops (4827) + - the watchdog thread now runs as interruptible (5246) + * miscellania + - add pid to ldlm debugging output (4922) + +2004-11-17 Cluster File Systems, Inc. * version 1.2.8 * bug fixes - allocate qswnal tx descriptors singly to avoid fragmentation (4504) @@ -39,6 +44,7 @@ tbd Cluster File Systems, Inc. - add software watchdogs to catch hung threads quickly (4941) - make lustrefs init script start after nfs is mounted - fix CWARN/ERROR duplication (4930) + - return async write errors to application if possible (2248) - add /proc/sys/portal/memused (bytes allocated by PORTALS_ALLOC) - print NAL number in %x format (4645) diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 7830e54..81bde4b 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -56,7 +56,7 @@ lvfs-sources: $(MAKE) sources -C lvfs modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources - $(MAKE) $(ARCH_UM) CC=$(CC) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@ + $(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@ endif # MODULES diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 1dacf83..ac41bfe 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -469,7 +469,7 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " "sleeping"); - ldlm_lock_dump(D_DLMTRACE, lock, 0); + ldlm_lock_dump(D_OTHER, lock, 0); fwd.fwd_lock = lock; obd = class_exp2obd(lock->l_conn_export); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 4074048..39d24ac 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -170,7 +170,7 @@ static int expired_lock_main(void *arg) static void waiting_locks_callback(unsigned long unused) { - struct ldlm_lock *lock; + struct ldlm_lock *lock, *last = NULL; char str[PTL_NALFMT_SIZE]; spin_lock_bh(&waiting_locks_spinlock); @@ -187,6 +187,17 @@ static void waiting_locks_callback(unsigned long unused) lock->l_export->exp_connection->c_remote_uuid.uuid, ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str)); + if (lock == last) { + LDLM_ERROR(lock, "waiting on lock multiple times"); + CERROR("wll %p .prev %p, l_pending.next %p .prev %p\n", + waiting_locks_list.next, waiting_locks_list.prev, + lock->l_pending_chain.next, + lock->l_pending_chain.prev); + spin_unlock(&waiting_locks_spinlock); + LBUG(); + } + last = lock; + spin_lock_bh(&expired_lock_thread.elt_lock); list_del(&lock->l_pending_chain); list_add(&lock->l_pending_chain, diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 33d93c2..2d09beb 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -742,7 +742,8 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) lock->l_ast_data = NULL; if (lock->l_req_mode != lock->l_granted_mode) - LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); } + LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); + } return LDLM_ITER_CONTINUE; } diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 3d3c7ec..9798c3e 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -434,12 +434,12 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode, GOTO(out, rc); /* make full-page requests if we are not at EOF (bug 4410) */ - if (llap->llap_page->index < size_index) { + if (to != PAGE_SIZE && llap->llap_page->index < size_index) { LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write before EOF: size_index %lu, to %d\n", size_index, to); to = PAGE_SIZE; - } else if (llap->llap_page->index == size_index) { + } else if (to != PAGE_SIZE && llap->llap_page->index == size_index) { int size_to = inode->i_size & ~PAGE_MASK; LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write at EOF: size_index %lu, to %d/%d\n", diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index e6ecc5e..752d074 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -56,10 +56,10 @@ struct lov_async_page { int lap_magic; int lap_stripe; obd_off lap_sub_offset; + obd_id lap_loi_id; void *lap_sub_cookie; struct obd_async_page_ops *lap_caller_ops; void *lap_caller_data; - obd_id lap_loi_id; }; #define LAP_FROM_COOKIE(c) \ diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 67df476..ddeba4d 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -258,7 +258,12 @@ static int filter_client_free(struct obd_export *exp, int flags) memset(&zero_fcd, 0, sizeof zero_fcd); push_ctxt(&saved, &obd->obd_ctxt, NULL); rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd, - sizeof(zero_fcd), &off, 1); + sizeof(zero_fcd), &off, 0); + + if (rc == 0) + /* update server's transno */ + filter_update_server_data(obd, filter->fo_rcvd_filp, + filter->fo_fsd, 1); pop_ctxt(&saved, &obd->obd_ctxt, NULL); CDEBUG(rc == 0 ? D_INFO : D_ERROR, @@ -1033,8 +1038,8 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, ENTRY; if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { - CERROR("destroying objid %.*s nlink = %lu, count = %d\n", - dchild->d_name.len, dchild->d_name.name, + CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n", + dchild->d_name.len, dchild->d_name.name, inode->i_ino, (unsigned long)inode->i_nlink, atomic_read(&inode->i_count)); } @@ -1420,7 +1425,6 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize; obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0; obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted; - int level = D_CACHE; if (list_empty(&obd->obd_exports)) return; @@ -1428,10 +1432,11 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) spin_lock(&obd->obd_osfs_lock); spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { + int error = 0; fed = &exp->exp_filter_data; if (fed->fed_grant < 0 || fed->fed_pending < 0 || fed->fed_dirty < 0) - level = D_ERROR; + error = 1; if (maxsize > 0) { /* we may not have done a statfs yet */ LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, "%s: cli %s/%p %ld+%ld > "LPU64"\n", func, @@ -1442,9 +1447,14 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) exp->exp_client_uuid.uuid, exp, fed->fed_dirty, maxsize); } - CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", - obd->obd_name, exp->exp_client_uuid.uuid, exp, - fed->fed_dirty, fed->fed_pending, fed->fed_grant); + if (error) + CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, fed->fed_pending,fed->fed_grant); + else + CDEBUG(D_CACHE, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, fed->fed_pending,fed->fed_grant); tot_granted += fed->fed_grant + fed->fed_pending; tot_pending += fed->fed_pending; tot_dirty += fed->fed_dirty; diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 9764996..599e9dc 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -187,18 +187,11 @@ restat: if (left >= tot_granted) { left -= tot_granted; } else { - static unsigned long next; - if (left < tot_granted - obd->u.filter.fo_tot_pending && - time_after(jiffies, next)) { - spin_unlock(&obd->obd_osfs_lock); + if (left < tot_granted - obd->u.filter.fo_tot_pending + 65536) { CERROR("%s: cli %s/%p grant "LPU64" > available " LPU64" and pending "LPU64"\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, tot_granted, left, obd->u.filter.fo_tot_pending); - if (next == 0) - portals_debug_dumplog(); - next = jiffies + 20 * HZ; - spin_lock(&obd->obd_osfs_lock); } left = 0; } diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index f7ac79a..f36d6ce 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -14,11 +14,10 @@ struct osc_async_page { int oap_magic; + int oap_cmd; struct list_head oap_pending_item; struct list_head oap_urgent_item; struct list_head oap_rpc_item; - struct page *oap_page; - int oap_cmd; obd_off oap_obj_off; obd_off oap_page_off; @@ -27,8 +26,9 @@ struct osc_async_page { enum async_flags oap_async_flags; unsigned long oap_interrupted:1; - struct obd_io_group *oap_oig; struct oig_callback_context oap_occ; + struct page *oap_page; + struct obd_io_group *oap_oig; struct ptlrpc_request *oap_request; struct client_obd *oap_cli; struct lov_oinfo *oap_loi; diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c index a1397d2..9a3d4f2 100644 --- a/lustre/portals/router/proc.c +++ b/lustre/portals/router/proc.c @@ -31,6 +31,7 @@ struct proc_route_data { struct list_head *curr; unsigned int generation; off_t skip; + rwlock_t proc_route_rwlock; } kpr_read_routes_data; /* nal2name support re-used from utils/portals.c */ @@ -96,19 +97,22 @@ static int kpr_proc_router_write(struct file *file, const char *ubuffer, static int kpr_proc_routes_read(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct proc_route_data *prd = data; - kpr_route_entry_t *re; - kpr_gateway_entry_t *ge; - int chunk_len = 0; - int line_len = 0; - int user_len = 0; + struct proc_route_data *prd = data; + kpr_route_entry_t *re; + kpr_gateway_entry_t *ge; + int chunk_len = 0; + int line_len = 0; + int user_len = 0; + int rc = 0; *eof = 1; *start = page; + write_lock(&(prd->proc_route_rwlock)); + if (prd->curr == NULL) { if (off != 0) - return 0; + goto routes_read_exit; /* First pass, initialize our private data */ prd->curr = kpr_routes.next; @@ -118,13 +122,14 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, /* Abort route list generation change */ if (prd->generation != kpr_routes_generation) { prd->curr = NULL; - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } /* All the routes have been walked */ if (prd->curr == &kpr_routes) { prd->curr = NULL; - return 0; + goto routes_read_exit; } } @@ -148,7 +153,8 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, if (prd->curr->next == NULL) { prd->curr = NULL; read_unlock(&kpr_rwlock); - return sprintf(page, "\nError: Routes Changed\n"); + rc = sprintf(page, "\nError: Routes Changed\n"); + goto routes_read_exit; } prd->curr = prd->curr->next; @@ -169,13 +175,18 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off, prd->curr = prd->curr->prev; prd->skip = line_len - (user_len - count); read_unlock(&kpr_rwlock); - return count; + rc = count; + goto routes_read_exit; } /* Not enough data to entirely satify callers request */ prd->skip = 0; read_unlock(&kpr_rwlock); - return user_len; + rc = user_len; + +routes_read_exit: + write_unlock(&(prd->proc_route_rwlock)); + return rc; } static int kpr_proc_routes_write(struct file *file, const char *ubuffer, @@ -215,6 +226,7 @@ void kpr_proc_init(void) kpr_read_routes_data.curr = NULL; kpr_read_routes_data.generation = 0; kpr_read_routes_data.skip = 0; + kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED; routes_entry->data = &kpr_read_routes_data; routes_entry->read_proc = kpr_proc_routes_read; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 1171fb5..df2110d 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -447,6 +447,10 @@ int ptl_send_rpc(struct ptlrpc_request *request) if (rc != PTL_OK) { CERROR("PtlMDAttach failed: %d\n", rc); LASSERT (rc == PTL_NO_SPACE); + spin_lock_irqsave (&request->rq_lock, flags); + /* ...but the MD attach didn't succeed... */ + request->rq_receiving_reply = 0; + spin_unlock_irqrestore (&request->rq_lock, flags); GOTO(cleanup_me, rc -ENOMEM); } @@ -456,7 +460,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_reply_portal, connection->c_peer.peer_ni->pni_name); - ptlrpc_request_addref(request); /* +1 ref for the SENT callback */ + ptlrpc_request_addref(request); /* +1 ref for the SENT callback */ request->rq_sent = CURRENT_SECONDS; ptlrpc_pinger_sending_on_import(request->rq_import); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 72924fc..b6bae88 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -224,8 +224,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) int ptlrpc_resend(struct obd_import *imp) { - struct list_head *tmp, *pos; - struct ptlrpc_request *req; + struct ptlrpc_request *req, *next; unsigned long flags; ENTRY; @@ -243,8 +242,10 @@ int ptlrpc_resend(struct obd_import *imp) } spin_unlock_irqrestore(&imp->imp_lock, flags); - list_for_each_safe(tmp, pos, &imp->imp_sending_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); + list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) { + LASSERTF((long)req > PAGE_SIZE && req != LP_POISON, + "req %p bad\n", req); + LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); ptlrpc_resend_req(req); } diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 6865e6c..157bca1 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -148,7 +148,7 @@ test_10() { run_test 10 "finish request on server after client eviction (bug 1521)" #bug 2460 -# wake up a thead waiting for completion after eviction +# wake up a thread waiting for completion after eviction test_11(){ do_facet client multiop $MOUNT/$tfile Ow || return 1 do_facet client multiop $MOUNT/$tfile or || return 2 @@ -161,7 +161,7 @@ test_11(){ do_facet client munlink $MOUNT/$tfile || return 4 } -run_test 11 "wake up a thead waiting for completion after eviction (b=2460)" +run_test 11 "wake up a thread waiting for completion after eviction (b=2460)" #b=2494 test_12(){ diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 96cf611..9fe97fa 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -969,7 +969,7 @@ def sys_get_local_address(net_type, wildcard, cluster_id): iface, star = string.split(wildcard, ':') local = if2addr(iface) if not local: - panic ("unable to determine ip for:", wildcard) + panic("unable to determine ip for:", wildcard) else: host = socket.gethostname() local = socket.gethostbyname(host) -- 1.8.3.1