Branch: b1_4

author adilger <adilger>

Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)

committer adilger <adilger>

Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)
author adilger <adilger>
Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)
committer adilger <adilger>
Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index 04449b8..85f9bae 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -1,22 +1,27 @@
  tbd         Cluster File Systems, Inc. <info@clusterfs.com>
         * version 1.3.4
         * bug fixes
+        - changes from 1.2.9
         - flock/lockf fixes (but it's still disabled, pending 5135)
         - don't use EXT3 constants in llite code (5094)
-       - return async write errors to application if possible (2248)
-       - return last_committed value from OST to avoid OOM (4966)
         - memory shortage at startup could cause assertion (5176)
-       - the watchdog thread now runs as interruptible (5246)
         * miscellania
         - reorganization of lov code
         - single portals codebase
         - Infiniband NAL
-       - add pid to ldlm debugging output (4922)
-       - return last_committed value from OST to avoid OOM (4966)
         - add extents/mballoc support (5025)
         - direct I/O reads in the obdfilter (4048)
  
  tbd         Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.2.9
+       - send OST transaction number in read/write reply to free req (4966)
+       - don't ASSERT in ptl_send_rpc() if we run out of memory (5119)
+       - lock /proc/sys/portals/routes internal state, avoiding oops (4827)
+       - the watchdog thread now runs as interruptible (5246)
+       * miscellania
+       - add pid to ldlm debugging output (4922)
+
+2004-11-17  Cluster File Systems, Inc. <info@clusterfs.com>
         * version 1.2.8
         * bug fixes
         - allocate qswnal tx descriptors singly to avoid fragmentation (4504)
@@ -39,6 +44,7 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
         - add software watchdogs to catch hung threads quickly (4941)
         - make lustrefs init script start after nfs is mounted
         - fix CWARN/ERROR duplication (4930)
+       - return async write errors to application if possible (2248)
         - add /proc/sys/portal/memused (bytes allocated by PORTALS_ALLOC)
         - print NAL number in %x format (4645)
  
diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am

index 7830e54..81bde4b 100644 (file)
--- a/lustre/autoMakefile.am
+++ b/lustre/autoMakefile.am
@@ -56,7 +56,7 @@ lvfs-sources:
         $(MAKE) sources -C lvfs
  
  modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources
-       $(MAKE) $(ARCH_UM) CC=$(CC) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
+       $(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
  
  endif # MODULES
  
diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c

index 1dacf83..ac41bfe 100644 (file)
--- a/lustre/ldlm/ldlm_flock.c
+++ b/lustre/ldlm/ldlm_flock.c
@@ -469,7 +469,7 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
                     "sleeping");
  
-        ldlm_lock_dump(D_DLMTRACE, lock, 0);
+        ldlm_lock_dump(D_OTHER, lock, 0);
  
          fwd.fwd_lock = lock;
          obd = class_exp2obd(lock->l_conn_export);
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 4074048..39d24ac 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -170,7 +170,7 @@ static int expired_lock_main(void *arg)
  
  static void waiting_locks_callback(unsigned long unused)
  {
-        struct ldlm_lock *lock;
+        struct ldlm_lock *lock, *last = NULL;
          char str[PTL_NALFMT_SIZE];
  
          spin_lock_bh(&waiting_locks_spinlock);
@@ -187,6 +187,17 @@ static void waiting_locks_callback(unsigned long unused)
                             lock->l_export->exp_connection->c_remote_uuid.uuid,
                             ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str));
  
+                if (lock == last) {
+                        LDLM_ERROR(lock, "waiting on lock multiple times");
+                        CERROR("wll %p .prev %p, l_pending.next %p .prev %p\n",
+                               waiting_locks_list.next, waiting_locks_list.prev,
+                               lock->l_pending_chain.next,
+                               lock->l_pending_chain.prev);
+                        spin_unlock(&waiting_locks_spinlock);
+                        LBUG();
+                }
+                last = lock;
+
                  spin_lock_bh(&expired_lock_thread.elt_lock);
                  list_del(&lock->l_pending_chain);
                  list_add(&lock->l_pending_chain,
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index 33d93c2..2d09beb 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -742,7 +742,8 @@ static int null_if_equal(struct ldlm_lock *lock, void *data)
                  lock->l_ast_data = NULL;
  
                  if (lock->l_req_mode != lock->l_granted_mode)
-                        LDLM_ERROR(lock,"clearing inode with ungranted lock\n");        }
+                        LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
+        }
  
          return LDLM_ITER_CONTINUE;
  }
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index 3d3c7ec..9798c3e 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -434,12 +434,12 @@ static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
                  GOTO(out, rc);
  
          /* make full-page requests if we are not at EOF (bug 4410) */
-        if (llap->llap_page->index < size_index) {
+        if (to != PAGE_SIZE && llap->llap_page->index < size_index) {
                  LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
                                 "sync write before EOF: size_index %lu, to %d\n",
                                 size_index, to);
                  to = PAGE_SIZE;
-        } else if (llap->llap_page->index == size_index) {
+        } else if (to != PAGE_SIZE && llap->llap_page->index == size_index) {
                  int size_to = inode->i_size & ~PAGE_MASK;
                  LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
                                 "sync write at EOF: size_index %lu, to %d/%d\n",
diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h

index e6ecc5e..752d074 100644 (file)
--- a/lustre/lov/lov_internal.h
+++ b/lustre/lov/lov_internal.h
@@ -56,10 +56,10 @@ struct lov_async_page {
          int                             lap_magic;
          int                             lap_stripe;
          obd_off                         lap_sub_offset;
+        obd_id                          lap_loi_id;
          void                            *lap_sub_cookie;
          struct obd_async_page_ops       *lap_caller_ops;
          void                            *lap_caller_data;
-        obd_id                          lap_loi_id;
  };
  
  #define LAP_FROM_COOKIE(c)                                                      \
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index 67df476..ddeba4d 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -258,7 +258,12 @@ static int filter_client_free(struct obd_export *exp, int flags)
          memset(&zero_fcd, 0, sizeof zero_fcd);
          push_ctxt(&saved, &obd->obd_ctxt, NULL);
          rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd,
-                                 sizeof(zero_fcd), &off, 1);
+                                 sizeof(zero_fcd), &off, 0);
+
+        if (rc == 0)
+                /* update server's transno */
+                filter_update_server_data(obd, filter->fo_rcvd_filp,
+                                          filter->fo_fsd, 1);
          pop_ctxt(&saved, &obd->obd_ctxt, NULL);
  
          CDEBUG(rc == 0 ? D_INFO : D_ERROR,
@@ -1033,8 +1038,8 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
          ENTRY;
  
          if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
-                CERROR("destroying objid %.*s nlink = %lu, count = %d\n",
-                       dchild->d_name.len, dchild->d_name.name,
+                CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n",
+                       dchild->d_name.len, dchild->d_name.name, inode->i_ino,
                         (unsigned long)inode->i_nlink,
                         atomic_read(&inode->i_count));
          }
@@ -1420,7 +1425,6 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
          obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize;
          obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
          obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
-        int level = D_CACHE;
  
          if (list_empty(&obd->obd_exports))
                  return;
@@ -1428,10 +1432,11 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
          spin_lock(&obd->obd_osfs_lock);
          spin_lock(&obd->obd_dev_lock);
          list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+                int error = 0;
                  fed = &exp->exp_filter_data;
                  if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
                      fed->fed_dirty < 0)
-                        level = D_ERROR;
+                        error = 1;
                  if (maxsize > 0) { /* we may not have done a statfs yet */
                          LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
                                   "%s: cli %s/%p %ld+%ld > "LPU64"\n", func,
@@ -1442,9 +1447,14 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
                                   exp->exp_client_uuid.uuid, exp,
                                   fed->fed_dirty, maxsize);
                  }
-                CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
-                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
-                       fed->fed_dirty, fed->fed_pending, fed->fed_grant);
+                if (error)
+                        CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
+                               obd->obd_name, exp->exp_client_uuid.uuid, exp,
+                               fed->fed_dirty, fed->fed_pending,fed->fed_grant);
+                else
+                        CDEBUG(D_CACHE, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
+                               obd->obd_name, exp->exp_client_uuid.uuid, exp,
+                               fed->fed_dirty, fed->fed_pending,fed->fed_grant);
                  tot_granted += fed->fed_grant + fed->fed_pending;
                  tot_pending += fed->fed_pending;
                  tot_dirty += fed->fed_dirty;
diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c

index 9764996..599e9dc 100644 (file)
--- a/lustre/obdfilter/filter_io.c
+++ b/lustre/obdfilter/filter_io.c
@@ -187,18 +187,11 @@ restat:
          if (left >= tot_granted) {
                  left -= tot_granted;
          } else {
-                static unsigned long next;
-                if (left < tot_granted - obd->u.filter.fo_tot_pending &&
-                    time_after(jiffies, next)) {
-                        spin_unlock(&obd->obd_osfs_lock);
+                if (left < tot_granted - obd->u.filter.fo_tot_pending + 65536) {
                          CERROR("%s: cli %s/%p grant "LPU64" > available "
                                 LPU64" and pending "LPU64"\n", obd->obd_name,
                                 exp->exp_client_uuid.uuid, exp, tot_granted,
                                 left, obd->u.filter.fo_tot_pending);
-                        if (next == 0)
-                                portals_debug_dumplog();
-                        next = jiffies + 20 * HZ;
-                        spin_lock(&obd->obd_osfs_lock);
                  }
                  left = 0;
          }
diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h

index f7ac79a..f36d6ce 100644 (file)
--- a/lustre/osc/osc_internal.h
+++ b/lustre/osc/osc_internal.h
@@ -14,11 +14,10 @@
  
  struct osc_async_page {
          int                     oap_magic;
+        int                     oap_cmd;
          struct list_head        oap_pending_item;
          struct list_head        oap_urgent_item;
          struct list_head        oap_rpc_item;
-        struct page             *oap_page;
-        int                     oap_cmd;
  
          obd_off                 oap_obj_off;
          obd_off                 oap_page_off;
@@ -27,8 +26,9 @@ struct osc_async_page {
          enum async_flags        oap_async_flags;
  
          unsigned long           oap_interrupted:1;
-        struct obd_io_group     *oap_oig;
          struct oig_callback_context oap_occ;
+        struct page             *oap_page;
+        struct obd_io_group     *oap_oig;
          struct ptlrpc_request   *oap_request;
          struct client_obd       *oap_cli;
          struct lov_oinfo        *oap_loi;
diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c

index a1397d2..9a3d4f2 100644 (file)
--- a/lustre/portals/router/proc.c
+++ b/lustre/portals/router/proc.c
@@ -31,6 +31,7 @@ struct proc_route_data {
          struct list_head *curr;
          unsigned int generation;
          off_t skip;
+        rwlock_t proc_route_rwlock;
  } kpr_read_routes_data;
  
  /* nal2name support re-used from utils/portals.c */
@@ -96,19 +97,22 @@ static int kpr_proc_router_write(struct file *file, const char *ubuffer,
  static int kpr_proc_routes_read(char *page, char **start, off_t off,
                                  int count, int *eof, void *data)
  {
-        struct proc_route_data *prd = data;
-        kpr_route_entry_t     *re;
-        kpr_gateway_entry_t *ge;
-        int                 chunk_len = 0;
-        int                 line_len = 0;
-        int                 user_len = 0;
+        struct proc_route_data  *prd = data;
+        kpr_route_entry_t       *re;
+        kpr_gateway_entry_t     *ge;
+        int                     chunk_len = 0;
+        int                     line_len = 0;
+        int                     user_len = 0;
+        int                     rc = 0;
  
          *eof = 1;
          *start = page;
  
+        write_lock(&(prd->proc_route_rwlock));
+
          if (prd->curr == NULL) {
                  if (off != 0)
-                        return 0;
+                        goto routes_read_exit;
  
                  /* First pass, initialize our private data */
                  prd->curr = kpr_routes.next;
@@ -118,13 +122,14 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off,
                  /* Abort route list generation change */
                  if (prd->generation != kpr_routes_generation) {
                          prd->curr = NULL;
-                        return sprintf(page, "\nError: Routes Changed\n");
+                        rc = sprintf(page, "\nError: Routes Changed\n");
+                        goto routes_read_exit;
                  }
  
                  /* All the routes have been walked */
                  if (prd->curr == &kpr_routes) {
                          prd->curr = NULL;
-                        return 0;
+                        goto routes_read_exit;
                  }
          }
  
@@ -148,7 +153,8 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off,
                  if (prd->curr->next == NULL) {
                          prd->curr = NULL;
                          read_unlock(&kpr_rwlock);
-                        return sprintf(page, "\nError: Routes Changed\n");
+                        rc = sprintf(page, "\nError: Routes Changed\n");
+                        goto routes_read_exit;
                  }
  
                  prd->curr = prd->curr->next;
@@ -169,13 +175,18 @@ static int kpr_proc_routes_read(char *page, char **start, off_t off,
                  prd->curr = prd->curr->prev;
                  prd->skip = line_len - (user_len - count);
                  read_unlock(&kpr_rwlock);
-                return count;
+                rc = count;
+                goto routes_read_exit;
          }
  
          /* Not enough data to entirely satify callers request */
          prd->skip = 0;
          read_unlock(&kpr_rwlock);
-        return user_len;
+        rc = user_len;
+
+routes_read_exit:
+        write_unlock(&(prd->proc_route_rwlock));
+        return rc;
  }
  
  static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
@@ -215,6 +226,7 @@ void kpr_proc_init(void)
          kpr_read_routes_data.curr = NULL;
          kpr_read_routes_data.generation = 0;
          kpr_read_routes_data.skip = 0;
+        kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED;
  
          routes_entry->data = &kpr_read_routes_data;
          routes_entry->read_proc = kpr_proc_routes_read;
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 1171fb5..df2110d 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -447,6 +447,10 @@ int ptl_send_rpc(struct ptlrpc_request *request)
          if (rc != PTL_OK) {
                  CERROR("PtlMDAttach failed: %d\n", rc);
                  LASSERT (rc == PTL_NO_SPACE);
+                spin_lock_irqsave (&request->rq_lock, flags);
+                /* ...but the MD attach didn't succeed... */
+                request->rq_receiving_reply = 0;
+                spin_unlock_irqrestore (&request->rq_lock, flags);
                  GOTO(cleanup_me, rc -ENOMEM);
          }
  
@@ -456,7 +460,7 @@ int ptl_send_rpc(struct ptlrpc_request *request)
                 request->rq_reply_portal,
                 connection->c_peer.peer_ni->pni_name);
  
-        ptlrpc_request_addref(request);        /* +1 ref for the SENT callback */
+        ptlrpc_request_addref(request);       /* +1 ref for the SENT callback */
  
          request->rq_sent = CURRENT_SECONDS;
          ptlrpc_pinger_sending_on_import(request->rq_import);
diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c

index 72924fc..b6bae88 100644 (file)
--- a/lustre/ptlrpc/recover.c
+++ b/lustre/ptlrpc/recover.c
@@ -224,8 +224,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
  
  int ptlrpc_resend(struct obd_import *imp)
  {
-        struct list_head *tmp, *pos;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req, *next;
          unsigned long flags;
  
          ENTRY;
@@ -243,8 +242,10 @@ int ptlrpc_resend(struct obd_import *imp)
          }
          spin_unlock_irqrestore(&imp->imp_lock, flags);
  
-        list_for_each_safe(tmp, pos, &imp->imp_sending_list) {
-                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+        list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) {
+                LASSERTF((long)req > PAGE_SIZE && req != LP_POISON,
+                         "req %p bad\n", req);
+                LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req);
                  ptlrpc_resend_req(req);
          }
  
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index 6865e6c..157bca1 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -148,7 +148,7 @@ test_10() {
  run_test 10 "finish request on server after client eviction (bug 1521)"
  
  #bug 2460
-# wake up a thead waiting for completion after eviction
+# wake up a thread waiting for completion after eviction
  test_11(){
      do_facet client multiop $MOUNT/$tfile Ow  || return 1
      do_facet client multiop $MOUNT/$tfile or  || return 2
@@ -161,7 +161,7 @@ test_11(){
  
      do_facet client munlink $MOUNT/$tfile  || return 4
  }
-run_test 11 "wake up a thead waiting for completion after eviction (b=2460)"
+run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"
  
  #b=2494
  test_12(){
diff --git a/lustre/utils/lconf b/lustre/utils/lconf

index 96cf611..9fe97fa 100755 (executable)
--- a/lustre/utils/lconf
+++ b/lustre/utils/lconf
@@ -969,7 +969,7 @@ def sys_get_local_address(net_type, wildcard, cluster_id):
              iface, star = string.split(wildcard, ':')
              local = if2addr(iface)
              if not local:
-                panic ("unable to determine ip for:", wildcard)
+                panic("unable to determine ip for:", wildcard)
          else:
              host = socket.gethostname()
              local = socket.gethostbyname(host)
author	adilger <adilger>
	Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)
committer	adilger <adilger>
	Thu, 18 Nov 2004 20:57:47 +0000 (20:57 +0000)
lustre/ChangeLog		patch \| blob \| history
lustre/autoMakefile.am		patch \| blob \| history
lustre/ldlm/ldlm_flock.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/lov/lov_internal.h		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/obdfilter/filter_io.c		patch \| blob \| history
lustre/osc/osc_internal.h		patch \| blob \| history
lustre/portals/router/proc.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/recover.c		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/utils/lconf		patch \| blob \| history