Whamcloud - gitweb
Handle SF IOR better by working harder to avoid granting conflicting locks.
authoradilger <adilger>
Wed, 24 Mar 2004 00:36:19 +0000 (00:36 +0000)
committeradilger <adilger>
Wed, 24 Mar 2004 00:36:19 +0000 (00:36 +0000)
From b1_2.
b=2919

lustre/ChangeLog
lustre/include/linux/obd.h
lustre/ldlm/ldlm_extent.c
lustre/ldlm/ldlm_lock.c
lustre/llite/file.c
lustre/llite/namei.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io.c

index f70d5d1..753aa40 100644 (file)
@@ -26,6 +26,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - don't oops on a deleted current working directory (2399)
        - handle hard links to targets without a parent properly (2517)
        - don't dereference NULL lock when racing during eviction (2867)
        - don't oops on a deleted current working directory (2399)
        - handle hard links to targets without a parent properly (2517)
        - don't dereference NULL lock when racing during eviction (2867)
+       - don't grow lock extents when lots of conflicting locks (2919)
 
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
 
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
index 2f23fb8..c5de023 100644 (file)
@@ -58,9 +58,9 @@ struct lov_oinfo {                 /* per-stripe data structure */
         struct list_head loi_read_item;
 
         int loi_kms_valid:1;
         struct list_head loi_read_item;
 
         int loi_kms_valid:1;
-        __u64 loi_kms; /* known minimum size */
-        __u64 loi_rss; /* recently seen size */
-        __u64 loi_mtime; /* recently seen mtime */
+        __u64 loi_kms;             /* known minimum size */
+        __u64 loi_rss;             /* recently seen size */
+        __u64 loi_mtime;           /* recently seen mtime */
 };
 
 static inline void loi_init(struct lov_oinfo *loi)
 };
 
 static inline void loi_init(struct lov_oinfo *loi)
index 16a27f9..312f5de 100644 (file)
@@ -45,6 +45,7 @@ ldlm_extent_internal_policy(struct list_head *queue, struct ldlm_lock *req,
         ldlm_mode_t req_mode = req->l_req_mode;
         __u64 req_start = req->l_req_extent.start;
         __u64 req_end = req->l_req_extent.end;
         ldlm_mode_t req_mode = req->l_req_mode;
         __u64 req_start = req->l_req_extent.start;
         __u64 req_end = req->l_req_extent.end;
+        int conflicting = 0;
         ENTRY;
 
         lockmode_verify(req_mode);
         ENTRY;
 
         lockmode_verify(req_mode);
@@ -65,15 +66,21 @@ ldlm_extent_internal_policy(struct list_head *queue, struct ldlm_lock *req,
                 if (req == lock)
                         continue;
 
                 if (req == lock)
                         continue;
 
+                /* Locks are compatible, overlap doesn't matter */
+                if (lockmode_compat(lock->l_req_mode, req_mode))
+                        continue;
+
+                /* If this is a high-traffic lock, don't grow downwards at all
+                 * or grow upwards too much */
+                ++conflicting;
+                if (conflicting > 4)
+                        new_ex->start = req_start;
+
                 /* If lock doesn't overlap new_ex, skip it. */
                 if (l_extent->end < new_ex->start ||
                     l_extent->start > new_ex->end)
                         continue;
 
                 /* If lock doesn't overlap new_ex, skip it. */
                 if (l_extent->end < new_ex->start ||
                     l_extent->start > new_ex->end)
                         continue;
 
-                /* Locks are compatible, overlap doesn't matter */
-                if (lockmode_compat(lock->l_req_mode, req_mode))
-                        continue;
-
                 /* Locks conflicting in requested extents and we can't satisfy
                  * both locks, so ignore it.  Either we will ping-pong this
                  * extent (we would regardless of what extent we granted) or
                 /* Locks conflicting in requested extents and we can't satisfy
                  * both locks, so ignore it.  Either we will ping-pong this
                  * extent (we would regardless of what extent we granted) or
@@ -85,10 +92,10 @@ ldlm_extent_internal_policy(struct list_head *queue, struct ldlm_lock *req,
                 /* We grow extents downwards only as far as they don't overlap
                  * with already-granted locks, on the assumtion that clients
                  * will be writing beyond the initial requested end and would
                 /* We grow extents downwards only as far as they don't overlap
                  * with already-granted locks, on the assumtion that clients
                  * will be writing beyond the initial requested end and would
-                 * then need to enqueue a new lock beyond the previous request.
-                 * We don't grow downwards if there are lots of lockers. */
-                if (l_extent->start < req_start) {
-                        if (atomic_read(&req->l_resource->lr_refcount) > 20)
+                 * then need to enqueue a new lock beyond previous request.
+                 * l_req_extent->end strictly < req_start, checked above. */
+                if (l_extent->start < req_start && new_ex->start != req_start) {
+                        if (l_extent->end >= req_start)
                                 new_ex->start = req_start;
                         else
                                 new_ex->start = min(l_extent->end+1, req_start);
                                 new_ex->start = req_start;
                         else
                                 new_ex->start = min(l_extent->end+1, req_start);
@@ -107,6 +114,13 @@ ldlm_extent_internal_policy(struct list_head *queue, struct ldlm_lock *req,
                                 new_ex->end = max(l_extent->start - 1, req_end);
                 }
         }
                                 new_ex->end = max(l_extent->start - 1, req_end);
                 }
         }
+
+#define LDLM_MAX_GROWN_EXTENT (32 * 1024 * 1024 - 1)
+        if (conflicting > 32 && (req_mode == LCK_PW || req_mode == LCK_CW)) {
+                if (req_end < req_start + LDLM_MAX_GROWN_EXTENT)
+                        new_ex->end = min(req_start + LDLM_MAX_GROWN_EXTENT,
+                                          new_ex->end);
+        }
         EXIT;
 }
 
         EXIT;
 }
 
index 9ffc201..1b5b3fb 100644 (file)
@@ -1040,8 +1040,7 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
         /* Please do not, no matter how tempting, remove this LBUG without
          * talking to me first. -phik */
         if (lock->l_readers || lock->l_writers) {
         /* Please do not, no matter how tempting, remove this LBUG without
          * talking to me first. -phik */
         if (lock->l_readers || lock->l_writers) {
-                LDLM_DEBUG(lock, "lock still has references");
-                ldlm_lock_dump(D_OTHER, lock, 0);
+                LDLM_ERROR(lock, "lock still has references");
                 LBUG();
         }
 
                 LBUG();
         }
 
@@ -1176,8 +1175,9 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
                lock->l_resource->lr_name.name[0],
                lock->l_resource->lr_name.name[1]);
         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
                lock->l_resource->lr_name.name[0],
                lock->l_resource->lr_name.name[1]);
-        CDEBUG(level, "  Req mode: %d, grant mode: %d, rc: %u, read: %d, "
-               "write: %d\n", (int)lock->l_req_mode, (int)lock->l_granted_mode,
+        CDEBUG(level, "  Req mode: %s, grant mode: %s, rc: %u, read: %d, "
+               "write: %d\n", ldlm_lockname[lock->l_req_mode],
+               ldlm_lockname[lock->l_granted_mode],
                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
         if (lock->l_resource->lr_type == LDLM_EXTENT)
                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
         if (lock->l_resource->lr_type == LDLM_EXTENT)
                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
index 574f958..21fcfce 100644 (file)
@@ -964,7 +964,7 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
 {
         int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
         struct lov_user_md  *lump;
 {
         int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
         struct lov_user_md  *lump;
-        int lum_size = sizeof(struct lov_user_md) + 
+        int lum_size = sizeof(struct lov_user_md) +
                        sizeof(struct lov_user_ost_data);
         int rc;
         ENTRY;
                        sizeof(struct lov_user_ost_data);
         int rc;
         ENTRY;
@@ -976,8 +976,7 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
         if (lump == NULL) {
                 RETURN(-ENOMEM);
         }
         if (lump == NULL) {
                 RETURN(-ENOMEM);
         }
-        rc = copy_from_user(lump, (struct lov_user_md  *)arg, 
-                            lum_size);
+        rc = copy_from_user(lump, (struct lov_user_md  *)arg, lum_size);
         if (rc) {
                 OBD_FREE(lump, lum_size);
                 RETURN(-EFAULT);
         if (rc) {
                 OBD_FREE(lump, lum_size);
                 RETURN(-EFAULT);
@@ -1115,7 +1114,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         case LL_IOC_LOV_SETSTRIPE:
                 RETURN(ll_lov_setstripe(inode, file, arg));
         case LL_IOC_LOV_SETEA:
         case LL_IOC_LOV_SETSTRIPE:
                 RETURN(ll_lov_setstripe(inode, file, arg));
         case LL_IOC_LOV_SETEA:
-                RETURN( ll_lov_setea(inode, file, arg) ); 
+                RETURN(ll_lov_setea(inode, file, arg));
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
index 258c3b8..23b193c 100644 (file)
@@ -300,7 +300,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                         rc = ll_glimpse_size(inode, &lvb);
                         if (rc) {
                                 iput(inode);
                         rc = ll_glimpse_size(inode, &lvb);
                         if (rc) {
                                 iput(inode);
-                                RETURN(-EIO);
+                                RETURN(rc);
                         }
                         inode->i_size = lvb.lvb_size;
                 }
                         }
                         inode->i_size = lvb.lvb_size;
                 }
index e36921a..f1f5f1c 100644 (file)
@@ -1113,8 +1113,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
                 if (tmplock->l_granted_mode == LCK_PR)
                         continue;
 
                 if (tmplock->l_granted_mode == LCK_PR)
                         continue;
 
-                if (tmplock->l_policy_data.l_extent.end <=
-                    reply_lvb->lvb_size)
+                if (tmplock->l_policy_data.l_extent.end <= reply_lvb->lvb_size)
                         continue;
 
                 if (l == NULL) {
                         continue;
 
                 if (l == NULL) {
index 773baf4..14eaf42 100644 (file)
@@ -123,7 +123,7 @@ static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa)
          * out-or-order and have already consumed some grant.  We want to
          * leave this here in case there is a large error in accounting. */
         CDEBUG(oa->o_grant > fed->fed_grant + FILTER_GRANT_CHUNK ?
          * out-or-order and have already consumed some grant.  We want to
          * leave this here in case there is a large error in accounting. */
         CDEBUG(oa->o_grant > fed->fed_grant + FILTER_GRANT_CHUNK ?
-               D_ERROR : D_CACHE,
+               D_WARNING : D_CACHE,
                "%s: cli %s/%p reports grant: "LPU64" dropped: %u, local: %lu\n",
                obd->obd_name, exp->exp_client_uuid.uuid, exp, oa->o_grant,
                oa->o_dropped, fed->fed_grant);
                "%s: cli %s/%p reports grant: "LPU64" dropped: %u, local: %lu\n",
                obd->obd_name, exp->exp_client_uuid.uuid, exp, oa->o_grant,
                oa->o_dropped, fed->fed_grant);
@@ -625,7 +625,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
 
                 rc = filter_start_page_write(dentry->d_inode, lnb);
                 if (rc) {
 
                 rc = filter_start_page_write(dentry->d_inode, lnb);
                 if (rc) {
-                        CDEBUG(D_ERROR, "page err %u@"LPU64" %u/%u %p: rc %d\n",
+                        CERROR("page err %u@"LPU64" %u/%u %p: rc %d\n",
                                lnb->len, lnb->offset,
                                i, obj->ioo_bufcnt, dentry, rc);
                         while (lnb-- > res)
                                lnb->len, lnb->offset,
                                i, obj->ioo_bufcnt, dentry, rc);
                         while (lnb-- > res)