Whamcloud - gitweb
Branch HEAD
authoranserper <anserper>
Thu, 3 Jul 2008 01:09:21 +0000 (01:09 +0000)
committeranserper <anserper>
Thu, 3 Jul 2008 01:09:21 +0000 (01:09 +0000)
b=14010
i=shadow
i=zam

Short read patch.

lustre/include/lustre_dlm.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_mmap.c
lustre/lov/lov_obd.c
lustre/obdclass/lprocfs_status.c
lustre/osc/osc_request.c

index ef02088..e6cac14 100644 (file)
@@ -781,6 +781,8 @@ void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
 void ldlm_lock_allow_match(struct ldlm_lock *lock);
+int ldlm_lock_fast_match(struct ldlm_lock *, int, obd_off, obd_off, void **);
+void ldlm_lock_fast_release(void *, int);
 ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                             const struct ldlm_res_id *, ldlm_type_t type,
                             ldlm_policy_data_t *, ldlm_mode_t mode,
index 0db6e51..7cc09ec 100644 (file)
@@ -1197,6 +1197,14 @@ struct obd_ops {
                                  struct obd_async_page_ops *ops, void *data,
                                  void **res, int nocache,
                                  struct lustre_handle *lockh);
+        int (*o_reget_short_lock)(struct obd_export *exp,
+                                  struct lov_stripe_md *lsm,
+                                  void **res, int rw,
+                                  obd_off start, obd_off end,
+                                  void **cookie);
+        int (*o_release_short_lock)(struct obd_export *exp,
+                                    struct lov_stripe_md *lsm, obd_off end,
+                                    void *cookie, int rw);
         int (*o_queue_async_io)(struct obd_export *exp,
                                 struct lov_stripe_md *lsm,
                                 struct lov_oinfo *loi, void *cookie,
index 6fcbd52..d734870 100644 (file)
@@ -1212,6 +1212,34 @@ static inline  int obd_prep_async_page(struct obd_export *exp,
         RETURN(ret);
 }
 
+static inline int obd_reget_short_lock(struct obd_export *exp,
+                                       struct lov_stripe_md *lsm,
+                                       void **res, int rw,
+                                       obd_off start, obd_off end,
+                                       void **cookie)
+{
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, reget_short_lock);
+        EXP_COUNTER_INCREMENT(exp, reget_short_lock);
+
+        RETURN(OBP(exp->exp_obd, reget_short_lock)(exp, lsm, res, rw,
+                                                   start, end, cookie));
+}
+
+static inline int obd_release_short_lock(struct obd_export *exp,
+                                         struct lov_stripe_md *lsm, obd_off end,
+                                         void *cookie, int rw)
+{
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, release_short_lock);
+        EXP_COUNTER_INCREMENT(exp, release_short_lock);
+
+        RETURN(OBP(exp->exp_obd, release_short_lock)(exp, lsm, end,
+                                                     cookie, rw));
+}
+
 static inline int obd_queue_async_io(struct obd_export *exp,
                                      struct lov_stripe_md *lsm,
                                      struct lov_oinfo *loi, void *cookie,
index 11ac6ca..4b690a9 100644 (file)
@@ -919,6 +919,34 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock)
         unlock_res_and_lock(lock);
 }
 
+int ldlm_lock_fast_match(struct ldlm_lock *lock, int rw,
+                         obd_off start, obd_off end,
+                         void **cookie)
+{
+        LASSERT(rw == OBD_BRW_READ || rw == OBD_BRW_WRITE);
+        /* should LCK_GROUP be handled in a special way? */
+        if (lock && (rw == OBD_BRW_READ ||
+                     (lock->l_granted_mode & (LCK_PW | LCK_GROUP))) &&
+            (lock->l_policy_data.l_extent.start <= start) &&
+            (lock->l_policy_data.l_extent.end >= end)) {
+                ldlm_lock_addref_internal(lock, rw == OBD_BRW_WRITE ? LCK_PW : LCK_PR);
+                *cookie = (void *)lock;
+                return 1; /* avoid using rc for stack relief */
+        }
+        return 0;
+}
+
+void ldlm_lock_fast_release(void *cookie, int rw)
+{
+        struct ldlm_lock *lock = (struct ldlm_lock *)cookie;
+
+        LASSERT(lock != NULL);
+        LASSERT(rw == OBD_BRW_READ || rw == OBD_BRW_WRITE);
+        LASSERT(rw == OBD_BRW_READ ||
+                (lock->l_granted_mode & (LCK_PW | LCK_GROUP)));
+        ldlm_lock_decref_internal(lock, rw == OBD_BRW_WRITE ? LCK_PW : LCK_PR);
+}
+
 /* Can be called in two ways:
  *
  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
index e24c831..7b3b159 100644 (file)
@@ -2252,6 +2252,8 @@ EXPORT_SYMBOL(ldlm_lock2handle);
 EXPORT_SYMBOL(__ldlm_handle2lock);
 EXPORT_SYMBOL(ldlm_lock_get);
 EXPORT_SYMBOL(ldlm_lock_put);
+EXPORT_SYMBOL(ldlm_lock_fast_match);
+EXPORT_SYMBOL(ldlm_lock_fast_release);
 EXPORT_SYMBOL(ldlm_lock_match);
 EXPORT_SYMBOL(ldlm_lock_cancel);
 EXPORT_SYMBOL(ldlm_lock_addref);
index e51a9b1..82f78c7 100644 (file)
@@ -1318,7 +1318,7 @@ static int ll_file_get_tree_lock(struct ll_lock_tree *tree, struct file *file,
         struct inode * inode = file->f_dentry->d_inode;
         ENTRY;
 
-        append = (rw == WRITE) && (file->f_flags & O_APPEND);
+        append = (rw == OBD_BRW_WRITE) && (file->f_flags & O_APPEND);
 
         if (append || !ll_is_file_contended(file)) {
                 struct ll_lock_tree_node *node;
@@ -1328,7 +1328,7 @@ static int ll_file_get_tree_lock(struct ll_lock_tree *tree, struct file *file,
                 if (file->f_flags & O_NONBLOCK)
                         ast_flags |= LDLM_FL_BLOCK_NOWAIT;
                 node = ll_node_from_inode(inode, start, end,
-                                          (rw == WRITE) ? LCK_PW : LCK_PR);
+                                          (rw == OBD_BRW_WRITE) ? LCK_PW : LCK_PR);
                 if (IS_ERR(node)) {
                         rc = PTR_ERR(node);
                         GOTO(out, rc);
@@ -1347,6 +1347,123 @@ out:
         return rc;
 }
 
+static int ll_reget_short_lock(struct page *page, int rw,
+                               obd_off start, obd_off end,
+                               void **cookie)
+{
+        struct ll_async_page *llap;
+        struct obd_export *exp;
+        struct inode *inode = page->mapping->host;
+
+        ENTRY;
+
+        exp = ll_i2dtexp(inode);
+        if (exp == NULL)
+                RETURN(0);
+
+        llap = llap_cast_private(page);
+        if (llap == NULL)
+                RETURN(0);
+
+        RETURN(obd_reget_short_lock(exp, ll_i2info(inode)->lli_smd,
+                                    &llap->llap_cookie, rw, start, end,
+                                    cookie));
+}
+
+static void ll_release_short_lock(struct inode *inode, obd_off end,
+                                  void *cookie, int rw)
+{
+        struct obd_export *exp;
+        int rc;
+
+        exp = ll_i2dtexp(inode);
+        if (exp == NULL)
+                return;
+
+        rc = obd_release_short_lock(exp, ll_i2info(inode)->lli_smd, end,
+                                    cookie, rw);
+        if (rc < 0)
+                CERROR("unlock failed (%d)\n", rc);
+}
+
+static inline int ll_file_get_fast_lock(struct file *file,
+                                        obd_off ppos, obd_off end,
+                                        char *buf, void **cookie, int rw)
+{
+        int rc = 0;
+        struct page *page;
+
+        ENTRY;
+
+        if (!ll_region_mapped((unsigned long)buf, end - ppos)) {
+                page = find_lock_page(file->f_dentry->d_inode->i_mapping,
+                                      ppos >> CFS_PAGE_SHIFT);
+                if (page) {
+                        if (ll_reget_short_lock(page, rw, ppos, end, cookie))
+                                rc = 1;
+
+                        unlock_page(page);
+                        page_cache_release(page);
+                }
+        }
+
+        RETURN(rc);
+}
+
+static inline void ll_file_put_fast_lock(struct inode *inode, obd_off end,
+                                         void *cookie, int rw)
+{
+        ll_release_short_lock(inode, end, cookie, rw);
+}
+
+enum ll_lock_style {
+        LL_LOCK_STYLE_NOLOCK   = 0,
+        LL_LOCK_STYLE_FASTLOCK = 1,
+        LL_LOCK_STYLE_TREELOCK = 2
+};
+
+static inline int ll_file_get_lock(struct file *file, obd_off ppos,
+                                   obd_off end, char *buf, void **cookie,
+                                   struct ll_lock_tree *tree, int rw)
+{
+        int rc;
+
+        ENTRY;
+
+        if (ll_file_get_fast_lock(file, ppos, end, buf, cookie, rw))
+                RETURN(LL_LOCK_STYLE_FASTLOCK);
+
+        rc = ll_file_get_tree_lock(tree, file, buf, ppos - end, ppos, end, rw);
+        /* rc: 1 for tree lock, 0 for no lock, <0 for error */
+        switch (rc) {
+        case 1:
+                RETURN(LL_LOCK_STYLE_TREELOCK);
+        case 0:
+                RETURN(LL_LOCK_STYLE_NOLOCK);
+        }
+
+        /* an error happened if we reached this point, rc = -errno here */
+        RETURN(rc);
+}
+
+static inline void ll_file_put_lock(struct inode *inode, obd_off end,
+                                    enum ll_lock_style lock_style,
+                                    void *cookie, struct ll_lock_tree *tree,
+                                    int rw)
+
+{
+        switch (lock_style) {
+        case LL_LOCK_STYLE_TREELOCK:
+                ll_tree_unlock(tree);
+                break;
+        case LL_LOCK_STYLE_FASTLOCK:
+                ll_file_put_fast_lock(inode, end, cookie, rw);
+                break;
+        default:
+                CERROR("invalid locking style (%d)\n", lock_style);
+        }
+}
+
 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
                             loff_t *ppos)
 {
@@ -1358,9 +1475,10 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
         struct ost_lvb lvb;
         struct ll_ra_read bead;
         int ra = 0;
-        loff_t end;
+        obd_off end;
         ssize_t retval, chunk, sum = 0;
-        int tree_locked;
+        int lock_style;
+        void *cookie;
 
         __u64 kms;
         ENTRY;
@@ -1402,8 +1520,7 @@ repeat:
         if (sbi->ll_max_rw_chunk != 0) {
                 /* first, let's know the end of the current stripe */
                 end = *ppos;
-                obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END, 
-                                (obd_off *)&end);
+                obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END, &end);
 
                 /* correct, the end is beyond the request */
                 if (end > *ppos + count - 1)
@@ -1416,10 +1533,10 @@ repeat:
                 end = *ppos + count - 1;
         }
 
-        tree_locked = ll_file_get_tree_lock(&tree, file, buf,
-                                            count, *ppos, end, READ);
-        if (tree_locked < 0)
-                GOTO(out, retval = tree_locked);
+        lock_style = ll_file_get_lock(file, (obd_off)(*ppos), end,
+                                      buf, &cookie, &tree, OBD_BRW_READ);
+        if (lock_style < 0)
+                GOTO(out, retval = lock_style);
 
         ll_inode_size_lock(inode, 1);
         /*
@@ -1450,8 +1567,9 @@ repeat:
                 ll_inode_size_unlock(inode, 1);
                 retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
                 if (retval) {
-                        if (tree_locked)
-                                ll_tree_unlock(&tree);
+                        if (lock_style != LL_LOCK_STYLE_NOLOCK)
+                                ll_file_put_lock(inode, end, lock_style,
+                                                 cookie, &tree, OBD_BRW_READ);
                         goto out;
                 }
         } else {
@@ -1470,7 +1588,7 @@ repeat:
         CDEBUG(D_INODE, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
                inode->i_ino, chunk, *ppos, i_size_read(inode));
 
-        if (tree_locked) {
+        if (lock_style != LL_LOCK_STYLE_NOLOCK) {
                 /* turn off the kernel's read-ahead */
                 file->f_ra.ra_pages = 0;
 
@@ -1485,7 +1603,8 @@ repeat:
                 /* BUG: 5972 */
                 file_accessed(file);
                 retval = generic_file_read(file, buf, chunk, ppos);
-                ll_tree_unlock(&tree);
+                ll_file_put_lock(inode, end, lock_style, cookie, &tree, 
+                                 OBD_BRW_READ);
         } else {
                 retval = ll_file_lockless_io(file, buf, chunk, ppos, READ);
         }
@@ -1570,7 +1689,7 @@ repeat:
         }
 
         tree_locked = ll_file_get_tree_lock(&tree, file, buf, count,
-                                            lock_start, lock_end, WRITE);
+                                            lock_start, lock_end, OBD_BRW_WRITE);
         if (tree_locked < 0)
                 GOTO(out, retval = tree_locked);
 
index e4871e8..c37af6b 100644 (file)
@@ -611,6 +611,7 @@ extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
 extern int ll_have_md_lock(struct inode *inode, __u64 bits);
 extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
                                    struct lustre_handle *lockh);
+int ll_region_mapped(unsigned long addr, size_t count);
 int ll_extent_lock(struct ll_file_data *, struct inode *,
                    struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
                    struct lustre_handle *, int ast_flags);
index d6c2230..64ef728 100644 (file)
@@ -290,6 +290,11 @@ static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
         RETURN(ret);
 }
 
+int ll_region_mapped(unsigned long addr, size_t count)
+{
+        return !!our_vma(addr, count);
+}
+
 int lt_get_mmap_locks(struct ll_lock_tree *tree,
                       unsigned long addr, size_t count)
 {
index 4466bd1..43b18e7 100644 (file)
@@ -2836,6 +2836,47 @@ void lov_stripe_unlock(struct lov_stripe_md *md)
 }
 EXPORT_SYMBOL(lov_stripe_unlock);
 
+static int lov_reget_short_lock(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                void **res, int rw,
+                                obd_off start, obd_off end,
+                                void **cookie)
+{
+        struct lov_async_page *l = *res;
+        obd_off stripe_start, stripe_end = start;
+
+        ENTRY;
+
+        /* ensure we don't cross stripe boundaries */
+        lov_extent_calc(exp, lsm, OBD_CALC_STRIPE_END, &stripe_end);
+        if (stripe_end <= end)
+                RETURN(0);
+
+        /* map the region limits to the object limits */
+        lov_stripe_offset(lsm, start, l->lap_stripe, &stripe_start);
+        lov_stripe_offset(lsm, end, l->lap_stripe, &stripe_end);
+
+        RETURN(obd_reget_short_lock(exp->exp_obd->u.lov.lov_tgts[lsm->
+                                    lsm_oinfo[l->lap_stripe]->loi_ost_idx]->
+                                    ltd_exp, NULL, &l->lap_sub_cookie,
+                                    rw, stripe_start, stripe_end, cookie));
+}
+
+static int lov_release_short_lock(struct obd_export *exp,
+                                  struct lov_stripe_md *lsm, obd_off end,
+                                  void *cookie, int rw)
+{
+        int stripe;
+
+        ENTRY;
+
+        stripe = lov_stripe_number(lsm, end);
+
+        RETURN(obd_release_short_lock(exp->exp_obd->u.lov.lov_tgts[lsm->
+                                      lsm_oinfo[stripe]->loi_ost_idx]->
+                                      ltd_exp, NULL, end, cookie, rw));
+}
+
 struct obd_ops lov_obd_ops = {
         .o_owner               = THIS_MODULE,
         .o_setup               = lov_setup,
@@ -2858,6 +2899,8 @@ struct obd_ops lov_obd_ops = {
         .o_brw                 = lov_brw,
         .o_brw_async           = lov_brw_async,
         .o_prep_async_page     = lov_prep_async_page,
+        .o_reget_short_lock    = lov_reget_short_lock,
+        .o_release_short_lock  = lov_release_short_lock,
         .o_queue_async_io      = lov_queue_async_io,
         .o_set_async_flags     = lov_set_async_flags,
         .o_queue_group_io      = lov_queue_group_io,
index 01e0654..c9fa3db 100644 (file)
@@ -1068,6 +1068,8 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, reget_short_lock);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, release_short_lock);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
index 0899811..06fabd3 100644 (file)
@@ -2572,6 +2572,35 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi,
         RETURN(-EDQUOT);
 }
 
+static int osc_reget_short_lock(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                void **res, int rw,
+                                obd_off start, obd_off end,
+                                void **cookie)
+{
+        struct osc_async_page *oap = *res;
+        int rc;
+
+        ENTRY;
+
+        spin_lock(&oap->oap_lock);
+        rc = ldlm_lock_fast_match(oap->oap_ldlm_lock, rw,
+                                  start, end, cookie);
+        spin_unlock(&oap->oap_lock);
+
+        RETURN(rc);
+}
+
+static int osc_release_short_lock(struct obd_export *exp,
+                                  struct lov_stripe_md *lsm, obd_off end,
+                                  void *cookie, int rw)
+{
+        ENTRY;
+        ldlm_lock_fast_release(cookie, rw);
+        /* no error could have happened at this layer */
+        RETURN(0);
+}
+
 int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
                         struct lov_oinfo *loi, cfs_page_t *page,
                         obd_off offset, struct obd_async_page_ops *ops,
@@ -4076,6 +4105,8 @@ struct obd_ops osc_obd_ops = {
         .o_brw                  = osc_brw,
         .o_brw_async            = osc_brw_async,
         .o_prep_async_page      = osc_prep_async_page,
+        .o_reget_short_lock     = osc_reget_short_lock,
+        .o_release_short_lock   = osc_release_short_lock,
         .o_queue_async_io       = osc_queue_async_io,
         .o_set_async_flags      = osc_set_async_flags,
         .o_queue_group_io       = osc_queue_group_io,