Whamcloud - gitweb
Update snapfs: 1) some fix on clonefs read in lustre
[fs/lustre-release.git] / lustre / mdc / mdc_locks.c
index 7b1aa8b..8c04ca2 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/obd_class.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
+#include <linux/lustre_snap.h>
 #include <linux/lprocfs_status.h>
 #include "mdc_internal.h"
 
@@ -76,7 +77,8 @@ static int it_to_lock_mode(struct lookup_intent *it)
         /* CREAT needs to be tested before open (both could be set) */
         if (it->it_op & IT_CREAT)
                 return LCK_PW;
-        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
+        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+                              IT_CHDIR))
                 return LCK_PR;
 
         LBUG();
@@ -120,7 +122,7 @@ int it_open_error(int phase, struct lookup_intent *it)
 EXPORT_SYMBOL(it_open_error);
 
 /* this must be called on a lockh that is known to have a referenced lock */
-void mdc_set_lock_data(__u64 *l, void *data)
+int mdc_set_lock_data(struct obd_export *exp, __u64 *l, void *data)
 {
         struct ldlm_lock *lock;
         struct lustre_handle *lockh = (struct lustre_handle *)l;
@@ -128,7 +130,7 @@ void mdc_set_lock_data(__u64 *l, void *data)
 
         if (!*l) {
                 EXIT;
-                return;
+                return 0;
         }
 
         lock = ldlm_handle2lock(lockh);
@@ -152,6 +154,7 @@ void mdc_set_lock_data(__u64 *l, void *data)
         LDLM_LOCK_PUT(lock);
 
         EXIT;
+        return 0;
 }
 EXPORT_SYMBOL(mdc_set_lock_data);
 
@@ -166,11 +169,27 @@ int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
 
         ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it, 
                            data);
+
         EXIT;
         return 0;
 }
 
-
+#if CONFIG_SNAPFS
+int mdc_set_clone_info(struct obd_export *exp, struct lustre_msg *msg, 
+                       int offset)
+{
+        struct client_obd *cli_obd = &exp->exp_obd->u.cli;
+        struct clonefs_info *cl_info;
+        ENTRY;
+        
+        if (cli_obd->cl_clone_info) { 
+                cl_info = (struct clonefs_info *)lustre_msg_buf(msg, offset, 
+                                                                 sizeof (*cl_info));
+                memcpy(cl_info, cli_obd->cl_clone_info, sizeof(*cl_info));
+        }
+        RETURN(0);        
+} 
+#endif
 
 /* We always reserve enough space in the reply packet for a stripe MD, because
  * we don't know in advance the file type. */
@@ -190,6 +209,7 @@ int mdc_enqueue(struct obd_export *exp,
         struct obd_device *obddev = class_exp2obd(exp);
         struct ldlm_res_id res_id =
                 { .name = {data->fid1.id, data->fid1.generation} };
+        ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
         int repsize[4] = {sizeof(struct ldlm_reply),
@@ -228,35 +248,27 @@ int mdc_enqueue(struct obd_export *exp,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mdc_open_pack(req, 2, data, it->it_create_mode, 0,
+                mdc_open_pack(req->rq_reqmsg, 2, data, it->it_create_mode, 0,
                               it->it_flags, lmm, lmmsize);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op & IT_UNLINK) {
-                size[2] = sizeof(struct mds_rec_unlink);
-                size[3] = data->namelen + 1;
-                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
-                                      size, NULL);
-                if (!req)
-                        RETURN(-ENOMEM);
-
-                /* pack the intent */
-                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
-                lit->opc = (__u64)it->it_op;
-
-                /* pack the intended request */
-                mdc_unlink_pack(req, 2, data);
-                /* get ready for the reply */
-                reply_buffers = 4;
-                req->rq_replen = lustre_msg_size(4, repsize);
-        } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
+        } else if (it->it_op & (IT_GETATTR | IT_LOOKUP | IT_CHDIR)) {
                 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
                 size[3] = data->namelen + 1;
 
+                if (it->it_op & IT_GETATTR)
+                        policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+#if CONFIG_SNAPFS                
+                size[4] = sizeof(struct clonefs_info); 
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 5,
+                                      size, NULL);
+#else
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
                                       size, NULL);
+
+#endif
                 if (!req)
                         RETURN(-ENOMEM);
 
@@ -265,27 +277,48 @@ int mdc_enqueue(struct obd_export *exp,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
+                mdc_getattr_pack(req->rq_reqmsg, valid, 2, it->it_flags, data);
+#if CONFIG_SNAPFS               
+                mdc_set_clone_info(exp, req->rq_reqmsg, 4); 
+#endif                 
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
         } else if (it->it_op == IT_READDIR) {
+                policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1,
                                       size, NULL);
+                
                 if (!req)
                         RETURN(-ENOMEM);
-
                 /* get ready for the reply */
                 reply_buffers = 1;
                 req->rq_replen = lustre_msg_size(1, repsize);
-        }  else {
+        } else if (it->it_op == IT_UNLINK) {
+                size[2] = sizeof(struct mds_body);
+                policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 3,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                /* pack the intended request */
+                mdc_getattr_pack(req->rq_reqmsg, 0,  2, 0, data);
+
+                /* pack the intent */
+                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+                lit->opc = (__u64)it->it_op;
+
+                /* get ready for the reply */
+                reply_buffers = 3;
+                req->rq_replen = lustre_msg_size(3, repsize);
+        } else {
                 LBUG();
                 RETURN(-EINVAL);
         }
-
         mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
         rc = ldlm_cli_enqueue(exp, req, obddev->obd_namespace, res_id,
-                              lock_type, NULL, lock_mode, &flags, cb_blocking,
+                              lock_type, &policy, lock_mode, &flags,cb_blocking,
                               cb_completion, NULL, cb_data, NULL, 0, NULL,
                               lockh);
         mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
@@ -341,6 +374,9 @@ int mdc_enqueue(struct obd_export *exp,
                 spin_unlock(&req->rq_lock);
         }
 
+        DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d",
+                  it->d.lustre.it_disposition, it->d.lustre.it_status);
+
         /* We know what to expect, so we do any byte flipping required here */
         LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
         if (reply_buffers >= 3) {
@@ -406,9 +442,9 @@ EXPORT_SYMBOL(mdc_enqueue);
  */
 int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
                     struct ll_fid *pfid, const char *name, int len,
-                    void *lmm, int lmmsize,
-                    struct ll_fid *cfid, struct lookup_intent *it,
-                    int lookup_flags, struct ptlrpc_request **reqp,
+                    void *lmm, int lmmsize, struct ll_fid *cfid,
+                    struct lookup_intent *it, int lookup_flags,
+                    struct ptlrpc_request **reqp,
                     ldlm_blocking_callback cb_blocking)
 {
         struct lustre_handle lockh;
@@ -421,25 +457,36 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         LASSERT(it);
 
         CDEBUG(D_DLMTRACE, "name: %*s in %ld, intent: %s\n", len, name,
-               (unsigned long)pfid->id, ldlm_it2str(it->it_op));
+               pfid ? (unsigned long) pfid->id : 0 , ldlm_it2str(it->it_op));
 
-        if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) {
+        if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
+                     it->it_op == IT_CHDIR)) {
                 /* We could just return 1 immediately, but since we should only
                  * be called in revalidate_it if we already have a lock, let's
                  * verify that. */
                 struct ldlm_res_id res_id ={.name = {cfid->id,
                                                      cfid->generation}};
                 struct lustre_handle lockh;
+                ldlm_policy_data_t policy;
                 int mode = LCK_PR;
 
+                /* For the GETATTR case, ll_revalidate_it issues two separate
+                   queries - for LOOKUP and for UPDATE lock because if cannot
+                   check them together - we might have those two bits to be
+                   present in two separate granted locks */
+                policy.l_inodebits.bits = 
+                                 (it->it_op == IT_GETATTR)?MDS_INODELOCK_UPDATE:
+                                                           MDS_INODELOCK_LOOKUP;
+                mode = LCK_PR;
                 rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                      LDLM_FL_BLOCK_GRANTED, &res_id,
-                                     LDLM_PLAIN, NULL, LCK_PR, &lockh);
+                                     LDLM_IBITS, &policy, LCK_PR, &lockh);
                 if (!rc) {
                         mode = LCK_PW;
                         rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                              LDLM_FL_BLOCK_GRANTED, &res_id,
-                                             LDLM_PLAIN, NULL, LCK_PW, &lockh);
+                                             LDLM_IBITS, &policy, LCK_PW,
+                                             &lockh);
                 }
                 if (rc) {
                         memcpy(&it->d.lustre.it_lock_handle, &lockh,
@@ -461,7 +508,7 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
                 struct mdc_op_data op_data;
                 mdc_fid2mdc_op_data(&op_data, uctxt, pfid, cfid, name, len, 0);
 
-                rc = mdc_enqueue(exp, LDLM_PLAIN, it, it_to_lock_mode(it),
+                rc = mdc_enqueue(exp, LDLM_IBITS, it, it_to_lock_mode(it),
                                  &op_data, &lockh, lmm, lmmsize,
                                  ldlm_completion_ast, cb_blocking, NULL);
                 if (rc < 0)
@@ -490,7 +537,12 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         if (cfid != NULL) {
                 it_set_disposition(it, DISP_ENQ_COMPLETE);
                 /* Also: did we find the same inode? */
-                if (memcmp(cfid, &mds_body->fid1, sizeof(*cfid)))
+                /* we have to compare all the fields but type, because
+                 * MDS can return mds/ino/generation triple if inode
+                 * lives on another MDS -bzzz */
+                if (cfid->generation != mds_body->fid1.generation ||
+                                cfid->id != mds_body->fid1.id ||
+                                cfid->mds != mds_body->fid1.mds)
                         RETURN(-ESTALE);
         }
 
@@ -527,7 +579,7 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         } else if (it->it_op == IT_OPEN) {
                 LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
         } else {
-                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP | IT_CHDIR));
         }
 
         /* If we already have a matching lock, then cancel the new
@@ -537,11 +589,12 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
          * intent_finish has performed the iget().) */
         lock = ldlm_handle2lock(&lockh);
         if (lock) {
+                ldlm_policy_data_t policy = lock->l_policy_data;
                 LDLM_DEBUG(lock, "matching against this");
                 LDLM_LOCK_PUT(lock);
                 memcpy(&old_lock, &lockh, sizeof(lockh));
                 if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
-                                    LDLM_PLAIN, NULL, LCK_NL, &old_lock)) {
+                                    LDLM_IBITS, &policy, LCK_NL, &old_lock)) {
                         ldlm_lock_decref_and_cancel(&lockh,
                                                     it->d.lustre.it_lock_mode);
                         memcpy(&lockh, &old_lock, sizeof(old_lock));