Whamcloud - gitweb
1)add .snap namespace to smfs
[fs/lustre-release.git] / lustre / mdc / mdc_locks.c
index f102439..09a709c 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/obd_class.h>
 #include <linux/lustre_mds.h>
 #include <linux/lustre_dlm.h>
+//#include <linux/lustre_smfs.h>
+//#include <linux/lustre_snap.h>
 #include <linux/lprocfs_status.h>
 #include "mdc_internal.h"
 
@@ -76,7 +78,8 @@ static int it_to_lock_mode(struct lookup_intent *it)
         /* CREAT needs to be tested before open (both could be set) */
         if (it->it_op & IT_CREAT)
                 return LCK_PW;
-        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
+        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+                              IT_CHDIR))
                 return LCK_PR;
 
         LBUG();
@@ -120,7 +123,7 @@ int it_open_error(int phase, struct lookup_intent *it)
 EXPORT_SYMBOL(it_open_error);
 
 /* this must be called on a lockh that is known to have a referenced lock */
-void mdc_set_lock_data(__u64 *l, void *data)
+int mdc_set_lock_data(struct obd_export *exp, __u64 *l, void *data)
 {
         struct ldlm_lock *lock;
         struct lustre_handle *lockh = (struct lustre_handle *)l;
@@ -128,7 +131,7 @@ void mdc_set_lock_data(__u64 *l, void *data)
 
         if (!*l) {
                 EXIT;
-                return;
+                return 0;
         }
 
         lock = ldlm_handle2lock(lockh);
@@ -139,12 +142,12 @@ void mdc_set_lock_data(__u64 *l, void *data)
         if (lock->l_ast_data && lock->l_ast_data != data) {
                 struct inode *new_inode = data;
                 struct inode *old_inode = lock->l_ast_data;
-                unsigned long state = old_inode->i_state & I_FREEING;
-                CERROR("Found existing inode %p/%lu/%u state %lu in lock: "
-                       "setting data to %p/%lu/%u\n", old_inode,
-                       old_inode->i_ino, old_inode->i_generation, state,
-                       new_inode, new_inode->i_ino, new_inode->i_generation);
-                LASSERT(state);
+                LASSERTF(old_inode->i_state & I_FREEING,
+                         "Found existing inode %p/%lu/%u state %lu in lock: "
+                         "setting data to %p/%lu/%u\n", old_inode,
+                         old_inode->i_ino, old_inode->i_generation,
+                         old_inode->i_state,
+                         new_inode, new_inode->i_ino, new_inode->i_generation);
         }
 #endif
         lock->l_ast_data = data;
@@ -152,6 +155,7 @@ void mdc_set_lock_data(__u64 *l, void *data)
         LDLM_LOCK_PUT(lock);
 
         EXIT;
+        return 0;
 }
 EXPORT_SYMBOL(mdc_set_lock_data);
 
@@ -166,12 +170,11 @@ int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
 
         ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it, 
                            data);
+
         EXIT;
         return 0;
 }
 
-
-
 /* We always reserve enough space in the reply packet for a stripe MD, because
  * we don't know in advance the file type. */
 int mdc_enqueue(struct obd_export *exp,
@@ -190,6 +193,7 @@ int mdc_enqueue(struct obd_export *exp,
         struct obd_device *obddev = class_exp2obd(exp);
         struct ldlm_res_id res_id =
                 { .name = {data->fid1.id, data->fid1.generation} };
+        ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
         int rc, flags = LDLM_FL_HAS_INTENT;
         int repsize[4] = {sizeof(struct ldlm_reply),
@@ -228,35 +232,21 @@ int mdc_enqueue(struct obd_export *exp,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mdc_open_pack(req, 2, data, it->it_create_mode, 0,
+                mdc_open_pack(req->rq_reqmsg, 2, data, it->it_create_mode, 0,
                               it->it_flags, lmm, lmmsize);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
-        } else if (it->it_op & IT_UNLINK) {
-                size[2] = sizeof(struct mds_rec_unlink);
-                size[3] = data->namelen + 1;
-                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
-                                      size, NULL);
-                if (!req)
-                        RETURN(-ENOMEM);
-
-                /* pack the intent */
-                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
-                lit->opc = (__u64)it->it_op;
-
-                /* pack the intended request */
-                mdc_unlink_pack(req, 2, data);
-                /* get ready for the reply */
-                reply_buffers = 4;
-                req->rq_replen = lustre_msg_size(4, repsize);
-        } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
+        } else if (it->it_op & (IT_GETATTR | IT_LOOKUP | IT_CHDIR)) {
                 int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
                 size[2] = sizeof(struct mds_body);
                 size[3] = data->namelen + 1;
 
+                if (it->it_op & IT_GETATTR)
+                        policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
                                       size, NULL);
+
                 if (!req)
                         RETURN(-ENOMEM);
 
@@ -265,27 +255,45 @@ int mdc_enqueue(struct obd_export *exp,
                 lit->opc = (__u64)it->it_op;
 
                 /* pack the intended request */
-                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
+                mdc_getattr_pack(req->rq_reqmsg, valid, 2, it->it_flags, data);
                 /* get ready for the reply */
                 reply_buffers = 3;
                 req->rq_replen = lustre_msg_size(3, repsize);
         } else if (it->it_op == IT_READDIR) {
+                policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
                 req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1,
                                       size, NULL);
+                
                 if (!req)
                         RETURN(-ENOMEM);
-
                 /* get ready for the reply */
                 reply_buffers = 1;
                 req->rq_replen = lustre_msg_size(1, repsize);
-        }  else {
+        } else if (it->it_op == IT_UNLINK) {
+                size[2] = sizeof(struct mds_body);
+                policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 3,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                /* pack the intended request */
+                mdc_getattr_pack(req->rq_reqmsg, 0,  2, 0, data);
+
+                /* pack the intent */
+                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+                lit->opc = (__u64)it->it_op;
+
+                /* get ready for the reply */
+                reply_buffers = 3;
+                req->rq_replen = lustre_msg_size(3, repsize);
+        } else {
                 LBUG();
                 RETURN(-EINVAL);
         }
-
         mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
         rc = ldlm_cli_enqueue(exp, req, obddev->obd_namespace, res_id,
-                              lock_type, NULL, lock_mode, &flags, cb_blocking,
+                              lock_type, &policy, lock_mode, &flags,cb_blocking,
                               cb_completion, NULL, cb_data, NULL, 0, NULL,
                               lockh);
         mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
@@ -333,6 +341,17 @@ int mdc_enqueue(struct obd_export *exp,
         it->d.lustre.it_lock_mode = lock_mode;
         it->d.lustre.it_data = req;
 
+        if (it->d.lustre.it_status < 0 && req->rq_replay) {
+                LASSERT(req->rq_transno == 0);
+                /* Don't hold error requests for replay. */
+                spin_lock(&req->rq_lock);
+                req->rq_replay = 0;
+                spin_unlock(&req->rq_lock);
+        }
+
+        DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d",
+                  it->d.lustre.it_disposition, it->d.lustre.it_status);
+
         /* We know what to expect, so we do any byte flipping required here */
         LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
         if (reply_buffers >= 3) {
@@ -346,10 +365,8 @@ int mdc_enqueue(struct obd_export *exp,
                 }
 
                 if ((body->valid & OBD_MD_FLEASIZE) != 0) {
-                        void *replayea;
-                        /* The eadata is opaque; just check that it is
-                         * there.  Eventually, obd_unpackmd() will check
-                         * the contents */
+                        /* The eadata is opaque; just check that it is there.
+                         * Eventually, obd_unpackmd() will check the contents */
                         eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
                                                     NULL);
                         if (eadata == NULL) {
@@ -357,10 +374,17 @@ int mdc_enqueue(struct obd_export *exp,
                                 RETURN (-EPROTO);
                         }
                         if (it->it_op & IT_OPEN) {
-                                replayea = lustre_msg_buf(req->rq_reqmsg, 4, 
-                                                          obddev->u.cli.cl_max_mds_easize);
+                                void *replayea;
+
+                                replayea = lustre_msg_buf(req->rq_reqmsg, 4,
+                                                          body->eadatasize);
                                 LASSERT(replayea);
                                 memcpy(replayea, eadata, body->eadatasize);
+
+                                LASSERT(req->rq_reqmsg->bufcount == 5);
+                                req->rq_reqmsg->buflens[4] = body->eadatasize;
+                                /* If this isn't the last buffer, we might
+                                 * have to shift other data around. */
                         }
                 }
         }
@@ -398,8 +422,8 @@ EXPORT_SYMBOL(mdc_enqueue);
  */
 int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
                     struct ll_fid *pfid, const char *name, int len,
-                    void *lmm, int lmmsize,
-                    struct ll_fid *cfid, struct lookup_intent *it, int flags,
+                    void *lmm, int lmmsize, struct ll_fid *cfid,
+                    struct lookup_intent *it, int lookup_flags,
                     struct ptlrpc_request **reqp,
                     ldlm_blocking_callback cb_blocking)
 {
@@ -413,28 +437,39 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         LASSERT(it);
 
         CDEBUG(D_DLMTRACE, "name: %*s in %ld, intent: %s\n", len, name,
-               (unsigned long) pfid->id, ldlm_it2str(it->it_op));
+               pfid ? (unsigned long) pfid->id : 0 , ldlm_it2str(it->it_op));
 
-        if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) {
+        if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
+                     it->it_op == IT_CHDIR)) {
                 /* We could just return 1 immediately, but since we should only
                  * be called in revalidate_it if we already have a lock, let's
                  * verify that. */
-                struct ldlm_res_id res_id ={.name = {cfid->id, 
+                struct ldlm_res_id res_id ={.name = {cfid->id,
                                                      cfid->generation}};
                 struct lustre_handle lockh;
-                int mode, flags = LDLM_FL_BLOCK_GRANTED;
-
+                ldlm_policy_data_t policy;
+                int mode = LCK_PR;
+
+                /* For the GETATTR case, ll_revalidate_it issues two separate
+                   queries - for LOOKUP and for UPDATE lock because if cannot
+                   check them together - we might have those two bits to be
+                   present in two separate granted locks */
+                policy.l_inodebits.bits = 
+                                 (it->it_op == IT_GETATTR)?MDS_INODELOCK_UPDATE:
+                                                           MDS_INODELOCK_LOOKUP;
                 mode = LCK_PR;
-                rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
-                                     &res_id, LDLM_PLAIN, NULL, LCK_PR, &lockh);
+                rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+                                     LDLM_FL_BLOCK_GRANTED, &res_id,
+                                     LDLM_IBITS, &policy, LCK_PR, &lockh);
                 if (!rc) {
                         mode = LCK_PW;
-                        rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
-                                             &res_id, LDLM_PLAIN, NULL, LCK_PW,
+                        rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+                                             LDLM_FL_BLOCK_GRANTED, &res_id,
+                                             LDLM_IBITS, &policy, LCK_PW,
                                              &lockh);
                 }
                 if (rc) {
-                        memcpy(&it->d.lustre.it_lock_handle, &lockh, 
+                        memcpy(&it->d.lustre.it_lock_handle, &lockh,
                                sizeof(lockh));
                         it->d.lustre.it_lock_mode = mode;
                 }
@@ -453,7 +488,7 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
                 struct mdc_op_data op_data;
                 mdc_fid2mdc_op_data(&op_data, uctxt, pfid, cfid, name, len, 0);
 
-                rc = mdc_enqueue(exp, LDLM_PLAIN, it, it_to_lock_mode(it),
+                rc = mdc_enqueue(exp, LDLM_IBITS, it, it_to_lock_mode(it),
                                  &op_data, &lockh, lmm, lmmsize,
                                  ldlm_completion_ast, cb_blocking, NULL);
                 if (rc < 0)
@@ -482,7 +517,12 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         if (cfid != NULL) {
                 it_set_disposition(it, DISP_ENQ_COMPLETE);
                 /* Also: did we find the same inode? */
-                if (memcmp(cfid, &mds_body->fid1, sizeof(*cfid)))
+                /* we have to compare all the fields but type, because
+                 * MDS can return mds/ino/generation triple if inode
+                 * lives on another MDS -bzzz */
+                if (cfid->generation != mds_body->fid1.generation ||
+                                cfid->id != mds_body->fid1.id ||
+                                cfid->mds != mds_body->fid1.mds)
                         RETURN(-ESTALE);
         }
 
@@ -492,11 +532,11 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         if (it->it_op & IT_OPEN) {
                 if (!it_disposition(it, DISP_OPEN_OPEN) ||
                     it->d.lustre.it_status != 0) {
-                        unsigned long flags;
+                        unsigned long irqflags;
 
-                        spin_lock_irqsave(&request->rq_lock, flags);
+                        spin_lock_irqsave(&request->rq_lock, irqflags);
                         request->rq_replay = 0;
-                        spin_unlock_irqrestore(&request->rq_lock, flags);
+                        spin_unlock_irqrestore(&request->rq_lock, irqflags);
                 }
         }
 
@@ -519,7 +559,7 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
         } else if (it->it_op == IT_OPEN) {
                 LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
         } else {
-                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP | IT_CHDIR));
         }
 
         /* If we already have a matching lock, then cancel the new
@@ -529,11 +569,12 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
          * intent_finish has performed the iget().) */
         lock = ldlm_handle2lock(&lockh);
         if (lock) {
+                ldlm_policy_data_t policy = lock->l_policy_data;
                 LDLM_DEBUG(lock, "matching against this");
                 LDLM_LOCK_PUT(lock);
                 memcpy(&old_lock, &lockh, sizeof(lockh));
                 if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
-                                    LDLM_PLAIN, NULL, LCK_NL, &old_lock)) {
+                                    LDLM_IBITS, &policy, LCK_NL, &old_lock)) {
                         ldlm_lock_decref_and_cancel(&lockh,
                                                     it->d.lustre.it_lock_mode);
                         memcpy(&lockh, &old_lock, sizeof(old_lock));