Whamcloud - gitweb
b=3063
[fs/lustre-release.git] / lustre / liblustre / super.c
index 2bd8248..25ffc0f 100644 (file)
@@ -43,6 +43,8 @@
 #include <inode.h>
 #include <file.h>
 
+#undef LIST_HEAD
+
 #include "llite_lib.h"
 
 static void llu_fsop_gone(struct filesys *fs)
@@ -293,14 +295,6 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
 
         obdo_refresh_inode(inode, &oa, refresh_valid);
 
-/*
-        if (inode->i_blksize < PAGE_CACHE_SIZE)
-                inode->i_blksize = PAGE_CACHE_SIZE;
-
-        CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n",
-               lsm->lsm_object_id, inode->i_size, inode->i_blocks,
-               inode->i_blksize);
-*/
         RETURN(0);
 }
 
@@ -343,49 +337,6 @@ static struct inode* llu_new_inode(struct filesys *fs,
         return inode;
 }
 
-#if 0
-static int ll_intent_to_lock_mode(struct lookup_intent *it)
-{
-        /* CREAT needs to be tested before open (both could be set) */
-        if (it->it_op & IT_CREAT)
-                return LCK_PW;
-        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
-                return LCK_PR;
-
-        LBUG();
-        RETURN(-EINVAL);
-}
-#endif
-
-#if 0
-int ll_it_open_error(int phase, struct lookup_intent *it)
-{
-        if (it_disposition(it, DISP_OPEN_OPEN)) {
-                if (phase == DISP_OPEN_OPEN)
-                        return it->d.lustre.it_status;
-                else
-                        return 0;
-        }
-
-        if (it_disposition(it, DISP_OPEN_CREATE)) {
-                if (phase == DISP_OPEN_CREATE)
-                        return it->d.lustre.it_status;
-                else
-                        return 0;
-        }
-
-        if (it_disposition(it, DISP_LOOKUP_EXECD)) {
-                if (phase == DISP_LOOKUP_EXECD)
-                        return it->d.lustre.it_status;
-                else
-                        return 0;
-        }
-        CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition, it->d.lustre.it_status);
-        LBUG();
-        return 0;
-}
-#endif
-
 static int llu_have_md_lock(struct inode *inode)
 {
         struct llu_sb_info *sbi = llu_i2sbi(inode);
@@ -404,15 +355,16 @@ static int llu_have_md_lock(struct inode *inode)
 
         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
 
+        /* FIXME use LDLM_FL_TEST_LOCK instead */
         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
-                            NULL, 0, LCK_PR, &lockh)) {
+                            NULL, LCK_PR, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PR);
                 RETURN(1);
         }
 
         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
-                            NULL, 0, LCK_PW, &lockh)) {
+                            NULL, LCK_PW, &lockh)) {
                 ldlm_lock_decref(&lockh, LCK_PW);
                 RETURN(1);
         }
@@ -481,22 +433,14 @@ static int llu_inode_revalidate(struct inode *inode)
         if (!lsm)       /* object not yet allocated, don't validate size */
                 RETURN(0);
 
-        /*
-         * unfortunately stat comes in through revalidate and we don't
-         * differentiate this use from initial instantiation.  we're
-         * also being wildly conservative and flushing write caches
-         * so that stat really returns the proper size.
-         */
+        /* ll_glimpse_size will prefer locally cached writes if they extend
+         * the file */
         {
-                struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
-                struct lustre_handle lockh = {0};
+                struct ost_lvb lvb;
                 ldlm_error_t err;
 
-                err = llu_extent_lock(NULL, inode, lsm, LCK_PR, &extent, &lockh);
-                if (err != ELDLM_OK)
-                        RETURN(err);
-
-                llu_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh);
+                err = llu_glimpse_size(inode, &lvb);
+                lli->lli_st_size = lvb.lvb_size;
         }
         RETURN(0);
 }
@@ -541,14 +485,7 @@ static int llu_iop_getattr(struct pnode *pno,
         rc = llu_inode_revalidate(ino);
         if (!rc) {
                 copy_stat_buf(ino, b);
-
-                if (llu_i2info(ino)->lli_it) {
-                        struct lookup_intent *it;
-
-                        LL_GET_INTENT(ino, it);
-                        it->it_op_release(it);
-                        OBD_FREE(it, sizeof(*it));
-                }
+                LASSERT(!llu_i2info(ino)->lli_it);
         }
 
         RETURN(rc);
@@ -653,8 +590,6 @@ out:
  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
  * at the same time.
  */
-#define OST_ATTR (ATTR_MTIME | ATTR_MTIME_SET | ATTR_CTIME | \
-                  ATTR_ATIME | ATTR_ATIME_SET | ATTR_SIZE)
 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
 {
         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
@@ -702,7 +637,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
         /* If only OST attributes being set on objects, don't do MDS RPC.
          * In that case, we need to check permissions and update the local
          * inode ourselves so we can call obdo_from_inode() always. */
-        if (ia_valid & (lsm ? ~(OST_ATTR | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
+        if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
                 struct lustre_md md;
                 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
@@ -752,29 +687,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
         }
 
         if (ia_valid & ATTR_SIZE) {
-                struct ldlm_extent extent = { .start = attr->ia_size,
-                                              .end = OBD_OBJECT_EOF };
+                ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
+                                                           OBD_OBJECT_EOF} };
                 struct lustre_handle lockh = { 0 };
                 int err, ast_flags = 0;
                 /* XXX when we fix the AST intents to pass the discard-range
                  * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
                  * XXX here. */
-
-                /* Writeback uses inode->i_size to determine how far out
-                 * its cached pages go.  ll_truncate gets a PW lock, canceling
-                 * our lock, _after_ it has updated i_size.  this can confuse
-                 *
-                 * We really need to get our PW lock before we change
-                 * inode->i_size.  If we don't we can race with other
-                 * i_size updaters on our node, like ll_file_read.  We
-                 * can also race with i_size propogation to other
-                 * nodes through dirtying and writeback of final cached
-                 * pages.  This last one is especially bad for racing
-                 * o_append users on other nodes. */
-                if (extent.start == 0)
+                if (attr->ia_size == 0)
                         ast_flags = LDLM_AST_DISCARD_DATA;
-                rc = llu_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                 &extent, &lockh, ast_flags);
+
+                rc = llu_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
+                                     &lockh, ast_flags);
                 if (rc != ELDLM_OK) {
                         if (rc > 0)
                                 RETURN(-ENOLCK);
@@ -810,7 +734,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
         RETURN(rc);
 }
 
-/* FIXME here we simply act as a thin layer to glue it with
+/* here we simply act as a thin layer to glue it with
  * llu_setattr_raw(), which is copy from kernel
  */
 static int llu_iop_setattr(struct pnode *pno,
@@ -844,7 +768,7 @@ static int llu_iop_setattr(struct pnode *pno,
                 iattr.ia_valid |= ATTR_GID;
         }
         if (mask & SETATTR_LEN) {
-                iattr.ia_size = stbuf->st_size; /* FIXME signed expansion problem */
+                iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
                 iattr.ia_valid |= ATTR_SIZE;
         }
 
@@ -950,10 +874,6 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
         int rc;
         ENTRY;
 
-        /* on symlinks lli_open_sem protects lli_symlink_name allocation/data */
-/*
-        down(&lli->lli_open_sem);
-*/
         rc = llu_readlink_internal(inode, &request, &symname);
         if (rc)
                 GOTO(out, rc);
@@ -963,9 +883,6 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
 
         ptlrpc_req_finished(request);
  out:
-/*
-        up(&lli->lli_open_sem);
-*/
         RETURN(rc);
 }
 
@@ -1013,80 +930,6 @@ static int llu_iop_mknod_raw(struct pnode *pno,
         RETURN(err);
 }
 
-#if 0
-static int llu_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
-                         const char *name, int len)
-{
-        struct ptlrpc_request *request = NULL;
-        struct mds_body *body;
-        struct lov_mds_md *eadata;
-        struct lov_stripe_md *lsm = NULL;
-        struct obd_trans_info oti = { 0 };
-        struct mdc_op_data op_data;
-        struct obdo *oa;
-        int rc;
-        ENTRY;
-
-        llu_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
-        rc = mdc_unlink(&llu_i2sbi(dir)->ll_mdc_conn, &op_data, &request);
-        if (rc)
-                GOTO(out, rc);
-        /* req is swabbed so this is safe */
-        body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
-
-        if (!(body->valid & OBD_MD_FLEASIZE))
-                GOTO(out, rc = 0);
-
-        if (body->eadatasize == 0) {
-                CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
-                GOTO(out, rc = -EPROTO);
-        }
-
-        /* The MDS sent back the EA because we unlinked the last reference
-         * to this file. Use this EA to unlink the objects on the OST.
-         * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
-         * check it is complete and sensible. */
-        eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
-        LASSERT(eadata != NULL);
-        if (eadata == NULL) {
-                CERROR("Can't unpack MDS EA data\n");
-                GOTO(out, rc = -EPROTO);
-        }
-
-        rc = obd_unpackmd(llu_i2obdconn(dir), &lsm, eadata, body->eadatasize);
-        if (rc < 0) {
-                CERROR("obd_unpackmd: %d\n", rc);
-                GOTO(out, rc);
-        }
-        LASSERT(rc >= sizeof(*lsm));
-
-        oa = obdo_alloc();
-        if (oa == NULL)
-                GOTO(out_free_memmd, rc = -ENOMEM);
-
-        oa->o_id = lsm->lsm_object_id;
-        oa->o_mode = body->mode & S_IFMT;
-        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
-
-        if (body->valid & OBD_MD_FLCOOKIE) {
-                oa->o_valid |= OBD_MD_FLCOOKIE;
-                oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3,
-                                                    body->eadatasize);
-        }
-
-        rc = obd_destroy(llu_i2obdconn(dir), oa, lsm, &oti);
-        obdo_free(oa);
-        if (rc)
-                CERROR("obd destroy objid 0x"LPX64" error %d\n",
-                       lsm->lsm_object_id, rc);
- out_free_memmd:
-        obd_free_memmd(llu_i2obdconn(dir), &lsm);
- out:
-        ptlrpc_req_finished(request);
-        return rc;
-}
-#endif
-
 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
 {
         struct inode *src = old->p_base->pb_ino;
@@ -1176,7 +1019,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
         RETURN(rc);
 }
 
-#if 0
+#ifdef _HAVE_STATVFS
 static int llu_statfs_internal(struct llu_sb_info *sbi,
                                struct obd_statfs *osfs,
                                unsigned long max_age)
@@ -1221,7 +1064,7 @@ static int llu_statfs_internal(struct llu_sb_info *sbi,
         RETURN(rc);
 }
 
-static int llu_statfs(struct llu_sb_info *sbi, struct kstatfs *sfs)
+static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
 {
         struct obd_statfs osfs;
         int rc;
@@ -1284,7 +1127,7 @@ static int llu_iop_statvfs(struct pnode *pno,
 
         RETURN(0);
 }
-#endif
+#endif /* _HAVE_STATVFS */
 
 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
 {
@@ -1344,10 +1187,78 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap)
         return -ENOSYS;
 }
 
+static int llu_get_cwlock(struct inode *inode, unsigned long arg)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        ldlm_policy_data_t policy = { .l_extent = { .start = 0,
+                                                    .end = OBD_OBJECT_EOF}};
+        struct lustre_handle lockh = { 0 };
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        ldlm_error_t err;
+        int flags = 0;
+        ENTRY;
+
+        if (fd->fd_flags & LL_FILE_CW_LOCKED) {
+                RETURN(-EINVAL);
+        }
+
+        policy.l_extent.gid = arg;
+        if (lli->lli_open_flags & O_NONBLOCK)
+                flags = LDLM_FL_BLOCK_NOWAIT;
+
+        err = llu_extent_lock(fd, inode, lsm, LCK_CW, &policy, &lockh, flags);
+        if (err)
+                RETURN(err);
+
+        fd->fd_flags |= LL_FILE_CW_LOCKED|LL_FILE_IGNORE_LOCK;
+        fd->fd_gid = arg;
+        memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
+
+        RETURN(0);
+}
+
+static int llu_put_cwlock(struct inode *inode, unsigned long arg)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        ldlm_error_t err;
+        ENTRY;
+
+        if (!(fd->fd_flags & LL_FILE_CW_LOCKED))
+                RETURN(-EINVAL);
+
+        if (fd->fd_gid != arg)
+                RETURN(-EINVAL);
+
+        fd->fd_flags &= ~(LL_FILE_CW_LOCKED|LL_FILE_IGNORE_LOCK);
+
+        err = llu_extent_unlock(fd, inode, lsm, LCK_CW, &fd->fd_cwlockh);
+        if (err)
+                RETURN(err);
+
+        fd->fd_gid = 0;
+        memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
+
+        RETURN(0);
+}       
+
 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
                          va_list ap)
 {
-        CERROR("liblustre did not support ioctl\n");
+        unsigned long arg;
+
+        switch (request) {
+        case LL_IOC_CW_LOCK:
+                arg = va_arg(ap, unsigned long);
+                return llu_get_cwlock(ino, arg);
+        case LL_IOC_CW_UNLOCK:
+                arg = va_arg(ap, unsigned long);
+                return llu_put_cwlock(ino, arg);
+        }
+
+        CERROR("did not support ioctl cmd %lx\n", request);
         return -ENOSYS;
 }
 
@@ -1387,12 +1298,15 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
 
         inode = _sysio_i_find(fs, &fileid);
         if (inode) {
-                if (llu_i2info(inode)->lli_st_generation ==
-                    md->body->generation) {
+                struct llu_inode_info *lli = llu_i2info(inode);
+
+                if (lli->lli_stale_flag ||
+                    lli->lli_st_generation != md->body->generation)
+                        I_RELE(inode);
+                else {
                         llu_update_inode(inode, md->body, md->lsm);
                         return inode;
-                } else
-                        I_RELE(inode);
+                }
         }
 
         inode = llu_new_inode(fs, &fid);
@@ -1449,7 +1363,6 @@ llu_fsswop_mount(const char *source,
                         GOTO(out_free, err = -EINVAL);
                 }
 
-                /* XXX */
                 /* generate a string unique to this super, let's try
                  the address of the super itself.*/
                 len = (sizeof(sbi) * 2) + 1; 
@@ -1460,7 +1373,7 @@ llu_fsswop_mount(const char *source,
 
                 cfg.cfg_instance = sbi->ll_instance;
                 cfg.cfg_uuid = sbi->ll_sb_uuid;
-                err = liblustre_process_log(&cfg);
+                err = liblustre_process_log(&cfg, 1);
                 if (err < 0) {
                         CERROR("Unable to process log: %s\n", g_zconf_profile);
 
@@ -1622,7 +1535,7 @@ static struct inode_ops llu_inode_ops = {
         inop_lookup:    llu_iop_lookup,
         inop_getattr:   llu_iop_getattr,
         inop_setattr:   llu_iop_setattr,
-        inop_getdirentries:     NULL,
+        inop_getdirentries:     llu_iop_getdirentries,
         inop_mkdir:     llu_iop_mkdir_raw,
         inop_rmdir:     llu_iop_rmdir_raw,
         inop_symlink:   llu_iop_symlink_raw,
@@ -1640,9 +1553,10 @@ static struct inode_ops llu_inode_ops = {
         inop_datasync:  llu_iop_datasync,
         inop_ioctl:     llu_iop_ioctl,
         inop_mknod:     llu_iop_mknod_raw,
-#if 0
+#ifdef _HAVE_STATVFS
         inop_statvfs:   llu_iop_statvfs,
 #endif
         inop_gone:      llu_iop_gone,
 };
 
+#warning "time_after() defined in liblustre.h need to be rewrite in userspace"