Whamcloud - gitweb
LU-10030 llite: extend inode flags into GET/SETXATTR ioctl
[fs/lustre-release.git] / lustre / llite / file.c
index 2607591..d640b0d 100644 (file)
@@ -45,9 +45,8 @@
 #ifdef HAVE_UIDGID_HEADER
 # include <linux/uidgid.h>
 #endif
-#include <lustre/ll_fiemap.h>
 
-#include <uapi/linux/lustre_ioctl.h>
+#include <uapi/linux/lustre/lustre_ioctl.h>
 #include <lustre_swab.h>
 
 #include "cl_object.h"
@@ -60,10 +59,6 @@ ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
                          bool *lease_broken);
 
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
-                 unsigned int cmd, unsigned long arg, int *rcp);
-
 static struct ll_file_data *ll_file_data_get(void)
 {
        struct ll_file_data *fd;
@@ -1029,7 +1024,7 @@ int ll_merge_attr(const struct lu_env *env, struct inode *inode)
        cl_object_attr_unlock(obj);
 
        if (rc != 0)
-               GOTO(out_size_unlock, rc);
+               GOTO(out_size_unlock, rc = (rc == -ENODATA ? 0 : rc));
 
        if (atime < attr->cat_atime)
                atime = attr->cat_atime;
@@ -1088,12 +1083,15 @@ static int ll_file_io_ptask(struct cfs_ptask *ptask);
 static void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot)
 {
        struct inode *inode = file_inode(file);
+       struct ll_file_data *fd  = LUSTRE_FPRIVATE(file);
 
        memset(&io->u.ci_rw.rw_iter, 0, sizeof(io->u.ci_rw.rw_iter));
        init_sync_kiocb(&io->u.ci_rw.rw_iocb, file);
        io->u.ci_rw.rw_file = file;
        io->u.ci_rw.rw_ptask = ll_file_io_ptask;
        io->u.ci_rw.rw_nonblock = !!(file->f_flags & O_NONBLOCK);
+       io->ci_lock_no_expand = fd->ll_lock_no_expand;
+
        if (iot == CIT_WRITE) {
                io->u.ci_rw.rw_append = !!(file->f_flags & O_APPEND);
                io->u.ci_rw.rw_sync   = !!(file->f_flags & O_SYNC ||
@@ -1735,20 +1733,20 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
                                        (struct lov_user_md_v3 *)lmm);
                        if (S_ISREG(body->mbo_mode))
                                lustre_swab_lov_user_md_objects(
-                                ((struct lov_user_md_v3 *)lmm)->lmm_objects,
-                                stripe_count);
+                                   ((struct lov_user_md_v3 *)lmm)->lmm_objects,
+                                   stripe_count);
                } else if (lmm->lmm_magic ==
                           cpu_to_le32(LOV_MAGIC_COMP_V1)) {
                        lustre_swab_lov_comp_md_v1(
                                        (struct lov_comp_md_v1 *)lmm);
                }
-        }
+       }
 
 out:
-        *lmmp = lmm;
-        *lmm_size = lmmsize;
-        *request = req;
-        return rc;
+       *lmmp = lmm;
+       *lmm_size = lmmsize;
+       *request = req;
+       return rc;
 }
 
 static int ll_lov_setea(struct inode *inode, struct file *file,
@@ -2440,6 +2438,189 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
        RETURN(rc);
 }
 
+static enum cl_lock_mode cl_mode_user_to_kernel(enum lock_mode_user mode)
+{
+       switch (mode) {
+       case MODE_READ_USER:
+               return CLM_READ;
+       case MODE_WRITE_USER:
+               return CLM_WRITE;
+       default:
+               return -EINVAL;
+       }
+}
+
+static const char *const user_lockname[] = LOCK_MODE_NAMES;
+
+/* Used to allow the upper layers of the client to request an LDLM lock
+ * without doing an actual read or write.
+ *
+ * Used for ladvise lockahead to manually request specific locks.
+ *
+ * \param[in] file     file this ladvise lock request is on
+ * \param[in] ladvise  ladvise struct describing this lock request
+ *
+ * \retval 0           success, no detailed result available (sync requests
+ *                     and requests sent to the server [not handled locally]
+ *                     cannot return detailed results)
+ * \retval LLA_RESULT_{SAME,DIFFERENT} - detailed result of the lock request,
+ *                                      see definitions for details.
+ * \retval negative    negative errno on error
+ */
+int ll_file_lock_ahead(struct file *file, struct llapi_lu_ladvise *ladvise)
+{
+       struct lu_env *env = NULL;
+       struct cl_io *io  = NULL;
+       struct cl_lock *lock = NULL;
+       struct cl_lock_descr *descr = NULL;
+       struct dentry *dentry = file->f_path.dentry;
+       struct inode *inode = dentry->d_inode;
+       enum cl_lock_mode cl_mode;
+       off_t start = ladvise->lla_start;
+       off_t end = ladvise->lla_end;
+       int result;
+       __u16 refcheck;
+
+       ENTRY;
+
+       CDEBUG(D_VFSTRACE, "Lock request: file=%.*s, inode=%p, mode=%s "
+              "start=%llu, end=%llu\n", dentry->d_name.len,
+              dentry->d_name.name, dentry->d_inode,
+              user_lockname[ladvise->lla_lockahead_mode], (__u64) start,
+              (__u64) end);
+
+       cl_mode = cl_mode_user_to_kernel(ladvise->lla_lockahead_mode);
+       if (cl_mode < 0)
+               GOTO(out, result = cl_mode);
+
+       /* Get IO environment */
+       result = cl_io_get(inode, &env, &io, &refcheck);
+       if (result <= 0)
+               GOTO(out, result);
+
+       result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
+       if (result > 0) {
+               /*
+                * nothing to do for this io. This currently happens when
+                * stripe sub-object's are not yet created.
+                */
+               result = io->ci_result;
+       } else if (result == 0) {
+               lock = vvp_env_lock(env);
+               descr = &lock->cll_descr;
+
+               descr->cld_obj   = io->ci_obj;
+               /* Convert byte offsets to pages */
+               descr->cld_start = cl_index(io->ci_obj, start);
+               descr->cld_end   = cl_index(io->ci_obj, end);
+               descr->cld_mode  = cl_mode;
+               /* CEF_MUST is used because we do not want to convert a
+                * lockahead request to a lockless lock */
+               descr->cld_enq_flags = CEF_MUST | CEF_LOCK_NO_EXPAND |
+                                      CEF_NONBLOCK;
+
+               if (ladvise->lla_peradvice_flags & LF_ASYNC)
+                       descr->cld_enq_flags |= CEF_SPECULATIVE;
+
+               result = cl_lock_request(env, io, lock);
+
+               /* On success, we need to release the lock */
+               if (result >= 0)
+                       cl_lock_release(env, lock);
+       }
+       cl_io_fini(env, io);
+       cl_env_put(env, &refcheck);
+
+       /* -ECANCELED indicates a matching lock with a different extent
+        * was already present, and -EEXIST indicates a matching lock
+        * on exactly the same extent was already present.
+        * We convert them to positive values for userspace to make
+        * recognizing true errors easier.
+        * Note we can only return these detailed results on async requests,
+        * as sync requests look the same as i/o requests for locking. */
+       if (result == -ECANCELED)
+               result = LLA_RESULT_DIFFERENT;
+       else if (result == -EEXIST)
+               result = LLA_RESULT_SAME;
+
+out:
+       RETURN(result);
+}
+static const char *const ladvise_names[] = LU_LADVISE_NAMES;
+
+static int ll_ladvise_sanity(struct inode *inode,
+                            struct llapi_lu_ladvise *ladvise)
+{
+       enum lu_ladvise_type advice = ladvise->lla_advice;
+       /* Note the peradvice flags is a 32 bit field, so per advice flags must
+        * be in the first 32 bits of enum ladvise_flags */
+       __u32 flags = ladvise->lla_peradvice_flags;
+       /* 3 lines at 80 characters per line, should be plenty */
+       int rc = 0;
+
+       if (advice > LU_LADVISE_MAX || advice == LU_LADVISE_INVALID) {
+               rc = -EINVAL;
+               CDEBUG(D_VFSTRACE, "%s: advice with value '%d' not recognized,"
+                      "last supported advice is %s (value '%d'): rc = %d\n",
+                      ll_get_fsname(inode->i_sb, NULL, 0), advice,
+                      ladvise_names[LU_LADVISE_MAX-1], LU_LADVISE_MAX-1, rc);
+               GOTO(out, rc);
+       }
+
+       /* Per-advice checks */
+       switch (advice) {
+       case LU_LADVISE_LOCKNOEXPAND:
+               if (flags & ~LF_LOCKNOEXPAND_MASK) {
+                       rc = -EINVAL;
+                       CDEBUG(D_VFSTRACE, "%s: Invalid flags (%x) for %s: "
+                              "rc = %d\n",
+                              ll_get_fsname(inode->i_sb, NULL, 0), flags,
+                              ladvise_names[advice], rc);
+                       GOTO(out, rc);
+               }
+               break;
+       case LU_LADVISE_LOCKAHEAD:
+               /* Currently only READ and WRITE modes can be requested */
+               if (ladvise->lla_lockahead_mode >= MODE_MAX_USER ||
+                   ladvise->lla_lockahead_mode == 0) {
+                       rc = -EINVAL;
+                       CDEBUG(D_VFSTRACE, "%s: Invalid mode (%d) for %s: "
+                              "rc = %d\n",
+                              ll_get_fsname(inode->i_sb, NULL, 0),
+                              ladvise->lla_lockahead_mode,
+                              ladvise_names[advice], rc);
+                       GOTO(out, rc);
+               }
+       case LU_LADVISE_WILLREAD:
+       case LU_LADVISE_DONTNEED:
+       default:
+               /* Note fall through above - These checks apply to all advices
+                * except LOCKNOEXPAND */
+               if (flags & ~LF_DEFAULT_MASK) {
+                       rc = -EINVAL;
+                       CDEBUG(D_VFSTRACE, "%s: Invalid flags (%x) for %s: "
+                              "rc = %d\n",
+                              ll_get_fsname(inode->i_sb, NULL, 0), flags,
+                              ladvise_names[advice], rc);
+                       GOTO(out, rc);
+               }
+               if (ladvise->lla_start >= ladvise->lla_end) {
+                       rc = -EINVAL;
+                       CDEBUG(D_VFSTRACE, "%s: Invalid range (%llu to %llu) "
+                              "for %s: rc = %d\n",
+                              ll_get_fsname(inode->i_sb, NULL, 0),
+                              ladvise->lla_start, ladvise->lla_end,
+                              ladvise_names[advice], rc);
+                       GOTO(out, rc);
+               }
+               break;
+       }
+
+out:
+       return rc;
+}
+#undef ERRSIZE
+
 /*
  * Give file access advices
  *
@@ -2489,6 +2670,15 @@ static int ll_ladvise(struct inode *inode, struct file *file, __u64 flags,
        RETURN(rc);
 }
 
+static int ll_lock_noexpand(struct file *file, int flags)
+{
+       struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+       fd->ll_lock_no_expand = !(flags & LF_UNSET);
+
+       return 0;
+}
+
 int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd,
                        unsigned long arg)
 {
@@ -2499,6 +2689,7 @@ int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd,
                           sizeof(fsxattr)))
                RETURN(-EFAULT);
 
+       fsxattr.fsx_xflags = ll_inode_to_ext_flags(inode->i_flags);
        fsxattr.fsx_projid = ll_i2info(inode)->lli_projid;
        if (copy_to_user((struct fsxattr __user *)arg,
                         &fsxattr, sizeof(fsxattr)))
@@ -2515,6 +2706,7 @@ int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
        struct ptlrpc_request *req = NULL;
        int rc = 0;
        struct fsxattr fsxattr;
+       struct cl_object *obj;
 
        /* only root could change project ID */
        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
@@ -2530,12 +2722,26 @@ int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
                           sizeof(fsxattr)))
                GOTO(out_fsxattr1, rc = -EFAULT);
 
+       op_data->op_attr_flags = fsxattr.fsx_xflags;
        op_data->op_projid = fsxattr.fsx_projid;
-       op_data->op_attr.ia_valid |= MDS_ATTR_PROJID;
+       op_data->op_attr.ia_valid |= (MDS_ATTR_PROJID | ATTR_ATTR_FLAG);
        rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, NULL,
                        0, &req);
        ptlrpc_req_finished(req);
 
+       obj = ll_i2info(inode)->lli_clob;
+       if (obj) {
+               struct iattr *attr;
+
+               inode->i_flags = ll_ext_to_inode_flags(fsxattr.fsx_xflags);
+               OBD_ALLOC_PTR(attr);
+               if (attr == NULL)
+                       GOTO(out_fsxattr1, rc = -ENOMEM);
+               attr->ia_valid = ATTR_ATTR_FLAG;
+               rc = cl_setattr_ost(obj, attr, fsxattr.fsx_xflags);
+
+               OBD_FREE_PTR(attr);
+       }
 out_fsxattr1:
        ll_finish_md_op_data(op_data);
        RETURN(rc);
@@ -2890,70 +3096,93 @@ out:
                RETURN(ll_file_futimes_3(file, &lfu));
        }
        case LL_IOC_LADVISE: {
-               struct llapi_ladvise_hdr *ladvise_hdr;
+               struct llapi_ladvise_hdr *k_ladvise_hdr;
+               struct llapi_ladvise_hdr __user *u_ladvise_hdr;
                int i;
                int num_advise;
-               int alloc_size = sizeof(*ladvise_hdr);
+               int alloc_size = sizeof(*k_ladvise_hdr);
 
                rc = 0;
-               OBD_ALLOC_PTR(ladvise_hdr);
-               if (ladvise_hdr == NULL)
+               u_ladvise_hdr = (void __user *)arg;
+               OBD_ALLOC_PTR(k_ladvise_hdr);
+               if (k_ladvise_hdr == NULL)
                        RETURN(-ENOMEM);
 
-               if (copy_from_user(ladvise_hdr,
-                                  (const struct llapi_ladvise_hdr __user *)arg,
-                                  alloc_size))
+               if (copy_from_user(k_ladvise_hdr, u_ladvise_hdr, alloc_size))
                        GOTO(out_ladvise, rc = -EFAULT);
 
-               if (ladvise_hdr->lah_magic != LADVISE_MAGIC ||
-                   ladvise_hdr->lah_count < 1)
+               if (k_ladvise_hdr->lah_magic != LADVISE_MAGIC ||
+                   k_ladvise_hdr->lah_count < 1)
                        GOTO(out_ladvise, rc = -EINVAL);
 
-               num_advise = ladvise_hdr->lah_count;
+               num_advise = k_ladvise_hdr->lah_count;
                if (num_advise >= LAH_COUNT_MAX)
                        GOTO(out_ladvise, rc = -EFBIG);
 
-               OBD_FREE_PTR(ladvise_hdr);
-               alloc_size = offsetof(typeof(*ladvise_hdr),
+               OBD_FREE_PTR(k_ladvise_hdr);
+               alloc_size = offsetof(typeof(*k_ladvise_hdr),
                                      lah_advise[num_advise]);
-               OBD_ALLOC(ladvise_hdr, alloc_size);
-               if (ladvise_hdr == NULL)
+               OBD_ALLOC(k_ladvise_hdr, alloc_size);
+               if (k_ladvise_hdr == NULL)
                        RETURN(-ENOMEM);
 
                /*
                 * TODO: submit multiple advices to one server in a single RPC
                 */
-               if (copy_from_user(ladvise_hdr,
-                                  (const struct llapi_ladvise_hdr __user *)arg,
-                                  alloc_size))
+               if (copy_from_user(k_ladvise_hdr, u_ladvise_hdr, alloc_size))
                        GOTO(out_ladvise, rc = -EFAULT);
 
                for (i = 0; i < num_advise; i++) {
-                       rc = ll_ladvise(inode, file, ladvise_hdr->lah_flags,
-                                       &ladvise_hdr->lah_advise[i]);
+                       struct llapi_lu_ladvise *k_ladvise =
+                                       &k_ladvise_hdr->lah_advise[i];
+                       struct llapi_lu_ladvise __user *u_ladvise =
+                                       &u_ladvise_hdr->lah_advise[i];
+
+                       rc = ll_ladvise_sanity(inode, k_ladvise);
                        if (rc)
+                               GOTO(out_ladvise, rc);
+
+                       switch (k_ladvise->lla_advice) {
+                       case LU_LADVISE_LOCKNOEXPAND:
+                               rc = ll_lock_noexpand(file,
+                                              k_ladvise->lla_peradvice_flags);
+                               GOTO(out_ladvise, rc);
+                       case LU_LADVISE_LOCKAHEAD:
+
+                               rc = ll_file_lock_ahead(file, k_ladvise);
+
+                               if (rc < 0)
+                                       GOTO(out_ladvise, rc);
+
+                               if (put_user(rc,
+                                            &u_ladvise->lla_lockahead_result))
+                                       GOTO(out_ladvise, rc = -EFAULT);
                                break;
+                       default:
+                               rc = ll_ladvise(inode, file,
+                                               k_ladvise_hdr->lah_flags,
+                                               k_ladvise);
+                               if (rc)
+                                       GOTO(out_ladvise, rc);
+                               break;
+                       }
+
                }
 
 out_ladvise:
-               OBD_FREE(ladvise_hdr, alloc_size);
+               OBD_FREE(k_ladvise_hdr, alloc_size);
                RETURN(rc);
        }
        case LL_IOC_FSGETXATTR:
                RETURN(ll_ioctl_fsgetxattr(inode, cmd, arg));
        case LL_IOC_FSSETXATTR:
                RETURN(ll_ioctl_fssetxattr(inode, cmd, arg));
-       default: {
-               int err;
-
-               if (LLIOC_STOP ==
-                    ll_iocontrol_call(inode, file, cmd, arg, &err))
-                       RETURN(err);
-
+       case BLKSSZGET:
+               RETURN(put_user(PAGE_SIZE, (int __user *)arg));
+       default:
                RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                     (void __user *)arg));
        }
-       }
 }
 
 #ifndef HAVE_FILE_LLSEEK_SIZE
@@ -4147,107 +4376,6 @@ struct inode_operations ll_file_inode_operations = {
 #endif
 };
 
-/* dynamic ioctl number support routins */
-static struct llioc_ctl_data {
-       struct rw_semaphore     ioc_sem;
-       struct list_head        ioc_head;
-} llioc = {
-       __RWSEM_INITIALIZER(llioc.ioc_sem),
-       LIST_HEAD_INIT(llioc.ioc_head)
-};
-
-
-struct llioc_data {
-       struct list_head        iocd_list;
-        unsigned int            iocd_size;
-        llioc_callback_t        iocd_cb;
-        unsigned int            iocd_count;
-        unsigned int            iocd_cmd[0];
-};
-
-void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
-{
-        unsigned int size;
-        struct llioc_data *in_data = NULL;
-        ENTRY;
-
-        if (cb == NULL || cmd == NULL ||
-            count > LLIOC_MAX_CMD || count < 0)
-                RETURN(NULL);
-
-        size = sizeof(*in_data) + count * sizeof(unsigned int);
-        OBD_ALLOC(in_data, size);
-        if (in_data == NULL)
-                RETURN(NULL);
-
-        memset(in_data, 0, sizeof(*in_data));
-        in_data->iocd_size = size;
-        in_data->iocd_cb = cb;
-        in_data->iocd_count = count;
-        memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
-
-       down_write(&llioc.ioc_sem);
-       list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
-       up_write(&llioc.ioc_sem);
-
-        RETURN(in_data);
-}
-
-void ll_iocontrol_unregister(void *magic)
-{
-        struct llioc_data *tmp;
-
-        if (magic == NULL)
-                return;
-
-       down_write(&llioc.ioc_sem);
-       list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
-                if (tmp == magic) {
-                        unsigned int size = tmp->iocd_size;
-
-                       list_del(&tmp->iocd_list);
-                       up_write(&llioc.ioc_sem);
-
-                        OBD_FREE(tmp, size);
-                        return;
-                }
-        }
-       up_write(&llioc.ioc_sem);
-
-        CWARN("didn't find iocontrol register block with magic: %p\n", magic);
-}
-
-EXPORT_SYMBOL(ll_iocontrol_register);
-EXPORT_SYMBOL(ll_iocontrol_unregister);
-
-static enum llioc_iter
-ll_iocontrol_call(struct inode *inode, struct file *file,
-                 unsigned int cmd, unsigned long arg, int *rcp)
-{
-        enum llioc_iter ret = LLIOC_CONT;
-        struct llioc_data *data;
-        int rc = -EINVAL, i;
-
-       down_read(&llioc.ioc_sem);
-       list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
-                for (i = 0; i < data->iocd_count; i++) {
-                        if (cmd != data->iocd_cmd[i])
-                                continue;
-
-                        ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
-                        break;
-                }
-
-                if (ret == LLIOC_STOP)
-                        break;
-        }
-       up_read(&llioc.ioc_sem);
-
-        if (rcp)
-                *rcp = rc;
-        return ret;
-}
-
 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
 {
        struct ll_inode_info *lli = ll_i2info(inode);