Whamcloud - gitweb
LU-14459 lmv: change default hash type to crush
[fs/lustre-release.git] / lustre / llite / dir.c
index ce9758b..5db622e 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/llite/dir.c
  *
  * returned page, page hash collision has to be handled. Pages in the
  * hash chain, except first one, are termed "overflow pages".
  *
- * Solution to index uniqueness problem is to not cache overflow
- * pages. Instead, when page hash collision is detected, all overflow pages
- * from emerging chain are immediately requested from the server and placed in
- * a special data structure (struct ll_dir_chain). This data structure is used
- * by ll_readdir() to process entries from overflow pages. When readdir
- * invocation finishes, overflow pages are discarded. If page hash collision
- * chain weren't completely processed, next call to readdir will again detect
- * page hash collision, again read overflow pages in, process next portion of
- * entries and again discard the pages. This is not as wasteful as it looks,
- * because, given reasonable hash, page hash collisions are extremely rare.
+ * Proposed (unimplimented) solution to index uniqueness problem is to
+ * not cache overflow pages.  Instead, when page hash collision is
+ * detected, all overflow pages from emerging chain should be
+ * immediately requested from the server and placed in a special data
+ * structure.  This data structure can be used by ll_readdir() to
+ * process entries from overflow pages.  When readdir invocation
+ * finishes, overflow pages are discarded.  If page hash collision chain
+ * weren't completely processed, next call to readdir will again detect
+ * page hash collision, again read overflow pages in, process next
+ * portion of entries and again discard the pages.  This is not as
+ * wasteful as it looks, because, given reasonable hash, page hash
+ * collisions are extremely rare.
  *
  * 1. directory positioning
  *
  *
  */
 struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
-                            __u64 offset, struct ll_dir_chain *chain)
+                            __u64 offset)
 {
        struct md_callback      cb_op;
        struct page             *page;
@@ -190,14 +191,11 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
        bool                  is_api32 = ll_need_32bit_api(sbi);
        bool                  is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
        struct page          *page;
-       struct ll_dir_chain   chain;
        bool                  done = false;
        int                   rc = 0;
        ENTRY;
 
-       ll_dir_chain_init(&chain);
-
-       page = ll_get_dir_page(inode, op_data, pos, &chain);
+       page = ll_get_dir_page(inode, op_data, pos);
 
        while (rc == 0 && !done) {
                struct lu_dirpage *dp;
@@ -234,7 +232,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
                                lhash = hash;
                        fid_le_to_cpu(&fid, &ent->lde_fid);
                        ino = cl_fid_build_ino(&fid, is_api32);
-                       type = IFTODT(lu_dirent_type_get(ent));
+                       type = S_DT(lu_dirent_type_get(ent));
                        /* For ll_nfs_get_name_filldir(), it will try to access
                         * 'ent' through 'lde_name', so the parameter 'name'
                         * for 'filldir()' must be part of the 'ent'. */
@@ -271,8 +269,7 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
                                        le32_to_cpu(dp->ldp_flags) &
                                        LDF_COLLIDE);
                        next = pos;
-                       page = ll_get_dir_page(inode, op_data, pos,
-                                              &chain);
+                       page = ll_get_dir_page(inode, op_data, pos);
                }
        }
 #ifdef HAVE_DIR_CONTEXT
@@ -280,7 +277,6 @@ int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
 #else
        *ppos = pos;
 #endif
-       ll_dir_chain_fini(&chain);
        RETURN(rc);
 }
 
@@ -402,7 +398,8 @@ out:
  *                      <0 if the creation is failed.
  */
 static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump,
-                              size_t len, const char *dirname, umode_t mode)
+                              size_t len, const char *dirname, umode_t mode,
+                              bool createonly)
 {
        struct inode *parent = dparent->d_inode;
        struct ptlrpc_request *request = NULL;
@@ -447,23 +444,17 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump,
            !OBD_FAIL_CHECK(OBD_FAIL_LLITE_NO_CHECK_DEAD))
                RETURN(-ENOENT);
 
+       /* MDS < 2.14 doesn't support 'crush' hash type, and cannot handle
+        * unknown hash if client doesn't set a valid one. switch to fnv_1a_64.
+        */
        if (!(exp_connect_flags2(sbi->ll_md_exp) & OBD_CONNECT2_CRUSH)) {
-               if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) ==
-                    LMV_HASH_TYPE_CRUSH) {
-                       /* if server doesn't support 'crush' hash type,
-                        * switch to fnv_1a_64.
-                        */
-                       lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK;
-                       lump->lum_hash_type |= LMV_HASH_TYPE_FNV_1A_64;
-               } else if ((lump->lum_hash_type & LMV_HASH_TYPE_MASK) ==
-                    LMV_HASH_TYPE_UNKNOWN) {
-                       /* from 2.14 MDT will choose default hash type if client
-                        * doesn't set a valid one, while old server doesn't
-                        * handle it.
-                        */
-                       lump->lum_hash_type &= ~LMV_HASH_TYPE_MASK;
-                       lump->lum_hash_type |= LMV_HASH_TYPE_DEFAULT;
-               }
+               enum lmv_hash_type type = lump->lum_hash_type &
+                                         LMV_HASH_TYPE_MASK;
+
+               if (type == LMV_HASH_TYPE_CRUSH ||
+                   type == LMV_HASH_TYPE_UNKNOWN)
+                       lump->lum_hash_type = (lump->lum_hash_type ^ type) |
+                                             LMV_HASH_TYPE_FNV_1A_64;
        }
 
        if (unlikely(!lmv_user_magic_supported(cpu_to_le32(lump->lum_magic))))
@@ -508,6 +499,9 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump,
        }
 
        op_data->op_cli_flags |= CLI_SET_MEA;
+       if (createonly)
+               op_data->op_bias |= MDS_SETSTRIPE_CREATE;
+
        err = md_create(sbi->ll_md_exp, op_data, lump, len, mode,
                        from_kuid(&init_user_ns, current_fsuid()),
                        from_kgid(&init_user_ns, current_fsgid()),
@@ -545,8 +539,7 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump,
        }
 
 out_inode:
-       if (inode != NULL)
-               iput(inode);
+       iput(inode);
 out_request:
        ptlrpc_req_finished(request);
 out_op_data:
@@ -1142,14 +1135,16 @@ int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
        case LUSTRE_Q_SETDEFAULT:
        case LUSTRE_Q_SETQUOTAPOOL:
        case LUSTRE_Q_SETINFOPOOL:
-               if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+       case LUSTRE_Q_SETDEFAULT_POOL:
+               if (!capable(CAP_SYS_ADMIN))
                        RETURN(-EPERM);
                break;
        case Q_GETQUOTA:
        case LUSTRE_Q_GETDEFAULT:
        case LUSTRE_Q_GETQUOTAPOOL:
+       case LUSTRE_Q_GETDEFAULT_POOL:
                if (check_owner(type, id) &&
-                   (!cfs_capable(CFS_CAP_SYS_ADMIN)))
+                   (!capable(CAP_SYS_ADMIN)))
                        RETURN(-EPERM);
                break;
        case Q_GETINFO:
@@ -1278,7 +1273,7 @@ int ll_rmfid(struct file *file, void __user *arg)
        int i, rc, *rcs = NULL;
        ENTRY;
 
-       if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
+       if (!capable(CAP_DAC_READ_SEARCH) &&
            !(ll_i2sbi(file_inode(file))->ll_flags & LL_SBI_USER_FID2PATH))
                RETURN(-EPERM);
        /* Only need to get the buflen */
@@ -1350,7 +1345,7 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = file_inode(file);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
-       struct obd_ioctl_data *data;
+       struct obd_ioctl_data *data = NULL;
        int rc = 0;
        ENTRY;
 
@@ -1388,14 +1383,12 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return 0;
        }
        case IOC_MDC_LOOKUP: {
-                                    int namelen, len = 0;
-               char *buf = NULL;
+               int namelen, len = 0;
                char *filename;
 
-               rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
+               rc = obd_ioctl_getdata(&data, &len, (void __user *)arg);
                if (rc != 0)
                        RETURN(rc);
-               data = (void *)buf;
 
                filename = data->ioc_inlbuf1;
                namelen = strlen(filename);
@@ -1411,24 +1404,23 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        GOTO(out_free, rc);
                }
 out_free:
-               OBD_FREE_LARGE(buf, len);
-                return rc;
-        }
+               OBD_FREE_LARGE(data, len);
+               return rc;
+       }
        case LL_IOC_LMV_SETSTRIPE: {
                struct lmv_user_md  *lum;
-               char            *buf = NULL;
-               char            *filename;
-               int              namelen = 0;
-               int              lumlen = 0;
-               umode_t          mode;
-               int              len;
-               int              rc;
+               char *filename;
+               int namelen = 0;
+               int lumlen = 0;
+               umode_t mode;
+               bool createonly = false;
+               int len;
+               int rc;
 
-               rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
+               rc = obd_ioctl_getdata(&data, &len, (void __user *)arg);
                if (rc)
                        RETURN(rc);
 
-               data = (void *)buf;
                if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL ||
                    data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0)
                        GOTO(lmv_out_free, rc = -EINVAL);
@@ -1465,9 +1457,11 @@ out_free:
                }
 
                mode = data->ioc_type;
-               rc = ll_dir_setdirstripe(dentry, lum, lumlen, filename, mode);
+               createonly = data->ioc_obdo1.o_flags & OBD_FL_OBDMDEXISTS;
+               rc = ll_dir_setdirstripe(dentry, lum, lumlen, filename, mode,
+                                        createonly);
 lmv_out_free:
-               OBD_FREE_LARGE(buf, len);
+               OBD_FREE_LARGE(data, len);
                RETURN(rc);
 
        }
@@ -1507,7 +1501,7 @@ lmv_out_free:
                if (copy_from_user(&lumv1, lumv1p, sizeof(lumv1)))
                        RETURN(-EFAULT);
 
-               if (inode->i_sb->s_root == file_dentry(file))
+               if (is_root_inode(inode))
                        set_default = 1;
 
                switch (lumv1.lmm_magic) {
@@ -1662,6 +1656,16 @@ finish_req:
                return rc;
        }
 
+       case LL_IOC_UNLOCK_FOREIGN:
+               /* if not a foreign symlink do nothing */
+               if (ll_foreign_is_removable(dentry, true)) {
+                       CDEBUG(D_INFO,
+                              "prevent rmdir of non-foreign dir ("DFID")\n",
+                              PFID(ll_inode2fid(inode)));
+                       RETURN(-EOPNOTSUPP);
+               }
+               RETURN(0);
+
        case LL_IOC_REMOVE_ENTRY: {
                char            *filename = NULL;
                int              namelen = 0;
@@ -1697,10 +1701,10 @@ out_rmdir:
                RETURN(ll_obd_statfs(inode, (void __user *)arg));
        case LL_IOC_LOV_GETSTRIPE:
        case LL_IOC_LOV_GETSTRIPE_NEW:
-       case LL_IOC_MDC_GETINFO:
-       case LL_IOC_MDC_GETINFO_OLD:
-       case IOC_MDC_GETFILEINFO:
-       case IOC_MDC_GETFILEINFO_OLD:
+       case LL_IOC_MDC_GETINFO_V1:
+       case LL_IOC_MDC_GETINFO_V2:
+       case IOC_MDC_GETFILEINFO_V1:
+       case IOC_MDC_GETFILEINFO_V2:
        case IOC_MDC_GETFILESTRIPE: {
                struct ptlrpc_request *request = NULL;
                struct ptlrpc_request *root_request = NULL;
@@ -1715,8 +1719,8 @@ out_rmdir:
                struct lu_fid __user *fidp = NULL;
                int lmmsize;
 
-               if (cmd == IOC_MDC_GETFILEINFO_OLD ||
-                   cmd == IOC_MDC_GETFILEINFO ||
+               if (cmd == IOC_MDC_GETFILEINFO_V1 ||
+                   cmd == IOC_MDC_GETFILEINFO_V2 ||
                    cmd == IOC_MDC_GETFILESTRIPE) {
                        filename = ll_getname((const char __user *)arg);
                        if (IS_ERR(filename))
@@ -1738,10 +1742,10 @@ out_rmdir:
                        GOTO(out_req, rc);
                }
 
-               if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
-                                      cmd == LL_IOC_MDC_GETINFO ||
-                                      cmd == IOC_MDC_GETFILEINFO_OLD ||
-                                      cmd == LL_IOC_MDC_GETINFO_OLD)) {
+               if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO_V1 ||
+                                      cmd == LL_IOC_MDC_GETINFO_V1 ||
+                                      cmd == IOC_MDC_GETFILEINFO_V2 ||
+                                      cmd == LL_IOC_MDC_GETINFO_V2)) {
                        lmmsize = 0;
                        rc = 0;
                }
@@ -1753,8 +1757,8 @@ out_rmdir:
                    cmd == LL_IOC_LOV_GETSTRIPE ||
                    cmd == LL_IOC_LOV_GETSTRIPE_NEW) {
                        lump = (struct lov_user_md __user *)arg;
-               } else if (cmd == IOC_MDC_GETFILEINFO_OLD ||
-                          cmd == LL_IOC_MDC_GETINFO_OLD){
+               } else if (cmd == IOC_MDC_GETFILEINFO_V1 ||
+                          cmd == LL_IOC_MDC_GETINFO_V1){
                        struct lov_user_mds_data_v1 __user *lmdp;
 
                        lmdp = (struct lov_user_mds_data_v1 __user *)arg;
@@ -1782,8 +1786,8 @@ out_rmdir:
                        rc = -EOVERFLOW;
                }
 
-               if (cmd == IOC_MDC_GETFILEINFO_OLD ||
-                   cmd == LL_IOC_MDC_GETINFO_OLD) {
+               if (cmd == IOC_MDC_GETFILEINFO_V1 ||
+                   cmd == LL_IOC_MDC_GETINFO_V1) {
                        lstat_t st = { 0 };
 
                        st.st_dev       = inode->i_sb->s_dev;
@@ -1804,8 +1808,8 @@ out_rmdir:
 
                        if (copy_to_user(statp, &st, sizeof(st)))
                                GOTO(out_req, rc = -EFAULT);
-               } else if (cmd == IOC_MDC_GETFILEINFO ||
-                          cmd == LL_IOC_MDC_GETINFO) {
+               } else if (cmd == IOC_MDC_GETFILEINFO_V2 ||
+                          cmd == LL_IOC_MDC_GETINFO_V2) {
                        lstatx_t stx = { 0 };
                        __u64 valid = body->mbo_valid;
 
@@ -1839,7 +1843,7 @@ out_rmdir:
                         * However, this whould be better decided by the MDS
                         * instead of the client.
                         */
-                       if (cmd == LL_IOC_MDC_GETINFO &&
+                       if (cmd == LL_IOC_MDC_GETINFO_V2 &&
                            ll_i2info(inode)->lli_lsm_md != NULL)
                                valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
 
@@ -1900,8 +1904,11 @@ out_quotactl:
                OBD_FREE(qctl, qctl_len);
                RETURN(rc);
         }
-        case OBD_IOC_GETDTNAME:
-        case OBD_IOC_GETMDNAME:
+       case OBD_IOC_GETNAME_OLD:
+               /* fall through */
+       case OBD_IOC_GETDTNAME:
+               /* fall through */
+       case OBD_IOC_GETMDNAME:
                 RETURN(ll_get_obd_name(inode, cmd, arg));
         case LL_IOC_FLUSHCTX:
                 RETURN(ll_flush_ctx(inode));
@@ -2038,7 +2045,7 @@ out_hur:
                RETURN(rc);
        }
        case LL_IOC_HSM_CT_START:
-               if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+               if (!capable(CAP_SYS_ADMIN))
                        RETURN(-EPERM);
 
                rc = copy_and_ct_start(cmd, sbi->ll_md_exp,
@@ -2085,17 +2092,15 @@ out_hur:
        }
        case LL_IOC_MIGRATE: {
                struct lmv_user_md *lum;
-               char *buf = NULL;
                int len;
                char *filename;
                int namelen = 0;
                int rc;
 
-               rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
+               rc = obd_ioctl_getdata(&data, &len, (void __user *)arg);
                if (rc)
                        RETURN(rc);
 
-               data = (struct obd_ioctl_data *)buf;
                if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL ||
                    data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0)
                        GOTO(migrate_free, rc = -EINVAL);
@@ -2119,7 +2124,7 @@ out_hur:
 
                rc = ll_migrate(inode, file, lum, filename);
 migrate_free:
-               OBD_FREE_LARGE(buf, len);
+               OBD_FREE_LARGE(data, len);
 
                RETURN(rc);
        }