Whamcloud - gitweb
LU-9846 obd: Add overstriping CONNECT flag
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index 9766890..273af31 100644 (file)
@@ -142,6 +142,9 @@ static struct ll_sb_info *ll_init_sbi(void)
        INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
        init_rwsem(&sbi->ll_squash.rsi_sem);
 
+       /* Per-filesystem file heat */
+       sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
+       sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
        RETURN(sbi);
 }
 
@@ -221,11 +224,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                  OBD_CONNECT_GRANT_PARAM |
                                  OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2;
 
-       data->ocd_connect_flags2 = OBD_CONNECT2_FLR |
-                                  OBD_CONNECT2_LOCK_CONVERT |
-                                  OBD_CONNECT2_DIR_MIGRATE |
+       data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
                                   OBD_CONNECT2_SUM_STATFS |
-                                  OBD_CONNECT2_ARCHIVE_ID_ARRAY;
+                                  OBD_CONNECT2_FLR |
+                                  OBD_CONNECT2_LOCK_CONVERT |
+                                  OBD_CONNECT2_ARCHIVE_ID_ARRAY |
+                                  OBD_CONNECT2_LSOM;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
@@ -271,10 +275,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        obd_connect_set_secctx(data);
 
+#if defined(CONFIG_SECURITY)
+       data->ocd_connect_flags2 |= OBD_CONNECT2_SELINUX_POLICY;
+#endif
+
        data->ocd_brw_size = MD_MAX_BRW_SIZE;
 
        err = obd_connect(NULL, &sbi->ll_md_exp, sbi->ll_md_obd,
-                         &sbi->ll_sb_uuid, data, NULL);
+                         &sbi->ll_sb_uuid, data, sbi->ll_cache);
         if (err == -EBUSY) {
                 LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing "
                                    "recovery, of which this client is not a "
@@ -462,7 +470,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
        data->ocd_brw_size = DT_MAX_BRW_SIZE;
 
        err = obd_connect(NULL, &sbi->ll_dt_exp, sbi->ll_dt_obd,
-                         &sbi->ll_sb_uuid, data, NULL);
+                         &sbi->ll_sb_uuid, data, sbi->ll_cache);
        if (err == -EBUSY) {
                LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing "
                                   "recovery, of which this client is not a "
@@ -584,20 +592,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
        }
        cl_sb_init(sb);
 
-       err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
-                                KEY_CACHE_SET, sizeof(*sbi->ll_cache),
-                                sbi->ll_cache, NULL);
-       if (err) {
-               CERROR("%s: Set cache_set failed: rc = %d\n",
-                      sbi->ll_dt_exp->exp_obd->obd_name, err);
-               GOTO(out_root, err);
-       }
-
        sb->s_root = d_make_root(root);
        if (sb->s_root == NULL) {
-               CERROR("%s: can't make root dentry\n",
-                       ll_get_fsname(sb, NULL, 0));
-               GOTO(out_root, err = -ENOMEM);
+               err = -ENOMEM;
+               CERROR("%s: can't make root dentry: rc = %d\n",
+                      sbi->ll_fsname, err);
+               GOTO(out_root, err);
        }
 #ifdef HAVE_DCACHE_LOCK
        sb->s_root->d_op = &ll_d_ops;
@@ -625,7 +625,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                        sbi->ll_dt_obd->obd_type->typ_name);
                if (err < 0) {
                        CERROR("%s: could not register %s in llite: rc = %d\n",
-                              dt, ll_get_fsname(sb, NULL, 0), err);
+                              dt, sbi->ll_fsname, err);
                        err = 0;
                }
        }
@@ -636,7 +636,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                        sbi->ll_md_obd->obd_type->typ_name);
                if (err < 0) {
                        CERROR("%s: could not register %s in llite: rc = %d\n",
-                              md, ll_get_fsname(sb, NULL, 0), err);
+                              md, sbi->ll_fsname, err);
                        err = 0;
                }
        }
@@ -678,12 +678,16 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
                RETURN(rc);
        }
 
+       CDEBUG(D_INFO, "max LOV ea size: %d\n", *lmmsize);
+
        size = sizeof(int);
        rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
                          KEY_MAX_EASIZE, &size, lmmsize);
        if (rc)
                CERROR("Get max mdsize error rc %d\n", rc);
 
+       CDEBUG(D_INFO, "max LMV ea size: %d\n", *lmmsize);
+
        RETURN(rc);
 }
 
@@ -969,6 +973,9 @@ void ll_lli_init(struct ll_inode_info *lli)
                INIT_LIST_HEAD(&lli->lli_agl_list);
                lli->lli_agl_index = 0;
                lli->lli_async_rc = 0;
+               spin_lock_init(&lli->lli_heat_lock);
+               obd_heat_clear(lli->lli_heat_instances, OBD_HEAT_COUNT);
+               lli->lli_heat_flags = 0;
        }
        mutex_init(&lli->lli_layout_mutex);
        memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
@@ -1062,6 +1069,19 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
        if (ptr && (strcmp(ptr, "-client") == 0))
                len -= 7;
 
+       if (len > LUSTRE_MAXFSNAME) {
+               if (unlikely(len >= MAX_STRING_SIZE))
+                       len = MAX_STRING_SIZE - 1;
+               strncpy(name, profilenm, len);
+               name[len] = '\0';
+               err = -ENAMETOOLONG;
+               CERROR("%s: fsname longer than %u characters: rc = %d\n",
+                      name, LUSTRE_MAXFSNAME, err);
+               GOTO(out_free_cfg, err);
+       }
+       strncpy(sbi->ll_fsname, profilenm, len);
+       sbi->ll_fsname[len] = '\0';
+
        /* Mount info */
        snprintf(name, MAX_STRING_SIZE, "%.*s-%016lx", len,
                 profilenm, cfg_instance);
@@ -1072,7 +1092,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
        err = ll_debugfs_register_super(sb, name);
        if (err < 0) {
                CERROR("%s: could not register mountpoint in llite: rc = %d\n",
-                      ll_get_fsname(sb, NULL, 0), err);
+                      sbi->ll_fsname, err);
                err = 0;
        }
 
@@ -1280,7 +1300,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb,
        inode = iget_locked(sb, ino);
        if (inode == NULL) {
                CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
-                      ll_get_fsname(sb, NULL, 0), PFID(fid));
+                      sbi->ll_fsname, PFID(fid));
                RETURN(ERR_PTR(-ENOENT));
        }
 
@@ -1329,8 +1349,7 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
        LASSERT(lsm != NULL);
 
        CDEBUG(D_INODE, "%s: "DFID" set dir layout:\n",
-               ll_get_fsname(inode->i_sb, NULL, 0),
-               PFID(&lli->lli_fid));
+              ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
        lsm_md_dump(D_INODE, lsm);
 
        /* XXX sigh, this lsm_root initialization should be in
@@ -1398,8 +1417,8 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
                if (lsm->lsm_md_layout_version <=
                    lli->lli_lsm_md->lsm_md_layout_version) {
                        CERROR("%s: "DFID" dir layout mismatch:\n",
-                               ll_get_fsname(inode->i_sb, NULL, 0),
-                               PFID(&lli->lli_fid));
+                              ll_i2sbi(inode)->ll_fsname,
+                              PFID(&lli->lli_fid));
                        lsm_md_dump(D_ERROR, lli->lli_lsm_md);
                        lsm_md_dump(D_ERROR, lsm);
                        GOTO(unlock, rc = -EINVAL);
@@ -1605,7 +1624,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
 
        CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, "
               "valid %x, hsm_import %d\n",
-              ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
+              ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid),
               inode, i_size_read(inode), attr->ia_size, attr->ia_valid,
               hsm_import);
 
@@ -1797,57 +1816,54 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
                       u32 flags)
 {
-       struct obd_statfs obd_osfs;
+       struct obd_statfs obd_osfs = { 0 };
        time64_t max_age;
        int rc;
 
        ENTRY;
        max_age = ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS;
 
-        rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
-        if (rc) {
-                CERROR("md_statfs fails: rc = %d\n", rc);
-                RETURN(rc);
-        }
+       rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
+       if (rc)
+               RETURN(rc);
 
        osfs->os_type = LL_SUPER_MAGIC;
 
        CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
-               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
+             osfs->os_bavail, osfs->os_blocks, osfs->os_ffree, osfs->os_files);
 
        if (osfs->os_state & OS_STATE_SUM)
                GOTO(out, rc);
 
-        if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
-                flags |= OBD_STATFS_NODELAY;
+       if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
+               flags |= OBD_STATFS_NODELAY;
 
        rc = obd_statfs(NULL, sbi->ll_dt_exp, &obd_osfs, max_age, flags);
-        if (rc) {
-                CERROR("obd_statfs fails: rc = %d\n", rc);
-                RETURN(rc);
-        }
+       if (rc) /* Possibly a filesystem with no OSTs.  Report MDT totals. */
+               GOTO(out, rc = 0);
 
        CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
-               obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
-               obd_osfs.os_files);
-
-        osfs->os_bsize = obd_osfs.os_bsize;
-        osfs->os_blocks = obd_osfs.os_blocks;
-        osfs->os_bfree = obd_osfs.os_bfree;
-        osfs->os_bavail = obd_osfs.os_bavail;
-
-        /* If we don't have as many objects free on the OST as inodes
-         * on the MDS, we reduce the total number of inodes to
-         * compensate, so that the "inodes in use" number is correct.
-         */
-        if (obd_osfs.os_ffree < osfs->os_ffree) {
-                osfs->os_files = (osfs->os_files - osfs->os_ffree) +
-                        obd_osfs.os_ffree;
-                osfs->os_ffree = obd_osfs.os_ffree;
-        }
+              obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
+              obd_osfs.os_files);
+
+       osfs->os_bsize = obd_osfs.os_bsize;
+       osfs->os_blocks = obd_osfs.os_blocks;
+       osfs->os_bfree = obd_osfs.os_bfree;
+       osfs->os_bavail = obd_osfs.os_bavail;
+
+       /* If we have _some_ OSTs, but don't have as many free objects on the
+        * OSTs as inodes on the MDTs, reduce the reported number of inodes
+        * to compensate, so that the "inodes in use" number is correct.
+        * This should be kept in sync with lod_statfs() behaviour.
+        */
+       if (obd_osfs.os_files && obd_osfs.os_ffree < osfs->os_ffree) {
+               osfs->os_files = (osfs->os_files - osfs->os_ffree) +
+                                obd_osfs.os_ffree;
+               osfs->os_ffree = obd_osfs.os_ffree;
+       }
 
 out:
-        RETURN(rc);
+       RETURN(rc);
 }
 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
 {
@@ -2123,7 +2139,7 @@ void ll_delete_inode(struct inode *inode)
 
        LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
                 "see https://jira.whamcloud.com/browse/LU-118\n",
-                ll_get_fsname(inode->i_sb, NULL, 0),
+                ll_i2sbi(inode)->ll_fsname,
                 PFID(ll_inode2fid(inode)), inode, nrpages);
 
 #ifdef HAVE_SBOPS_EVICT_INODE
@@ -2352,8 +2368,7 @@ void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
        OBD_ALLOC_PTR(op_data);
        if (op_data == NULL) {
                CWARN("%s: cannot allocate op_data to release open handle for "
-                     DFID"\n",
-                     ll_get_fsname(sb, NULL, 0), PFID(&body->mbo_fid1));
+                     DFID"\n", ll_s2sbi(sb)->ll_fsname, PFID(&body->mbo_fid1));
 
                RETURN_EXIT;
        }
@@ -2396,7 +2411,7 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
                 */
                if (!fid_is_sane(&md.body->mbo_fid1)) {
                        CERROR("%s: Fid is insane "DFID"\n",
-                               ll_get_fsname(sb, NULL, 0),
+                               sbi->ll_fsname,
                                PFID(&md.body->mbo_fid1));
                        GOTO(out, rc = -EINVAL);
                }
@@ -2668,39 +2683,6 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
        RETURN(0);
 }
 
-/**
- * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
- * fsname will be returned in this buffer; otherwise, a static buffer will be
- * used to store the fsname and returned to caller.
- */
-char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
-{
-       static char fsname_static[MTI_NAME_MAXLEN];
-       struct lustre_sb_info *lsi = s2lsi(sb);
-       char *ptr;
-       int len;
-
-       if (buf == NULL) {
-               /* this means the caller wants to use static buffer
-                * and it doesn't care about race. Usually this is
-                * in error reporting path */
-               buf = fsname_static;
-               buflen = sizeof(fsname_static);
-       }
-
-       len = strlen(lsi->lsi_lmd->lmd_profile);
-       ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
-       if (ptr && (strcmp(ptr, "-client") == 0))
-               len -= 7;
-
-       if (unlikely(len >= buflen))
-               len = buflen - 1;
-       strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
-       buf[len] = '\0';
-
-       return buf;
-}
-
 static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize)
 {
        char *path = NULL;
@@ -2731,7 +2713,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 
        CDEBUG(D_WARNING,
               "%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
-              "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
+              "(rc %d)\n", ll_i2sbi(inode)->ll_fsname,
               s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
               PFID(ll_inode2fid(inode)),
               (path && !IS_ERR(path)) ? path : "", ioret);