LU-12137 llite: use ->iterate_shared() for readdir

[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c

index ea81256..185e3d3 100644 (file)
--- a/lustre/osd-ldiskfs/osd_handler.c
+++ b/lustre/osd-ldiskfs/osd_handler.c
@@ -423,7 +423,7 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
          */
          /* LASSERT(current->journal_info == NULL); */
  
-       inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
+       inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino);
         if (IS_ERR(inode)) {
                 CDEBUG(D_INODE, "no inode: ino = %u, rc = %ld\n",
                        id->oii_ino, PTR_ERR(inode));
@@ -495,13 +495,13 @@ int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
                 }
  
                 if (fid != NULL)
-                       CWARN("%s: directory (inode: %lu, FID: "DFID") %s "
-                             "maximum entry limit\n",
+                       /* below message is checked in sanity.sh test_129 */
+                       CWARN("%s: directory (inode: %lu, FID: "DFID") %s maximum entry limit\n",
                               osd_name(osd), parent->i_ino, PFID(fid),
                               rc == -ENOSPC ? "has reached" : "is approaching");
                 else
-                       CWARN("%s: directory (inode: %lu, FID: unknown) %s "
-                             "maximum entry limit\n",
+                       /* below message is checked in sanity.sh test_129 */
+                       CWARN("%s: directory (inode: %lu, FID: unknown) %s maximum entry limit\n",
                               osd_name(osd), parent->i_ino,
                               rc == -ENOSPC ? "has reached" : "is approaching");
  
@@ -559,7 +559,7 @@ static struct inode *osd_iget_check(struct osd_thread_info *info,
          */
  
  again:
-       inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
+       inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino);
         if (IS_ERR(inode)) {
                 rc = PTR_ERR(inode);
                 if (!trusted && (rc == -ENOENT || rc == -ESTALE))
@@ -1023,11 +1023,7 @@ again:
                 oclb.oclb_items = 0;
  #ifdef HAVE_DIR_CONTEXT
                 oclb.ctx.pos = filp->f_pos;
-#ifdef HAVE_ITERATE_SHARED
                 rc = fops->iterate_shared(filp, &oclb.ctx);
-#else
-               rc = fops->iterate(filp, &oclb.ctx);
-#endif
                 filp->f_pos = oclb.ctx.pos;
  #else
                 rc = fops->readdir(filp, &oclb, osd_stripe_dir_filldir);
@@ -1128,6 +1124,12 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
                 goto trigger;
         }
  
+       /* -ESTALE is returned if inode of OST object doesn't exist */
+       if (result == -ESTALE &&
+           fid_is_on_ost(info, dev, fid, OI_CHECK_FLD)) {
+               GOTO(out, result = 0);
+       }
+
         if (result)
                 GOTO(out, result);
  
@@ -1290,6 +1292,19 @@ check_lma:
  
         LASSERT(!updated);
  
+       /*
+        * if two OST objects map to the same inode, and inode mode is
+        * (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666), which means it's
+        * reserved by precreate, and not written yet, in this case, don't
+        * set inode for the object whose FID mismatch, so that it can create
+        * inode and not block precreate.
+        */
+       if (fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) &&
+           inode->i_mode == (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666)) {
+               obj->oo_inode = NULL;
+               GOTO(out, result = 0);
+       }
+
         result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
         /*
          * "result == -ENOENT" means the cached OI mapping has been removed
@@ -1864,7 +1879,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d,
                  */
                 if (last_credits != oh->ot_credits &&
                     time_after(jiffies, last_printed +
-                              msecs_to_jiffies(60 * MSEC_PER_SEC)) &&
+                              cfs_time_seconds(60)) &&
                     osd_transaction_size(dev) > 512) {
                         CWARN("%s: credits %u > trans_max %u\n", osd_name(dev),
                               oh->ot_credits, osd_transaction_size(dev));
@@ -2171,7 +2186,7 @@ static int osd_object_print(const struct lu_env *env, void *cookie,
   * Concurrency: shouldn't matter.
   */
  int osd_statfs(const struct lu_env *env, struct dt_device *d,
-               struct obd_statfs *sfs)
+               struct obd_statfs *sfs, struct obd_statfs_info *info)
  {
         struct osd_device *osd = osd_dt_dev(d);
         struct super_block *sb = osd_sb(osd);
@@ -2196,8 +2211,11 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d,
                 goto out;
  
         statfs_pack(sfs, ksfs);
-       if (unlikely(sb->s_flags & MS_RDONLY))
+       if (unlikely(sb->s_flags & SB_RDONLY))
                 sfs->os_state |= OS_STATE_READONLY;
+
+       sfs->os_state |= osd->od_nonrotational ? OS_STATE_NONROT : 0;
+
         if (ldiskfs_has_feature_extents(sb))
                 sfs->os_maxbytes = sb->s_maxbytes;
         else
@@ -2267,12 +2285,12 @@ static void osd_conf_get(const struct lu_env *env,
          */
         param->ddp_inodespace     = PER_OBJ_USAGE;
         /*
-        * EXT_INIT_MAX_LEN is the theoretical maximum extent size  (32k blocks
-        * = 128MB) which is unlikely to be hit in real life. Report a smaller
-        * maximum length to not under count the actual number of extents
-        * needed for writing a file.
+        * EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks
+        * is 128MB) which is unlikely to be hit in real life. Report a smaller
+        * maximum length to not under-count the actual number of extents
+        * needed for writing a file if there are sub-optimal block allocations.
          */
-       param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 2;
+       param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 1;
         /* worst-case extent insertion metadata overhead */
         param->ddp_extent_tax = 6 * LDISKFS_BLOCK_SIZE(sb);
         param->ddp_mntopts = 0;
@@ -2297,8 +2315,8 @@ static void osd_conf_get(const struct lu_env *env,
  #endif
                 param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
  
-       if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE - ea_overhead)
-               param->ddp_max_ea_size = OBD_MAX_EA_SIZE - ea_overhead;
+       if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE)
+               param->ddp_max_ea_size = OBD_MAX_EA_SIZE;
  
         /*
          * Preferred RPC size for efficient disk IO.  4MB shows good
@@ -2360,6 +2378,11 @@ static void osd_conf_get(const struct lu_env *env,
         }
  }
  
+static struct super_block *osd_mnt_sb_get(const struct dt_device *d)
+{
+       return osd_sb(osd_dt_dev(d));
+}
+
  /*
   * Concurrency: shouldn't matter.
   */
@@ -2522,6 +2545,7 @@ static const struct dt_device_operations osd_dt_ops = {
         .dt_trans_stop     = osd_trans_stop,
         .dt_trans_cb_add   = osd_trans_cb_add,
         .dt_conf_get       = osd_conf_get,
+       .dt_mnt_sb_get     = osd_mnt_sb_get,
         .dt_sync           = osd_sync,
         .dt_ro             = osd_ro,
         .dt_commit_async   = osd_commit_async,
@@ -2593,18 +2617,6 @@ static int osd_write_locked(const struct lu_env *env, struct dt_object *dt)
         return obj->oo_owner == env;
  }
  
-static struct timespec *osd_inode_time(const struct lu_env *env,
-                                      struct inode *inode, __u64 seconds)
-{
-       struct osd_thread_info *oti = osd_oti_get(env);
-       struct timespec *t = &oti->oti_time;
-
-       t->tv_sec = seconds;
-       t->tv_nsec = 0;
-       *t = timespec_trunc(*t, inode->i_sb->s_time_gran);
-       return t;
-}
-
  static void osd_inode_getattr(const struct lu_env *env,
                               struct inode *inode, struct lu_attr *attr)
  {
@@ -2820,11 +2832,11 @@ static int osd_inode_setattr(const struct lu_env *env,
                 return 0;
  
         if (bits & LA_ATIME)
-               inode->i_atime  = *osd_inode_time(env, inode, attr->la_atime);
+               inode->i_atime = osd_inode_time(inode, attr->la_atime);
         if (bits & LA_CTIME)
-               inode->i_ctime  = *osd_inode_time(env, inode, attr->la_ctime);
+               inode->i_ctime = osd_inode_time(inode, attr->la_ctime);
         if (bits & LA_MTIME)
-               inode->i_mtime  = *osd_inode_time(env, inode, attr->la_mtime);
+               inode->i_mtime = osd_inode_time(inode, attr->la_mtime);
         if (bits & LA_SIZE) {
                 spin_lock(&inode->i_lock);
                 LDISKFS_I(inode)->i_disksize = attr->la_size;
@@ -2927,7 +2939,7 @@ static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr)
             (attr->la_valid & LA_GID && attr->la_gid != i_gid_read(inode))) {
                 struct iattr iattr;
  
-               ll_vfs_dq_init(inode);
+               dquot_initialize(inode);
                 iattr.ia_valid = 0;
                 if (attr->la_valid & LA_UID)
                         iattr.ia_valid |= ATTR_UID;
@@ -2936,7 +2948,7 @@ static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr)
                 iattr.ia_uid = make_kuid(&init_user_ns, attr->la_uid);
                 iattr.ia_gid = make_kgid(&init_user_ns, attr->la_gid);
  
-               rc = ll_vfs_dq_transfer(inode, &iattr);
+               rc = dquot_transfer(inode, &iattr);
                 if (rc) {
                         CERROR("%s: quota transfer failed: rc = %d. Is quota "
                                "enforcement enabled on the ldiskfs "
@@ -3395,6 +3407,19 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj,
         osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
         rc = osd_oi_insert(info, osd, fid, id, oh->ot_handle,
                            OI_CHECK_FLD, NULL);
+       if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP) && osd->od_is_ost) {
+               struct lu_fid next_fid = *fid;
+
+               /* insert next object in advance, and map to the same inode */
+               next_fid.f_oid++;
+               if (next_fid.f_oid != 0) {
+                       osd_trans_exec_op(env, th, OSD_OT_INSERT);
+                       osd_oi_insert(info, osd, &next_fid, id, oh->ot_handle,
+                                     OI_CHECK_FLD, NULL);
+                       osd_trans_exec_check(env, th, OSD_OT_INSERT);
+               }
+       }
+
         osd_trans_exec_check(env, th, OSD_OT_INSERT);
  
         return rc;
@@ -3459,6 +3484,9 @@ static int osd_declare_create(const struct lu_env *env, struct dt_object *dt,
          */
         osd_trans_declare_op(env, oh, OSD_OT_INSERT,
                              osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
+       if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP))
+               osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+                            osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
  
         /* will help to find FID->ino mapping at dt_insert() */
         rc = osd_idc_find_and_init(env, osd_obj2dev(osd_dt_obj(dt)),
@@ -4405,7 +4433,7 @@ static int osd_xattr_set_pfid(const struct lu_env *env, struct osd_object *obj,
                         RETURN(fl);
  
                 /* Remove old PFID EA entry firstly. */
-               ll_vfs_dq_init(inode);
+               dquot_initialize(inode);
                 rc = osd_removexattr(dentry, inode, XATTR_NAME_FID);
                 if (rc == -ENODATA) {
                         if ((fl & LU_XATTR_REPLACE) && !(fl & LU_XATTR_CREATE))
@@ -4709,7 +4737,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt,
                                 obj->oo_pfid_in_lma = 0;
                 }
         } else {
-               ll_vfs_dq_init(inode);
+               dquot_initialize(inode);
                 dentry->d_inode = inode;
                 dentry->d_sb = inode->i_sb;
                 rc = osd_removexattr(dentry, inode, name);
@@ -4744,7 +4772,7 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt,
         file->f_op = inode->i_fop;
         set_file_inode(file, inode);
  
-       rc = ll_vfs_fsync_range(file, start, end, 0);
+       rc = vfs_fsync_range(file, start, end, 0);
  
         RETURN(rc);
  }
@@ -5160,7 +5188,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         LASSERT(oh->ot_handle != NULL);
         LASSERT(oh->ot_handle->h_transaction != NULL);
  
-       ll_vfs_dq_init(dir);
+       dquot_initialize(dir);
         dentry = osd_child_dentry_get(env, obj,
                                       (char *)key, strlen((char *)key));
  
@@ -5395,7 +5423,7 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
                 osd_get_ldiskfs_dirent_param(ldp, fid);
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
         child->d_fsdata = (void *)ldp;
-       ll_vfs_dq_init(pobj->oo_inode);
+       dquot_initialize(pobj->oo_inode);
         rc = osd_ldiskfs_add_entry(info, osd_obj2dev(pobj), oth->ot_handle,
                                    child, cinode, hlock);
         if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_TYPE)) {
@@ -6599,11 +6627,7 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env,
  
  #ifdef HAVE_DIR_CONTEXT
         buf.ctx.pos = filp->f_pos;
-#ifdef HAVE_ITERATE_SHARED
         rc = inode->i_fop->iterate_shared(filp, &buf.ctx);
-#else
-       rc = inode->i_fop->iterate(filp, &buf.ctx);
-#endif
         filp->f_pos = buf.ctx.pos;
  #else
         rc = inode->i_fop->readdir(filp, &buf, osd_ldiskfs_filldir);
@@ -6762,7 +6786,7 @@ osd_dirent_reinsert(const struct lu_env *env, struct osd_device *dev,
         ldp = (struct ldiskfs_dentry_param *)osd_oti_get(env)->oti_ldp;
         osd_get_ldiskfs_dirent_param(ldp, fid);
         dentry->d_fsdata = (void *)ldp;
-       ll_vfs_dq_init(dir);
+       dquot_initialize(dir);
         rc = osd_ldiskfs_add_entry(info, dev, jh, dentry, inode, hlock);
         /*
          * It is too bad, we cannot reinsert the name entry back.
@@ -7586,7 +7610,7 @@ static int osd_mount(const struct lu_env *env,
                         "force_over_512tb",
                         NULL
                 };
-               strcat(options, opts);
+               strncat(options, opts, PAGE_SIZE);
                 for (rc = 0, str = options; sout[rc]; ) {
                         char *op = strstr(str, sout[rc]);
  
@@ -7606,13 +7630,13 @@ static int osd_mount(const struct lu_env *env,
                                 ;
                 }
         } else {
-               strncat(options, "user_xattr,acl", 14);
+               strncat(options, "user_xattr,acl", PAGE_SIZE);
         }
  
         /* Glom up mount options */
         if (*options != '\0')
-               strcat(options, ",");
-       strlcat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
+               strncat(options, ",", PAGE_SIZE);
+       strncat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
  
         type = get_fs_type("ldiskfs");
         if (!type) {
@@ -7751,6 +7775,7 @@ static int osd_device_init0(const struct lu_env *env,
         o->od_read_cache = 1;
         o->od_writethrough_cache = 1;
         o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE;
+
         o->od_auto_scrub_interval = AS_DEFAULT;
  
         cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
@@ -7772,6 +7797,10 @@ static int osd_device_init0(const struct lu_env *env,
         if (rc != 0)
                 GOTO(out, rc);
  
+       /* Can only check block device after mount */
+       o->od_nonrotational =
+               blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev));
+
         rc = osd_obj_map_init(env, o);
         if (rc != 0)
                 GOTO(out_mnt, rc);
@@ -8110,7 +8139,7 @@ static int osd_health_check(const struct lu_env *env, struct obd_device *obd)
         struct osd_device *osd = osd_dev(obd->obd_lu_dev);
         struct super_block *sb = osd_sb(osd);
  
-       return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY);
+       return (osd->od_mnt == NULL || sb->s_flags & SB_RDONLY);
  }
  
  /*