X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_handler.c;h=584c8c147c49de53bb2014ad9bee82fce730797d;hp=d7eb0e11b31b38236f79f5f1cfe001005a5df35f;hb=6df76d3357fc5896b6902399ed7ce6d7c7835f58;hpb=546993d587c5fc380e9745eae98f863e02e68575 diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index d7eb0e1..584c8c1 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -39,12 +39,11 @@ #define DEBUG_SUBSYSTEM S_OSD +#include #include #include #include -#ifdef HAVE_UIDGID_HEADER -# include -#endif +#include /* prerequisite for linux/xattr.h */ #include @@ -211,14 +210,14 @@ osd_idc_add(const struct lu_env *env, struct osd_device *osd, i = oti->oti_ins_cache_size * 2; if (i == 0) i = OSD_INS_CACHE_SIZE; - OBD_ALLOC(idc, sizeof(*idc) * i); + OBD_ALLOC_PTR_ARRAY(idc, i); if (idc == NULL) return ERR_PTR(-ENOMEM); if (oti->oti_ins_cache != NULL) { memcpy(idc, oti->oti_ins_cache, oti->oti_ins_cache_used * sizeof(*idc)); - OBD_FREE(oti->oti_ins_cache, - oti->oti_ins_cache_used * sizeof(*idc)); + OBD_FREE_PTR_ARRAY(oti->oti_ins_cache, + oti->oti_ins_cache_used); } oti->oti_ins_cache = idc; oti->oti_ins_cache_size = i; @@ -286,6 +285,76 @@ osd_idc_find_or_init(const struct lu_env *env, struct osd_device *osd, return idc; } +static void osd_idc_dump_lma(const struct lu_env *env, + struct osd_device *osd, + unsigned long ino, + bool check_in_oi) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_ost_attrs *loa = &info->oti_ost_attrs; + const struct lu_fid *fid; + struct osd_inode_id lid; + struct inode *inode; + int rc; + + inode = osd_ldiskfs_iget(osd_sb(osd), ino); + if (IS_ERR(inode)) { + CERROR("%s: can't get inode %lu: rc = %d\n", + osd->od_svname, ino, (int)PTR_ERR(inode)); + return; + } + if (is_bad_inode(inode)) { + CERROR("%s: bad inode %lu\n", osd->od_svname, ino); + goto put; + } + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa); + if (rc) { + CERROR("%s: can't get LMA for %lu: rc = %d\n", + osd->od_svname, ino, rc); + goto put; + } + fid = &loa->loa_lma.lma_self_fid; + LCONSOLE(D_INFO, "%s: "DFID" in inode %lu/%u\n", osd->od_svname, + PFID(fid), ino, (unsigned)inode->i_generation); + if (!check_in_oi) + goto put; + rc = osd_oi_lookup(osd_oti_get(env), osd, fid, &lid, 0); + if (rc) { + CERROR("%s: can't lookup "DFID": rc = %d\n", + osd->od_svname, PFID(fid), rc); + goto put; + } + LCONSOLE(D_INFO, "%s: "DFID" maps to %u/%u\n", osd->od_svname, + PFID(fid), lid.oii_ino, lid.oii_gen); +put: + iput(inode); +} + +static void osd_idc_dump_debug(const struct lu_env *env, + struct osd_device *osd, + const struct lu_fid *fid, + unsigned long ino1, + unsigned long ino2) +{ + struct osd_inode_id lid; + + int rc; + + rc = osd_oi_lookup(osd_oti_get(env), osd, fid, &lid, 0); + if (!rc) { + LCONSOLE(D_INFO, "%s: "DFID" maps to %u/%u\n", + osd->od_svname, PFID(fid), lid.oii_ino, lid.oii_gen); + osd_idc_dump_lma(env, osd, lid.oii_ino, false); + } else { + CERROR("%s: can't lookup "DFID": rc = %d\n", + osd->od_svname, PFID(fid), rc); + } + if (ino1) + osd_idc_dump_lma(env, osd, ino1, true); + if (ino2) + osd_idc_dump_lma(env, osd, ino2, true); +} + /* * lookup mapping for given FID and fill it from the given object. * the object is lolcal by definition. @@ -303,7 +372,12 @@ static int osd_idc_find_and_init(const struct lu_env *env, if (obj->oo_inode == NULL) return 0; if (idc->oic_lid.oii_ino != obj->oo_inode->i_ino) { - LASSERT(idc->oic_lid.oii_ino == 0); + if (idc->oic_lid.oii_ino) { + osd_idc_dump_debug(env, osd, fid, + idc->oic_lid.oii_ino, + obj->oo_inode->i_ino); + return -EINVAL; + } idc->oic_lid.oii_ino = obj->oo_inode->i_ino; idc->oic_lid.oii_gen = obj->oo_inode->i_generation; } @@ -393,12 +467,11 @@ int osd_get_lma(struct osd_thread_info *info, struct inode *inode, lustre_loa_swab(loa, true); /* Check LMA compatibility */ if (lma->lma_incompat & ~LMA_INCOMPAT_SUPP) { - CWARN("%s: unsupported incompat LMA feature(s) %#x " - "for fid = "DFID", ino = %lu\n", + rc = -EOPNOTSUPP; + CWARN("%s: unsupported incompat LMA feature(s) %#x for fid = "DFID", ino = %lu: rc = %d\n", osd_ino2name(inode), lma->lma_incompat & ~LMA_INCOMPAT_SUPP, - PFID(&lma->lma_self_fid), inode->i_ino); - rc = -EOPNOTSUPP; + PFID(&lma->lma_self_fid), inode->i_ino, rc); } } else if (rc == 0) { rc = -ENODATA; @@ -443,10 +516,11 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, iput(inode); inode = ERR_PTR(-ESTALE); } else if (is_bad_inode(inode)) { - CWARN("%s: bad inode: ino = %u\n", - osd_dev2name(dev), id->oii_ino); + rc = -ENOENT; + CWARN("%s: bad inode: ino = %u: rc = %d\n", + osd_dev2name(dev), id->oii_ino, rc); iput(inode); - inode = ERR_PTR(-ENOENT); + inode = ERR_PTR(rc); } else if ((rc = osd_attach_jinode(inode))) { iput(inode); inode = ERR_PTR(rc); @@ -479,6 +553,7 @@ int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd, struct lustre_ost_attrs *loa = &info->oti_ost_attrs; struct inode *parent = child->d_parent->d_inode; struct lu_fid *fid = NULL; + char fidstr[FID_LEN + 1] = "unknown"; rc2 = osd_get_lma(info, parent, child->d_parent, loa); if (!rc2) { @@ -495,19 +570,18 @@ int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd, } if (fid != NULL) - /* below message is checked in sanity.sh test_129 */ - CWARN("%s: directory (inode: %lu, FID: "DFID") %s maximum entry limit\n", - osd_name(osd), parent->i_ino, PFID(fid), - rc == -ENOSPC ? "has reached" : "is approaching"); - else - /* below message is checked in sanity.sh test_129 */ - CWARN("%s: directory (inode: %lu, FID: unknown) %s maximum entry limit\n", - osd_name(osd), parent->i_ino, - rc == -ENOSPC ? "has reached" : "is approaching"); + snprintf(fidstr, sizeof(fidstr), DFID, PFID(fid)); + + /* below message is checked in sanity.sh test_129 */ + if (rc == -ENOSPC) { + CWARN("%s: directory (inode: %lu, FID: %s) has reached max size limit\n", + osd_name(osd), parent->i_ino, fidstr); + } else { + rc = 0; /* ignore such error now */ + CWARN("%s: directory (inode: %lu, FID: %s) is approaching max size limit\n", + osd_name(osd), parent->i_ino, fidstr); + } - /* ignore such error now */ - if (rc == -ENOBUFS) - rc = 0; } return rc; @@ -855,10 +929,8 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) } struct osd_check_lmv_buf { -#ifdef HAVE_DIR_CONTEXT /* please keep it as first member */ struct dir_context ctx; -#endif struct osd_thread_info *oclb_info; struct osd_device *oclb_dev; struct osd_idmap_cache *oclb_oic; @@ -867,7 +939,7 @@ struct osd_check_lmv_buf { }; /** - * It is called internally by ->readdir() to filter out the + * It is called internally by ->iterate*() to filter out the * local slave object's FID of the striped directory. * * \retval 1 found the local slave's FID @@ -960,13 +1032,10 @@ static int osd_check_lmv(struct osd_thread_info *oti, struct osd_device *dev, { struct lu_buf *buf = &oti->oti_big_buf; struct dentry *dentry = &oti->oti_obj_dentry; - struct file *filp = &oti->oti_file; - const struct file_operations *fops; + struct file *filp; struct lmv_mds_md_v1 *lmv1; struct osd_check_lmv_buf oclb = { -#ifdef HAVE_DIR_CONTEXT .ctx.actor = osd_stripe_dir_filldir, -#endif .oclb_info = oti, .oclb_dev = dev, .oclb_oic = oic, @@ -1008,35 +1077,23 @@ again: if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1) GOTO(out, rc = 0); - fops = inode->i_fop; - dentry->d_inode = inode; - dentry->d_sb = inode->i_sb; - filp->f_pos = 0; - filp->f_path.dentry = dentry; - filp->f_mode = FMODE_64BITHASH; - filp->f_mapping = inode->i_mapping; - filp->f_op = fops; - filp->private_data = NULL; - set_file_inode(filp, inode); + filp = osd_quasi_file(oti->oti_env, inode); + rc = osd_security_file_alloc(filp); + if (rc) + goto out; do { oclb.oclb_items = 0; -#ifdef HAVE_DIR_CONTEXT - oclb.ctx.pos = filp->f_pos; - rc = fops->iterate_shared(filp, &oclb.ctx); - filp->f_pos = oclb.ctx.pos; -#else - rc = fops->readdir(filp, &oclb, osd_stripe_dir_filldir); -#endif + rc = iterate_dir(filp, &oclb.ctx); } while (rc >= 0 && oclb.oclb_items > 0 && !oclb.oclb_found && filp->f_pos != LDISKFS_HTREE_EOF_64BIT); - fops->release(inode, filp); + inode->i_fop->release(inode, filp); out: if (rc < 0) - CDEBUG(D_LFSCK, "%s: fail to check LMV EA, inode = %lu/%u," - DFID": rc = %d\n", osd_ino2name(inode), - inode->i_ino, inode->i_generation, + CDEBUG(D_LFSCK, + "%s: cannot check LMV, ino = %lu/%u "DFID": rc = %d\n", + osd_ino2name(inode), inode->i_ino, inode->i_generation, PFID(&oic->oic_fid), rc); else rc = 0; @@ -1070,7 +1127,13 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_inode == NULL); - LASSERTF(fid_is_sane(fid) || fid_is_idif(fid), DFID"\n", PFID(fid)); + + if (fid_is_sane(fid) == 0) { + CERROR("%s: invalid FID "DFID"\n", ldev->ld_obd->obd_name, + PFID(fid)); + dump_stack(); + RETURN(-EINVAL); + } dev = osd_dev(ldev); scrub = &dev->od_scrub.os_scrub; @@ -1185,17 +1248,19 @@ trigger: } } - if (thread_is_running(&scrub->os_thread)) { + if (scrub->os_running) { if (scrub->os_partial_scan && !scrub->os_in_join) goto join; - if (IS_ERR_OR_NULL(inode) || result) + osd_add_oi_cache(info, dev, id, fid); + if (IS_ERR_OR_NULL(inode) || result) { + osd_oii_insert(dev, oic, result == -ENOENT); GOTO(out, result = -EINPROGRESS); + } LASSERT(remote); LASSERT(obj->oo_inode == inode); - osd_add_oi_cache(info, dev, id, fid); osd_oii_insert(dev, oic, true); goto found; } @@ -1218,13 +1283,15 @@ join: if (rc1 && rc1 != -EALREADY) GOTO(out, result = -EREMCHG); - if (IS_ERR_OR_NULL(inode) || result) + osd_add_oi_cache(info, dev, id, fid); + if (IS_ERR_OR_NULL(inode) || result) { + osd_oii_insert(dev, oic, result == -ENOENT); GOTO(out, result = -EINPROGRESS); + } LASSERT(remote); LASSERT(obj->oo_inode == inode); - osd_add_oi_cache(info, dev, id, fid); osd_oii_insert(dev, oic, true); goto found; @@ -1407,6 +1474,13 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l, LINVRNT(osd_invariant(obj)); + if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) && + cfs_fail_val == 2) { + struct osd_thread_info *info = osd_oti_get(env); + struct osd_idmap_cache *oic = &info->oti_cache; + /* invalidate thread cache */ + memset(&oic->oic_fid, 0, sizeof(oic->oic_fid)); + } if (fid_is_otable_it(&l->lo_header->loh_fid)) { obj->oo_dt.do_ops = &osd_obj_otable_it_ops; l->lo_header->loh_attr |= LOHA_EXISTS; @@ -1436,6 +1510,7 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l, result = 0; } } + obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET; LINVRNT(osd_invariant(obj)); return result; @@ -1462,8 +1537,6 @@ static int osd_oxc_get(struct osd_object *obj, const char *name, size_t namelen = strlen(name); int rc; - ENTRY; - rcu_read_lock(); list_for_each_entry_rcu(tmp, &obj->oo_xattr_list, oxe_list) { if (namelen == tmp->oxe_namelen && @@ -1490,7 +1563,6 @@ static int osd_oxc_get(struct osd_object *obj, const char *name, GOTO(out, rc = -ERANGE); memcpy(buf->lb_buf, &oxe->oxe_buf[namelen + 1], rc); - EXIT; out: rcu_read_unlock(); @@ -1590,11 +1662,10 @@ static void osd_object_free(const struct lu_env *env, struct lu_object *l) dt_object_fini(&obj->oo_dt); if (obj->oo_hl_head != NULL) ldiskfs_htree_lock_head_free(obj->oo_hl_head); + /* obj doesn't contain an lu_object_header, so we don't need call_rcu */ OBD_FREE_PTR(obj); - if (unlikely(h)) { - lu_object_header_fini(h); - OBD_FREE_PTR(h); - } + if (unlikely(h)) + lu_object_header_free(h); } /* @@ -1616,16 +1687,6 @@ static void osd_index_fini(struct osd_object *o) } } -/* - * Concurrency: no concurrent access is possible that late in object - * life-cycle (for all existing callers, that is. New callers have to provide - * their own locking.) - */ -static int osd_inode_unlinked(const struct inode *inode) -{ - return inode->i_nlink == 0; -} - enum { OSD_TXN_OI_DELETE_CREDITS = 20, OSD_TXN_INODE_DELETE_CREDITS = 20 @@ -1712,9 +1773,10 @@ static int osd_param_is_not_sane(const struct osd_device *dev, static void osd_trans_commit_cb(struct super_block *sb, struct ldiskfs_journal_cb_entry *jcb, int error) { - struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb); + struct osd_thandle *oh = container_of(jcb, struct osd_thandle, ot_jcb); struct thandle *th = &oh->ot_super; struct lu_device *lud = &th->th_dev->dd_lu_dev; + struct osd_device *osd = osd_dev(lud); struct dt_txn_commit_cb *dcb, *tmp; LASSERT(oh->ot_handle == NULL); @@ -1734,17 +1796,13 @@ static void osd_trans_commit_cb(struct super_block *sb, } lu_ref_del_at(&lud->ld_reference, &oh->ot_dev_link, "osd-tx", th); - lu_device_put(lud); + if (atomic_dec_and_test(&osd->od_commit_cb_in_flight)) + wake_up(&osd->od_commit_cb_done); th->th_dev = NULL; OBD_FREE_PTR(oh); } -#ifndef HAVE_SB_START_WRITE -# define sb_start_write(sb) do {} while (0) -# define sb_end_write(sb) do {} while (0) -#endif - static struct thandle *osd_trans_create(const struct lu_env *env, struct dt_device *d) { @@ -1780,6 +1838,7 @@ static struct thandle *osd_trans_create(const struct lu_env *env, th->th_dev = d; th->th_result = 0; oh->ot_credits = 0; + oh->oh_declared_ext = 0; INIT_LIST_HEAD(&oh->ot_commit_dcb_list); INIT_LIST_HEAD(&oh->ot_stop_dcb_list); INIT_LIST_HEAD(&oh->ot_trunc_locks); @@ -1802,7 +1861,7 @@ void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th) struct osd_thread_info *oti = osd_oti_get(env); struct osd_thandle *oh; - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh != NULL); CWARN(" create: %u/%u/%u, destroy: %u/%u/%u\n", @@ -1861,7 +1920,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, LASSERT(current->journal_info == NULL); - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh != NULL); LASSERT(oh->ot_handle == NULL); @@ -1922,7 +1981,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, oh->ot_handle = jh; LASSERT(oti->oti_txns == 0); - lu_device_get(&d->dd_lu_dev); + atomic_inc(&dev->od_commit_cb_in_flight); lu_ref_add_at(&d->dd_lu_dev.ld_reference, &oh->ot_dev_link, "osd-tx", th); oti->oti_txns++; @@ -1990,7 +2049,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, ENTRY; - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); remove_agents = oh->ot_remove_agents; @@ -2039,7 +2098,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, OBD_FREE_PTR(oh); } - osd_trunc_unlock_all(&truncates); + osd_trunc_unlock_all(env, &truncates); /* inform the quota slave device that the transaction is stopping */ qsd_op_end(env, qsd, qtrans); @@ -2063,6 +2122,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, if (unlikely(remove_agents != 0)) osd_process_scheduled_agent_removals(env, osd); + LASSERT(oti->oti_ins_cache_depth > 0); oti->oti_ins_cache_depth--; /* reset OI cache for safety */ if (oti->oti_ins_cache_depth == 0) @@ -2075,8 +2135,8 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, static int osd_trans_cb_add(struct thandle *th, struct dt_txn_commit_cb *dcb) { - struct osd_thandle *oh = container_of0(th, struct osd_thandle, - ot_super); + struct osd_thandle *oh = container_of(th, struct osd_thandle, + ot_super); LASSERT(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC); LASSERT(&dcb->dcb_func != NULL); @@ -2115,6 +2175,9 @@ static void osd_object_delete(const struct lu_env *env, struct lu_object *l) if (!inode) return; + if (osd_has_index(obj) && obj->oo_dt.do_index_ops == &osd_index_iam_ops) + ldiskfs_set_inode_flag(inode, LDISKFS_INODE_JOURNAL_DATA); + uid = i_uid_read(inode); gid = i_gid_read(inode); projid = i_projid_read(inode); @@ -2212,9 +2275,9 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, statfs_pack(sfs, ksfs); if (unlikely(sb->s_flags & SB_RDONLY)) - sfs->os_state |= OS_STATE_READONLY; + sfs->os_state |= OS_STATFS_READONLY; - sfs->os_state |= osd->od_nonrotational ? OS_STATE_NONROT : 0; + sfs->os_state |= osd->od_nonrotational ? OS_STATFS_NONROT : 0; if (ldiskfs_has_feature_extents(sb)) sfs->os_maxbytes = sb->s_maxbytes; @@ -2229,7 +2292,7 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, * * Reserve 0.78% of total space, at least 8MB for small filesystems. */ - CLASSERT(OSD_STATFS_RESERVED > LDISKFS_MAX_BLOCK_SIZE); + BUILD_BUG_ON(OSD_STATFS_RESERVED <= LDISKFS_MAX_BLOCK_SIZE); reserved = OSD_STATFS_RESERVED >> sb->s_blocksize_bits; if (likely(sfs->os_blocks >= reserved << OSD_STATFS_RESERVED_SHIFT)) reserved = sfs->os_blocks >> OSD_STATFS_RESERVED_SHIFT; @@ -2376,6 +2439,8 @@ static void osd_conf_get(const struct lu_env *env, d->od_svname, name); } } + + param->ddp_has_lseek_data_hole = true; } static struct super_block *osd_mnt_sb_get(const struct dt_device *d) @@ -2430,10 +2495,14 @@ static int osd_commit_async(const struct lu_env *env, RETURN(rc); } -/* Our own copy of the set readonly functions if present, or NU if not. */ -static int (*priv_dev_set_rdonly)(struct block_device *bdev); -static int (*priv_dev_check_rdonly)(struct block_device *bdev); -/* static int (*priv_dev_clear_rdonly)(struct block_device *bdev); */ +static int (*priv_security_file_alloc)(struct file *file); + +int osd_security_file_alloc(struct file *file) +{ + if (priv_security_file_alloc) + return priv_security_file_alloc(file); + return 0; +} /* * Concurrency: shouldn't matter. @@ -2446,35 +2515,8 @@ static int osd_ro(const struct lu_env *env, struct dt_device *d) ENTRY; - if (priv_dev_set_rdonly) { - struct block_device *jdev = LDISKFS_SB(sb)->journal_bdev; - - rc = 0; - CERROR("*** setting %s read-only ***\n", - osd_dt_dev(d)->od_svname); - - if (sb->s_op->freeze_fs) { - rc = sb->s_op->freeze_fs(sb); - if (rc) - goto out; - } - - if (jdev && (jdev != dev)) { - CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n", - (long)jdev); - priv_dev_set_rdonly(jdev); - } - CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev); - priv_dev_set_rdonly(dev); - - if (sb->s_op->unfreeze_fs) - sb->s_op->unfreeze_fs(sb); - } - -out: - if (rc) - CERROR("%s: %lx CANNOT BE SET READONLY: rc = %d\n", - osd_dt_dev(d)->od_svname, (long)dev, rc); + CERROR("%s: %lx CANNOT BE SET READONLY: rc = %d\n", + osd_dt_dev(d)->od_svname, (long)dev, rc); RETURN(rc); } @@ -2623,11 +2665,12 @@ static void osd_inode_getattr(const struct lu_env *env, attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | LA_SIZE | LA_BLOCKS | LA_UID | LA_GID | LA_PROJID | LA_FLAGS | LA_NLINK | LA_RDEV | - LA_BLKSIZE | LA_TYPE; + LA_BLKSIZE | LA_TYPE | LA_BTIME; attr->la_atime = inode->i_atime.tv_sec; attr->la_mtime = inode->i_mtime.tv_sec; attr->la_ctime = inode->i_ctime.tv_sec; + attr->la_btime = LDISKFS_I(inode)->i_crtime.tv_sec; attr->la_mode = inode->i_mode; attr->la_size = i_size_read(inode); attr->la_blocks = inode->i_blocks; @@ -2648,10 +2691,71 @@ static void osd_inode_getattr(const struct lu_env *env, attr->la_flags |= LUSTRE_PROJINHERIT_FL; } +static int osd_dirent_count(const struct lu_env *env, struct dt_object *dt, + u64 *count) +{ + struct osd_object *obj = osd_dt_obj(dt); + const struct dt_it_ops *iops; + struct dt_it *it; + int rc; + + ENTRY; + + LASSERT(S_ISDIR(obj->oo_inode->i_mode)); + LASSERT(fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu))); + + if (obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) { + *count = obj->oo_dirent_count; + RETURN(0); + } + + /* directory not initialized yet */ + if (!dt->do_index_ops) { + *count = 0; + RETURN(0); + } + + iops = &dt->do_index_ops->dio_it; + it = iops->init(env, dt, LUDA_64BITHASH); + if (IS_ERR(it)) + RETURN(PTR_ERR(it)); + + rc = iops->load(env, it, 0); + if (rc < 0) { + if (rc == -ENODATA) { + rc = 0; + *count = 0; + } + GOTO(out, rc); + } + if (rc > 0) + rc = iops->next(env, it); + + for (*count = 0; rc == 0 || rc == -ESTALE; rc = iops->next(env, it)) { + if (rc == -ESTALE) + continue; + + if (iops->key_size(env, it) == 0) + continue; + + (*count)++; + } + if (rc == 1) { + obj->oo_dirent_count = *count; + rc = 0; + } +out: + iops->put(env, it); + iops->fini(env, it); + + RETURN(rc); +} + static int osd_attr_get(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr) { struct osd_object *obj = osd_dt_obj(dt); + int rc = 0; if (unlikely(!dt_object_exists(dt))) return -ENOENT; @@ -2667,9 +2771,17 @@ static int osd_attr_get(const struct lu_env *env, struct dt_object *dt, attr->la_valid |= LA_FLAGS; attr->la_flags |= LUSTRE_ORPHAN_FL; } + if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL) { + attr->la_valid |= LA_FLAGS; + attr->la_flags |= LUSTRE_ENCRYPT_FL; + } spin_unlock(&obj->oo_guard); - return 0; + if (S_ISDIR(obj->oo_inode->i_mode) && + fid_is_namespace_visible(lu_object_fid(&dt->do_lu))) + rc = osd_dirent_count(env, dt, &attr->la_dirent_count); + + return rc; } static int osd_declare_attr_qid(const struct lu_env *env, @@ -2752,7 +2864,7 @@ static int osd_declare_attr_set(const struct lu_env *env, obj = osd_dt_obj(dt); LASSERT(osd_invariant(obj)); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_ATTR_SET, @@ -2779,9 +2891,9 @@ static int osd_declare_attr_set(const struct lu_env *env, bool ignore_edquot = !(attr->la_flags & LUSTRE_SET_SYNC_FL); if (!ignore_edquot) - CDEBUG(D_QUOTA, "%s: enforce quota on UID %u, GID %u" - "(the quota space is %lld)\n", - obj->oo_inode->i_sb->s_id, attr->la_uid, + CDEBUG(D_QUOTA, + "%s: enforce quota on UID %u, GID %u (quota space is %lld)\n", + osd_ino2name(obj->oo_inode), attr->la_uid, attr->la_gid, bspace); /* USERQUOTA */ @@ -2794,9 +2906,10 @@ static int osd_declare_attr_set(const struct lu_env *env, RETURN(rc); gid = i_gid_read(obj->oo_inode); + CDEBUG(D_QUOTA, "declare uid %d -> %d gid %d -> %d\n", uid, + attr->la_uid, gid, attr->la_gid); enforce = (attr->la_valid & LA_GID) && (attr->la_gid != gid); - rc = osd_declare_attr_qid(env, obj, oh, bspace, - i_gid_read(obj->oo_inode), + rc = osd_declare_attr_qid(env, obj, oh, bspace, gid, attr->la_gid, enforce, GRPQUOTA, ignore_edquot); if (rc) @@ -2866,6 +2979,13 @@ static int osd_inode_setattr(const struct lu_env *env, /* always keep S_NOCMTIME */ inode->i_flags = ll_ext_to_inode_flags(attr->la_flags) | S_NOCMTIME; +#if defined(S_ENCRYPTED) + /* Always remove S_ENCRYPTED, because ldiskfs must not be + * aware of encryption status. It is just stored into LMA + * so that it can be forwared to client side. + */ + inode->i_flags &= ~S_ENCRYPTED; +#endif /* * Ext4 did not transfer inherit flags from * @inode->i_flags to raw inode i_flags when writing @@ -2880,7 +3000,8 @@ static int osd_inode_setattr(const struct lu_env *env, } #ifdef HAVE_PROJECT_QUOTA -static int osd_transfer_project(struct inode *inode, __u32 projid) +static int osd_transfer_project(struct inode *inode, __u32 projid, + struct thandle *handle) { struct super_block *sb = inode->i_sb; struct ldiskfs_inode_info *ei = LDISKFS_I(inode); @@ -2912,9 +3033,18 @@ static int osd_transfer_project(struct inode *inode, __u32 projid) raw_inode = ldiskfs_raw_inode(&iloc); if (!LDISKFS_FITS_IN_INODE(raw_inode, ei, i_projid)) { - err = -EOVERFLOW; - brelse(iloc.bh); - return err; + struct osd_thandle *oh = container_of(handle, + struct osd_thandle, + ot_super); + /** + * try to expand inode size automatically. + */ + ldiskfs_mark_inode_dirty(oh->ot_handle, inode); + if (!LDISKFS_FITS_IN_INODE(raw_inode, ei, i_projid)) { + err = -EOVERFLOW; + brelse(iloc.bh); + return err; + } } brelse(iloc.bh); @@ -2931,7 +3061,8 @@ static int osd_transfer_project(struct inode *inode, __u32 projid) } #endif -static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr) +static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr, + struct thandle *handle) { int rc; @@ -2939,6 +3070,11 @@ static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr) (attr->la_valid & LA_GID && attr->la_gid != i_gid_read(inode))) { struct iattr iattr; + CDEBUG(D_QUOTA, + "executing dquot_transfer inode %ld uid %d -> %d gid %d -> %d\n", + inode->i_ino, i_uid_read(inode), attr->la_uid, + i_gid_read(inode), attr->la_gid); + dquot_initialize(inode); iattr.ia_valid = 0; if (attr->la_valid & LA_UID) @@ -2950,9 +3086,8 @@ static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr) rc = dquot_transfer(inode, &iattr); if (rc) { - CERROR("%s: quota transfer failed: rc = %d. Is quota " - "enforcement enabled on the ldiskfs " - "filesystem?\n", inode->i_sb->s_id, rc); + CERROR("%s: quota transfer failed. Is quota enforcement enabled on the ldiskfs filesystem? rc = %d\n", + osd_ino2name(inode), rc); return rc; } } @@ -2961,14 +3096,13 @@ static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr) if (attr->la_valid & LA_PROJID && attr->la_projid != i_projid_read(inode)) { #ifdef HAVE_PROJECT_QUOTA - rc = osd_transfer_project(inode, attr->la_projid); + rc = osd_transfer_project(inode, attr->la_projid, handle); #else rc = -ENOTSUPP; #endif if (rc) { - CERROR("%s: quota transfer failed: rc = %d. Is project " - "enforcement enabled on the ldiskfs " - "filesystem?\n", inode->i_sb->s_id, rc); + CERROR("%s: quota transfer failed. Is project enforcement enabled on the ldiskfs filesystem? rc = %d\n", + osd_ino2name(inode), rc); return rc; } } @@ -3012,7 +3146,7 @@ static int osd_attr_set(const struct lu_env *env, if (unlikely(ipd == NULL)) RETURN(-ENOMEM); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); rc = iam_update(oh->ot_handle, bag, (const struct iam_key *)fid1, (const struct iam_rec *)id, ipd); @@ -3022,7 +3156,7 @@ static int osd_attr_set(const struct lu_env *env, inode = obj->oo_inode; - rc = osd_quota_transfer(inode, attr); + rc = osd_quota_transfer(inode, attr, handle); if (rc) return rc; @@ -3032,7 +3166,9 @@ static int osd_attr_set(const struct lu_env *env, if (rc != 0) GOTO(out, rc); - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); + + osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET); if (!(attr->la_valid & LA_FLAGS)) GOTO(out, rc); @@ -3052,6 +3188,9 @@ static int osd_attr_set(const struct lu_env *env, lma->lma_incompat |= lustre_to_lma_flags(attr->la_flags); lustre_lma_swab(lma); + + osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET); + rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma, sizeof(*lma), XATTR_REPLACE); if (rc != 0) { @@ -3067,7 +3206,6 @@ static int osd_attr_set(const struct lu_env *env, osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET); } out: - osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET); return rc; } @@ -3158,6 +3296,8 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, oth = container_of(th, struct osd_thandle, ot_super); LASSERT(oth->ot_handle->h_transaction != NULL); + if (fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu))) + obj->oo_dirent_count = 0; result = osd_mkfile(info, obj, mode, hint, th, attr); return result; @@ -3304,7 +3444,8 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, } static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj, - struct lu_attr *attr, struct dt_object_format *dof) + struct lu_attr *attr, struct dt_object_format *dof, + struct thandle *handle) { struct inode *inode = obj->oo_inode; __u64 valid = attr->la_valid; @@ -3321,7 +3462,7 @@ static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj, if ((valid & LA_MTIME) && (attr->la_mtime == inode->i_mtime.tv_sec)) attr->la_valid &= ~LA_MTIME; - result = osd_quota_transfer(inode, attr); + result = osd_quota_transfer(inode, attr, handle); if (result) return; @@ -3335,7 +3476,7 @@ static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj, * enabled on ldiskfs (lquota takes care of it). */ LASSERTF(result == 0, "%d\n", result); - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); } attr->la_valid = valid; @@ -3372,7 +3513,7 @@ static int __osd_create(struct osd_thread_info *info, struct osd_object *obj, } if (likely(result == 0)) { - osd_attr_init(info, obj, attr, dof); + osd_attr_init(info, obj, attr, dof, th); osd_object_init0(obj); } @@ -3400,7 +3541,10 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, LASSERT(obj->oo_inode != NULL); - oh = container_of0(th, struct osd_thandle, ot_super); + if (CFS_FAIL_CHECK(OBD_FAIL_OSD_OI_ENOSPC)) + return -ENOSPC; + + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle); osd_trans_exec_op(env, th, OSD_OT_INSERT); @@ -3466,7 +3610,7 @@ static int osd_declare_create(const struct lu_env *env, struct dt_object *dt, LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); /* @@ -3524,7 +3668,7 @@ static int osd_declare_destroy(const struct lu_env *env, struct dt_object *dt, if (inode == NULL) RETURN(-ENOENT); - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_DESTROY, @@ -3575,7 +3719,7 @@ static int osd_destroy(const struct lu_env *env, struct dt_object *dt, ENTRY; - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle); LASSERT(inode); LASSERT(!lu_object_is_dying(dt->do_lu.lo_header)); @@ -3591,13 +3735,15 @@ static int osd_destroy(const struct lu_env *env, struct dt_object *dt, } if (S_ISDIR(inode->i_mode)) { - LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1 || - inode->i_nlink == 2); + if (inode->i_nlink > 2) + CERROR("%s: directory "DFID" ino %lu link count is %u at unlink. run e2fsck to repair\n", + osd_name(osd), PFID(fid), inode->i_ino, + inode->i_nlink); spin_lock(&obj->oo_guard); clear_nlink(inode); spin_unlock(&obj->oo_guard); - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); } osd_trans_exec_op(env, th, OSD_OT_DESTROY); @@ -3756,6 +3902,9 @@ static int osd_add_dot_dotdot_internal(struct osd_thread_info *info, __u32 saved_nlink = dir->i_nlink; int rc; + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_DOTDOT_ENOSPC)) + return -ENOSPC; + dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2; osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid); @@ -3796,6 +3945,7 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, struct osd_thread_info *info = osd_oti_get(env); struct inode *local; struct osd_thandle *oh; + uid_t own[2] = {0, 0}; int rc; ENTRY; @@ -3804,8 +3954,7 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); - local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type, - NULL); + local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type, own); if (IS_ERR(local)) { CERROR("%s: create local error %d\n", osd_name(osd), (int)PTR_ERR(local)); @@ -3824,7 +3973,7 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, * debugging if we need to determine where this symlink came from. */ if (S_ISLNK(type)) { - CLASSERT(LDISKFS_N_BLOCKS * 4 >= FID_LEN + 1); + BUILD_BUG_ON(LDISKFS_N_BLOCKS * 4 < FID_LEN + 1); rc = snprintf((char *)LDISKFS_I(local)->i_data, LDISKFS_N_BLOCKS * 4, DFID, PFID(fid)); @@ -3837,11 +3986,10 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, #ifdef HAVE_PROJECT_QUOTA if (LDISKFS_I(pobj->oo_inode)->i_flags & LUSTRE_PROJINHERIT_FL && i_projid_read(pobj->oo_inode) != 0) { - rc = osd_transfer_project(local, 0); + rc = osd_transfer_project(local, 0, th); if (rc) { - CERROR("%s: quota transfer failed: rc = %d. Is project " - "quota enforcement enabled on the ldiskfs " - "filesystem?\n", local->i_sb->s_id, rc); + CERROR("%s: quota transfer failed:. Is project quota enforcement enabled on the ldiskfs filesystem? rc = %d\n", + osd_ino2name(local), rc); RETURN(ERR_PTR(rc)); } } @@ -3908,20 +4056,16 @@ static int osd_process_scheduled_agent_removals(const struct lu_env *env, struct osd_thread_info *info = osd_oti_get(env); struct osd_obj_orphan *oor, *tmp; struct osd_inode_id id; - struct list_head list; + LIST_HEAD(list); struct inode *inode; struct lu_fid fid; handle_t *jh; __u32 ino; - INIT_LIST_HEAD(&list); - spin_lock(&osd->od_osfs_lock); list_for_each_entry_safe(oor, tmp, &osd->od_orphan_list, oor_list) { - if (oor->oor_env == env) { - list_del(&oor->oor_list); - list_add(&oor->oor_list, &list); - } + if (oor->oor_env == env) + list_move(&oor->oor_list, &list); } spin_unlock(&osd->od_osfs_lock); @@ -4009,8 +4153,21 @@ static int osd_create(const struct lu_env *env, struct dt_object *dt, obj->oo_dt.do_body_ops = &osd_body_ops; } - if (!result && !CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY)) + if (!result && !CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY)) { + struct inode *inode = obj->oo_inode; + result = __osd_oi_insert(env, obj, fid, th); + if (result && inode) { + spin_lock(&obj->oo_guard); + clear_nlink(inode); + spin_unlock(&obj->oo_guard); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); + ldiskfs_set_inode_state(inode, + LDISKFS_STATE_LUSTRE_DESTROY); + iput(inode); + obj->oo_inode = NULL; + } + } /* * a small optimization - dt_insert() isn't usually applied @@ -4034,19 +4191,21 @@ static int osd_declare_ref_add(const struct lu_env *env, struct dt_object *dt, struct thandle *handle) { struct osd_thandle *oh; + int rc; /* it's possible that object doesn't exist yet */ LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_REF_ADD, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); - osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev), osd_dt_obj(dt)); + rc = osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev), + osd_dt_obj(dt)); - return 0; + return rc; } /* @@ -4068,7 +4227,7 @@ static int osd_ref_add(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_is_write_locked(env, obj)); LASSERT(th != NULL); - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); osd_trans_exec_op(env, th, OSD_OT_REF_ADD); @@ -4097,7 +4256,7 @@ static int osd_ref_add(const struct lu_env *env, struct dt_object *dt, } spin_unlock(&obj->oo_guard); - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); LINVRNT(osd_invariant(obj)); osd_trans_exec_check(env, th, OSD_OT_REF_ADD); @@ -4116,7 +4275,7 @@ static int osd_declare_ref_del(const struct lu_env *env, struct dt_object *dt, LASSERT(!dt_object_remote(dt)); LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_REF_DEL, @@ -4144,7 +4303,10 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_is_write_locked(env, obj)); LASSERT(th != NULL); - oh = container_of0(th, struct osd_thandle, ot_super); + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL)) + return -EIO; + + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); osd_trans_exec_op(env, th, OSD_OT_REF_DEL); @@ -4170,7 +4332,7 @@ static int osd_ref_del(const struct lu_env *env, struct dt_object *dt, ldiskfs_dec_count(oh->ot_handle, inode); spin_unlock(&obj->oo_guard); - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); LINVRNT(osd_invariant(obj)); osd_trans_exec_check(env, th, OSD_OT_REF_DEL); @@ -4295,7 +4457,7 @@ static int osd_declare_xattr_set(const struct lu_env *env, LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); if (strcmp(name, XATTR_NAME_LMA) == 0) { @@ -4434,7 +4596,7 @@ static int osd_xattr_set_pfid(const struct lu_env *env, struct osd_object *obj, /* Remove old PFID EA entry firstly. */ dquot_initialize(inode); - rc = osd_removexattr(dentry, inode, XATTR_NAME_FID); + rc = ll_vfs_removexattr(dentry, inode, XATTR_NAME_FID); if (rc == -ENODATA) { if ((fl & LU_XATTR_REPLACE) && !(fl & LU_XATTR_CREATE)) RETURN(rc); @@ -4514,7 +4676,7 @@ static int osd_xattr_handle_linkea(const struct lu_env *env, ENTRY; - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); rc = linkea_init_with_rec(&ldata); @@ -4587,7 +4749,7 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, * Version is set after all inode operations are finished, * so we should mark it dirty here */ - ll_dirty_inode(inode, I_DIRTY_DATASYNC); + osd_dirty_inode(inode, I_DIRTY_DATASYNC); RETURN(0); } @@ -4683,7 +4845,7 @@ static int osd_declare_xattr_del(const struct lu_env *env, LASSERT(!dt_object_remote(dt)); LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET, @@ -4740,7 +4902,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, dquot_initialize(inode); dentry->d_inode = inode; dentry->d_sb = inode->i_sb; - rc = osd_removexattr(dentry, inode, name); + rc = ll_vfs_removexattr(dentry, inode, name); } osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET); @@ -4758,20 +4920,11 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, { struct osd_object *obj = osd_dt_obj(dt); struct inode *inode = obj->oo_inode; - struct osd_thread_info *info = osd_oti_get(env); - struct dentry *dentry = &info->oti_obj_dentry; - struct file *file = &info->oti_file; + struct file *file = osd_quasi_file(env, inode); int rc; ENTRY; - dentry->d_inode = inode; - dentry->d_sb = inode->i_sb; - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = inode->i_fop; - set_file_inode(file, inode); - rc = vfs_fsync_range(file, start, end, 0); RETURN(rc); @@ -4984,7 +5137,7 @@ static int osd_index_declare_iam_delete(const struct lu_env *env, { struct osd_thandle *oh; - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); /* Recycle may cause additional three blocks to be changed. */ @@ -5032,7 +5185,7 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, if (unlikely(ipd == NULL)) RETURN(-ENOMEM); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); LASSERT(oh->ot_handle->h_transaction != NULL); @@ -5063,7 +5216,7 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, LASSERT(!dt_object_remote(dt)); LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); credits = osd_dto_credits_noquota[DTO_INDEX_DELETE]; @@ -5225,11 +5378,21 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, } else { rc = PTR_ERR(bh); } + + if (!rc && fid_is_namespace_visible(lu_object_fid(&dt->do_lu)) && + obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) { + /* NB, dirent count may not be accurate, because it's counted + * without lock. + */ + if (obj->oo_dirent_count) + obj->oo_dirent_count--; + else + obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET; + } if (hlock != NULL) ldiskfs_htree_unlock(hlock); else up_write(&obj->oo_ext_idx_sem); - GOTO(out, rc); out: LASSERT(osd_invariant(obj)); @@ -5316,7 +5479,7 @@ static int osd_index_declare_iam_insert(const struct lu_env *env, LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); osd_trans_declare_op(env, oh, OSD_OT_INSERT, @@ -5366,7 +5529,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, if (unlikely(ipd == NULL)) RETURN(-ENOMEM); - oh = container_of0(th, struct osd_thandle, ot_super); + oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); LASSERT(oh->ot_handle->h_transaction != NULL); if (S_ISDIR(obj->oo_inode->i_mode)) { @@ -5564,6 +5727,10 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, hlock, th); } } + if (!rc && fid_is_namespace_visible(lu_object_fid(&pobj->oo_dt.do_lu)) + && pobj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) + pobj->oo_dirent_count++; + if (hlock != NULL) ldiskfs_htree_unlock(hlock); else @@ -5589,8 +5756,7 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, if (!fid_is_norm(fid) && !fid_is_igif(fid)) RETURN(0); - if (thread_is_running(&scrub->os_thread) && - scrub->os_pos_current > id->oii_ino) + if (scrub->os_running && scrub->os_pos_current > id->oii_ino) RETURN(0); if (dev->od_auto_scrub_interval == AS_NEVER || @@ -5631,7 +5797,7 @@ again: insert = false; trigger: - if (thread_is_running(&scrub->os_thread)) { + if (scrub->os_running) { if (inode == NULL) { inode = osd_iget(oti, dev, id); /* The inode has been removed (by race maybe). */ @@ -5664,8 +5830,7 @@ trigger: rc = osd_scrub_start(oti->oti_env, dev, SS_AUTO_PARTIAL | SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT); CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING, - "%s: trigger partial OI scrub for RPC inconsistency " - "checking FID "DFID": rc = %d\n", + "%s: trigger partial OI scrub for RPC inconsistency checking FID "DFID": rc = %d\n", osd_dev2name(dev), PFID(fid), rc); if (rc == 0 || rc == -EALREADY) goto again; @@ -5957,7 +6122,7 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, LASSERT(fid != NULL); LASSERT(rec1->rec_type != 0); - oh = container_of0(handle, struct osd_thandle, ot_super); + oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); credits = osd_dto_credits_noquota[DTO_INDEX_INSERT]; @@ -6117,6 +6282,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, iput(child_inode); LASSERT(osd_invariant(obj)); osd_trans_exec_check(env, th, OSD_OT_INSERT); + RETURN(rc); } @@ -6453,7 +6619,7 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env, file->f_path.dentry = obj_dentry; file->f_mapping = obj->oo_inode->i_mapping; file->f_op = obj->oo_inode->i_fop; - set_file_inode(file, obj->oo_inode); + file->f_inode = obj->oo_inode; lu_object_get(lo); RETURN((struct dt_it *)oie); @@ -6515,13 +6681,11 @@ static void osd_it_ea_put(const struct lu_env *env, struct dt_it *di) } struct osd_filldir_cbs { -#ifdef HAVE_DIR_CONTEXT struct dir_context ctx; -#endif struct osd_it_ea *it; }; /** - * It is called internally by ->readdir(). It fills the + * It is called internally by ->iterate*(). It fills the * iterator's in-memory data structure with required * information i.e. name, namelen, rec_size etc. * @@ -6588,7 +6752,7 @@ static int osd_ldiskfs_filldir(void *buf, } /** - * Calls ->readdir() to load a directory entry at a time + * Calls ->iterate*() to load a directory entry at a time * and stored it in iterator's in-memory data structure. * * \param di iterator's in memory structure @@ -6607,9 +6771,7 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env, struct file *filp = &it->oie_file; int rc = 0; struct osd_filldir_cbs buf = { -#ifdef HAVE_DIR_CONTEXT .ctx.actor = osd_ldiskfs_filldir, -#endif .it = it }; @@ -6625,18 +6787,16 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env, down_read(&obj->oo_ext_idx_sem); } -#ifdef HAVE_DIR_CONTEXT - buf.ctx.pos = filp->f_pos; - rc = inode->i_fop->iterate_shared(filp, &buf.ctx); - filp->f_pos = buf.ctx.pos; -#else - rc = inode->i_fop->readdir(filp, &buf, osd_ldiskfs_filldir); -#endif + filp->f_cred = current_cred(); + rc = osd_security_file_alloc(filp); + if (rc) + GOTO(unlock, rc); - if (hlock != NULL) - ldiskfs_htree_unlock(hlock); - else - up_read(&obj->oo_ext_idx_sem); + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NONOTIFY; + rc = iterate_dir(filp, &buf.ctx); + if (rc) + GOTO(unlock, rc); if (it->oie_rd_dirent == 0) { /* @@ -6650,12 +6810,17 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env, it->oie_dirent = it->oie_buf; it->oie_it_dirent = 1; } +unlock: + if (hlock != NULL) + ldiskfs_htree_unlock(hlock); + else + up_read(&obj->oo_ext_idx_sem); RETURN(rc); } /** - * It calls osd_ldiskfs_it_fill() which will use ->readdir() + * It calls osd_ldiskfs_it_fill() which will use ->iterate*() * to load a directory entry at a time and stored it in * iterator's in-memory data structure. * @@ -6718,6 +6883,11 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di) return it->oie_dirent->oied_namelen; } +#if defined LDISKFS_DIR_ENTRY_LEN && defined LDISKFS_DIR_ENTRY_LEN_ +#undef LDISKFS_DIR_REC_LEN +#define LDISKFS_DIR_REC_LEN(de) LDISKFS_DIR_ENTRY_LEN_((de)) +#endif + static inline bool osd_dotdot_has_space(struct ldiskfs_dir_entry_2 *de) { if (LDISKFS_DIR_REC_LEN(de) >= @@ -6793,11 +6963,10 @@ osd_dirent_reinsert(const struct lu_env *env, struct osd_device *dev, * That means we lose it! */ if (rc != 0) - CDEBUG(D_LFSCK, "%s: fail to reinsert the dirent, " - "dir = %lu/%u, name = %.*s, "DFID": rc = %d\n", - osd_ino2name(inode), - dir->i_ino, dir->i_generation, namelen, - dentry->d_name.name, PFID(fid), rc); + CDEBUG(D_LFSCK, + "%s: fail to reinsert the dirent, dir = %lu/%u, name = %.*s, "DFID": rc = %d\n", + osd_ino2name(inode), dir->i_ino, dir->i_generation, + namelen, dentry->d_name.name, PFID(fid), rc); RETURN(rc); } @@ -7208,8 +7377,6 @@ static inline int osd_it_ea_rec(const struct lu_env *env, rc = osd_ea_fid_get(env, obj, ino, fid, id); } - } else { - osd_id_gen(id, ino, OSD_OII_NOGEN); } } @@ -7219,15 +7386,6 @@ static inline int osd_it_ea_rec(const struct lu_env *env, it->oie_dirent->oied_namelen, it->oie_dirent->oied_type, attr); - if (rc < 0) - RETURN(rc); - - if (osd_remote_fid(env, dev, fid)) - RETURN(0); - - if (likely(!(attr & (LUDA_IGNORE | LUDA_UNKNOWN)) && rc == 0)) - osd_add_oi_cache(oti, dev, id, fid); - RETURN(rc > 0 ? 0 : rc); } @@ -7268,7 +7426,7 @@ static __u64 osd_it_ea_store(const struct lu_env *env, const struct dt_it *di) } /** - * It calls osd_ldiskfs_it_fill() which will use ->readdir() + * It calls osd_ldiskfs_it_fill() which will use ->iterate*() * to load a directory entry at a time and stored it i inn, * in iterator's in-memory data structure. * @@ -7384,11 +7542,17 @@ static void osd_key_fini(const struct lu_context *ctx, if (info->oti_dio_pages) { int i; for (i = 0; i < PTLRPC_MAX_BRW_PAGES; i++) { - if (info->oti_dio_pages[i]) - __free_page(info->oti_dio_pages[i]); + struct page *page = info->oti_dio_pages[i]; + if (page) { + LASSERT(PagePrivate2(page)); + LASSERT(PageLocked(page)); + ClearPagePrivate2(page); + unlock_page(page); + __free_page(page); + } } - OBD_FREE(info->oti_dio_pages, - sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES); + OBD_FREE_PTR_ARRAY_LARGE(info->oti_dio_pages, + PTLRPC_MAX_BRW_PAGES); } if (info->oti_inode != NULL) @@ -7402,7 +7566,7 @@ static void osd_key_fini(const struct lu_context *ctx, lu_buf_free(&info->oti_big_buf); if (idc != NULL) { LASSERT(info->oti_ins_cache_size > 0); - OBD_FREE(idc, sizeof(*idc) * info->oti_ins_cache_size); + OBD_FREE_PTR_ARRAY(idc, info->oti_ins_cache_size); info->oti_ins_cache = NULL; info->oti_ins_cache_size = 0; } @@ -7444,7 +7608,7 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d, static int osd_fid_init(const struct lu_env *env, struct osd_device *osd) { struct seq_server_site *ss = osd_seq_site(osd); - int rc; + int rc = 0; ENTRY; @@ -7458,13 +7622,8 @@ static int osd_fid_init(const struct lu_env *env, struct osd_device *osd) if (osd->od_cl_seq == NULL) RETURN(-ENOMEM); - rc = seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA, - osd->od_svname, ss->ss_server_seq); - if (rc != 0) { - OBD_FREE_PTR(osd->od_cl_seq); - osd->od_cl_seq = NULL; - RETURN(rc); - } + seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA, + osd->od_svname, ss->ss_server_seq); if (ss->ss_node_id == 0) { /* @@ -7521,6 +7680,8 @@ static void osd_umount(const struct lu_env *env, struct osd_device *o) if (o->od_mnt != NULL) { shrink_dcache_sb(osd_sb(o)); osd_sync(env, &o->od_dt_dev); + wait_event(o->od_commit_cb_done, + !atomic_read(&o->od_commit_cb_in_flight)); mntput(o->od_mnt); o->od_mnt = NULL; @@ -7543,7 +7704,7 @@ static int osd_mount(const struct lu_env *env, struct osd_thread_info *info = osd_oti_get(env); struct lu_fid *fid = &info->oti_fid; struct inode *inode; - int rc = 0, force_over_512tb = 0; + int rc = 0, force_over_1024tb = 0; ENTRY; @@ -7570,23 +7731,22 @@ static int osd_mount(const struct lu_env *env, #endif #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) if (opts != NULL && strstr(opts, "force_over_128tb") != NULL) { - CWARN("force_over_128tb option is deprecated. " - "Filesystems less than 512TB can be created without any " - "force options. Use force_over_512tb option for " - "filesystems greater than 512TB.\n"); + CWARN("force_over_128tb option is deprecated. Filesystems smaller than 1024TB can be created without any force option. Use force_over_1024tb option for filesystems larger than 1024TB.\n"); } #endif #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 1, 53, 0) if (opts != NULL && strstr(opts, "force_over_256tb") != NULL) { - CWARN("force_over_256tb option is deprecated. " - "Filesystems less than 512TB can be created without any " - "force options. Use force_over_512tb option for " - "filesystems greater than 512TB.\n"); + CWARN("force_over_256tb option is deprecated. Filesystems smaller than 1024TB can be created without any force options. Use force_over_1024tb option for filesystems larger than 1024TB.\n"); + } +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 53, 0) + if (opts != NULL && strstr(opts, "force_over_512tb") != NULL) { + CWARN("force_over_512tb option is deprecated. Filesystems smaller than 1024TB can be created without any force options. Use force_over_1024tb option for filesystems larger than 1024TB.\n"); } #endif - if (opts != NULL && strstr(opts, "force_over_512tb") != NULL) - force_over_512tb = 1; + if (opts != NULL && strstr(opts, "force_over_1024tb") != NULL) + force_over_1024tb = 1; __page = alloc_page(GFP_KERNEL); if (__page == NULL) @@ -7596,7 +7756,7 @@ static int osd_mount(const struct lu_env *env, *options = '\0'; if (opts != NULL) { /* strip out the options for back compatiblity */ - static char *sout[] = { + static const char * const sout[] = { "mballoc", "iopen", "noiopen", @@ -7608,6 +7768,7 @@ static int osd_mount(const struct lu_env *env, "force_over_128tb", "force_over_256tb", "force_over_512tb", + "force_over_1024tb", NULL }; strncat(options, opts, PAGE_SIZE); @@ -7655,33 +7816,18 @@ static int osd_mount(const struct lu_env *env, } if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) << - osd_sb(o)->s_blocksize_bits > 512ULL << 40 && - force_over_512tb == 0) { - CERROR("%s: device %s LDISKFS does not support filesystems " - "greater than 512TB and can cause data corruption. " - "Use \"force_over_512tb\" mount option to override.\n", + osd_sb(o)->s_blocksize_bits > 1024ULL << 40 && + force_over_1024tb == 0) { + CERROR("%s: device %s LDISKFS has not been tested on filesystems larger than 1024TB and may cause data corruption. Use 'force_over_1024tb' mount option to override.\n", name, dev); GOTO(out_mnt, rc = -EINVAL); } if (lmd_flags & LMD_FLG_DEV_RDONLY) { - if (priv_dev_set_rdonly) { - priv_dev_set_rdonly(osd_sb(o)->s_bdev); - o->od_dt_dev.dd_rdonly = 1; - LCONSOLE_WARN("%s: set dev_rdonly on this device\n", - name); - } else { - LCONSOLE_WARN("%s: not support dev_rdonly on this device", - name); - - GOTO(out_mnt, rc = -EOPNOTSUPP); - } - } else if (priv_dev_check_rdonly && - priv_dev_check_rdonly(osd_sb(o)->s_bdev)) { - CERROR("%s: underlying device %s is marked as " - "read-only. Setup failed\n", name, dev); + LCONSOLE_WARN("%s: not support dev_rdonly on this device", + name); - GOTO(out_mnt, rc = -EROFS); + GOTO(out_mnt, rc = -EOPNOTSUPP); } if (!ldiskfs_has_feature_journal(o->od_mnt->mnt_sb)) { @@ -7738,6 +7884,10 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, osd_index_backup(env, o, false); osd_shutdown(env, o); osd_procfs_fini(o); + if (o->od_oi_table != NULL) + osd_oi_fini(osd_oti_get(env), o); + if (o->od_extent_bytes_percpu) + free_percpu(o->od_extent_bytes_percpu); osd_obj_map_fini(o); osd_umount(env, o); @@ -7771,12 +7921,16 @@ static int osd_device_init0(const struct lu_env *env, spin_lock_init(&o->od_lock); o->od_index_backup_policy = LIBP_NONE; o->od_t10_type = 0; + init_waitqueue_head(&o->od_commit_cb_done); o->od_read_cache = 1; o->od_writethrough_cache = 1; o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE; - + o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20; + o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20; o->od_auto_scrub_interval = AS_DEFAULT; + /* default fallocate to unwritten extents: LU-14326/LU-14333 */ + o->od_fallocate_zero_blocks = 0; cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname)); @@ -7859,6 +8013,12 @@ static int osd_device_init0(const struct lu_env *env, GOTO(out_procfs, rc); } + o->od_extent_bytes_percpu = alloc_percpu(unsigned int); + if (!o->od_extent_bytes_percpu) { + rc = -ENOMEM; + GOTO(out_procfs, rc); + } + RETURN(0); out_procfs: @@ -7914,10 +8074,8 @@ static struct lu_device *osd_device_free(const struct lu_env *env, /* XXX: make osd top device in order to release reference */ d->ld_site->ls_top_dev = d; lu_site_purge(env, d->ld_site, -1); - if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); - lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer); - } + lu_site_print(env, d->ld_site, &d->ld_site->ls_obj_hash.nelems, + D_ERROR, lu_cdebug_printer); lu_site_fini(&o->od_site); dt_device_fini(&o->od_dt_dev); OBD_FREE_PTR(o); @@ -8089,10 +8247,18 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, RETURN(result); } -static int osd_fid_alloc(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data) +/** + * Implementation of lu_device_operations::ldo_fid_alloc() for OSD + * + * Allocate FID. + * + * see include/lu_object.h for the details. + */ +static int osd_fid_alloc(const struct lu_env *env, struct lu_device *d, + struct lu_fid *fid, struct lu_object *parent, + const struct lu_name *name) { - struct osd_device *osd = osd_dev(exp->exp_obd->obd_lu_dev); + struct osd_device *osd = osd_dev(d); return seq_client_alloc_fid(env, osd->od_cl_seq, fid); } @@ -8111,6 +8277,7 @@ const struct lu_device_operations osd_lu_ops = { .ldo_process_config = osd_process_config, .ldo_recovery_complete = osd_recovery_complete, .ldo_prepare = osd_prepare, + .ldo_fid_alloc = osd_fid_alloc, }; static const struct lu_device_type_operations osd_device_type_ops = { @@ -8145,11 +8312,10 @@ static int osd_health_check(const struct lu_env *env, struct obd_device *obd) /* * lprocfs legacy support. */ -static struct obd_ops osd_obd_device_ops = { +static const struct obd_ops osd_obd_device_ops = { .o_owner = THIS_MODULE, .o_connect = osd_obd_connect, .o_disconnect = osd_obd_disconnect, - .o_fid_alloc = osd_fid_alloc, .o_health_check = osd_health_check, }; @@ -8182,10 +8348,11 @@ static int __init osd_init(void) struct kobject *kobj; int rc; - CLASSERT(BH_DXLock < sizeof(((struct buffer_head *)0)->b_state) * 8); + BUILD_BUG_ON(BH_DXLock >= + sizeof(((struct buffer_head *)0)->b_state) * 8); #if !defined(CONFIG_DEBUG_MUTEXES) && !defined(CONFIG_DEBUG_SPINLOCK) /* please, try to keep osd_thread_info smaller than a page */ - CLASSERT(sizeof(struct osd_thread_info) <= PAGE_SIZE); + BUILD_BUG_ON(sizeof(struct osd_thread_info) > PAGE_SIZE); #endif osd_oi_mod_init(); @@ -8195,9 +8362,8 @@ static int __init osd_init(void) return rc; #ifdef CONFIG_KALLSYMS - priv_dev_set_rdonly = (void *)kallsyms_lookup_name("dev_set_rdonly"); - priv_dev_check_rdonly = - (void *)kallsyms_lookup_name("dev_check_rdonly"); + priv_security_file_alloc = + (void *)kallsyms_lookup_name("security_file_alloc"); #endif rc = class_register_type(&osd_obd_device_ops, NULL, true, NULL,