X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_handler.c;h=a928e77c3643c38e9511801f6ab34db0be44c448;hp=b6a871095d78b1fcbd143b62060c4916b04c4974;hb=cfa981f3bf06d602aee998e64d4758e13f48aab8;hpb=c3947b14e5fa88b25d4e2a8e1c44b27d6397d814 diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index b6a8710..a928e77 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2015, Intel Corporation. + * Copyright (c) 2011, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -385,6 +385,7 @@ int osd_get_lma(struct osd_thread_info *info, struct inode *inode, struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, struct osd_inode_id *id) { + int rc; struct inode *inode = NULL; /* if we look for an inode withing a running @@ -414,6 +415,9 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, id->oii_ino); iput(inode); inode = ERR_PTR(-ENOENT); + } else if ((rc = osd_attach_jinode(inode))) { + iput(inode); + inode = ERR_PTR(rc); } else { ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_DESTROY); if (id->oii_gen == OSD_OII_NOGEN) @@ -1191,6 +1195,13 @@ found: (flags & SS_AUTO_PARTIAL || sf->sf_status == SS_SCANNING)) osd_check_lmv(info, dev, inode, oic); + result = osd_attach_jinode(inode); + if (result) { + obj->oo_inode = NULL; + iput(inode); + GOTO(out, result); + } + if (!ldiskfs_pdo) GOTO(out, result = 0); @@ -1455,7 +1466,7 @@ enum { */ static void osd_th_alloced(struct osd_thandle *oth) { - oth->oth_alloced = cfs_time_current(); + oth->oth_alloced = ktime_get(); } /** @@ -1463,58 +1474,42 @@ static void osd_th_alloced(struct osd_thandle *oth) */ static void osd_th_started(struct osd_thandle *oth) { - oth->oth_started = cfs_time_current(); + oth->oth_started = ktime_get(); } /** - * Helper function to convert time interval to microseconds packed in - * long int. + * Check whether the we deal with this handle for too long. */ -static long interval_to_usec(cfs_time_t start, cfs_time_t end) +static void __osd_th_check_slow(void *oth, struct osd_device *dev, + ktime_t alloced, ktime_t started, + ktime_t closed) { - struct timeval val; + ktime_t now = ktime_get(); - cfs_duration_usec(cfs_time_sub(end, start), &val); - return val.tv_sec * 1000000 + val.tv_usec; -} + LASSERT(dev != NULL); -/** - * Check whether the we deal with this handle for too long. - */ -static void __osd_th_check_slow(void *oth, struct osd_device *dev, - cfs_time_t alloced, cfs_time_t started, - cfs_time_t closed) -{ - cfs_time_t now = cfs_time_current(); - - LASSERT(dev != NULL); - - lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING, - interval_to_usec(alloced, started)); - lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN, - interval_to_usec(started, closed)); - lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING, - interval_to_usec(closed, now)); - - if (cfs_time_before(cfs_time_add(alloced, cfs_time_seconds(30)), now)) { - CWARN("transaction handle %p was open for too long: " - "now "CFS_TIME_T" ," - "alloced "CFS_TIME_T" ," - "started "CFS_TIME_T" ," - "closed "CFS_TIME_T"\n", + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING, + ktime_us_delta(started, alloced)); + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN, + ktime_us_delta(closed, started)); + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING, + ktime_us_delta(now, closed)); + + if (ktime_before(ktime_add_ns(alloced, 30 * NSEC_PER_SEC), now)) { + CWARN("transaction handle %p was open for too long: now %lld, alloced %lld, started %lld, closed %lld\n", oth, now, alloced, started, closed); libcfs_debug_dumpstack(NULL); } } -#define OSD_CHECK_SLOW_TH(oth, dev, expr) \ -{ \ - cfs_time_t __closed = cfs_time_current(); \ - cfs_time_t __alloced = oth->oth_alloced; \ - cfs_time_t __started = oth->oth_started; \ - \ - expr; \ - __osd_th_check_slow(oth, dev, __alloced, __started, __closed); \ +#define OSD_CHECK_SLOW_TH(oth, dev, expr) \ +{ \ + ktime_t __closed = ktime_get(); \ + ktime_t __alloced = oth->oth_alloced; \ + ktime_t __started = oth->oth_started; \ + \ + expr; \ + __osd_th_check_slow(oth, dev, __alloced, __started, __closed); \ } #else /* OSD_THANDLE_STATS */ @@ -1804,6 +1799,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, oh->ot_quota_trans = NULL; if (oh->ot_handle != NULL) { + int rc2; handle_t *hdl = oh->ot_handle; /* @@ -1827,10 +1823,12 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, hdl->h_sync = th->th_sync; oh->ot_handle = NULL; - OSD_CHECK_SLOW_TH(oh, osd, rc = ldiskfs_journal_stop(hdl)); - if (rc != 0) + OSD_CHECK_SLOW_TH(oh, osd, rc2 = ldiskfs_journal_stop(hdl)); + if (rc2 != 0) CERROR("%s: failed to stop transaction: rc = %d\n", - osd_name(osd), rc); + osd_name(osd), rc2); + if (!rc) + rc = rc2; } else { osd_trans_stop_cb(oh, th->th_result); OBD_FREE_PTR(oh); @@ -2735,22 +2733,22 @@ enum { }; static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, - struct lu_attr *attr, - struct dt_allocation_hint *hint, - struct dt_object_format *dof, - struct thandle *th) + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct dt_object_format *dof, + struct thandle *th) { - int result; - struct osd_thandle *oth; - __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX)); + int result; + struct osd_thandle *oth; + __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX | S_ISGID)); - LASSERT(S_ISDIR(attr->la_mode)); + LASSERT(S_ISDIR(attr->la_mode)); - oth = container_of(th, struct osd_thandle, ot_super); - LASSERT(oth->ot_handle->h_transaction != NULL); - result = osd_mkfile(info, obj, mode, hint, th); + oth = container_of(th, struct osd_thandle, ot_super); + LASSERT(oth->ot_handle->h_transaction != NULL); + result = osd_mkfile(info, obj, mode, hint, th); - return result; + return result; } static int osd_mk_index(struct osd_thread_info *info, struct osd_object *obj, @@ -3375,6 +3373,9 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, RETURN(local); } + /* restore i_gid in case S_ISGID is set, we will inherit S_ISGID and set + * correct gid on remote file, not agent here */ + local->i_gid = current_fsgid(); ldiskfs_set_inode_state(local, LDISKFS_STATE_LUSTRE_NOSCRUB); unlock_new_inode(local); @@ -3895,7 +3896,6 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, if (strcmp(name, XATTR_NAME_LMV) == 0) { struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; - int rc; rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc != 0) @@ -3909,10 +3909,6 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_OVERFLOW) && - strcmp(name, XATTR_NAME_LINK) == 0) - return -ENOSPC; - rc = __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len, fs_flags); osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET); @@ -4323,7 +4319,7 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, { struct osd_thandle *oh; struct inode *inode; - int rc; + int rc, credits; ENTRY; LASSERT(!dt_object_remote(dt)); @@ -4332,10 +4328,14 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - /* due to DNE we may need to remove an agent inode */ - osd_trans_declare_op(env, oh, OSD_OT_DELETE, - osd_dto_credits_noquota[DTO_INDEX_DELETE] + - osd_dto_credits_noquota[DTO_OBJECT_DELETE]); + credits = osd_dto_credits_noquota[DTO_INDEX_DELETE]; + if (key != NULL && unlikely(strcmp((char *)key, dotdot) == 0)) { + /* '..' to a remote object has a local representative */ + credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + /* to reset LMAI_REMOTE_PARENT */ + credits += 1; + } + osd_trans_declare_op(env, oh, OSD_OT_DELETE, credits); inode = osd_dt_obj(dt)->oo_inode; if (inode == NULL) @@ -5032,8 +5032,8 @@ again: } ldata.ld_buf = buf; - rc = linkea_init(&ldata); - if (rc == 0) { + rc = linkea_init_with_rec(&ldata); + if (!rc) { linkea_first_entry(&ldata); linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, NULL, fid); } @@ -5076,8 +5076,8 @@ again: } ldata.ld_buf = buf; - rc = linkea_init(&ldata); - if (rc == 0) + rc = linkea_init_with_rec(&ldata); + if (!rc) rc = linkea_links_find(&ldata, &cname, pfid); RETURN(rc); @@ -6722,7 +6722,7 @@ static int osd_mount(const struct lu_env *env, struct osd_thread_info *info = osd_oti_get(env); struct lu_fid *fid = &info->oti_fid; struct inode *inode; - int rc = 0, force_over_256tb = 0; + int rc = 0, force_over_512tb = 0; ENTRY; if (o->od_mnt != NULL) @@ -6746,15 +6746,25 @@ static int osd_mount(const struct lu_env *env, RETURN(-EINVAL); } #endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) if (opts != NULL && strstr(opts, "force_over_128tb") != NULL) { - CWARN("force_over_128tb option is depricated." - "Filesystems less then 256TB can be created without any" - "force options. Use force_over_256tb option for" - "filesystems greather then 256TB.\n"); + CWARN("force_over_128tb option is deprecated. " + "Filesystems less than 512TB can be created without any " + "force options. Use force_over_512tb option for " + "filesystems greater than 512TB.\n"); } +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 1, 53, 0) + if (opts != NULL && strstr(opts, "force_over_256tb") != NULL) { + CWARN("force_over_256tb option is deprecated. " + "Filesystems less than 512TB can be created without any " + "force options. Use force_over_512tb option for " + "filesystems greater than 512TB.\n"); + } +#endif - if (opts != NULL && strstr(opts, "force_over_256tb") != NULL) - force_over_256tb = 1; + if (opts != NULL && strstr(opts, "force_over_512tb") != NULL) + force_over_512tb = 1; __page = alloc_page(GFP_KERNEL); if (__page == NULL) @@ -6773,12 +6783,9 @@ static int osd_mount(const struct lu_env *env, "noextents", /* strip out option we processed in osd */ "bigendian_extents", -#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(3,0,53,0) -#warning "remove force_over_128 option" -#else - "force_over_128tb (deprecated)", -#endif + "force_over_128tb", "force_over_256tb", + "force_over_512tb", NULL }; strcat(options, opts); @@ -6824,11 +6831,12 @@ static int osd_mount(const struct lu_env *env, GOTO(out, rc); } - if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) > (64ULL << 30) && - force_over_256tb == 0) { + if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) << + osd_sb(o)->s_blocksize_bits > 512ULL << 40 && + force_over_512tb == 0) { CERROR("%s: device %s LDISKFS does not support filesystems " - "greater than 256TB and can cause data corruption. " - "Use \"force_over_256tb\" mount option to override.\n", + "greater than 512TB and can cause data corruption. " + "Use \"force_over_512tb\" mount option to override.\n", name, dev); GOTO(out, rc = -EINVAL); }