There is race condition inside osd-zfs, it may cause deadlock.
Consider the following scenarios:
1) The Thread1 calls osd_attr_set() to set flags on the object.
The osd_attr_set() will call the osd_xattr_get() with holding
the read mode semaphore on the object::oo_guard.
2) The Thread2 calls the osd_declare_destroy() to destroy such
object, it will down_write() on the object::oo_gurad, but be
blocked by the Thread1's granted read mode semaphore.
3) The osd_xattr_get() triggered by the osd_xattr_set() will also
down_read() on the object::oo_guard. But it will be blocked by
the Thread2's pending down_write() request.
Then the Thread1 and the Thread2 deadlock.
This patch makes the osd_attr_set() to call the lockless version
xattr_get osd_xattr_get_internal() to avoid such deadlock.
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Iaac2e414b5f1fd197303bb7ec7d5e2763b6f3e9a
Reviewed-on: https://review.whamcloud.com/31511
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Nathaniel Clark <nathaniel.l.clark@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
struct lu_attr *la, uint64_t parent, nvlist_t *);
int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
uint64_t oid, dnode_t **dnp);
struct lu_attr *la, uint64_t parent, nvlist_t *);
int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
uint64_t oid, dnode_t **dnp);
-int osd_object_init0(const struct lu_env *env, struct osd_object *obj);
/* osd_oi.c */
int osd_oi_init(const struct lu_env *env, struct osd_device *o);
/* osd_oi.c */
int osd_oi_init(const struct lu_env *env, struct osd_device *o);
/*
* Retrieve the attributes of a DMU object
*/
/*
* Retrieve the attributes of a DMU object
*/
-int __osd_object_attr_get(const struct lu_env *env, struct osd_device *o,
- struct osd_object *obj, struct lu_attr *la)
+static int __osd_object_attr_get(const struct lu_env *env, struct osd_device *o,
+ struct osd_object *obj, struct lu_attr *la)
{
struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
{
struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
/*
* Concurrency: shouldn't matter.
*/
/*
* Concurrency: shouldn't matter.
*/
-int osd_object_init0(const struct lu_env *env, struct osd_object *obj)
+static int osd_object_init0(const struct lu_env *env, struct osd_object *obj)
{
struct osd_device *osd = osd_obj2dev(obj);
const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
{
struct osd_device *osd = osd_obj2dev(obj);
const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
rc = -zap_update(osd->od_os, zapid, buf, 8,
sizeof(*zde) / 8, zde, oh->ot_tx);
}
rc = -zap_update(osd->od_os, zapid, buf, 8,
sizeof(*zde) / 8, zde, oh->ot_tx);
}
- up_read(&obj->oo_guard);
-
- RETURN(rc > 0 ? 0 : rc);
+ if (rc > 0)
+ rc = 0;
+ GOTO(out, rc);
}
/* Only allow set size for regular file */
}
/* Only allow set size for regular file */
if (valid & LA_FLAGS) {
struct lustre_mdt_attrs *lma;
struct lu_buf buf;
if (valid & LA_FLAGS) {
struct lustre_mdt_attrs *lma;
struct lu_buf buf;
if (la->la_flags & LUSTRE_LMA_FL_MASKS) {
LASSERT(!obj->oo_pfid_in_lma);
if (la->la_flags & LUSTRE_LMA_FL_MASKS) {
LASSERT(!obj->oo_pfid_in_lma);
lma = (struct lustre_mdt_attrs *)&info->oti_buf;
buf.lb_buf = lma;
buf.lb_len = sizeof(info->oti_buf);
lma = (struct lustre_mdt_attrs *)&info->oti_buf;
buf.lb_buf = lma;
buf.lb_len = sizeof(info->oti_buf);
- rc = osd_xattr_get(env, &obj->oo_dt, &buf,
- XATTR_NAME_LMA);
- if (rc > 0) {
+
+ /* Please do NOT call osd_xattr_get() directly, that
+ * will cause recursive down_read() on oo_guard. */
+ rc = osd_xattr_get_internal(env, obj, &buf,
+ XATTR_NAME_LMA, &size);
+ if (!rc && unlikely(size < sizeof(*lma))) {
+ rc = -EINVAL;
+ } else if (!rc) {
lma->lma_incompat =
le32_to_cpu(lma->lma_incompat);
lma->lma_incompat |=
lma->lma_incompat =
le32_to_cpu(lma->lma_incompat);
lma->lma_incompat |=