* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <md_object.h>
#include <lustre_quota.h>
-#ifdef HAVE_LDISKFS_PDO
int ldiskfs_pdo = 1;
CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644,
"ldiskfs with parallel directory operations");
-#else
-int ldiskfs_pdo = 0;
-#endif
static const char dot[] = ".";
static const char dotdot[] = "..";
dt_object_init(&mo->oo_dt, NULL, d);
mo->oo_dt.do_ops = &osd_obj_ea_ops;
l->lo_ops = &osd_lu_obj_ops;
- cfs_init_rwsem(&mo->oo_sem);
- cfs_init_rwsem(&mo->oo_ext_idx_sem);
- cfs_spin_lock_init(&mo->oo_guard);
+ init_rwsem(&mo->oo_sem);
+ init_rwsem(&mo->oo_ext_idx_sem);
+ spin_lock_init(&mo->oo_guard);
return l;
} else {
return NULL;
}
}
-static int osd_get_lma(struct inode *inode, struct dentry *dentry,
- struct lustre_mdt_attrs *lma)
+static int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
+ struct dentry *dentry, struct lustre_mdt_attrs *lma)
{
int rc;
dentry->d_inode = inode;
rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma,
sizeof(*lma));
+ if (rc == -ERANGE) {
+ /* try with old lma size */
+ rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA,
+ info->oti_mdt_attrs_old,
+ LMA_OLD_SIZE);
+ if (rc > 0)
+ memcpy(lma, info->oti_mdt_attrs_old, sizeof(*lma));
+ }
if (rc > 0) {
/* Check LMA compatibility */
if (lma->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP)) {
if (IS_ERR(inode))
return inode;
- rc = osd_get_lma(inode, &info->oti_obj_dentry, lma);
+ rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
if (rc == 0) {
*fid = lma->lma_self_fid;
} else if (rc == -ENODATA) {
if (IS_ERR(inode))
return inode;
- rc = osd_get_lma(inode, &info->oti_obj_dentry, lma);
+ rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
if (rc == -ENODATA)
return inode;
/**
* Helper function to convert time interval to microseconds packed in
- * long int (default time units for the counter in "stats" initialized
- * by lu_time_init() )
+ * long int.
*/
static long interval_to_usec(cfs_time_t start, cfs_time_t end)
{
/* Release granted quota to master if necessary */
qi->lqi_id.qid_uid = uid;
- qsd_adjust_quota(env, qsd, &qi->lqi_id, USRQUOTA);
+ qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA);
qi->lqi_id.qid_uid = gid;
- qsd_adjust_quota(env, qsd, &qi->lqi_id, GRPQUOTA);
+ qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA);
}
}
}
ksfs = &osd_oti_get(env)->oti_ksfs;
}
- cfs_spin_lock(&osd->od_osfs_lock);
+ spin_lock(&osd->od_osfs_lock);
/* cache 1 second */
if (cfs_time_before_64(osd->od_osfs_age, cfs_time_shift_64(-1))) {
result = sb->s_op->statfs(sb->s_root, ksfs);
}
}
- if (likely(result == 0))
- *sfs = osd->od_statfs;
- cfs_spin_unlock(&osd->od_osfs_lock);
+ if (likely(result == 0))
+ *sfs = osd->od_statfs;
+ spin_unlock(&osd->od_osfs_lock);
if (unlikely(env == NULL))
OBD_FREE_PTR(ksfs);
LINVRNT(osd_invariant(obj));
LASSERT(obj->oo_owner != env);
- cfs_down_read_nested(&obj->oo_sem, role);
+ down_read_nested(&obj->oo_sem, role);
LASSERT(obj->oo_owner == NULL);
oti->oti_r_locks++;
LINVRNT(osd_invariant(obj));
LASSERT(obj->oo_owner != env);
- cfs_down_write_nested(&obj->oo_sem, role);
+ down_write_nested(&obj->oo_sem, role);
LASSERT(obj->oo_owner == NULL);
obj->oo_owner = env;
LASSERT(oti->oti_r_locks > 0);
oti->oti_r_locks--;
- cfs_up_read(&obj->oo_sem);
+ up_read(&obj->oo_sem);
}
static void osd_object_write_unlock(const struct lu_env *env,
LASSERT(oti->oti_w_locks > 0);
oti->oti_w_locks--;
obj->oo_owner = NULL;
- cfs_up_write(&obj->oo_sem);
+ up_write(&obj->oo_sem);
}
static int osd_object_write_locked(const struct lu_env *env,
RETURN(-ESTALE);
}
- cfs_spin_lock(&capa_lock);
- for (i = 0; i < 2; i++) {
- if (keys[i].lk_keyid == capa->lc_keyid) {
- oti->oti_capa_key = keys[i];
- break;
- }
- }
- cfs_spin_unlock(&capa_lock);
+ spin_lock(&capa_lock);
+ for (i = 0; i < 2; i++) {
+ if (keys[i].lk_keyid == capa->lc_keyid) {
+ oti->oti_capa_key = keys[i];
+ break;
+ }
+ }
+ spin_unlock(&capa_lock);
if (i == 2) {
DEBUG_CAPA(D_ERROR, capa, "no matched capa key");
if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ))
return -EACCES;
- cfs_spin_lock(&obj->oo_guard);
- osd_inode_getattr(env, obj->oo_inode, attr);
- cfs_spin_unlock(&obj->oo_guard);
- return 0;
+ spin_lock(&obj->oo_guard);
+ osd_inode_getattr(env, obj->oo_inode, attr);
+ spin_unlock(&obj->oo_guard);
+ return 0;
}
static int osd_declare_attr_set(const struct lu_env *env,
if (bits & LA_GID)
inode->i_gid = attr->la_gid;
if (bits & LA_NLINK)
- inode->i_nlink = attr->la_nlink;
+ set_nlink(inode, attr->la_nlink);
if (bits & LA_RDEV)
inode->i_rdev = attr->la_rdev;
if (rc)
return rc;
- cfs_spin_lock(&obj->oo_guard);
- rc = osd_inode_setattr(env, inode, attr);
- cfs_spin_unlock(&obj->oo_guard);
+ spin_lock(&obj->oo_guard);
+ rc = osd_inode_setattr(env, inode, attr);
+ spin_unlock(&obj->oo_guard);
if (!rc)
inode->i_sb->s_op->dirty_inode(inode);
* This inode should be marked dirty for i_rdev. Currently
* that is done in the osd_attr_init().
*/
- init_special_inode(obj->oo_inode, mode, attr->la_rdev);
+ init_special_inode(obj->oo_inode, obj->oo_inode->i_mode,
+ attr->la_rdev);
}
LINVRNT(osd_invariant(obj));
return result;
struct thandle *th)
{
int result;
- __u32 umask;
-
- /* we drop umask so that permissions we pass are not affected */
- umask = current->fs->umask;
- current->fs->umask = 0;
result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
th);
unlock_new_inode(obj->oo_inode);
}
- /* restore previous umask value */
- current->fs->umask = umask;
-
return result;
}
/* Parallel control for OI scrub. For most of cases, there is no
* lock contention. So it will not affect unlink performance. */
- cfs_mutex_lock(&inode->i_mutex);
- if (S_ISDIR(inode->i_mode)) {
- LASSERT(osd_inode_unlinked(inode) ||
- inode->i_nlink == 1);
- cfs_spin_lock(&obj->oo_guard);
- inode->i_nlink = 0;
- cfs_spin_unlock(&obj->oo_guard);
- inode->i_sb->s_op->dirty_inode(inode);
- } else {
- LASSERT(osd_inode_unlinked(inode));
- }
+ mutex_lock(&inode->i_mutex);
+ if (S_ISDIR(inode->i_mode)) {
+ LASSERT(osd_inode_unlinked(inode) ||
+ inode->i_nlink == 1);
+ spin_lock(&obj->oo_guard);
+ clear_nlink(inode);
+ spin_unlock(&obj->oo_guard);
+ inode->i_sb->s_op->dirty_inode(inode);
+ } else {
+ LASSERT(osd_inode_unlinked(inode));
+ }
OSD_EXEC_OP(th, destroy);
result = osd_oi_delete(osd_oti_get(env), osd, fid, th);
- cfs_mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
/* XXX: add to ext3 orphan list */
/* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
OSD_EXEC_OP(th, ref_add);
- /*
- * DIR_NLINK feature is set for compatibility reasons if:
- * 1) nlinks > LDISKFS_LINK_MAX, or
- * 2) nlinks == 2, since this indicates i_nlink was previously 1.
- *
- * It is easier to always set this flag (rather than check and set),
- * since it has less overhead, and the superblock will be dirtied
- * at some point. Both e2fsprogs and any Lustre-supported ldiskfs
- * do not actually care whether this flag is set or not.
- */
- cfs_spin_lock(&obj->oo_guard);
- inode->i_nlink++;
- if (S_ISDIR(inode->i_mode) && inode->i_nlink > 1) {
- if (inode->i_nlink >= LDISKFS_LINK_MAX ||
- inode->i_nlink == 2)
- inode->i_nlink = 1;
- }
- LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX);
- cfs_spin_unlock(&obj->oo_guard);
- inode->i_sb->s_op->dirty_inode(inode);
- LINVRNT(osd_invariant(obj));
+ /*
+ * DIR_NLINK feature is set for compatibility reasons if:
+ * 1) nlinks > LDISKFS_LINK_MAX, or
+ * 2) nlinks == 2, since this indicates i_nlink was previously 1.
+ *
+ * It is easier to always set this flag (rather than check and set),
+ * since it has less overhead, and the superblock will be dirtied
+ * at some point. Both e2fsprogs and any Lustre-supported ldiskfs
+ * do not actually care whether this flag is set or not.
+ */
+ spin_lock(&obj->oo_guard);
+ /* inc_nlink from 0 may cause WARN_ON */
+ if(inode->i_nlink == 0)
+ set_nlink(inode, 1);
+ else
+ inc_nlink(inode);
+ if (S_ISDIR(inode->i_mode) && inode->i_nlink > 1) {
+ if (inode->i_nlink >= LDISKFS_LINK_MAX ||
+ inode->i_nlink == 2)
+ set_nlink(inode, 1);
+ }
+ LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX);
+ spin_unlock(&obj->oo_guard);
+ inode->i_sb->s_op->dirty_inode(inode);
+ LINVRNT(osd_invariant(obj));
- return 0;
+ return 0;
}
static int osd_declare_object_ref_del(const struct lu_env *env,
OSD_EXEC_OP(th, ref_del);
- cfs_spin_lock(&obj->oo_guard);
- LASSERT(inode->i_nlink > 0);
- inode->i_nlink--;
- /* If this is/was a many-subdir directory (nlink > LDISKFS_LINK_MAX)
- * then the nlink count is 1. Don't let it be set to 0 or the directory
- * inode will be deleted incorrectly. */
- if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
- inode->i_nlink++;
- cfs_spin_unlock(&obj->oo_guard);
- inode->i_sb->s_op->dirty_inode(inode);
- LINVRNT(osd_invariant(obj));
+ spin_lock(&obj->oo_guard);
+ LASSERT(inode->i_nlink > 0);
+ drop_nlink(inode);
+ /* If this is/was a many-subdir directory (nlink > LDISKFS_LINK_MAX)
+ * then the nlink count is 1. Don't let it be set to 0 or the directory
+ * inode will be deleted incorrectly. */
+ if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0)
+ set_nlink(inode, 1);
+ spin_unlock(&obj->oo_guard);
+ inode->i_sb->s_op->dirty_inode(inode);
+ LINVRNT(osd_invariant(obj));
- return 0;
+ return 0;
}
/*
RETURN(oc);
}
- cfs_spin_lock(&capa_lock);
- *key = dev->od_capa_keys[1];
- cfs_spin_unlock(&capa_lock);
+ spin_lock(&capa_lock);
+ *key = dev->od_capa_keys[1];
+ spin_unlock(&capa_lock);
capa->lc_keyid = key->lk_keyid;
capa->lc_expiry = cfs_time_current_sec() + dev->od_capa_timeout;
OBD_ALLOC_PTR(dir);
if (dir != NULL) {
- cfs_spin_lock(&obj->oo_guard);
- if (obj->oo_dir == NULL)
- obj->oo_dir = dir;
- else
- /*
- * Concurrent thread allocated container data.
- */
- OBD_FREE_PTR(dir);
- cfs_spin_unlock(&obj->oo_guard);
- /*
- * Now, that we have container data, serialize its
- * initialization.
- */
- cfs_down_write(&obj->oo_ext_idx_sem);
- /*
- * recheck under lock.
- */
- if (!osd_has_index(obj))
- result = osd_iam_container_init(env, obj, dir);
- else
- result = 0;
- cfs_up_write(&obj->oo_ext_idx_sem);
+ spin_lock(&obj->oo_guard);
+ if (obj->oo_dir == NULL)
+ obj->oo_dir = dir;
+ else
+ /*
+ * Concurrent thread allocated container data.
+ */
+ OBD_FREE_PTR(dir);
+ spin_unlock(&obj->oo_guard);
+ /*
+ * Now, that we have container data, serialize its
+ * initialization.
+ */
+ down_write(&obj->oo_ext_idx_sem);
+ /*
+ * recheck under lock.
+ */
+ if (!osd_has_index(obj))
+ result = osd_iam_container_init(env, obj, dir);
+ else
+ result = 0;
+ up_write(&obj->oo_ext_idx_sem);
} else {
result = -ENOMEM;
}
ldiskfs_htree_lock(hlock, obj->oo_hl_head,
dir, LDISKFS_HLOCK_DEL);
} else {
- cfs_down_write(&obj->oo_ext_idx_sem);
+ down_write(&obj->oo_ext_idx_sem);
}
bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock);
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
- cfs_up_write(&obj->oo_ext_idx_sem);
+ up_write(&obj->oo_ext_idx_sem);
LASSERT(osd_invariant(obj));
RETURN(rc);
ldiskfs_htree_lock(hlock, pobj->oo_hl_head,
pobj->oo_inode, 0);
} else {
- cfs_down_write(&pobj->oo_ext_idx_sem);
+ down_write(&pobj->oo_ext_idx_sem);
}
rc = osd_add_dot_dotdot(info, pobj, cinode, name,
(struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu),
ldiskfs_htree_lock(hlock, pobj->oo_hl_head,
pobj->oo_inode, LDISKFS_HLOCK_ADD);
} else {
- cfs_down_write(&pobj->oo_ext_idx_sem);
+ down_write(&pobj->oo_ext_idx_sem);
}
rc = __osd_ea_add_rec(info, pobj, cinode, name, fid,
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
- cfs_up_write(&pobj->oo_ext_idx_sem);
+ up_write(&pobj->oo_ext_idx_sem);
return rc;
}
ldiskfs_htree_lock(hlock, obj->oo_hl_head,
dir, LDISKFS_HLOCK_LOOKUP);
} else {
- cfs_down_read(&obj->oo_ext_idx_sem);
+ down_read(&obj->oo_ext_idx_sem);
}
bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock);
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
- cfs_up_read(&obj->oo_ext_idx_sem);
+ up_read(&obj->oo_ext_idx_sem);
return rc;
}
ldiskfs_htree_lock(hlock, obj->oo_hl_head,
inode, LDISKFS_HLOCK_READDIR);
} else {
- cfs_down_read(&obj->oo_ext_idx_sem);
+ down_read(&obj->oo_ext_idx_sem);
}
result = inode->i_fop->readdir(&it->oie_file, it,
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
- cfs_up_read(&obj->oo_ext_idx_sem);
+ up_read(&obj->oo_ext_idx_sem);
if (it->oie_rd_dirent == 0) {
result = -EIO;
if (o->od_mnt != NULL)
RETURN(0);
+ if (strlen(dev) >= sizeof(o->od_mntdev))
+ RETURN(-E2BIG);
+ strcpy(o->od_mntdev, dev);
+
o->od_fsops = fsfilt_get_ops(mt_str(LDD_MT_LDISKFS));
if (o->od_fsops == NULL) {
CERROR("Can't find fsfilt_ldiskfs\n");
rc = osd_shutdown(env, osd_dev(d));
- osd_compat_fini(osd_dev(d));
+ osd_obj_map_fini(osd_dev(d));
shrink_dcache_sb(osd_sb(osd_dev(d)));
osd_sync(env, lu2dt_dev(d));
l->ld_ops = &osd_lu_ops;
o->od_dt_dev.dd_ops = &osd_dt_ops;
- cfs_spin_lock_init(&o->od_osfs_lock);
- cfs_mutex_init(&o->od_otable_mutex);
+ spin_lock_init(&o->od_osfs_lock);
+ mutex_init(&o->od_otable_mutex);
o->od_osfs_age = cfs_time_shift_64(-1000);
o->od_capa_hash = init_capa_hash();
strncpy(o->od_svname, lustre_cfg_string(cfg, 4),
sizeof(o->od_svname) - 1);
- rc = osd_compat_init(o);
+ rc = osd_obj_map_init(o);
if (rc != 0)
GOTO(out_scrub, rc);
out_site:
lu_site_fini(&o->od_site);
out_compat:
- osd_compat_fini(o);
+ osd_obj_map_fini(o);
out_scrub:
osd_scrub_cleanup(env, o);
out_mnt:
rc = dt_device_init(&o->od_dt_dev, t);
if (rc == 0) {
+ /* Because the ctx might be revived in dt_device_init,
+ * refill the env here */
+ lu_env_refill((struct lu_env *)env);
rc = osd_device_init0(env, o, cfg);
if (rc)
dt_device_fini(&o->od_dt_dev);
*exp = class_conn2export(&conn);
- cfs_spin_lock(&osd->od_osfs_lock);
+ spin_lock(&osd->od_osfs_lock);
osd->od_connects++;
- cfs_spin_unlock(&osd->od_osfs_lock);
+ spin_unlock(&osd->od_osfs_lock);
RETURN(0);
}
ENTRY;
/* Only disconnect the underlying layers on the final disconnect. */
- cfs_spin_lock(&osd->od_osfs_lock);
+ spin_lock(&osd->od_osfs_lock);
osd->od_connects--;
if (osd->od_connects == 0)
release = 1;
- cfs_spin_unlock(&osd->od_osfs_lock);
+ spin_unlock(&osd->od_osfs_lock);
rc = class_disconnect(exp); /* bz 9811 */