X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdfilter%2Ffilter.c;h=530762dd8b6b72dc1d987730514efa9a2e6d4ae5;hb=d750891e478804bc495ffa075d771d1816369958;hp=e3f13b71a1b2df71e86c85dfa3ddff2700cb54f7;hpb=08b2bc314e2eef39edebd87616ea3a372718b310;p=fs%2Flustre-release.git diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index e3f13b7..530762d 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -241,6 +241,7 @@ static int lprocfs_init_rw_stats(struct obd_device *obd, plus the procfs overhead :( */ static int filter_export_stats_init(struct obd_device *obd, struct obd_export *exp, + int reconnect, void *client_nid) { int rc, newnid = 0; @@ -250,7 +251,7 @@ static int filter_export_stats_init(struct obd_device *obd, /* Self-export gets no proc entry */ RETURN(0); - rc = lprocfs_exp_setup(exp, client_nid, &newnid); + rc = lprocfs_exp_setup(exp, client_nid, reconnect, &newnid); if (rc) { /* Mask error for already created * /proc entries */ @@ -282,23 +283,13 @@ static int filter_export_stats_init(struct obd_device *obd, tmp->nid_stats); if (rc) GOTO(clean, rc); - /* Always add in ldlm_stats */ - tmp->nid_ldlm_stats = - lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC, - LPROCFS_STATS_FLAG_NOPERCPU); - if (tmp->nid_ldlm_stats == NULL) - GOTO(clean, rc = -ENOMEM); - - lprocfs_init_ldlm_stats(tmp->nid_ldlm_stats); - rc = lprocfs_register_stats(tmp->nid_proc, "ldlm_stats", - tmp->nid_ldlm_stats); + rc = lprocfs_nid_ldlm_stats_init(tmp); if (rc) GOTO(clean, rc); } RETURN(0); clean: - lprocfs_exp_cleanup(exp); return rc; } @@ -643,14 +634,18 @@ static void filter_fmd_cleanup(struct obd_export *exp) static int filter_init_export(struct obd_export *exp) { + int rc; cfs_spin_lock_init(&exp->exp_filter_data.fed_lock); CFS_INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list); cfs_spin_lock(&exp->exp_lock); exp->exp_connecting = 1; cfs_spin_unlock(&exp->exp_lock); + rc = lut_client_alloc(exp); + if (rc == 0) + rc = ldlm_init_export(exp); - return ldlm_init_export(exp); + return rc; } static int filter_free_server_data(struct obd_device_target *obt) @@ -698,7 +693,7 @@ int filter_update_last_objid(struct obd_device *obd, obd_seq group, } CDEBUG(D_INODE, "%s: server last_objid for "POSTID"\n", - obd->obd_name, group, filter->fo_last_objids[group]); + obd->obd_name, filter->fo_last_objids[group], group); tmp = cpu_to_le64(filter->fo_last_objids[group]); rc = fsfilt_write_record(obd, filter->fo_last_objid_files[group], @@ -821,18 +816,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) GOTO(out, rc = 0); } + OBD_ALLOC_PTR(lcd); + if (!lcd) + GOTO(err_client, rc = -ENOMEM); + for (cl_idx = 0, off = le32_to_cpu(lsd->lsd_client_start); off < last_rcvd_size; cl_idx++) { __u64 last_rcvd; struct obd_export *exp; struct filter_export_data *fed; - if (!lcd) { - OBD_ALLOC_PTR(lcd); - if (!lcd) - GOTO(err_client, rc = -ENOMEM); - } - /* Don't assume off is incremented properly by * fsfilt_read_record(), in case sizeof(*lcd) * isn't the same as lsd->lsd_client_size. */ @@ -855,60 +848,50 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) last_rcvd = le64_to_cpu(lcd->lcd_last_transno); + CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 + " srv lr: "LPU64"\n", lcd->lcd_uuid, cl_idx, + last_rcvd, le64_to_cpu(lsd->lsd_last_transno)); + /* These exports are cleaned up by filter_disconnect(), so they * need to be set up like real exports as filter_connect() does. */ exp = class_new_export(obd, (struct obd_uuid *)lcd->lcd_uuid); - - CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 - " srv lr: "LPU64"\n", lcd->lcd_uuid, cl_idx, - last_rcvd, le64_to_cpu(lsd->lsd_last_transno)); if (IS_ERR(exp)) { if (PTR_ERR(exp) == -EALREADY) { /* export already exists, zero out this one */ - CERROR("Zeroing out duplicate export due to " - "bug 10479.\n"); - lcd->lcd_uuid[0] = '\0'; - } else { - GOTO(err_client, rc = PTR_ERR(exp)); + CERROR("Duplicate export %s!\n", lcd->lcd_uuid); + continue; } - } else { - fed = &exp->exp_filter_data; - fed->fed_ted.ted_lcd = lcd; - fed->fed_group = 0; /* will be assigned at connect */ - filter_export_stats_init(obd, exp, NULL); - rc = filter_client_add(obd, exp, cl_idx); - /* can't fail for existing client */ - LASSERTF(rc == 0, "rc = %d\n", rc); - - /* VBR: set export last committed */ - exp->exp_last_committed = last_rcvd; - cfs_spin_lock(&exp->exp_lock); - exp->exp_connecting = 0; - exp->exp_in_recovery = 0; - cfs_spin_unlock(&exp->exp_lock); - cfs_spin_lock_bh(&obd->obd_processing_task_lock); - obd->obd_max_recoverable_clients++; - cfs_spin_unlock_bh(&obd->obd_processing_task_lock); - lcd = NULL; - class_export_put(exp); + OBD_FREE_PTR(lcd); + GOTO(err_client, rc = PTR_ERR(exp)); } - /* Need to check last_rcvd even for duplicated exports. */ - CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n", - cl_idx, last_rcvd); + fed = &exp->exp_filter_data; + *fed->fed_ted.ted_lcd = *lcd; + fed->fed_group = 0; /* will be assigned at connect */ + filter_export_stats_init(obd, exp, 0, NULL); + rc = filter_client_add(obd, exp, cl_idx); + /* can't fail for existing client */ + LASSERTF(rc == 0, "rc = %d\n", rc); + + /* VBR: set export last committed */ + exp->exp_last_committed = last_rcvd; + cfs_spin_lock(&exp->exp_lock); + exp->exp_connecting = 0; + exp->exp_in_recovery = 0; + cfs_spin_unlock(&exp->exp_lock); + obd->obd_max_recoverable_clients++; + class_export_put(exp); if (last_rcvd > le64_to_cpu(lsd->lsd_last_transno)) lsd->lsd_last_transno = cpu_to_le64(last_rcvd); } - - if (lcd) - OBD_FREE_PTR(lcd); + OBD_FREE_PTR(lcd); obd->obd_last_committed = le64_to_cpu(lsd->lsd_last_transno); out: - lut->lut_mount_count = mount_count + 1; - lsd->lsd_mount_count = cpu_to_le64(lut->lut_mount_count); + obd->u.obt.obt_mount_count = mount_count + 1; + lsd->lsd_mount_count = cpu_to_le64(obd->u.obt.obt_mount_count); /* save it, so mount count and last_transno is current */ rc = filter_update_server_data(obd); @@ -1212,7 +1195,7 @@ static int filter_prep_groups(struct obd_device *obd) loff_t off = 0; ENTRY; - O_dentry = simple_mkdir(current->fs->pwd, obd->u.obt.obt_vfsmnt, + O_dentry = simple_mkdir(cfs_fs_pwd(current->fs), obd->u.obt.obt_vfsmnt, "O", 0700, 1); CDEBUG(D_INODE, "got/created O: %p\n", O_dentry); if (IS_ERR(O_dentry)) { @@ -1501,8 +1484,8 @@ struct dentry *filter_fid2dentry(struct obd_device *obd, if (dir_dentry == NULL) filter_parent_unlock(dparent); if (IS_ERR(dchild)) { - CERROR("%s: child lookup error %ld\n", obd->obd_name, - PTR_ERR(dchild)); + CERROR("%s: object "LPU64":"LPU64" lookup error: rc %ld\n", + obd->obd_name, id, group, PTR_ERR(dchild)); RETURN(dchild); } @@ -1578,7 +1561,7 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry, GOTO(out, rc = -EPERM); /* check_sticky() */ - if ((dentry->d_inode->i_uid != current->fsuid && + if ((dentry->d_inode->i_uid != cfs_curproc_fsuid() && !cfs_capable(CFS_CAP_FOWNER)) || IS_APPEND(dentry->d_inode) || IS_IMMUTABLE(dentry->d_inode)) GOTO(out, rc = -EPERM); @@ -1586,7 +1569,7 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry, /* NOTE: This might need to go outside i_mutex, though it isn't clear if * that was done because of journal_start (which is already done * here) or some other ordering issue. */ - DQUOT_INIT(dir); + ll_vfs_dq_init(dir); rc = ll_security_inode_unlink(dir, dentry, mnt); if (rc) @@ -1612,7 +1595,10 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, struct inode *inode = dchild->d_inode; int rc; - if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { + /* There should be 2 references to the inode: + * 1) taken by filter_prepare_destroy + * 2) taken by filter_destroy */ + if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 2) { CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n", dchild->d_name.len, dchild->d_name.name, inode->i_ino, (unsigned long)inode->i_nlink, @@ -1716,7 +1702,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, * lock, and should not be granted if the lock will be blocked. */ - LASSERT(ns == res->lr_namespace); + LASSERT(ns == ldlm_res_to_ns(res)); lock_res(res); rc = policy(lock, &tmpflags, 0, &err, &rpc_list); check_res_locked(res); @@ -1738,7 +1724,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, if (rc == LDLM_ITER_CONTINUE) { /* do not grant locks to the liblustre clients: they cannot * handle ASTs robustly. We need to do this while still - * holding ns_lock to avoid the lock remaining on the res_link + * holding lr_lock to avoid the lock remaining on the res_link * list (and potentially being added to l_pending_list by an * AST) when we are going to drop this lock ASAP. */ if (lock->l_export->exp_libclient || @@ -1761,7 +1747,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns, *reply_lvb = *res_lvb; /* - * ->ns_lock guarantees that no new locks are granted, and, + * lr_lock guarantees that no new locks are granted, and, * therefore, that res->lr_lvb_data cannot increase beyond the * end of already granted lock. As a result, it is safe to * check against "stale" reply_lvb->lvb_size value without @@ -1814,13 +1800,6 @@ static int filter_intent_policy(struct ldlm_namespace *ns, LASSERTF(l->l_glimpse_ast != NULL, "l == %p", l); rc = l->l_glimpse_ast(l, NULL); /* this will update the LVB */ - /* Update the LVB from disk if the AST failed (this is a legal race) */ - /* - * XXX nikita: situation when ldlm_server_glimpse_ast() failed before - * sending ast is not handled. This can result in lost client writes. - */ - if (rc != 0) - ldlm_res_lvbo_update(res, NULL, 1); lock_res(res); *reply_lvb = *res_lvb; @@ -1969,7 +1948,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, __u8 *uuid_ptr; char *str, *label; char ns_name[48]; - request_queue_t *q; + struct request_queue *q; int rc, i; ENTRY; @@ -2034,6 +2013,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, obd->u.obt.obt_vfsmnt = mnt; obd->u.obt.obt_sb = mnt->mnt_sb; + obd->u.obt.obt_magic = OBT_MAGIC; filter->fo_fstype = mnt->mnt_sb->s_type->name; CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt); @@ -2055,11 +2035,14 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, CFS_INIT_LIST_HEAD(&filter->fo_export_list); cfs_sema_init(&filter->fo_alloc_lock, 1); init_brw_stats(&filter->fo_filter_stats); + cfs_spin_lock_init(&filter->fo_flags_lock); filter->fo_read_cache = 1; /* enable read-only cache by default */ filter->fo_writethrough_cache = 1; /* enable writethrough cache */ filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE; filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT; filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT; + filter->fo_syncjournal = 0; /* Don't sync journals on i/o by default */ + filter_slc_set(filter); /* initialize sync on lock cancel */ rc = filter_prep(obd); if (rc) @@ -2076,8 +2059,10 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, GOTO(err_post, rc = -ENOMEM); sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid); - obd->obd_namespace = ldlm_namespace_new(obd, ns_name, LDLM_NAMESPACE_SERVER, - LDLM_NAMESPACE_GREEDY); + obd->obd_namespace = ldlm_namespace_new(obd, ns_name, + LDLM_NAMESPACE_SERVER, + LDLM_NAMESPACE_GREEDY, + LDLM_NS_TYPE_OST); if (obd->obd_namespace == NULL) GOTO(err_post, rc = -ENOMEM); obd->obd_namespace->ns_lvbp = obd; @@ -2103,13 +2088,13 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, GOTO(err_post, rc); q = bdev_get_queue(mnt->mnt_sb->s_bdev); - if (q->max_sectors < q->max_hw_sectors && - q->max_sectors < PTLRPC_MAX_BRW_SIZE >> 9) + if (queue_max_sectors(q) < queue_max_hw_sectors(q) && + queue_max_sectors(q) < PTLRPC_MAX_BRW_SIZE >> 9) LCONSOLE_INFO("%s: underlying device %s should be tuned " "for larger I/O requests: max_sectors = %u " "could be up to max_hw_sectors=%u\n", obd->obd_name, mnt->mnt_sb->s_id, - q->max_sectors, q->max_hw_sectors); + queue_max_sectors(q), queue_max_hw_sectors(q)); uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb); if (uuid_ptr != NULL) { @@ -2522,9 +2507,9 @@ static int filter_llog_connect(struct obd_export *exp, obd->obd_name, body->lgdc_logid.lgl_oid, body->lgdc_logid.lgl_oseq, body->lgdc_logid.lgl_ogen); - cfs_spin_lock_bh(&obd->obd_processing_task_lock); + cfs_spin_lock(&obd->u.filter.fo_flags_lock); obd->u.filter.fo_mds_ost_sync = 1; - cfs_spin_unlock_bh(&obd->obd_processing_task_lock); + cfs_spin_unlock(&obd->u.filter.fo_flags_lock); rc = llog_connect(ctxt, &body->lgdc_logid, &body->lgdc_gen, NULL); llog_ctxt_put(ctxt); @@ -2617,7 +2602,7 @@ static int filter_cleanup(struct obd_device *obd) filter_post(obd); - LL_DQUOT_OFF(obd->u.obt.obt_sb); + ll_vfs_dq_off(obd->u.obt.obt_sb, 0); shrink_dcache_sb(obd->u.obt.obt_sb); server_put_mount(obd->obd_name, obd->u.obt.obt_vfsmnt); @@ -2761,7 +2746,7 @@ static int filter_reconnect(const struct lu_env *env, rc = filter_connect_internal(exp, data, 1); if (rc == 0) - filter_export_stats_init(obd, exp, localdata); + filter_export_stats_init(obd, exp, 1, localdata); RETURN(rc); } @@ -2775,8 +2760,6 @@ static int filter_connect(const struct lu_env *env, struct lvfs_run_ctxt saved; struct lustre_handle conn = { 0 }; struct obd_export *lexp; - struct tg_export_data *ted; - struct lsd_client_data *lcd = NULL; __u32 group; int rc; ENTRY; @@ -2790,22 +2773,15 @@ static int filter_connect(const struct lu_env *env, lexp = class_conn2export(&conn); LASSERT(lexp != NULL); - ted = &lexp->exp_target_data; - rc = filter_connect_internal(lexp, data, 0); if (rc) GOTO(cleanup, rc); - filter_export_stats_init(obd, lexp, localdata); + filter_export_stats_init(obd, lexp, 0, localdata); if (obd->obd_replayable) { - OBD_ALLOC(lcd, sizeof(*lcd)); - if (!lcd) { - CERROR("filter: out of memory for client data\n"); - GOTO(cleanup, rc = -ENOMEM); - } - + struct lsd_client_data *lcd = lexp->exp_target_data.ted_lcd; + LASSERT(lcd); memcpy(lcd->lcd_uuid, cluuid, sizeof(lcd->lcd_uuid)); - ted->ted_lcd = lcd; rc = filter_client_add(obd, lexp, -1); if (rc) GOTO(cleanup, rc); @@ -2829,7 +2805,6 @@ static int filter_connect(const struct lu_env *env, cleanup: if (rc) { class_disconnect(lexp); - lprocfs_exp_cleanup(lexp); *exp = NULL; } else { *exp = lexp; @@ -2960,12 +2935,11 @@ static int filter_destroy_export(struct obd_export *exp) target_destroy_export(exp); ldlm_destroy_export(exp); + lut_client_free(exp); if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) RETURN(0); - lut_client_free(exp); - if (!exp->exp_obd->obd_replayable) fsfilt_sync(exp->exp_obd, exp->exp_obd->u.obt.obt_sb); @@ -3105,7 +3079,7 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct ost_id *ostid, if (dchild->d_inode == NULL) { if (!quiet) CERROR("%s: %s on non-existent object: "POSTID" \n", - obd->obd_name, what, ostid->oi_seq,ostid->oi_id); + obd->obd_name, what, ostid->oi_id,ostid->oi_seq); f_dput(dchild); RETURN(ERR_PTR(-ENOENT)); } @@ -3120,10 +3094,6 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo) int rc = 0; ENTRY; - rc = filter_validate_obdo(oinfo->oi_oa, exp); - if (rc) - RETURN(rc); - rc = filter_auth_capa(exp, NULL, oinfo->oi_oa->o_seq, oinfo_capa(oinfo), CAPA_OPC_META_READ); if (rc) @@ -3218,13 +3188,15 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, *fcc = oa->o_lcookie; } if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { - DQUOT_INIT(inode); + unsigned long now = jiffies; + ll_vfs_dq_init(inode); /* Filter truncates and writes are serialized by * i_alloc_sem, see the comment in * filter_preprw_write.*/ if (ia_valid & ATTR_SIZE) down_write(&inode->i_alloc_sem); LOCK_INODE_MUTEX(inode); + fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem and i_mutex"); old_size = i_size_read(inode); } @@ -3302,11 +3274,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS)) fsfilt_extend(exp->exp_obd, inode, 0, handle); - /* The truncate might have used up our transaction credits. Make - * sure we have one left for the last_rcvd update. */ - err = fsfilt_extend(exp->exp_obd, inode, 1, handle); + /* The truncate might have used up our transaction credits. Make sure + * we have two left for the last_rcvd and VBR inode version updates. */ + err = fsfilt_extend(exp->exp_obd, inode, 2, handle); + + /* Update inode version only if data has changed => size has changed */ + rc = filter_finish_transno(exp, ia_valid & ATTR_SIZE ? inode : NULL, + oti, rc, sync); - rc = filter_finish_transno(exp, inode, oti, rc, sync); if (sync) { filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc); fcc = NULL; @@ -3372,10 +3347,6 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, int rc; ENTRY; - rc = filter_validate_obdo(oinfo->oi_oa, exp); - if (rc) - RETURN(rc); - if (oa->o_valid & OBD_FL_TRUNC) opc |= CAPA_OPC_OSS_TRUNC; @@ -3405,7 +3376,7 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, CERROR("%s: setattr from %s trying to truncate objid "POSTID "%s\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), - oa->o_seq, oa->o_id, mdsinum); + oa->o_id, oa->o_seq, mdsinum); RETURN(-EPERM); } @@ -3425,7 +3396,9 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, */ if (oa->o_valid & (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) { + unsigned long now = jiffies; down_write(&dentry->d_inode->i_alloc_sem); + fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem"); fmd = filter_fmd_get(exp, oa->o_id, oa->o_seq); if (fmd && fmd->fmd_mactime_xid < oti->oti_xid) fmd->fmd_mactime_xid = oti->oti_xid; @@ -3527,10 +3500,6 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, int skip_orphan; ENTRY; - rc = filter_validate_obdo(oa, exp); - if (rc) - RETURN(rc); - LASSERT(down_trylock(&filter->fo_create_locks[oa->o_seq]) != 0); memset(&doa, 0, sizeof(doa)); @@ -3575,7 +3544,14 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, filter_set_last_id(filter, id, doa.o_seq); rc = filter_update_last_objid(exp->exp_obd, doa.o_seq, 1); } else { - /* don't reuse orphan object, return last used objid */ + /* + * We have destroyed orphan objects, but don't want to reuse + * them. Therefore we don't reset last_id to the last created + * objects. Instead, we report back to the MDS the object id + * of the last orphan, so that the MDS can restart allocating + * objects from this id + 1 and thus skip the whole orphan + * object id range + */ oa->o_id = last; rc = 0; } @@ -3795,7 +3771,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, OBD_ALLOC(osfs, sizeof(*osfs)); if (osfs == NULL) RETURN(-ENOMEM); - rc = filter_statfs(obd, osfs, cfs_time_current_64() - CFS_HZ, + rc = filter_statfs(obd, osfs, + cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), 0); if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) { CDEBUG(D_RPCTRACE,"%s: not enough space for create " @@ -3835,11 +3812,16 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, } else next_id = filter_last_id(filter, group) + 1; - /* Temporary solution for oid in CMD before fid-on-OST */ - if ((fid_seq_is_mdt0(oa->o_seq) && next_id >= IDIF_MAX_OID) && - (fid_seq_is_cmd(oa->o_seq) && next_id >= OBIF_MAX_OID)) { - CERROR("%s:"POSTID" hit the max IDIF_MAX_OID(1 << 48) !\n", - obd->obd_name, group, next_id); + /* Don't create objects beyond the valid range for this SEQ */ + if (unlikely(fid_seq_is_mdt0(group) && + next_id >= IDIF_MAX_OID)) { + CERROR("%s:"POSTID" hit the IDIF_MAX_OID (1<<48)!\n", + obd->obd_name, next_id, group); + GOTO(cleanup, rc = -ENOSPC); + } else if (unlikely(!fid_seq_is_mdt0(group) && + next_id >= OBIF_MAX_OID)) { + CERROR("%s:"POSTID" hit the OBIF_MAX_OID (1<<32)!\n", + obd->obd_name, next_id, group); GOTO(cleanup, rc = -ENOSPC); } @@ -3941,6 +3923,7 @@ set_last_id: if (rc) break; if (cfs_time_after(jiffies, enough_time)) { + i++; CDEBUG(D_RPCTRACE, "%s: precreate slow - want %d got %d \n", obd->obd_name, *num, i); @@ -3951,13 +3934,13 @@ set_last_id: CDEBUG(D_RPCTRACE, "%s: created %d objects for group "POSTID" rc %d\n", - obd->obd_name, i, group, filter->fo_last_objids[group], rc); + obd->obd_name, i, filter->fo_last_objids[group], group, rc); RETURN(rc); } -static int filter_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) +int filter_create(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct obd_device *obd = exp->exp_obd; struct filter_export_data *fed; @@ -3967,10 +3950,6 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, int rc = 0, diff; ENTRY; - rc = filter_validate_obdo(oa, exp); - if (rc) - RETURN(rc); - CDEBUG(D_INODE, "%s: filter_create(group="LPU64",id=" LPU64")\n", obd->obd_name, oa->o_seq, oa->o_id); @@ -4000,7 +3979,8 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { - if (oa->o_id > filter_last_id(filter, oa->o_seq)) { + if (!obd->obd_recovering || + oa->o_id > filter_last_id(filter, oa->o_seq)) { CERROR("recreate objid "LPU64" > last id "LPU64"\n", oa->o_id, filter_last_id(filter, oa->o_seq)); rc = -EINVAL; @@ -4043,11 +4023,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, struct llog_cookie *fcc = NULL; int rc, rc2, cleanup_phase = 0, sync = 0; struct iattr iattr; + unsigned long now; ENTRY; - rc = filter_validate_obdo(oa, exp); - if (rc) - RETURN(rc); rc = filter_auth_capa(exp, NULL, oa->o_seq, (struct lustre_capa *)capa, CAPA_OPC_OSS_DESTROY); if (rc) @@ -4069,7 +4047,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, if (dchild->d_inode == NULL) { CDEBUG(D_INODE, "destroying non-existent object "POSTID"\n", - oa->o_seq, oa->o_id); + oa->o_id, oa->o_seq); /* If object already gone, cancel cookie right now */ if (oa->o_valid & OBD_MD_FLCOOKIE) { struct llog_ctxt *ctxt; @@ -4100,7 +4078,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, if (fcc != NULL) *fcc = oa->o_lcookie; } - DQUOT_INIT(dchild->d_inode); + ll_vfs_dq_init(dchild->d_inode); /* we're gonna truncate it first in order to avoid possible deadlock: * P1 P2 @@ -4114,8 +4092,10 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, * between page lock, i_mutex & starting new journal handle. * (see bug 20321) -johann */ + now = jiffies; down_write(&dchild->d_inode->i_alloc_sem); LOCK_INODE_MUTEX(dchild->d_inode); + fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem and i_mutex"); /* VBR: version recovery check */ rc = filter_version_get_check(exp, oti, dchild->d_inode); @@ -4257,10 +4237,6 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, int rc, rc2; ENTRY; - rc = filter_validate_obdo(oa, exp); - if (rc) - RETURN(rc); - rc = filter_auth_capa(exp, NULL, oa->o_seq, (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE); if (rc) @@ -4362,9 +4338,6 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen, struct lvfs_run_ctxt saved; int rc; - rc = filter_validate_obdo(&fm_key->oa, exp); - if (rc) - RETURN(rc); if (fiemap == NULL) { *vallen = fiemap_count_to_size( fm_key->fiemap.fm_extent_count); @@ -4386,6 +4359,12 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen, RETURN(rc); } + if (KEY_IS(KEY_SYNC_LOCK_CANCEL)) { + *((__u32 *) val) = obd->u.filter.fo_sync_lock_cancel; + *vallen = sizeof(__u32); + RETURN(0); + } + CDEBUG(D_IOCTL, "invalid key\n"); RETURN(-EINVAL); }