X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fobdfilter%2Ffilter.c;h=13c2530618bc3eee320b65974713eb06fe40e49a;hb=e67c6e366752611ffd2baeb7cefa24c9f289eb78;hp=8d664db432f1665d2f1241e13903c4b3ab8dbcd1;hpb=35c1f53f2403b16415cb445927c02d141eac8555;p=fs%2Flustre-release.git
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c
index 8d664db..13c2530 100644
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -79,24 +79,50 @@
#include "filter_internal.h"
-/* Group 0 is no longer a legal group, to catch uninitialized IDs */
-#define FILTER_MIN_GROUPS FILTER_GROUP_MDS0
static struct lvfs_callback_ops filter_lvfs_ops;
cfs_mem_cache_t *ll_fmd_cachep;
static void filter_commit_cb(struct obd_device *obd, __u64 transno,
void *cb_data, int error)
{
- obd_transno_commit_cb(obd, transno, error);
+ struct obd_export *exp = cb_data;
+ LASSERT(exp->exp_obd == obd);
+ obd_transno_commit_cb(obd, transno, exp, error);
+ class_export_cb_put(exp);
+}
+
+int filter_version_get_check(struct obd_export *exp,
+ struct obd_trans_info *oti, struct inode *inode)
+{
+ __u64 curr_version;
+
+ if (inode == NULL || oti == NULL)
+ RETURN(0);
+
+ curr_version = fsfilt_get_version(exp->exp_obd, inode);
+ if ((__s64)curr_version == -EOPNOTSUPP)
+ RETURN(0);
+ /* VBR: version is checked always because costs nothing */
+ if (oti->oti_pre_version != 0 &&
+ oti->oti_pre_version != curr_version) {
+ CDEBUG(D_INODE, "Version mismatch "LPX64" != "LPX64"\n",
+ oti->oti_pre_version, curr_version);
+ cfs_spin_lock(&exp->exp_lock);
+ exp->exp_vbr_failed = 1;
+ cfs_spin_unlock(&exp->exp_lock);
+ RETURN (-EOVERFLOW);
+ }
+ oti->oti_pre_version = curr_version;
+ RETURN(0);
}
/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
- int rc, int force_sync)
+int filter_finish_transno(struct obd_export *exp, struct inode *inode,
+ struct obd_trans_info *oti, int rc, int force_sync)
{
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
- struct lsd_client_data *lcd = fed->fed_lcd;
+ struct lsd_client_data *lcd;
__u64 last_rcvd;
loff_t off;
int err, log_pri = D_RPCTRACE;
@@ -108,25 +134,41 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
if (!exp->exp_obd->obd_replayable || oti == NULL)
RETURN(rc);
+ cfs_mutex_down(&fed->fed_lcd_lock);
+ lcd = fed->fed_lcd;
+ /* if the export has already been disconnected, we have no last_rcvd slot,
+ * update server data with latest transno then */
+ if (lcd == NULL) {
+ cfs_mutex_up(&fed->fed_lcd_lock);
+ CWARN("commit transaction for disconnected client %s: rc %d\n",
+ exp->exp_client_uuid.uuid, rc);
+ err = filter_update_server_data(exp->exp_obd,
+ filter->fo_rcvd_filp,
+ filter->fo_fsd);
+ RETURN(err);
+ }
+
/* we don't allocate new transnos for replayed requests */
+ cfs_spin_lock(&filter->fo_translock);
if (oti->oti_transno == 0) {
- spin_lock(&filter->fo_translock);
last_rcvd = le64_to_cpu(filter->fo_fsd->lsd_last_transno) + 1;
filter->fo_fsd->lsd_last_transno = cpu_to_le64(last_rcvd);
- spin_unlock(&filter->fo_translock);
- oti->oti_transno = last_rcvd;
} else {
- spin_lock(&filter->fo_translock);
last_rcvd = oti->oti_transno;
if (last_rcvd > le64_to_cpu(filter->fo_fsd->lsd_last_transno))
filter->fo_fsd->lsd_last_transno =
cpu_to_le64(last_rcvd);
- spin_unlock(&filter->fo_translock);
}
+ oti->oti_transno = last_rcvd;
+
+ LASSERT(last_rcvd >= le64_to_cpu(lcd->lcd_last_transno));
lcd->lcd_last_transno = cpu_to_le64(last_rcvd);
+ lcd->lcd_pre_versions[0] = cpu_to_le64(oti->oti_pre_version);
+ lcd->lcd_last_xid = cpu_to_le64(oti->oti_xid);
+ cfs_spin_unlock(&filter->fo_translock);
- /* could get xid from oti, if it's ever needed */
- lcd->lcd_last_xid = 0;
+ if (inode)
+ fsfilt_set_version(exp->exp_obd, inode, last_rcvd);
off = fed->fed_lr_off;
if (off <= 0) {
@@ -134,18 +176,19 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
fed->fed_lr_idx, fed->fed_lr_off);
err = -EINVAL;
} else {
+ class_export_cb_get(exp); /* released when the cb is called */
if (!force_sync)
force_sync = fsfilt_add_journal_cb(exp->exp_obd,
last_rcvd,
oti->oti_handle,
filter_commit_cb,
- NULL);
+ exp);
err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp,
lcd, sizeof(*lcd), &off,
force_sync | exp->exp_need_sync);
if (force_sync)
- filter_commit_cb(exp->exp_obd, last_rcvd, NULL, err);
+ filter_commit_cb(exp->exp_obd, last_rcvd, exp, err);
}
if (err) {
log_pri = D_ERROR;
@@ -155,7 +198,7 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
CDEBUG(log_pri, "wrote trans "LPU64" for client %s at #%d: err = %d\n",
last_rcvd, lcd->lcd_uuid, fed->fed_lr_idx, err);
-
+ cfs_mutex_up(&fed->fed_lcd_lock);
RETURN(rc);
}
@@ -173,7 +216,7 @@ static void init_brw_stats(struct brw_stats *brw_stats)
{
int i;
for (i = 0; i < BRW_LAST; i++)
- spin_lock_init(&brw_stats->hist[i].oh_lock);
+ cfs_spin_lock_init(&brw_stats->hist[i].oh_lock);
}
static int lprocfs_init_rw_stats(struct obd_device *obd,
@@ -183,7 +226,7 @@ static int lprocfs_init_rw_stats(struct obd_device *obd,
num_stats = (sizeof(*obd->obd_type->typ_dt_ops) / sizeof(void *)) +
LPROC_FILTER_LAST - 1;
- *stats = lprocfs_alloc_stats(num_stats, 0);
+ *stats = lprocfs_alloc_stats(num_stats, LPROCFS_STATS_FLAG_NOPERCPU);
if (*stats == NULL)
return -ENOMEM;
@@ -202,19 +245,21 @@ static int filter_export_stats_init(struct obd_device *obd,
struct obd_export *exp,
void *client_nid)
{
- struct filter_export_data *fed = &exp->exp_filter_data;
int rc, newnid = 0;
ENTRY;
- init_brw_stats(&fed->fed_brw_stats);
-
if (obd_uuid_equals(&exp->exp_client_uuid, &obd->obd_uuid))
/* Self-export gets no proc entry */
RETURN(0);
rc = lprocfs_exp_setup(exp, client_nid, &newnid);
- if (rc)
+ if (rc) {
+ /* Mask error for already created
+ * /proc entries */
+ if (rc == -EALREADY)
+ rc = 0;
RETURN(rc);
+ }
if (newnid) {
struct nid_stat *tmp = exp->exp_nid_stats;
@@ -222,7 +267,7 @@ static int filter_export_stats_init(struct obd_device *obd,
OBD_ALLOC(tmp->nid_brw_stats, sizeof(struct brw_stats));
if (tmp->nid_brw_stats == NULL)
- RETURN(-ENOMEM);
+ GOTO(clean, rc = -ENOMEM);
init_brw_stats(tmp->nid_brw_stats);
rc = lprocfs_seq_create(exp->exp_nid_stats->nid_proc, "brw_stats",
@@ -233,15 +278,30 @@ static int filter_export_stats_init(struct obd_device *obd,
rc = lprocfs_init_rw_stats(obd, &exp->exp_nid_stats->nid_stats);
if (rc)
- RETURN(rc);
+ GOTO(clean, rc);
rc = lprocfs_register_stats(tmp->nid_proc, "stats",
tmp->nid_stats);
if (rc)
- RETURN(rc);
+ GOTO(clean, rc);
+ /* Always add in ldlm_stats */
+ tmp->nid_ldlm_stats =
+ lprocfs_alloc_stats(LDLM_LAST_OPC - LDLM_FIRST_OPC,
+ LPROCFS_STATS_FLAG_NOPERCPU);
+ if (tmp->nid_ldlm_stats == NULL)
+ GOTO(clean, rc = -ENOMEM);
+
+ lprocfs_init_ldlm_stats(tmp->nid_ldlm_stats);
+ rc = lprocfs_register_stats(tmp->nid_proc, "ldlm_stats",
+ tmp->nid_ldlm_stats);
+ if (rc)
+ GOTO(clean, rc);
}
RETURN(0);
+ clean:
+ lprocfs_exp_cleanup(exp);
+ return rc;
}
/* Add client data to the FILTER. We use a bitmap to locate a free space
@@ -269,20 +329,20 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp,
* there's no need for extra complication here
*/
if (new_client) {
- cl_idx = find_first_zero_bit(bitmap, LR_MAX_CLIENTS);
+ cl_idx = cfs_find_first_zero_bit(bitmap, LR_MAX_CLIENTS);
repeat:
if (cl_idx >= LR_MAX_CLIENTS) {
CERROR("no room for %u client - fix LR_MAX_CLIENTS\n",
cl_idx);
RETURN(-EOVERFLOW);
}
- if (test_and_set_bit(cl_idx, bitmap)) {
- cl_idx = find_next_zero_bit(bitmap, LR_MAX_CLIENTS,
- cl_idx);
+ if (cfs_test_and_set_bit(cl_idx, bitmap)) {
+ cl_idx = cfs_find_next_zero_bit(bitmap, LR_MAX_CLIENTS,
+ cl_idx);
goto repeat;
}
} else {
- if (test_and_set_bit(cl_idx, bitmap)) {
+ if (cfs_test_and_set_bit(cl_idx, bitmap)) {
CERROR("FILTER client %d: bit already set in bitmap!\n",
cl_idx);
LBUG();
@@ -292,6 +352,7 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp,
fed->fed_lr_idx = cl_idx;
fed->fed_lr_off = le32_to_cpu(filter->fo_fsd->lsd_client_start) +
cl_idx * le16_to_cpu(filter->fo_fsd->lsd_client_size);
+ cfs_init_mutex(&fed->fed_lcd_lock);
LASSERTF(fed->fed_lr_off > 0, "fed_lr_off = %llu\n", fed->fed_lr_off);
CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n",
@@ -315,12 +376,16 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp,
rc = PTR_ERR(handle);
CERROR("unable to start transaction: rc %d\n", rc);
} else {
+ fed->fed_lcd->lcd_last_epoch =
+ filter->fo_fsd->lsd_start_epoch;
+ exp->exp_last_request_time = cfs_time_current_sec();
rc = fsfilt_add_journal_cb(obd, 0, handle,
- target_client_add_cb, exp);
+ target_client_add_cb,
+ class_export_cb_get(exp));
if (rc == 0) {
- spin_lock(&exp->exp_lock);
+ cfs_spin_lock(&exp->exp_lock);
exp->exp_need_sync = 1;
- spin_unlock(&exp->exp_lock);
+ cfs_spin_unlock(&exp->exp_lock);
}
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp,
fed->fed_lcd,
@@ -341,31 +406,33 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp,
RETURN(0);
}
+struct lsd_client_data zero_lcd; /* globals are implicitly zeroed */
+
static int filter_client_free(struct obd_export *exp)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
struct obd_device *obd = exp->exp_obd;
- struct lsd_client_data zero_lcd;
struct lvfs_run_ctxt saved;
+ struct lsd_client_data *lcd = fed->fed_lcd;
int rc;
loff_t off;
ENTRY;
- if (fed->fed_lcd == NULL)
+ if (lcd == NULL)
RETURN(0);
/* XXX if lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (strcmp(fed->fed_lcd->lcd_uuid, obd->obd_uuid.uuid ) == 0)
+ if (strcmp(lcd->lcd_uuid, obd->obd_uuid.uuid ) == 0)
GOTO(free, 0);
- CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n",
- fed->fed_lr_idx, fed->fed_lr_off, fed->fed_lcd->lcd_uuid);
-
LASSERT(filter->fo_last_rcvd_slots != NULL);
off = fed->fed_lr_off;
+ CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n",
+ fed->fed_lr_idx, fed->fed_lr_off, lcd->lcd_uuid);
+
/* Don't clear fed_lr_idx here as it is likely also unset. At worst
* we leak a client slot that will be cleaned on the next recovery. */
if (off <= 0) {
@@ -376,42 +443,43 @@ static int filter_client_free(struct obd_export *exp)
/* Clear the bit _after_ zeroing out the client so we don't
race with filter_client_add and zero out new clients.*/
- if (!test_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
+ if (!cfs_test_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
CERROR("FILTER client %u: bit already clear in bitmap!!\n",
fed->fed_lr_idx);
LBUG();
}
- if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
- memset(&zero_lcd, 0, sizeof zero_lcd);
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
- sizeof(zero_lcd), &off,
- (!exp->exp_libclient ||
- exp->exp_need_sync));
- if (rc == 0)
- /* update server's transno */
- filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd,
- !exp->exp_libclient);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ /* Make sure the server's last_transno is up to date.
+ * This should be done before zeroing client slot so last_transno will
+ * be in server data or in client data in case of failure */
+ filter_update_server_data(obd, filter->fo_rcvd_filp, filter->fo_fsd);
+
+ cfs_mutex_down(&fed->fed_lcd_lock);
+ rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_lcd,
+ sizeof(zero_lcd), &off, 0);
+ fed->fed_lcd = NULL;
+ cfs_mutex_up(&fed->fed_lcd_lock);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- CDEBUG(rc == 0 ? D_INFO : D_ERROR,
- "zeroing out client %s at idx %u (%llu) in %s rc %d\n",
- fed->fed_lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
- LAST_RCVD, rc);
- }
+ CDEBUG(rc == 0 ? D_INFO : D_ERROR,
+ "zero out client %s at idx %u/%llu in %s, rc %d\n",
+ lcd->lcd_uuid, fed->fed_lr_idx, fed->fed_lr_off,
+ LAST_RCVD, rc);
- if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) {
+ if (!cfs_test_and_clear_bit(fed->fed_lr_idx,
+ filter->fo_last_rcvd_slots)) {
CERROR("FILTER client %u: bit already clear in bitmap!!\n",
fed->fed_lr_idx);
LBUG();
}
-
- EXIT;
+ OBD_FREE_PTR(lcd);
+ RETURN(0);
free:
- OBD_FREE_PTR(fed->fed_lcd);
+ cfs_mutex_down(&fed->fed_lcd_lock);
fed->fed_lcd = NULL;
+ cfs_mutex_up(&fed->fed_lcd_lock);
+ OBD_FREE_PTR(lcd);
return 0;
}
@@ -425,7 +493,7 @@ static inline void filter_fmd_put_nolock(struct filter_export_data *fed,
/* XXX when we have persistent reservations and the handle
* is stored herein we need to drop it here. */
fed->fed_mod_count--;
- list_del(&fmd->fmd_list);
+ cfs_list_del(&fmd->fmd_list);
OBD_SLAB_FREE(fmd, ll_fmd_cachep, sizeof(*fmd));
}
}
@@ -439,9 +507,9 @@ void filter_fmd_put(struct obd_export *exp, struct filter_mod_data *fmd)
return;
fed = &exp->exp_filter_data;
- spin_lock(&fed->fed_lock);
+ cfs_spin_lock(&fed->fed_lock);
filter_fmd_put_nolock(fed, fmd); /* caller reference */
- spin_unlock(&fed->fed_lock);
+ cfs_spin_unlock(&fed->fed_lock);
}
/* expire entries from the end of the list if there are too many
@@ -452,25 +520,25 @@ static void filter_fmd_expire_nolock(struct filter_obd *filter,
{
struct filter_mod_data *fmd, *tmp;
- list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
+ cfs_list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
if (fmd == keep)
break;
- if (time_before(jiffies, fmd->fmd_expire) &&
+ if (cfs_time_before(jiffies, fmd->fmd_expire) &&
fed->fed_mod_count < filter->fo_fmd_max_num)
break;
- list_del_init(&fmd->fmd_list);
+ cfs_list_del_init(&fmd->fmd_list);
filter_fmd_put_nolock(fed, fmd); /* list reference */
}
}
void filter_fmd_expire(struct obd_export *exp)
{
- spin_lock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_lock(&exp->exp_filter_data.fed_lock);
filter_fmd_expire_nolock(&exp->exp_obd->u.filter,
&exp->exp_filter_data, NULL);
- spin_unlock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_unlock(&exp->exp_filter_data.fed_lock);
}
/* find specified objid, group in export fmd list.
@@ -483,11 +551,11 @@ static struct filter_mod_data *filter_fmd_find_nolock(struct filter_obd *filter,
LASSERT_SPIN_LOCKED(&fed->fed_lock);
- list_for_each_entry_reverse(fmd, &fed->fed_mod_list, fmd_list) {
+ cfs_list_for_each_entry_reverse(fmd, &fed->fed_mod_list, fmd_list) {
if (fmd->fmd_id == objid && fmd->fmd_gr == group) {
found = fmd;
- list_del(&fmd->fmd_list);
- list_add_tail(&fmd->fmd_list, &fed->fed_mod_list);
+ cfs_list_del(&fmd->fmd_list);
+ cfs_list_add_tail(&fmd->fmd_list, &fed->fed_mod_list);
fmd->fmd_expire = jiffies + filter->fo_fmd_max_age;
break;
}
@@ -504,12 +572,12 @@ struct filter_mod_data *filter_fmd_find(struct obd_export *exp,
{
struct filter_mod_data *fmd;
- spin_lock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_lock(&exp->exp_filter_data.fed_lock);
fmd = filter_fmd_find_nolock(&exp->exp_obd->u.filter,
&exp->exp_filter_data, objid, group);
if (fmd)
fmd->fmd_refcount++; /* caller reference */
- spin_unlock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_unlock(&exp->exp_filter_data.fed_lock);
return fmd;
}
@@ -525,13 +593,14 @@ struct filter_mod_data *filter_fmd_get(struct obd_export *exp,
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_mod_data *found = NULL, *fmd_new = NULL;
- OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO, sizeof(*fmd_new));
+ OBD_SLAB_ALLOC_PTR_GFP(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO);
- spin_lock(&fed->fed_lock);
+ cfs_spin_lock(&fed->fed_lock);
found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group);
if (fmd_new) {
if (found == NULL) {
- list_add_tail(&fmd_new->fmd_list, &fed->fed_mod_list);
+ cfs_list_add_tail(&fmd_new->fmd_list,
+ &fed->fed_mod_list);
fmd_new->fmd_id = objid;
fmd_new->fmd_gr = group;
fmd_new->fmd_refcount++; /* list reference */
@@ -547,7 +616,7 @@ struct filter_mod_data *filter_fmd_get(struct obd_export *exp,
exp->exp_obd->u.filter.fo_fmd_max_age;
}
- spin_unlock(&fed->fed_lock);
+ cfs_spin_unlock(&fed->fed_lock);
return found;
}
@@ -561,13 +630,13 @@ static void filter_fmd_drop(struct obd_export *exp, obd_id objid, obd_gr group)
{
struct filter_mod_data *found = NULL;
- spin_lock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_lock(&exp->exp_filter_data.fed_lock);
found = filter_fmd_find_nolock(&exp->exp_filter_data, objid, group);
if (found) {
- list_del_init(&found->fmd_list);
+ cfs_list_del_init(&found->fmd_list);
filter_fmd_put_nolock(&exp->exp_filter_data, found);
}
- spin_unlock(&exp->exp_filter_data.fed_lock);
+ cfs_spin_unlock(&exp->exp_filter_data.fed_lock);
}
#else
#define filter_fmd_drop(exp, objid, group)
@@ -579,29 +648,29 @@ static void filter_fmd_cleanup(struct obd_export *exp)
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_mod_data *fmd = NULL, *tmp;
- spin_lock(&fed->fed_lock);
- list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
- list_del_init(&fmd->fmd_list);
+ cfs_spin_lock(&fed->fed_lock);
+ cfs_list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
+ cfs_list_del_init(&fmd->fmd_list);
filter_fmd_put_nolock(fed, fmd);
}
- spin_unlock(&fed->fed_lock);
+ cfs_spin_unlock(&fed->fed_lock);
}
static int filter_init_export(struct obd_export *exp)
{
- spin_lock_init(&exp->exp_filter_data.fed_lock);
+ cfs_spin_lock_init(&exp->exp_filter_data.fed_lock);
CFS_INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list);
- spin_lock(&exp->exp_lock);
+ cfs_spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
- spin_unlock(&exp->exp_lock);
+ cfs_spin_unlock(&exp->exp_lock);
return ldlm_init_export(exp);
}
static int filter_free_server_data(struct filter_obd *filter)
{
- OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
+ OBD_FREE_PTR(filter->fo_fsd);
filter->fo_fsd = NULL;
OBD_FREE(filter->fo_last_rcvd_slots, LR_MAX_CLIENTS / 8);
filter->fo_last_rcvd_slots = NULL;
@@ -610,7 +679,7 @@ static int filter_free_server_data(struct filter_obd *filter)
/* assumes caller is already in kernel ctxt */
int filter_update_server_data(struct obd_device *obd, struct file *filp,
- struct lr_server_data *fsd, int force_sync)
+ struct lr_server_data *fsd)
{
loff_t off = 0;
int rc;
@@ -622,8 +691,7 @@ int filter_update_server_data(struct obd_device *obd, struct file *filp,
CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
le64_to_cpu(fsd->lsd_mount_count));
- fsd->lsd_compat14 = fsd->lsd_last_transno;
- rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off, force_sync);
+ rc = fsfilt_write_record(obd, filp, fsd, sizeof(*fsd), &off, 0);
if (rc)
CERROR("error writing lr_server_data: rc = %d\n", rc);
@@ -666,6 +734,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
struct inode *inode = filp->f_dentry->d_inode;
unsigned long last_rcvd_size = i_size_read(inode);
__u64 mount_count;
+ __u32 start_epoch;
int cl_idx;
loff_t off = 0;
int rc;
@@ -698,7 +767,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
fsd->lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
fsd->lsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
- fsd->lsd_feature_incompat = cpu_to_le32(OBD_INCOMPAT_OST);
+ /* OBD_COMPAT_OST is set in filter_connect_internal when the
+ * MDS first connects and assigns the OST index number. */
+ fsd->lsd_feature_incompat = cpu_to_le32(OBD_INCOMPAT_COMMON_LR|
+ OBD_INCOMPAT_OST);
} else {
rc = fsfilt_read_record(obd, filp, fsd, sizeof(*fsd), &off);
if (rc) {
@@ -721,6 +793,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
cpu_to_le32(OBD_INCOMPAT_COMMON_LR)))
fsd->lsd_last_transno = fsd->lsd_compat14;
/* end COMPAT_146 */
+ /* OBD_COMPAT_OST is set in filter_connect_internal when the
+ * MDS first connects and assigns the OST index number. */
+ fsd->lsd_feature_incompat |= cpu_to_le32(OBD_INCOMPAT_COMMON_LR|
+ OBD_INCOMPAT_OST);
}
if (fsd->lsd_feature_incompat & ~cpu_to_le32(FILTER_INCOMPAT_SUPP)) {
@@ -737,7 +813,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
GOTO(err_fsd, rc = -EINVAL);
}
- CDEBUG(D_INODE, "%s: server last_transno : "LPU64"\n",
+ start_epoch = le32_to_cpu(fsd->lsd_start_epoch);
+
+ CDEBUG(D_INODE, "%s: server start_epoch : %#x\n",
+ obd->obd_name, start_epoch);
+ CDEBUG(D_INODE, "%s: server last_transno : "LPX64"\n",
obd->obd_name, le64_to_cpu(fsd->lsd_last_transno));
CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
obd->obd_name, mount_count + 1);
@@ -789,6 +869,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
continue;
}
+ check_lcd(obd->obd_name, cl_idx, lcd);
+
last_rcvd = le64_to_cpu(lcd->lcd_last_transno);
/* These exports are cleaned up by filter_disconnect(), so they
@@ -817,12 +899,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
/* can't fail for existing client */
LASSERTF(rc == 0, "rc = %d\n", rc);
- lcd = NULL;
- spin_lock(&exp->exp_lock);
+ /* VBR: set export last committed */
+ exp->exp_last_committed = last_rcvd;
+ cfs_spin_lock(&exp->exp_lock);
exp->exp_connecting = 0;
exp->exp_in_recovery = 0;
- spin_unlock(&exp->exp_lock);
+ cfs_spin_unlock(&exp->exp_lock);
+ cfs_spin_lock_bh(&obd->obd_processing_task_lock);
obd->obd_max_recoverable_clients++;
+ cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
+ lcd = NULL;
class_export_put(exp);
}
@@ -838,22 +924,19 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
OBD_FREE_PTR(lcd);
obd->obd_last_committed = le64_to_cpu(fsd->lsd_last_transno);
-
- target_recovery_init(obd, ost_handle);
-
out:
filter->fo_mount_count = mount_count + 1;
fsd->lsd_mount_count = cpu_to_le64(filter->fo_mount_count);
/* save it, so mount count and last_transno is current */
- rc = filter_update_server_data(obd, filp, filter->fo_fsd, 1);
+ rc = filter_update_server_data(obd, filp, filter->fo_fsd);
if (rc)
GOTO(err_client, rc);
RETURN(0);
err_client:
- target_recovery_fini(obd);
+ class_disconnect_exports(obd);
err_fsd:
filter_free_server_data(filter);
RETURN(rc);
@@ -939,7 +1022,7 @@ static int filter_update_last_group(struct obd_device *obd, int group)
CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n",rc);
GOTO(cleanup, rc);
}
- LASSERT(off == 0 || last_group >= FILTER_MIN_GROUPS);
+
CDEBUG(D_INODE, "%s: previous %d, new %d\n",
obd->obd_name, last_group, group);
@@ -1012,7 +1095,7 @@ static int filter_read_group_internal(struct obd_device *obd, int group,
GOTO(cleanup, rc);
}
- if (filter->fo_subdir_count) {
+ if (filter->fo_subdir_count && filter_group_is_mds(group)) {
OBD_ALLOC(tmp_subdirs, sizeof(*tmp_subdirs));
if (tmp_subdirs == NULL)
GOTO(cleanup, rc = -ENOMEM);
@@ -1076,7 +1159,7 @@ static int filter_read_group_internal(struct obd_device *obd, int group,
filter->fo_dentry_O_groups[group] = dentry;
filter->fo_last_objid_files[group] = filp;
- if (filter->fo_subdir_count) {
+ if (filter->fo_subdir_count && filter_group_is_mds(group)) {
filter->fo_dentry_O_sub[group] = *tmp_subdirs;
OBD_FREE(tmp_subdirs, sizeof(*tmp_subdirs));
}
@@ -1105,7 +1188,7 @@ static int filter_read_group_internal(struct obd_device *obd, int group,
if (new_files != NULL)
OBD_FREE(new_files, len * sizeof(*new_files));
case 3:
- if (filter->fo_subdir_count) {
+ if (filter->fo_subdir_count && filter_group_is_mds(group)) {
for (i = 0; i < filter->fo_subdir_count; i++) {
if (tmp_subdirs->dentry[i] != NULL)
dput(tmp_subdirs->dentry[i]);
@@ -1126,17 +1209,14 @@ static int filter_read_groups(struct obd_device *obd, int last_group,
struct filter_obd *filter = &obd->u.filter;
int old_count, group, rc = 0;
- down(&filter->fo_init_lock);
+ cfs_down(&filter->fo_init_lock);
old_count = filter->fo_group_count;
for (group = old_count; group <= last_group; group++) {
- if (group == 0)
- continue; /* no group zero */
-
rc = filter_read_group_internal(obd, group, create);
if (rc != 0)
break;
}
- up(&filter->fo_init_lock);
+ cfs_up(&filter->fo_init_lock);
return rc;
}
@@ -1144,7 +1224,7 @@ static int filter_read_groups(struct obd_device *obd, int last_group,
static int filter_prep_groups(struct obd_device *obd)
{
struct filter_obd *filter = &obd->u.filter;
- struct dentry *dentry, *O_dentry;
+ struct dentry *O_dentry;
struct file *filp;
int last_group, rc = 0, cleanup_phase = 0;
loff_t off = 0;
@@ -1161,57 +1241,6 @@ static int filter_prep_groups(struct obd_device *obd)
filter->fo_dentry_O = O_dentry;
cleanup_phase = 1; /* O_dentry */
- /* Lookup "R" to tell if we're on an old OST FS and need to convert
- * from O/R/
/ to O/0//. This can be removed
- * some time post 1.0 when all old-style OSTs have converted along
- * with the init_objid hack. */
- dentry = ll_lookup_one_len("R", O_dentry, 1);
- if (IS_ERR(dentry))
- GOTO(cleanup, rc = PTR_ERR(dentry));
- if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
- struct dentry *O0_dentry = lookup_one_len("0", O_dentry, 1);
- ENTRY;
-
- CWARN("converting OST to new object layout\n");
- if (IS_ERR(O0_dentry)) {
- rc = PTR_ERR(O0_dentry);
- CERROR("error looking up O/0: rc %d\n", rc);
- GOTO(cleanup_R, rc);
- }
-
- if (O0_dentry->d_inode) {
- CERROR("Both O/R and O/0 exist. Fix manually.\n");
- GOTO(cleanup_O0, rc = -EEXIST);
- }
-
- LOCK_INODE_MUTEX(O_dentry->d_inode);
- rc = ll_vfs_rename(O_dentry->d_inode, dentry, filter->fo_vfsmnt,
- O_dentry->d_inode, O0_dentry,
- filter->fo_vfsmnt);
- UNLOCK_INODE_MUTEX(O_dentry->d_inode);
-
- if (rc) {
- CERROR("error renaming O/R to O/0: rc %d\n", rc);
- GOTO(cleanup_O0, rc);
- }
- filter->fo_fsd->lsd_feature_incompat |=
- cpu_to_le32(OBD_INCOMPAT_GROUPS);
- rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd, 1);
- GOTO(cleanup_O0, rc);
-
- cleanup_O0:
- f_dput(O0_dentry);
- cleanup_R:
- f_dput(dentry);
- if (rc)
- GOTO(cleanup, rc);
- } else {
- f_dput(dentry);
- }
-
- cleanup_phase = 2; /* groups */
-
/* we have to initialize all groups before first connections from
* clients because they may send create/destroy for any group -bzzz */
filp = filp_open("LAST_GROUP", O_CREAT | O_RDWR, 0700);
@@ -1219,21 +1248,19 @@ static int filter_prep_groups(struct obd_device *obd)
CERROR("cannot create LAST_GROUP: rc = %ld\n", PTR_ERR(filp));
GOTO(cleanup, rc = PTR_ERR(filp));
}
- cleanup_phase = 3; /* filp */
+ cleanup_phase = 2; /* filp */
rc = fsfilt_read_record(obd, filp, &last_group, sizeof(__u32), &off);
if (rc) {
CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n", rc);
GOTO(cleanup, rc);
}
- if (off == 0) {
- last_group = FILTER_MIN_GROUPS;
- } else {
- LASSERT(last_group >= FILTER_MIN_GROUPS);
- }
+
+ if (off == 0)
+ last_group = FILTER_GROUP_MDS0;
CWARN("%s: initialize groups [%d,%d]\n", obd->obd_name,
- FILTER_MIN_GROUPS, last_group);
+ FILTER_GROUP_MDS0, last_group);
filter->fo_committed_group = last_group;
rc = filter_read_groups(obd, last_group, 1);
if (rc)
@@ -1244,11 +1271,10 @@ static int filter_prep_groups(struct obd_device *obd)
cleanup:
switch (cleanup_phase) {
- case 3:
- filp_close(filp, 0);
case 2:
- filter_cleanup_groups(obd);
+ filp_close(filp, 0);
case 1:
+ filter_cleanup_groups(obd);
f_dput(filter->fo_dentry_O);
filter->fo_dentry_O = NULL;
default:
@@ -1291,11 +1317,17 @@ static int filter_prep(struct obd_device *obd)
GOTO(err_filp, rc = -EOPNOTSUPP);
}
+ /** lu_target has very limited use in filter now */
+ lut_init(NULL, &filter->fo_lut, obd, NULL);
+
rc = filter_init_server_data(obd, file);
if (rc) {
CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
GOTO(err_filp, rc);
}
+
+ target_recovery_init(&filter->fo_lut, ost_handle);
+
/* open and create health check io file*/
file = filp_open(HEALTH_CHECK, O_RDWR | O_CREAT, 0644);
if (IS_ERR(file)) {
@@ -1304,7 +1336,7 @@ static int filter_prep(struct obd_device *obd)
HEALTH_CHECK, rc);
GOTO(err_server_data, rc);
}
- filter->fo_health_check_filp = file;
+ filter->fo_obt.obt_health_check_filp = file;
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", HEALTH_CHECK,
file->f_dentry->d_inode->i_mode);
@@ -1323,9 +1355,9 @@ out:
return(rc);
err_health_check:
- if (filp_close(filter->fo_health_check_filp, 0))
+ if (filp_close(filter->fo_obt.obt_health_check_filp, 0))
CERROR("can't close %s after error\n", HEALTH_CHECK);
- filter->fo_health_check_filp = NULL;
+ filter->fo_obt.obt_health_check_filp = NULL;
err_server_data:
target_recovery_fini(obd);
filter_free_server_data(filter);
@@ -1349,11 +1381,11 @@ static void filter_post(struct obd_device *obd)
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
- filter->fo_fsd, 0);
+ filter->fo_fsd);
if (rc)
CERROR("error writing server data: rc = %d\n", rc);
- for (i = 1; i < filter->fo_group_count; i++) {
+ for (i = 0; i < filter->fo_group_count; i++) {
rc = filter_update_last_objid(obd, i,
(i == filter->fo_group_count - 1));
if (rc)
@@ -1366,8 +1398,8 @@ static void filter_post(struct obd_device *obd)
if (rc)
CERROR("error closing %s: rc = %d\n", LAST_RCVD, rc);
- rc = filp_close(filter->fo_health_check_filp, 0);
- filter->fo_health_check_filp = NULL;
+ rc = filp_close(filter->fo_obt.obt_health_check_filp, 0);
+ filter->fo_obt.obt_health_check_filp = NULL;
if (rc)
CERROR("error closing %s: rc = %d\n", HEALTH_CHECK, rc);
@@ -1385,9 +1417,9 @@ static void filter_set_last_id(struct filter_obd *filter,
LASSERT(filter->fo_fsd != NULL);
LASSERT(group <= filter->fo_group_count);
- spin_lock(&filter->fo_objidlock);
+ cfs_spin_lock(&filter->fo_objidlock);
filter->fo_last_objids[group] = id;
- spin_unlock(&filter->fo_objidlock);
+ cfs_spin_unlock(&filter->fo_objidlock);
}
obd_id filter_last_id(struct filter_obd *filter, obd_gr group)
@@ -1395,12 +1427,12 @@ obd_id filter_last_id(struct filter_obd *filter, obd_gr group)
obd_id id;
LASSERT(filter->fo_fsd != NULL);
LASSERT(group <= filter->fo_group_count);
+ LASSERT(filter->fo_last_objids != NULL);
/* FIXME: object groups */
- spin_lock(&filter->fo_objidlock);
+ cfs_spin_lock(&filter->fo_objidlock);
id = filter->fo_last_objids[group];
- spin_unlock(&filter->fo_objidlock);
-
+ cfs_spin_unlock(&filter->fo_objidlock);
return id;
}
@@ -1417,8 +1449,7 @@ struct dentry *filter_parent(struct obd_device *obd, obd_gr group, obd_id objid)
struct filter_subdirs *subdirs;
LASSERT(group < filter->fo_group_count); /* FIXME: object groups */
- if ((group > 0 && group < FILTER_GROUP_MDS0) ||
- filter->fo_subdir_count == 0)
+ if (!filter_group_is_mds(group) || filter->fo_subdir_count == 0)
return filter->fo_dentry_O_groups[group];
subdirs = &filter->fo_dentry_O_sub[group];
@@ -1515,9 +1546,8 @@ struct dentry *filter_fid2dentry(struct obd_device *obd,
}
static int filter_prepare_destroy(struct obd_device *obd, obd_id objid,
- obd_id group)
+ obd_id group, struct lustre_handle *lockh)
{
- struct lustre_handle lockh;
int flags = LDLM_AST_DISCARD_DATA, rc;
struct ldlm_res_id res_id;
ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
@@ -1529,15 +1559,19 @@ static int filter_prepare_destroy(struct obd_device *obd, obd_id objid,
rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_EXTENT,
&policy, LCK_PW, &flags, ldlm_blocking_ast,
ldlm_completion_ast, NULL, NULL, 0, NULL,
- &lockh);
-
- /* We only care about the side-effects, just drop the lock. */
- if (rc == ELDLM_OK)
- ldlm_lock_decref(&lockh, LCK_PW);
-
+ lockh);
+ if (rc != ELDLM_OK)
+ lockh->cookie = 0;
RETURN(rc);
}
+static void filter_fini_destroy(struct obd_device *obd,
+ struct lustre_handle *lockh)
+{
+ if (lustre_handle_is_used(lockh))
+ ldlm_lock_decref(lockh, LCK_PW);
+}
+
/* This is vfs_unlink() without down(i_sem). If we call regular vfs_unlink()
* we have 2.6 lock ordering issues with filter_commitrw_write() as it takes
* i_sem before starting a handle, while filter_destroy() + vfs_unlink do the
@@ -1637,7 +1671,7 @@ static enum interval_iter filter_intent_cb(struct interval_node *n,
if (interval_high(n) <= size)
return INTERVAL_ITER_STOP;
- list_for_each_entry(lck, &node->li_group, l_sl_policy) {
+ cfs_list_for_each_entry(lck, &node->li_group, l_sl_policy) {
/* Don't send glimpse ASTs to liblustre clients.
* They aren't listening for them, and they do
* entirely synchronous I/O anyways. */
@@ -1714,13 +1748,14 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
/* FIXME: we should change the policy function slightly, to not make
* this list at all, since we just turn around and free it */
- while (!list_empty(&rpc_list)) {
+ while (!cfs_list_empty(&rpc_list)) {
struct ldlm_lock *wlock =
- list_entry(rpc_list.next, struct ldlm_lock, l_cp_ast);
+ cfs_list_entry(rpc_list.next, struct ldlm_lock,
+ l_cp_ast);
LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
lock->l_flags &= ~LDLM_FL_CP_REQD;
- list_del_init(&wlock->l_cp_ast);
+ cfs_list_del_init(&wlock->l_cp_ast);
LDLM_LOCK_RELEASE(wlock);
}
@@ -1783,7 +1818,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
*
* Of course, this will all disappear when we switch to
* taking liblustre locks on the OST. */
- ldlm_res_lvbo_update(res, NULL, 0, 1);
+ ldlm_res_lvbo_update(res, NULL, 1);
}
RETURN(ELDLM_LOCK_ABORTED);
}
@@ -1810,7 +1845,7 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
* sending ast is not handled. This can result in lost client writes.
*/
if (rc != 0)
- ldlm_res_lvbo_update(res, NULL, 0, 1);
+ ldlm_res_lvbo_update(res, NULL, 1);
lock_res(res);
*reply_lvb = *res_lvb;
@@ -1873,6 +1908,30 @@ static void filter_iobuf_pool_done(struct filter_obd *filter)
EXIT;
}
+static int filter_adapt_sptlrpc_conf(struct obd_device *obd, int initial)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ struct sptlrpc_rule_set tmp_rset;
+ int rc;
+
+ sptlrpc_rule_set_init(&tmp_rset);
+ rc = sptlrpc_conf_target_get_rules(obd, &tmp_rset, initial);
+ if (rc) {
+ CERROR("obd %s: failed get sptlrpc rules: %d\n",
+ obd->obd_name, rc);
+ return rc;
+ }
+
+ sptlrpc_target_update_exp_flavor(obd, &tmp_rset);
+
+ cfs_write_lock(&filter->fo_sptlrpc_lock);
+ sptlrpc_rule_set_free(&filter->fo_sptlrpc_rset);
+ filter->fo_sptlrpc_rset = tmp_rset;
+ cfs_write_unlock(&filter->fo_sptlrpc_lock);
+
+ return 0;
+}
+
/*
* pre-allocate pool of iobuf's to be used by filter_{prep,commit}rw_write().
*/
@@ -1899,7 +1958,8 @@ static int filter_iobuf_pool_init(struct filter_obd *filter)
* If we haven't allocated a pool entry for this thread before, do so now. */
void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
{
- int thread_id = oti ? oti->oti_thread_id : -1;
+ int thread_id = (oti && oti->oti_thread) ?
+ oti->oti_thread->t_id : -1;
struct filter_iobuf *pool = NULL;
struct filter_iobuf **pool_place = NULL;
@@ -1972,7 +2032,11 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
if (rc != 0)
GOTO(err_ops, rc);
- LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
+ if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) {
+ CERROR("%s: Underlying device is marked as read-only. "
+ "Setup failed\n", obd->obd_name);
+ GOTO(err_ops, rc = -EROFS);
+ }
/* failover is the default */
obd->obd_replayable = 1;
@@ -1998,39 +2062,36 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
obd->obd_lvfs_ctxt.fs = get_ds();
obd->obd_lvfs_ctxt.cb_ops = filter_lvfs_ops;
- init_mutex(&filter->fo_init_lock);
+ cfs_init_mutex(&filter->fo_init_lock);
filter->fo_committed_group = 0;
-
- rc = filter_prep(obd);
- if (rc)
- GOTO(err_ops, rc);
-
filter->fo_destroys_in_progress = 0;
for (i = 0; i < 32; i++)
- sema_init(&filter->fo_create_locks[i], 1);
+ cfs_sema_init(&filter->fo_create_locks[i], 1);
- spin_lock_init(&filter->fo_translock);
- spin_lock_init(&filter->fo_objidlock);
+ cfs_spin_lock_init(&filter->fo_translock);
+ cfs_spin_lock_init(&filter->fo_objidlock);
CFS_INIT_LIST_HEAD(&filter->fo_export_list);
- sema_init(&filter->fo_alloc_lock, 1);
+ cfs_sema_init(&filter->fo_alloc_lock, 1);
init_brw_stats(&filter->fo_filter_stats);
filter->fo_read_cache = 1; /* enable read-only cache by default */
- filter->fo_writethrough_cache = 0; /* disable writethrough cache */
+ filter->fo_writethrough_cache = 1; /* enable writethrough cache */
filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT;
filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT;
+ rc = filter_prep(obd);
+ if (rc)
+ GOTO(err_ops, rc);
+
CFS_INIT_LIST_HEAD(&filter->fo_llog_list);
- spin_lock_init(&filter->fo_llog_list_lock);
+ cfs_spin_lock_init(&filter->fo_llog_list_lock);
- filter->fo_sptlrpc_lock = RW_LOCK_UNLOCKED;
- sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset);
+ filter->fo_fl_oss_capa = 1;
- filter->fo_fl_oss_capa = 0;
CFS_INIT_LIST_HEAD(&filter->fo_capa_keys);
filter->fo_capa_hash = init_capa_hash();
if (filter->fo_capa_hash == NULL)
- GOTO(err_ops, rc = -ENOMEM);
+ GOTO(err_post, rc = -ENOMEM);
sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
obd->obd_namespace = ldlm_namespace_new(obd, ns_name, LDLM_NAMESPACE_SERVER,
@@ -2044,12 +2105,17 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"filter_ldlm_cb_client", &obd->obd_ldlm_client);
- rc = obd_llog_init(obd, &obd->obd_olg, obd, 1, NULL, NULL);
+ rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
if (rc) {
CERROR("failed to setup llogging subsystems\n");
GOTO(err_post, rc);
}
+ cfs_rwlock_init(&filter->fo_sptlrpc_lock);
+ sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset);
+ /* do this after llog being initialized */
+ filter_adapt_sptlrpc_conf(obd, 1);
+
rc = lquota_setup(filter_quota_interface_ref, obd);
if (rc)
GOTO(err_post, rc);
@@ -2076,17 +2142,14 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
if (obd->obd_recovering) {
LCONSOLE_WARN("OST %s now serving %s (%s%s%s), but will be in "
"recovery for at least %d:%.02d, or until %d "
- "client%s reconnect. During this time new clients"
- " will not be allowed to connect. "
- "Recovery progress can be monitored by watching "
- "/proc/fs/lustre/obdfilter/%s/recovery_status.\n",
+ "client%s reconnect%s.\n",
obd->obd_name, lustre_cfg_string(lcfg, 1),
label ?: "", label ? "/" : "", str,
obd->obd_recovery_timeout / 60,
obd->obd_recovery_timeout % 60,
obd->obd_max_recoverable_clients,
(obd->obd_max_recoverable_clients == 1) ? "":"s",
- obd->obd_name);
+ (obd->obd_max_recoverable_clients == 1) ? "s":"");
} else {
LCONSOLE_INFO("OST %s now serving %s (%s%s%s) with recovery "
"%s\n", obd->obd_name, lustre_cfg_string(lcfg, 1),
@@ -2205,10 +2268,18 @@ static int filter_olg_fini(struct obd_llog_group *olg)
rc = llog_cleanup(ctxt);
ctxt = llog_group_get_ctxt(olg, LLOG_SIZE_ORIG_CTXT);
- if (ctxt)
+ if (ctxt) {
rc2 = llog_cleanup(ctxt);
- if (!rc)
- rc = rc2;
+ if (!rc)
+ rc = rc2;
+ }
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_CONFIG_ORIG_CTXT);
+ if (ctxt) {
+ rc2 = llog_cleanup(ctxt);
+ if (!rc)
+ rc = rc2;
+ }
RETURN(rc);
}
@@ -2261,13 +2332,18 @@ filter_default_olg_init(struct obd_device *obd, struct obd_llog_group *olg,
if (rc)
GOTO(cleanup_lcm, rc);
+ rc = llog_setup(obd, olg, LLOG_CONFIG_ORIG_CTXT, tgt, 0, NULL,
+ &llog_lvfs_ops);
+ if (rc)
+ GOTO(cleanup_olg, rc);
+
ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
if (!ctxt) {
CERROR("Can't get ctxt for %p:%x\n", olg,
LLOG_MDS_OST_REPL_CTXT);
GOTO(cleanup_olg, rc = -ENODEV);
}
- ctxt->loc_lcm = filter->fo_lcm;
+ ctxt->loc_lcm = lcm_get(filter->fo_lcm);
ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
llog_ctxt_put(ctxt);
@@ -2282,8 +2358,7 @@ cleanup_lcm:
static int
filter_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
- struct obd_device *tgt, int count, struct llog_catid *catid,
- struct obd_uuid *uuid)
+ struct obd_device *tgt, int *index)
{
struct filter_obd *filter = &obd->u.filter;
struct llog_ctxt *ctxt;
@@ -2306,7 +2381,7 @@ filter_llog_init(struct obd_device *obd, struct obd_llog_group *olg,
RETURN(-ENODEV);
}
ctxt->llog_proc_cb = filter_recov_log_mds_ost_cb;
- ctxt->loc_lcm = filter->fo_lcm;
+ ctxt->loc_lcm = lcm_get(filter->fo_lcm);
llog_ctxt_put(ctxt);
RETURN(rc);
}
@@ -2318,23 +2393,33 @@ static int filter_llog_finish(struct obd_device *obd, int count)
ENTRY;
ctxt = llog_group_get_ctxt(&obd->obd_olg, LLOG_MDS_OST_REPL_CTXT);
- LASSERT(ctxt != NULL);
- mutex_down(&ctxt->loc_sem);
- if (ctxt->loc_imp) {
+ if (ctxt) {
/*
- * Balance class_import_get() in llog_receptor_accept(). This
- * is safe to do here, as llog is already synchronized and its
- * import may go.
+ * Make sure that no cached llcds left in recov_thread.
+ * We actually do sync in disconnect time, but disconnect
+ * may not come being marked rq_no_resend = 1.
*/
- class_import_put(ctxt->loc_imp);
- ctxt->loc_imp = NULL;
+ llog_sync(ctxt, NULL);
+
+ /*
+ * Balance class_import_get() in llog_receptor_accept().
+ * This is safe to do, as llog is already synchronized
+ * and its import may go.
+ */
+ cfs_mutex_down(&ctxt->loc_sem);
+ if (ctxt->loc_imp) {
+ class_import_put(ctxt->loc_imp);
+ ctxt->loc_imp = NULL;
+ }
+ cfs_mutex_up(&ctxt->loc_sem);
+ llog_ctxt_put(ctxt);
}
- mutex_up(&ctxt->loc_sem);
- llog_ctxt_put(ctxt);
if (filter->fo_lcm) {
+ cfs_mutex_down(&ctxt->loc_sem);
llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
filter->fo_lcm = NULL;
+ cfs_mutex_up(&ctxt->loc_sem);
}
RETURN(filter_olg_fini(&obd->obd_olg));
}
@@ -2348,7 +2433,7 @@ filter_find_olg_internal(struct filter_obd *filter, int group)
struct obd_llog_group *olg;
LASSERT_SPIN_LOCKED(&filter->fo_llog_list_lock);
- list_for_each_entry(olg, &filter->fo_llog_list, olg_list) {
+ cfs_list_for_each_entry(olg, &filter->fo_llog_list, olg_list) {
if (olg->olg_group == group)
RETURN(olg);
}
@@ -2368,9 +2453,9 @@ struct obd_llog_group *filter_find_olg(struct obd_device *obd, int group)
if (group == FILTER_GROUP_LLOG)
RETURN(&obd->obd_olg);
- spin_lock(&filter->fo_llog_list_lock);
+ cfs_spin_lock(&filter->fo_llog_list_lock);
olg = filter_find_olg_internal(filter, group);
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
RETURN(olg);
}
@@ -2390,7 +2475,7 @@ struct obd_llog_group *filter_find_create_olg(struct obd_device *obd, int group)
if (group == FILTER_GROUP_LLOG)
RETURN(&obd->obd_olg);
- spin_lock(&filter->fo_llog_list_lock);
+ cfs_spin_lock(&filter->fo_llog_list_lock);
olg = filter_find_olg_internal(filter, group);
if (olg) {
if (olg->olg_initializing) {
@@ -2404,28 +2489,28 @@ struct obd_llog_group *filter_find_create_olg(struct obd_device *obd, int group)
GOTO(out_unlock, olg = ERR_PTR(-ENOMEM));
llog_group_init(olg, group);
- list_add(&olg->olg_list, &filter->fo_llog_list);
+ cfs_list_add(&olg->olg_list, &filter->fo_llog_list);
olg->olg_initializing = 1;
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
- rc = llog_cat_initialize(obd, olg, 1, NULL);
+ rc = obd_llog_init(obd, olg, obd, NULL);
if (rc) {
- spin_lock(&filter->fo_llog_list_lock);
- list_del(&olg->olg_list);
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_lock(&filter->fo_llog_list_lock);
+ cfs_list_del(&olg->olg_list);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
OBD_FREE_PTR(olg);
GOTO(out, olg = ERR_PTR(-ENOMEM));
}
- spin_lock(&filter->fo_llog_list_lock);
+ cfs_spin_lock(&filter->fo_llog_list_lock);
olg->olg_initializing = 0;
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
CDEBUG(D_OTHER, "%s: new llog group %u (0x%p)\n",
obd->obd_name, group, olg);
out:
RETURN(olg);
out_unlock:
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
GOTO(out, olg);
}
@@ -2438,10 +2523,9 @@ static int filter_llog_connect(struct obd_export *exp,
int rc;
ENTRY;
- CDEBUG(D_OTHER, "handle connect for %s: %u/%u/%u\n", obd->obd_name,
- (unsigned) body->lgdc_logid.lgl_ogr,
- (unsigned) body->lgdc_logid.lgl_oid,
- (unsigned) body->lgdc_logid.lgl_ogen);
+ CDEBUG(D_OTHER, "%s: LLog connect for: "LPX64"/"LPX64":%x\n",
+ obd->obd_name, body->lgdc_logid.lgl_oid,
+ body->lgdc_logid.lgl_ogr, body->lgdc_logid.lgl_ogen);
olg = filter_find_olg(obd, body->lgdc_logid.lgl_ogr);
if (!olg) {
@@ -2454,7 +2538,15 @@ static int filter_llog_connect(struct obd_export *exp,
ctxt = llog_group_get_ctxt(olg, body->lgdc_ctxt_idx);
LASSERTF(ctxt != NULL, "ctxt is not null, ctxt idx %d \n",
body->lgdc_ctxt_idx);
- rc = llog_connect(ctxt, 1, &body->lgdc_logid,
+
+ CWARN("%s: Recovery from log "LPX64"/"LPX64":%x\n",
+ obd->obd_name, body->lgdc_logid.lgl_oid,
+ body->lgdc_logid.lgl_ogr, body->lgdc_logid.lgl_ogen);
+
+ cfs_spin_lock_bh(&obd->obd_processing_task_lock);
+ obd->u.filter.fo_mds_ost_sync = 1;
+ cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
+ rc = llog_connect(ctxt, &body->lgdc_logid,
&body->lgdc_gen, NULL);
llog_ctxt_put(ctxt);
if (rc != 0)
@@ -2468,7 +2560,7 @@ static int filter_llog_preclean(struct obd_device *obd)
{
struct obd_llog_group *olg, *tmp;
struct filter_obd *filter;
- struct list_head remove_list;
+ cfs_list_t remove_list;
int rc = 0;
ENTRY;
@@ -2479,17 +2571,17 @@ static int filter_llog_preclean(struct obd_device *obd)
filter = &obd->u.filter;
CFS_INIT_LIST_HEAD(&remove_list);
- spin_lock(&filter->fo_llog_list_lock);
- while (!list_empty(&filter->fo_llog_list)) {
- olg = list_entry(filter->fo_llog_list.next,
- struct obd_llog_group, olg_list);
- list_del(&olg->olg_list);
- list_add(&olg->olg_list, &remove_list);
+ cfs_spin_lock(&filter->fo_llog_list_lock);
+ while (!cfs_list_empty(&filter->fo_llog_list)) {
+ olg = cfs_list_entry(filter->fo_llog_list.next,
+ struct obd_llog_group, olg_list);
+ cfs_list_del(&olg->olg_list);
+ cfs_list_add(&olg->olg_list, &remove_list);
}
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
- list_for_each_entry_safe(olg, tmp, &remove_list, olg_list) {
- list_del_init(&olg->olg_list);
+ cfs_list_for_each_entry_safe(olg, tmp, &remove_list, olg_list) {
+ cfs_list_del_init(&olg->olg_list);
rc = filter_olg_fini(olg);
if (rc)
CERROR("failed to cleanup llogging subsystem for %u\n",
@@ -2510,7 +2602,8 @@ static int filter_precleanup(struct obd_device *obd,
case OBD_CLEANUP_EARLY:
break;
case OBD_CLEANUP_EXPORTS:
- target_cleanup_recovery(obd);
+ /* Stop recovery before namespace cleanup. */
+ target_recovery_fini(obd);
rc = filter_llog_preclean(obd);
break;
}
@@ -2526,14 +2619,8 @@ static int filter_cleanup(struct obd_device *obd)
LCONSOLE_WARN("%s: shutting down for failover; client state "
"will be preserved.\n", obd->obd_name);
- if (!list_empty(&obd->obd_exports)) {
- CERROR("%s: still has clients!\n", obd->obd_name);
- class_disconnect_exports(obd);
- if (!list_empty(&obd->obd_exports)) {
- CERROR("still has exports after forced cleanup?\n");
- RETURN(-EBUSY);
- }
- }
+ obd_exports_barrier(obd);
+ obd_zombie_barrier();
lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
lprocfs_free_per_client_stats(obd);
@@ -2541,10 +2628,6 @@ static int filter_cleanup(struct obd_device *obd)
lprocfs_obd_cleanup(obd);
lquota_cleanup(filter_quota_interface_ref, obd);
- /* Stop recovery before namespace cleanup. */
- target_stop_recovery_thread(obd);
- target_cleanup_recovery(obd);
-
ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
obd->obd_namespace = NULL;
@@ -2571,7 +2654,8 @@ static int filter_cleanup(struct obd_device *obd)
}
static int filter_connect_internal(struct obd_export *exp,
- struct obd_connect_data *data)
+ struct obd_connect_data *data,
+ int reconnect)
{
struct filter_export_data *fed = &exp->exp_filter_data;
@@ -2598,20 +2682,28 @@ static int filter_connect_internal(struct obd_export *exp,
exp->exp_connect_flags = data->ocd_connect_flags;
data->ocd_version = LUSTRE_VERSION_CODE;
+ /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
+ if (!ergo(data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN,
+ data->ocd_connect_flags & OBD_CONNECT_MDS))
+ RETURN(-EPROTO);
+
if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
obd_size left, want;
- spin_lock(&exp->exp_obd->obd_osfs_lock);
+ cfs_spin_lock(&exp->exp_obd->obd_osfs_lock);
left = filter_grant_space_left(exp);
want = data->ocd_grant;
- filter_grant(exp, fed->fed_grant, want, left);
+ filter_grant(exp, fed->fed_grant, want, left, (reconnect == 0));
data->ocd_grant = fed->fed_grant;
- spin_unlock(&exp->exp_obd->obd_osfs_lock);
+ cfs_spin_unlock(&exp->exp_obd->obd_osfs_lock);
CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: "
LPU64" left: "LPU64"\n", exp->exp_obd->obd_name,
exp->exp_client_uuid.uuid, exp,
data->ocd_grant, want, left);
+
+ filter->fo_tot_granted_clients ++;
}
if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
@@ -2624,8 +2716,10 @@ static int filter_connect_internal(struct obd_export *exp,
/* this will only happen on the first connect */
lsd->lsd_ost_index = cpu_to_le32(data->ocd_index);
lsd->lsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST);
+ /* sync is not needed here as filter_client_add will
+ * set exp_need_sync flag */
filter_update_server_data(exp->exp_obd,
- filter->fo_rcvd_filp, lsd, 1);
+ filter->fo_rcvd_filp, lsd);
} else if (index != data->ocd_index) {
LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index"
" %u doesn't match actual OST index"
@@ -2635,6 +2729,10 @@ static int filter_connect_internal(struct obd_export *exp,
data->ocd_index);
RETURN(-EBADF);
}
+ /* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
+ * FIXME: We don't strictly need the COMPAT flag for that,
+ * FIXME: as fsd_peeruuid[0] will tell us if that is set.
+ * FIXME: We needed it for the index, as index 0 is valid. */
}
if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) {
@@ -2669,18 +2767,14 @@ static int filter_connect_internal(struct obd_export *exp,
obd_export_nid2str(exp));
}
- /* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
- * FIXME: We don't strictly need the COMPAT flag for that,
- * FIXME: as fsd_peeruuid[0] will tell us if that is set.
- * FIXME: We needed it for the index, as index 0 is valid. */
-
RETURN(0);
}
static int filter_reconnect(const struct lu_env *env,
struct obd_export *exp, struct obd_device *obd,
struct obd_uuid *cluuid,
- struct obd_connect_data *data)
+ struct obd_connect_data *data,
+ void *localdata)
{
int rc;
ENTRY;
@@ -2688,41 +2782,44 @@ static int filter_reconnect(const struct lu_env *env,
if (exp == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
- rc = filter_connect_internal(exp, data);
+ rc = filter_connect_internal(exp, data, 1);
+ if (rc == 0)
+ filter_export_stats_init(obd, exp, localdata);
RETURN(rc);
}
/* nearly identical to mds_connect */
static int filter_connect(const struct lu_env *env,
- struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_export **exp, struct obd_device *obd,
struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct lvfs_run_ctxt saved;
- struct obd_export *exp;
+ struct lustre_handle conn = { 0 };
+ struct obd_export *lexp;
struct filter_export_data *fed;
struct lsd_client_data *lcd = NULL;
__u32 group;
int rc;
ENTRY;
- if (conn == NULL || obd == NULL || cluuid == NULL)
+ if (exp == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
- rc = class_connect(conn, obd, cluuid);
+ rc = class_connect(&conn, obd, cluuid);
if (rc)
RETURN(rc);
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
+ lexp = class_conn2export(&conn);
+ LASSERT(lexp != NULL);
- fed = &exp->exp_filter_data;
+ fed = &lexp->exp_filter_data;
- rc = filter_connect_internal(exp, data);
+ rc = filter_connect_internal(lexp, data, 0);
if (rc)
GOTO(cleanup, rc);
- filter_export_stats_init(obd, exp, localdata);
+ filter_export_stats_init(obd, lexp, localdata);
if (obd->obd_replayable) {
OBD_ALLOC(lcd, sizeof(*lcd));
if (!lcd) {
@@ -2732,17 +2829,15 @@ static int filter_connect(const struct lu_env *env,
memcpy(lcd->lcd_uuid, cluuid, sizeof(lcd->lcd_uuid));
fed->fed_lcd = lcd;
- rc = filter_client_add(obd, exp, -1);
+ rc = filter_client_add(obd, lexp, -1);
if (rc)
GOTO(cleanup, rc);
}
group = data->ocd_group;
- if (group == 0)
- GOTO(cleanup, rc);
CWARN("%s: Received MDS connection ("LPX64"); group %d\n",
- obd->obd_name, exp->exp_handle.h_cookie, group);
+ obd->obd_name, lexp->exp_handle.h_cookie, group);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = filter_read_groups(obd, group, 1);
@@ -2760,9 +2855,11 @@ cleanup:
OBD_FREE_PTR(lcd);
fed->fed_lcd = NULL;
}
- class_disconnect(exp);
+ class_disconnect(lexp);
+ lprocfs_exp_cleanup(lexp);
+ *exp = NULL;
} else {
- class_export_put(exp);
+ *exp = lexp;
}
RETURN(rc);
@@ -2779,7 +2876,7 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
- if (list_empty(&obd->obd_exports))
+ if (cfs_list_empty(&obd->obd_exports))
return;
/* We don't want to do this for large machines that do lots of
@@ -2787,9 +2884,9 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
if (obd->obd_num_exports > 100)
return;
- spin_lock(&obd->obd_osfs_lock);
- spin_lock(&obd->obd_dev_lock);
- list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
+ cfs_spin_lock(&obd->obd_osfs_lock);
+ cfs_spin_lock(&obd->obd_dev_lock);
+ cfs_list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
int error = 0;
fed = &exp->exp_filter_data;
if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
@@ -2820,8 +2917,8 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func)
fo_tot_granted = obd->u.filter.fo_tot_granted;
fo_tot_pending = obd->u.filter.fo_tot_pending;
fo_tot_dirty = obd->u.filter.fo_tot_dirty;
- spin_unlock(&obd->obd_dev_lock);
- spin_unlock(&obd->obd_osfs_lock);
+ cfs_spin_unlock(&obd->obd_dev_lock);
+ cfs_spin_unlock(&obd->obd_osfs_lock);
/* Do these assertions outside the spinlocks so we don't kill system */
if (tot_granted != fo_tot_granted)
@@ -2854,11 +2951,7 @@ static void filter_grant_discard(struct obd_export *exp)
struct filter_obd *filter = &obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
- spin_lock(&obd->obd_osfs_lock);
- spin_lock(&obd->obd_dev_lock);
- list_del_init(&exp->exp_obd_chain);
- spin_unlock(&obd->obd_dev_lock);
-
+ cfs_spin_lock(&obd->obd_osfs_lock);
LASSERTF(filter->fo_tot_granted >= fed->fed_grant,
"%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
obd->obd_name, filter->fo_tot_granted,
@@ -2877,7 +2970,7 @@ static void filter_grant_discard(struct obd_export *exp)
fed->fed_dirty = 0;
fed->fed_grant = 0;
- spin_unlock(&obd->obd_osfs_lock);
+ cfs_spin_unlock(&obd->obd_osfs_lock);
}
static int filter_destroy_export(struct obd_export *exp)
@@ -2889,9 +2982,7 @@ static int filter_destroy_export(struct obd_export *exp)
exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
exp, exp->exp_filter_data.fed_pending);
- /* Not ported yet the b1_6 quota functionality
- * lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
- */
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
target_destroy_export(exp);
ldlm_destroy_export(exp);
@@ -2900,14 +2991,18 @@ static int filter_destroy_export(struct obd_export *exp)
RETURN(0);
- if (exp->exp_obd->obd_replayable)
- filter_client_free(exp);
- else
+ if (!exp->exp_obd->obd_replayable)
fsfilt_sync(exp->exp_obd, exp->exp_obd->u.obt.obt_sb);
filter_grant_discard(exp);
filter_fmd_cleanup(exp);
+ if (exp->exp_connect_flags & OBD_CONNECT_GRANT_SHRINK) {
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
+ if (filter->fo_tot_granted_clients > 0)
+ filter->fo_tot_granted_clients --;
+ }
+
if (!(exp->exp_flags & OBD_OPT_FORCE))
filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
@@ -2918,7 +3013,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
{
struct obd_llog_group *olg_min, *olg;
struct filter_obd *filter;
- int worked = 0, group;
+ int worked = -1, group;
struct llog_ctxt *ctxt;
ENTRY;
@@ -2931,8 +3026,8 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
/* look for group with min. number, but > worked */
olg_min = NULL;
group = 1 << 30;
- spin_lock(&filter->fo_llog_list_lock);
- list_for_each_entry(olg, &filter->fo_llog_list, olg_list) {
+ cfs_spin_lock(&filter->fo_llog_list_lock);
+ cfs_list_for_each_entry(olg, &filter->fo_llog_list, olg_list) {
if (olg->olg_group <= worked) {
/* this group is already synced */
continue;
@@ -2945,7 +3040,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
olg_min = olg;
group = olg->olg_group;
}
- spin_unlock(&filter->fo_llog_list_lock);
+ cfs_spin_unlock(&filter->fo_llog_list_lock);
if (olg_min == NULL)
break;
@@ -2955,18 +3050,20 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
(dexp == olg_min->olg_exp || dexp == NULL)) {
int err;
ctxt = llog_group_get_ctxt(olg_min,
- LLOG_MDS_OST_REPL_CTXT);
- LASSERT(ctxt != NULL);
- err = llog_sync(ctxt, olg_min->olg_exp);
- llog_ctxt_put(ctxt);
- if (err)
- CERROR("error flushing logs to MDS: rc %d\n",
- err);
+ LLOG_MDS_OST_REPL_CTXT);
+ if (ctxt) {
+ err = llog_sync(ctxt, olg_min->olg_exp);
+ llog_ctxt_put(ctxt);
+ if (err) {
+ CERROR("error flushing logs to MDS: "
+ "rc %d\n", err);
+ }
+ }
}
} while (olg_min != NULL);
}
-/* also incredibly similar to mds_disconnect */
+/* Also incredibly similar to mds_disconnect */
static int filter_disconnect(struct obd_export *exp)
{
struct obd_device *obd = exp->exp_obd;
@@ -2980,17 +3077,18 @@ static int filter_disconnect(struct obd_export *exp)
filter_grant_sanity_check(obd, __FUNCTION__);
filter_grant_discard(exp);
- /* Disconnect early so that clients can't keep using export */
- rc = class_disconnect(exp);
- if (exp->exp_obd->obd_namespace != NULL)
- ldlm_cancel_locks_for_export(exp);
+ /* Flush any remaining cancel messages out to the target */
+ filter_sync_llogs(obd, exp);
- fsfilt_sync(obd, obd->u.obt.obt_sb);
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
- lprocfs_exp_cleanup(exp);
+ rc = server_disconnect_export(exp);
+
+ if (exp->exp_obd->obd_replayable)
+ filter_client_free(exp);
+ else
+ fsfilt_sync(obd, obd->u.obt.obt_sb);
- /* flush any remaining cancel messages out to the target */
- filter_sync_llogs(obd, exp);
class_export_put(exp);
RETURN(rc);
}
@@ -3012,7 +3110,6 @@ static void filter_revimp_update(struct obd_export *exp)
static int filter_ping(struct obd_export *exp)
{
filter_fmd_expire(exp);
-
return 0;
}
@@ -3051,7 +3148,8 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
int rc = 0;
ENTRY;
- rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo),
+ LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
+ rc = filter_auth_capa(exp, NULL, oinfo->oi_oa->o_gr,
oinfo_capa(oinfo), CAPA_OPC_META_READ);
if (rc)
RETURN(rc);
@@ -3118,9 +3216,11 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
unsigned int orig_ids[MAXQUOTAS] = {0, 0};
struct llog_cookie *fcc = NULL;
struct filter_obd *filter;
- int rc, err, locked = 0, sync = 0;
+ int rc, err, sync = 0;
+ loff_t old_size = 0;
unsigned int ia_valid;
struct inode *inode;
+ struct page *page = NULL;
struct iattr iattr;
void *handle;
ENTRY;
@@ -3140,11 +3240,31 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
if (fcc != NULL)
*fcc = oa->o_lcookie;
}
-
- if (ia_valid & ATTR_SIZE || ia_valid & (ATTR_UID | ATTR_GID)) {
+ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
DQUOT_INIT(inode);
+ /* Filter truncates and writes are serialized by
+ * i_alloc_sem, see the comment in
+ * filter_preprw_write.*/
+ if (ia_valid & ATTR_SIZE)
+ down_write(&inode->i_alloc_sem);
LOCK_INODE_MUTEX(inode);
- locked = 1;
+ old_size = i_size_read(inode);
+ }
+
+ /* VBR: version recovery check */
+ rc = filter_version_get_check(exp, oti, inode);
+ if (rc)
+ GOTO(out_unlock, rc);
+
+ /* Let's pin the last page so that ldiskfs_truncate
+ * should not start GFP_FS allocation. */
+ if (ia_valid & ATTR_SIZE) {
+ page = grab_cache_page(inode->i_mapping,
+ iattr.ia_size >> PAGE_CACHE_SHIFT);
+ if (page == NULL)
+ GOTO(out_unlock, rc = -ENOMEM);
+
+ unlock_page(page);
}
/* If the inode still has SUID+SGID bits set (see filter_precreate())
@@ -3191,8 +3311,8 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
GOTO(out_unlock, rc = PTR_ERR(handle));
}
if (oa->o_valid & OBD_MD_FLFLAGS) {
- rc = fsfilt_iocontrol(exp->exp_obd, inode, NULL,
- EXT3_IOC_SETFLAGS, (long)&oa->o_flags);
+ rc = fsfilt_iocontrol(exp->exp_obd, dentry,
+ FSFILT_IOC_SETFLAGS, (long)&oa->o_flags);
} else {
rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
if (fcc != NULL)
@@ -3209,7 +3329,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
* sure we have one left for the last_rcvd update. */
err = fsfilt_extend(exp->exp_obd, inode, 1, handle);
- rc = filter_finish_transno(exp, oti, rc, sync);
+ rc = filter_finish_transno(exp, inode, oti, rc, sync);
if (sync) {
filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc);
fcc = NULL;
@@ -3224,18 +3344,26 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
fcc = NULL;
}
- if (locked) {
- /* truncate can leave dirty pages in the cache.
- * we'll take care of them in write path -bzzz */
- UNLOCK_INODE_MUTEX(inode);
- locked = 0;
+ /* For a partial-page truncate flush the page to disk immediately
+ * to avoid data corruption during direct disk write. b=17397 */
+ if (!sync && (iattr.ia_valid & ATTR_SIZE) &&
+ old_size != iattr.ia_size && (iattr.ia_size & ~CFS_PAGE_MASK)) {
+ err = filemap_fdatawrite_range(inode->i_mapping, iattr.ia_size,
+ iattr.ia_size + 1);
+ if (!rc)
+ rc = err;
}
EXIT;
+
out_unlock:
- if (locked)
- UNLOCK_INODE_MUTEX(inode);
+ if (page)
+ page_cache_release(page);
+ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID))
+ UNLOCK_INODE_MUTEX(inode);
+ if (ia_valid & ATTR_SIZE)
+ up_write(&inode->i_alloc_sem);
if (fcc)
OBD_FREE(fcc, sizeof(*fcc));
@@ -3255,60 +3383,70 @@ out_unlock:
int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
struct obd_trans_info *oti)
{
+ struct obdo *oa = oinfo->oi_oa;
+ struct lustre_capa *capa = oinfo_capa(oinfo);
struct ldlm_res_id res_id;
struct filter_mod_data *fmd;
struct lvfs_run_ctxt saved;
struct filter_obd *filter;
struct ldlm_resource *res;
struct dentry *dentry;
+ __u64 opc = CAPA_OPC_META_WRITE;
int rc;
ENTRY;
- osc_build_res_name(oinfo->oi_oa->o_id, oinfo->oi_oa->o_gr, &res_id);
- rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo),
- oinfo_capa(oinfo), CAPA_OPC_META_WRITE);
+ if (oa->o_valid & OBD_FL_TRUNC)
+ opc |= CAPA_OPC_OSS_TRUNC;
+
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+ rc = filter_auth_capa(exp, NULL, oa->o_gr, capa, opc);
if (rc)
RETURN(rc);
+ if (oa->o_valid & (OBD_MD_FLUID | OBD_MD_FLGID)) {
+ rc = filter_capa_fixoa(exp, oa, oa->o_gr, capa);
+ if (rc)
+ RETURN(rc);
+ }
+
+ osc_build_res_name(oa->o_id, oa->o_gr, &res_id);
/* This would be very bad - accidentally truncating a file when
* changing the time or similar - bug 12203. */
- if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE &&
+ if (oa->o_valid & OBD_MD_FLSIZE &&
oinfo->oi_policy.l_extent.end != OBD_OBJECT_EOF) {
static char mdsinum[48];
- if (oinfo->oi_oa->o_valid & OBD_MD_FLFID)
+ if (oa->o_valid & OBD_MD_FLFID)
snprintf(mdsinum, sizeof(mdsinum) - 1,
- " of inode "LPU64"/%u", oinfo->oi_oa->o_fid,
- oinfo->oi_oa->o_generation);
+ " of inode "LPU64"/%u", oa->o_fid,
+ oa->o_generation);
else
mdsinum[0] = '\0';
CERROR("%s: setattr from %s trying to truncate objid "LPU64
" %s\n",
exp->exp_obd->obd_name, obd_export_nid2str(exp),
- oinfo->oi_oa->o_id, mdsinum);
+ oa->o_id, mdsinum);
RETURN(-EPERM);
}
- dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa,
- __FUNCTION__, 1);
+ dentry = __filter_oa2dentry(exp->exp_obd, oa, __FUNCTION__, 1);
if (IS_ERR(dentry))
RETURN(PTR_ERR(dentry));
filter = &exp->exp_obd->u.filter;
push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
- lock_kernel();
- if (oinfo->oi_oa->o_valid &
+ if (oa->o_valid &
(OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) {
- fmd = filter_fmd_get(exp,oinfo->oi_oa->o_id,oinfo->oi_oa->o_gr);
+ fmd = filter_fmd_get(exp, oa->o_id, oa->o_gr);
if (fmd && fmd->fmd_mactime_xid < oti->oti_xid)
fmd->fmd_mactime_xid = oti->oti_xid;
filter_fmd_put(exp, fmd);
}
/* setting objects attributes (including owner/group) */
- rc = filter_setattr_internal(exp, dentry, oinfo->oi_oa, oti);
+ rc = filter_setattr_internal(exp, dentry, oa, oti);
if (rc)
GOTO(out_unlock, rc);
@@ -3317,20 +3455,19 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
if (res != NULL) {
LDLM_RESOURCE_ADDREF(res);
- rc = ldlm_res_lvbo_update(res, NULL, 0, 0);
+ rc = ldlm_res_lvbo_update(res, NULL, 0);
LDLM_RESOURCE_DELREF(res);
ldlm_resource_putref(res);
}
- oinfo->oi_oa->o_valid = OBD_MD_FLID;
+ oa->o_valid = OBD_MD_FLID;
/* Quota release need uid/gid info */
- obdo_from_inode(oinfo->oi_oa, dentry->d_inode,
+ obdo_from_inode(oa, dentry->d_inode,
FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
EXIT;
out_unlock:
- unlock_kernel();
f_dput(dentry);
pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
return rc;
@@ -3394,15 +3531,16 @@ static int filter_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
/* caller must hold fo_create_locks[oa->o_gr] */
static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
- struct filter_obd *filter)
+ struct filter_obd *filter)
{
- struct obdo doa; /* XXX obdo on stack */
+ struct obdo doa = { 0 }; /* XXX obdo on stack */
obd_id last, id;
- int rc;
+ int rc = 0;
+ int skip_orphan;
ENTRY;
LASSERT(oa);
- LASSERT(oa->o_gr != 0);
+ LASSERT_MDS_GROUP(oa->o_gr);
LASSERT(oa->o_valid & OBD_MD_FLGROUP);
LASSERT(down_trylock(&filter->fo_create_locks[oa->o_gr]) != 0);
@@ -3412,7 +3550,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
doa.o_gr = oa->o_gr;
doa.o_mode = S_IFREG;
- if (!test_bit(doa.o_gr, &filter->fo_destroys_in_progress)) {
+ if (!cfs_test_bit(doa.o_gr, &filter->fo_destroys_in_progress)) {
CERROR("%s:["LPU64"] destroys_in_progress already cleared\n",
exp->exp_obd->obd_name, doa.o_gr);
RETURN(0);
@@ -3420,27 +3558,39 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
last = filter_last_id(filter, doa.o_gr);
- CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
- exp->exp_obd->obd_name, oa->o_id + 1, last);
+ skip_orphan = !!(exp->exp_connect_flags & OBD_CONNECT_SKIP_ORPHAN);
+
+ CDEBUG(D_HA, "%s: deleting orphan objects from "LPU64" to "LPU64"%s\n",
+ exp->exp_obd->obd_name, oa->o_id + 1, last,
+ skip_orphan ? ", orphan objids won't be reused any more." : ".");
for (id = last; id > oa->o_id; id--) {
doa.o_id = id;
- rc = filter_destroy(exp, &doa, NULL, NULL, NULL);
+ rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
if (rc && rc != -ENOENT) /* this is pretty fatal... */
CEMERG("error destroying precreate objid "LPU64": %d\n",
id, rc);
- filter_set_last_id(filter, id - 1, doa.o_gr);
+
/* update last_id on disk periodically so that if we restart
* we don't need to re-scan all of the just-deleted objects. */
- if ((id & 511) == 0)
+ if ((id & 511) == 0 && !skip_orphan) {
+ filter_set_last_id(filter, id - 1, doa.o_gr);
filter_update_last_objid(exp->exp_obd, doa.o_gr, 0);
+ }
}
CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n",
exp->exp_obd->obd_name, doa.o_gr, oa->o_id);
- rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
- clear_bit(doa.o_gr, &filter->fo_destroys_in_progress);
+ if (!skip_orphan) {
+ filter_set_last_id(filter, id, doa.o_gr);
+ rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+ } else {
+ /* don't reuse orphan object, return last used objid */
+ oa->o_id = last;
+ rc = 0;
+ }
+ cfs_clear_bit(doa.o_gr, &filter->fo_destroys_in_progress);
RETURN(rc);
}
@@ -3458,28 +3608,30 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
/* delete orphans request */
if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_DELORPHAN)){
+ obd_id last = filter_last_id(filter, group);
+
if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
CERROR("%s: dropping old orphan cleanup request\n",
obd->obd_name);
RETURN(0);
}
/* This causes inflight precreates to abort and drop lock */
- set_bit(group, &filter->fo_destroys_in_progress);
- down(&filter->fo_create_locks[group]);
- if (!test_bit(group, &filter->fo_destroys_in_progress)) {
+ cfs_set_bit(group, &filter->fo_destroys_in_progress);
+ cfs_down(&filter->fo_create_locks[group]);
+ if (!cfs_test_bit(group, &filter->fo_destroys_in_progress)) {
CERROR("%s:["LPU64"] destroys_in_progress already cleared\n",
exp->exp_obd->obd_name, group);
- up(&filter->fo_create_locks[group]);
+ cfs_up(&filter->fo_create_locks[group]);
RETURN(0);
}
- diff = oa->o_id - filter_last_id(filter, group);
+ diff = oa->o_id - last;
CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
- filter_last_id(filter, group), diff);
+ last, diff);
if (-diff > OST_MAX_PRECREATE) {
CERROR("%s: ignoring bogus orphan destroy request: "
"obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
- oa->o_id, filter_last_id(filter, group));
+ oa->o_id, last);
/* FIXME: should reset precreate_next_id on MDS */
GOTO(out, rc = -EINVAL);
}
@@ -3491,17 +3643,17 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
GOTO(out, rc);
} else {
/* XXX: Used by MDS for the first time! */
- clear_bit(group, &filter->fo_destroys_in_progress);
+ cfs_clear_bit(group, &filter->fo_destroys_in_progress);
}
} else {
- down(&filter->fo_create_locks[group]);
+ cfs_down(&filter->fo_create_locks[group]);
if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
CERROR("%s: dropping old precreate request\n",
obd->obd_name);
GOTO(out, rc = 0);
}
- /* only precreate if group == 0 and o_id is specified */
- if (group < FILTER_GROUP_MDS0 || oa->o_id == 0)
+ /* only precreate if group == 0 and o_id is specfied */
+ if (!filter_group_is_mds(group) || oa->o_id == 0)
diff = 1;
else
diff = oa->o_id - filter_last_id(filter, group);
@@ -3523,7 +3675,7 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
/* else diff == 0 */
GOTO(out, rc = 0);
out:
- up(&filter->fo_create_locks[group]);
+ cfs_up(&filter->fo_create_locks[group]);
return rc;
}
@@ -3538,10 +3690,10 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
/* at least try to account for cached pages. its still racey and
* might be under-reporting if clients haven't announced their
* caches with brw recently */
- spin_lock(&obd->obd_osfs_lock);
+ cfs_spin_lock(&obd->obd_osfs_lock);
rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, max_age);
memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
- spin_unlock(&obd->obd_osfs_lock);
+ cfs_spin_unlock(&obd->obd_osfs_lock);
CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64
" pending "LPU64" free "LPU64" avail "LPU64"\n",
@@ -3555,11 +3707,27 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
((filter->fo_tot_dirty + filter->fo_tot_pending +
osfs->os_bsize - 1) >> blockbits));
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOSPC)) {
+ struct lr_server_data *lsd = filter->fo_fsd;
+ int index = le32_to_cpu(lsd->lsd_ost_index);
+
+ if (obd_fail_val == -1 ||
+ index == obd_fail_val)
+ osfs->os_bfree = osfs->os_bavail = 2;
+ else if (obd_fail_loc & OBD_FAIL_ONCE)
+ obd_fail_loc &= ~OBD_FAILED; /* reset flag */
+ }
+
/* set EROFS to state field if FS is mounted as RDONLY. The goal is to
* stop creating files on MDS if OST is not good shape to create
* objects.*/
- osfs->os_state = (filter->fo_obt.obt_sb->s_flags & MS_RDONLY) ?
- EROFS : 0;
+ osfs->os_state = 0;
+
+ if (filter->fo_obt.obt_sb->s_flags & MS_RDONLY)
+ osfs->os_state = OS_STATE_READONLY;
+
+ if (filter->fo_raid_degraded)
+ osfs->os_state |= OS_STATE_DEGRADED;
RETURN(rc);
}
@@ -3587,6 +3755,19 @@ static int filter_use_existing_obj(struct obd_device *obd,
return rc;
}
+static __u64 filter_calc_free_inodes(struct obd_device *obd)
+{
+ int rc;
+ __u64 os_ffree = -1;
+
+ cfs_spin_lock(&obd->obd_osfs_lock);
+ rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, cfs_time_shift_64(1));
+ if (rc == 0)
+ os_ffree = obd->obd_osfs.os_ffree;
+ cfs_spin_unlock(&obd->obd_osfs_lock);
+
+ return os_ffree;
+}
/* We rely on the fact that only one thread will be creating files in a given
* group at a time, which is why we don't need an atomic filter_get_new_id.
@@ -3607,6 +3788,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
struct obd_statfs *osfs;
int err = 0, rc = 0, recreate_obj = 0, i;
cfs_time_t enough_time = cfs_time_shift(DISK_TIMEOUT/2);
+ __u64 os_ffree;
obd_id next_id;
void *handle = NULL;
ENTRY;
@@ -3624,7 +3806,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
OBD_ALLOC(osfs, sizeof(*osfs));
if (osfs == NULL)
RETURN(-ENOMEM);
- rc = filter_statfs(obd, osfs, cfs_time_current_64() - HZ, 0);
+ rc = filter_statfs(obd, osfs, cfs_time_current_64() - CFS_HZ,
+ 0);
if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
CDEBUG(D_RPCTRACE,"%s: not enough space for create "
LPU64"\n", obd->obd_name, osfs->os_bavail <<
@@ -3643,7 +3826,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
for (i = 0; i < *num && err == 0; i++) {
int cleanup_phase = 0;
- if (test_bit(group, &filter->fo_destroys_in_progress)) {
+ if (cfs_test_bit(group, &filter->fo_destroys_in_progress)) {
CWARN("%s: create aborted by destroy\n",
obd->obd_name);
rc = -EAGAIN;
@@ -3663,8 +3846,6 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
} else
next_id = filter_last_id(filter, group) + 1;
- CDEBUG(D_INFO, "precreate objid "LPU64"\n", next_id);
-
dparent = filter_parent_lock(obd, group, next_id);
if (IS_ERR(dparent))
GOTO(cleanup, rc = PTR_ERR(dparent));
@@ -3710,6 +3891,10 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
GOTO(cleanup, rc = PTR_ERR(handle));
cleanup_phase = 3;
+ CDEBUG(D_INODE, "%s: filter_precreate(od->o_gr="LPU64
+ ",od->o_id="LPU64")\n", obd->obd_name, group,
+ next_id);
+
/* We mark object SUID+SGID to flag it for accepting UID+GID
* from client on first write. Currently the permission bits
* on the OST are never used, so this is OK. */
@@ -3717,9 +3902,19 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
S_IFREG | S_ISUID | S_ISGID | 0666, NULL);
if (rc) {
CERROR("create failed rc = %d\n", rc);
+ if (rc == -ENOSPC) {
+ os_ffree = filter_calc_free_inodes(obd);
+ if (os_ffree != -1)
+ CERROR("%s: free inode "LPU64"\n",
+ obd->obd_name, os_ffree);
+ }
GOTO(cleanup, rc);
}
+ if (dchild->d_inode)
+ CDEBUG(D_INFO, "objid "LPU64" got inum %lu\n", next_id,
+ dchild->d_inode->i_ino);
+
set_last_id:
if (!recreate_obj) {
filter_set_last_id(filter, next_id, group);
@@ -3748,7 +3943,7 @@ set_last_id:
if (rc)
break;
- if (time_after(jiffies, enough_time)) {
+ if (cfs_time_after(jiffies, enough_time)) {
CDEBUG(D_RPCTRACE,
"%s: precreate slow - want %d got %d \n",
obd->obd_name, *num, i);
@@ -3767,21 +3962,23 @@ set_last_id:
static int filter_create(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
+ struct obd_device *obd = exp->exp_obd;
struct filter_export_data *fed;
- struct obd_device *obd = NULL;
struct filter_obd *filter;
struct lvfs_run_ctxt saved;
struct lov_stripe_md *lsm = NULL;
int rc = 0, diff, group = oa->o_gr;
ENTRY;
- if (!(oa->o_valid & OBD_MD_FLGROUP) || group == 0) {
+ CDEBUG(D_INODE, "%s: filter_create(od->o_gr="LPU64",od->o_id="
+ LPU64")\n", obd->obd_name, oa->o_gr, oa->o_id);
+
+ if (!(oa->o_valid & OBD_MD_FLGROUP)) {
CERROR("!!! nid %s sent invalid object group %d\n",
obd_export_nid2str(exp), group);
RETURN(-EINVAL);
}
- obd = exp->exp_obd;
fed = &exp->exp_filter_data;
filter = &obd->u.filter;
@@ -3794,8 +3991,6 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
RETURN(-ENOTUNIQ);
}
- CDEBUG(D_INFO, "filter_create(od->o_gr="LPU64",od->o_id="LPU64")\n",
- oa->o_gr, oa->o_id);
if (ea != NULL) {
lsm = *ea;
if (lsm == NULL) {
@@ -3817,9 +4012,9 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
rc = -EINVAL;
} else {
diff = 1;
- down(&filter->fo_create_locks[oa->o_gr]);
+ cfs_down(&filter->fo_create_locks[oa->o_gr]);
rc = filter_precreate(obd, oa, oa->o_gr, &diff);
- up(&filter->fo_create_locks[oa->o_gr]);
+ cfs_up(&filter->fo_create_locks[oa->o_gr]);
}
} else {
rc = filter_handle_precreate(exp, oa, oa->o_gr, oti);
@@ -3842,12 +4037,13 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
int filter_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *md, struct obd_trans_info *oti,
- struct obd_export *md_exp)
+ struct obd_export *md_exp, void *capa)
{
unsigned int qcids[MAXQUOTAS] = {0, 0};
struct obd_device *obd;
struct filter_obd *filter;
struct dentry *dchild = NULL, *dparent = NULL;
+ struct lustre_handle lockh = { 0 };
struct lvfs_run_ctxt saved;
void *handle = NULL;
struct llog_cookie *fcc = NULL;
@@ -3856,6 +4052,10 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
ENTRY;
LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+ rc = filter_auth_capa(exp, NULL, oa->o_gr,
+ (struct lustre_capa *)capa, CAPA_OPC_OSS_DESTROY);
+ if (rc)
+ RETURN(rc);
obd = exp->exp_obd;
filter = &obd->u.filter;
@@ -3863,6 +4063,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
cleanup_phase = 1;
+ CDEBUG(D_INODE, "%s: filter_destroy(od->o_gr="LPU64",od->o_id="
+ LPU64")\n", obd->obd_name, oa->o_gr, oa->o_id);
+
dchild = filter_fid2dentry(obd, NULL, oa->o_gr, oa->o_id);
if (IS_ERR(dchild))
GOTO(cleanup, rc = PTR_ERR(dchild));
@@ -3875,15 +4078,14 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
if (oa->o_valid & OBD_MD_FLCOOKIE) {
struct llog_ctxt *ctxt;
struct obd_llog_group *olg;
- fcc = &oa->o_lcookie;
+
olg = filter_find_olg(obd, oa->o_gr);
if (!olg) {
CERROR(" %s: can not find olg of group %d\n",
obd->obd_name, (int)oa->o_gr);
GOTO(cleanup, rc = PTR_ERR(olg));
}
- llog_group_set_export(olg, exp);
-
+ fcc = &oa->o_lcookie;
ctxt = llog_group_get_ctxt(olg, fcc->lgc_subsys + 1);
llog_cancel(ctxt, NULL, 1, fcc, 0);
llog_ctxt_put(ctxt);
@@ -3892,7 +4094,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
GOTO(cleanup, rc = -ENOENT);
}
- filter_prepare_destroy(obd, oa->o_id, oa->o_gr);
+ rc = filter_prepare_destroy(obd, oa->o_id, oa->o_gr, &lockh);
+ if (rc)
+ GOTO(cleanup, rc);
/* Our MDC connection is established by the MDS to us */
if (oa->o_valid & OBD_MD_FLCOOKIE) {
@@ -3908,12 +4112,28 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
* down(i_zombie) down(i_zombie)
* restart transaction
* (see BUG 4180) -bzzz
+ *
+ * take i_alloc_sem too to prevent other threads from writing to the
+ * file while we are truncating it. This can cause lock ordering issue
+ * between page lock, i_mutex & starting new journal handle.
+ * (see bug 20321) -johann
*/
+ down_write(&dchild->d_inode->i_alloc_sem);
LOCK_INODE_MUTEX(dchild->d_inode);
+
+ /* VBR: version recovery check */
+ rc = filter_version_get_check(exp, oti, dchild->d_inode);
+ if (rc) {
+ UNLOCK_INODE_MUTEX(dchild->d_inode);
+ up_write(&dchild->d_inode->i_alloc_sem);
+ GOTO(cleanup, rc);
+ }
+
handle = fsfilt_start_log(obd, dchild->d_inode, FSFILT_OP_SETATTR,
NULL, 1);
if (IS_ERR(handle)) {
UNLOCK_INODE_MUTEX(dchild->d_inode);
+ up_write(&dchild->d_inode->i_alloc_sem);
GOTO(cleanup, rc = PTR_ERR(handle));
}
@@ -3922,6 +4142,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
rc = fsfilt_setattr(obd, dchild, handle, &iattr, 1);
rc2 = fsfilt_commit(obd, dchild->d_inode, handle, 0);
UNLOCK_INODE_MUTEX(dchild->d_inode);
+ up_write(&dchild->d_inode->i_alloc_sem);
if (rc)
GOTO(cleanup, rc);
if (rc2)
@@ -3966,7 +4187,7 @@ cleanup:
* on commit. then we call callback directly to free
* the fcc.
*/
- rc = filter_finish_transno(exp, oti, rc, sync);
+ rc = filter_finish_transno(exp, NULL, oti, rc, sync);
if (sync) {
filter_cancel_cookies_cb(obd, 0, fcc, rc);
fcc = NULL;
@@ -3982,6 +4203,8 @@ cleanup:
case 3:
filter_parent_unlock(dparent);
case 2:
+ filter_fini_destroy(obd, &lockh);
+
f_dput(dchild);
if (fcc != NULL)
OBD_FREE(fcc, sizeof(*fcc));
@@ -3998,9 +4221,8 @@ cleanup:
qcids[GRPQUOTA] = oa->o_gid;
rc2 = lquota_adjust(filter_quota_interface_ref, obd, qcids, NULL, rc,
FSFILT_OP_UNLINK);
-
if (rc2)
- CDEBUG(D_QUOTA, "filter adjust qunit! (rc:%d)\n", rc2);
+ CERROR("filter adjust qunit! (rc:%d)\n", rc2);
return rc;
}
@@ -4022,13 +4244,10 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
", o_size = "LPD64"\n", oinfo->oi_oa->o_id,
oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start);
- rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo),
- oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC);
- if (rc)
- RETURN(rc);
-
oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
+ oinfo->oi_oa->o_valid |= OBD_FL_TRUNC;
rc = filter_setattr(exp, oinfo, oti);
+ oinfo->oi_oa->o_valid &= ~OBD_FL_TRUNC;
RETURN(rc);
}
@@ -4042,17 +4261,18 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
int rc, rc2;
ENTRY;
- rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa),
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+ rc = filter_auth_capa(exp, NULL, oa->o_gr,
(struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE);
if (rc)
RETURN(rc);
filter = &exp->exp_obd->u.filter;
- /* an objid of zero is taken to mean "sync whole filesystem" */
+ /* An objid of zero is taken to mean "sync whole filesystem" */
if (!oa || !(oa->o_valid & OBD_MD_FLID)) {
rc = fsfilt_sync(exp->exp_obd, filter->fo_obt.obt_sb);
- /* flush any remaining cancel messages out to the target */
+ /* Flush any remaining cancel messages out to the target */
filter_sync_llogs(exp->exp_obd, exp);
RETURN(rc);
}
@@ -4156,30 +4376,94 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen,
memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- rc = fsfilt_iocontrol(obd, dentry->d_inode, NULL,
- EXT3_IOC_FIEMAP, (long)fiemap);
- if (rc) {
- f_dput(dentry);
- RETURN(rc);
- }
+ rc = fsfilt_iocontrol(obd, dentry, FSFILT_IOC_FIEMAP,
+ (long)fiemap);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
f_dput(dentry);
- RETURN(0);
+ RETURN(rc);
}
CDEBUG(D_IOCTL, "invalid key\n");
RETURN(-EINVAL);
}
+static inline int filter_setup_llog_group(struct obd_export *exp,
+ struct obd_device *obd,
+ int group)
+{
+ struct obd_llog_group *olg;
+ struct llog_ctxt *ctxt;
+ int rc;
+
+ olg = filter_find_create_olg(obd, group);
+ if (IS_ERR(olg))
+ RETURN(PTR_ERR(olg));
+
+ llog_group_set_export(olg, exp);
+
+ ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
+ LASSERTF(ctxt != NULL, "ctxt is null\n");
+
+ rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
+ llog_ctxt_put(ctxt);
+ return rc;
+}
+
+static int filter_set_grant_shrink(struct obd_export *exp,
+ struct ost_body *body)
+{
+ /* handle shrink grant */
+ cfs_spin_lock(&exp->exp_obd->obd_osfs_lock);
+ filter_grant_incoming(exp, &body->oa);
+ cfs_spin_unlock(&exp->exp_obd->obd_osfs_lock);
+
+ RETURN(0);
+
+}
+
+static int filter_set_mds_conn(struct obd_export *exp, void *val)
+{
+ struct obd_device *obd;
+ int rc = 0, group;
+ ENTRY;
+
+ obd = exp->exp_obd;
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "invalid export %p\n", exp);
+ RETURN(-EINVAL);
+ }
+
+ LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
+ obd_export_nid2str(exp));
+ obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
+
+ /* setup llog imports */
+ if (val != NULL)
+ group = (int)(*(__u32 *)val);
+ else
+ group = 0; /* default value */
+
+ LASSERT_MDS_GROUP(group);
+ rc = filter_setup_llog_group(exp, obd, group);
+ if (rc)
+ goto out;
+
+ if (group == FILTER_GROUP_MDS0) {
+ /* setup llog group 1 for interop */
+ filter_setup_llog_group(exp, obd, FILTER_GROUP_LLOG);
+ }
+
+ lquota_setinfo(filter_quota_interface_ref, obd, exp);
+out:
+ RETURN(rc);
+}
+
static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
void *key, __u32 vallen, void *val,
struct ptlrpc_request_set *set)
{
struct obd_device *obd;
- struct obd_llog_group *olg;
- struct llog_ctxt *ctxt;
- int rc = 0, group;
ENTRY;
obd = exp->exp_obd;
@@ -4189,6 +4473,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
}
if (KEY_IS(KEY_CAPA_KEY)) {
+ int rc;
rc = filter_update_capa_key(obd, (struct lustre_capa_key *)val);
if (rc)
CERROR("filter update capability key failed: %d\n", rc);
@@ -4197,36 +4482,22 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
if (KEY_IS(KEY_REVIMP_UPD)) {
filter_revimp_update(exp);
+ lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
RETURN(0);
}
- if (!KEY_IS(KEY_MDS_CONN))
- RETURN(-EINVAL);
-
- LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
- obd_export_nid2str(exp));
- obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
-
- /* setup llog imports */
- LASSERT(val != NULL);
- group = (int)(*(__u32 *)val);
- LASSERT(group >= FILTER_GROUP_MDS0);
-
- olg = filter_find_create_olg(obd, group);
- if (IS_ERR(olg))
- RETURN(PTR_ERR(olg));
-
- llog_group_set_export(olg, exp);
-
- ctxt = llog_group_get_ctxt(olg, LLOG_MDS_OST_REPL_CTXT);
- LASSERTF(ctxt != NULL, "ctxt is null\n");
+ if (KEY_IS(KEY_SPTLRPC_CONF)) {
+ filter_adapt_sptlrpc_conf(obd, 0);
+ RETURN(0);
+ }
- rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
- llog_ctxt_put(ctxt);
+ if (KEY_IS(KEY_MDS_CONN))
+ RETURN(filter_set_mds_conn(exp, val));
- lquota_setinfo(filter_quota_interface_ref, exp, obd);
+ if (KEY_IS(KEY_GRANT_SHRINK))
+ RETURN(filter_set_grant_shrink(exp, val));
- RETURN(rc);
+ RETURN(-EINVAL);
}
int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
@@ -4238,7 +4509,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
switch (cmd) {
case OBD_IOC_ABORT_RECOVERY: {
- CERROR("aborting recovery for device %s\n", obd->obd_name);
+ LCONSOLE_WARN("%s: Aborting recovery.\n", obd->obd_name);
target_stop_recovery_thread(obd);
RETURN(0);
}
@@ -4312,8 +4583,8 @@ static int filter_health_check(struct obd_device *obd)
rc = 1;
#ifdef USE_HEALTH_CHECK_WRITE
- LASSERT(filter->fo_health_check_filp != NULL);
- rc |= !!lvfs_check_io_health(obd, filter->fo_health_check_filp);
+ LASSERT(filter->fo_obt.obt_health_check_filp != NULL);
+ rc |= !!lvfs_check_io_health(obd, filter->fo_obt.obt_health_check_filp);
#endif
return rc;
}
@@ -4332,39 +4603,13 @@ static int filter_process_config(struct obd_device *obd, obd_count len,
int rc = 0;
switch (lcfg->lcfg_command) {
- case LCFG_SPTLRPC_CONF: {
- struct filter_obd *filter = &obd->u.filter;
- struct sptlrpc_conf_log *log;
- struct sptlrpc_rule_set tmp_rset;
-
- log = sptlrpc_conf_log_extract(lcfg);
- if (IS_ERR(log)) {
- rc = PTR_ERR(log);
- break;
- }
-
- sptlrpc_rule_set_init(&tmp_rset);
-
- rc = sptlrpc_rule_set_from_log(&tmp_rset, log);
- if (rc) {
- CERROR("obd %s: failed get sptlrpc rules: %d\n",
- obd->obd_name, rc);
- break;
- }
-
- write_lock(&filter->fo_sptlrpc_lock);
- sptlrpc_rule_set_free(&filter->fo_sptlrpc_rset);
- filter->fo_sptlrpc_rset = tmp_rset;
- write_unlock(&filter->fo_sptlrpc_lock);
-
- sptlrpc_target_update_exp_flavor(obd, &tmp_rset);
- break;
- }
default:
lprocfs_filter_init_vars(&lvars);
rc = class_process_proc_param(PARAM_OST, lvars.obd_vars,
lcfg, obd);
+ if (rc > 0)
+ rc = 0;
break;
}
@@ -4413,11 +4658,15 @@ extern quota_interface_t filter_quota_interface;
static int __init obdfilter_init(void)
{
struct lprocfs_static_vars lvars;
- int rc;
+ int rc, i;
+
+ /** sanity check for group<->mdsno conversion */
+ for (i = 0; i < 32; i++)
+ LASSERT(objgrp_to_mdsno(mdt_to_obd_objgrp(i)) == i);
lprocfs_filter_init_vars(&lvars);
- request_module("lquota");
+ cfs_request_module("%s", "lquota");
OBD_ALLOC(obdfilter_created_scratchpad,
OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
sizeof(*obdfilter_created_scratchpad));