X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fofd%2Fofd_fs.c;h=25c000942d8f277f06edd14dfa754531aa87e6d0;hp=ea2fd06238f0ffbc494fae2aa155b730b16c7504;hb=94a6bc2a947359a9b03b039a2fb4bb49842666e5;hpb=71eb67f041732093696781cc3ddddb7d9aac66a6 diff --git a/lustre/ofd/ofd_fs.c b/lustre/ofd/ofd_fs.c index ea2fd06..25c0009 100644 --- a/lustre/ofd/ofd_fs.c +++ b/lustre/ofd/ofd_fs.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2012, 2014 Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -35,151 +31,377 @@ * * lustre/ofd/ofd_fs.c * - * Author: Alexey Zhuravlev - * Author: Mikhail Pershin + * This file provides helper functions to handle various data stored on disk. + * It uses OSD API and works with any OSD. + * + * Note: this file contains also functions for sequence handling, they are + * placed here improperly and will be moved to the ofd_dev.c and ofd_internal.h, + * this comment is to be removed after that. + * + * Author: Alexey Zhuravlev + * Author: Mikhail Pershin */ #define DEBUG_SUBSYSTEM S_FILTER #include "ofd_internal.h" -int ofd_record_write(const struct lu_env *env, struct ofd_device *ofd, - struct dt_object *dt, struct lu_buf *buf, loff_t *off) +/** + * Restrict precreate batch count by its upper limit. + * + * The precreate batch count is a number of precreates to do in + * single transaction. It has upper limit - ofd_device::ofd_precreate_batch + * value which shouldn't be exceeded. + * + * \param[in] ofd OFD device + * \param[in] int number of updates in the batch + * + * \retval \a batch limited by ofd_device::ofd_precreate_batch + */ +int ofd_precreate_batch(struct ofd_device *ofd, int batch) { - struct thandle *th; - int rc; - - ENTRY; + int count; - LASSERT(dt); + spin_lock(&ofd->ofd_batch_lock); + count = min(ofd->ofd_precreate_batch, batch); + spin_unlock(&ofd->ofd_batch_lock); - th = dt_trans_create(env, ofd->ofd_osd); - if (IS_ERR(th)) - RETURN(PTR_ERR(th)); + return count; +} - rc = dt_declare_record_write(env, dt, buf->lb_len, *off, th); - if (rc == 0) { - rc = dt_trans_start_local(env, ofd->ofd_osd, th); - if (rc == 0) - rc = dt_record_write(env, dt, buf, off, th); +/** + * Get ofd_seq for \a seq. + * + * Function finds appropriate structure by \a seq number and + * increases the reference counter of that structure. + * + * \param[in] ofd OFD device + * \param[in] seq sequence number, FID sequence number usually + * + * \retval pointer to the requested ofd_seq structure + * \retval NULL if ofd_seq is not found + */ +struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, obd_seq seq) +{ + struct ofd_seq *oseq; + + read_lock(&ofd->ofd_seq_list_lock); + list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) { + if (ostid_seq(&oseq->os_oi) == seq) { + atomic_inc(&oseq->os_refc); + read_unlock(&ofd->ofd_seq_list_lock); + return oseq; + } } - dt_trans_stop(env, ofd->ofd_osd, th); - - RETURN(rc); + read_unlock(&ofd->ofd_seq_list_lock); + return NULL; } -int ofd_precreate_batch(struct ofd_device *ofd, int batch) +/** + * Drop a reference to ofd_seq. + * + * The paired function to the ofd_seq_get(). It decrease the reference counter + * of the ofd_seq structure and free it if that reference was last one. + * + * \param[in] env execution environment + * \param[in] oseq ofd_seq structure to put + */ +void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq) { - int count; - - spin_lock(&ofd->ofd_objid_lock); - count = min(ofd->ofd_precreate_batch, batch); - spin_unlock(&ofd->ofd_objid_lock); + if (atomic_dec_and_test(&oseq->os_refc)) { + LASSERT(list_empty(&oseq->os_list)); + LASSERT(oseq->os_lastid_obj != NULL); + lu_object_put(env, &oseq->os_lastid_obj->do_lu); + OBD_FREE_PTR(oseq); + } +} - return count; +/** + * Add a new ofd_seq to the given OFD device. + * + * First it checks if there is already existent ofd_seq with the same + * sequence number as used by \a new_seq. + * If such ofd_seq is not found then the \a new_seq is added to the list + * of all ofd_seq structures else the \a new_seq is dropped and the found + * ofd_seq is returned back. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] new_seq new ofd_seq to be added + * + * \retval ofd_seq structure + */ +static struct ofd_seq *ofd_seq_add(const struct lu_env *env, + struct ofd_device *ofd, + struct ofd_seq *new_seq) +{ + struct ofd_seq *os = NULL; + + write_lock(&ofd->ofd_seq_list_lock); + list_for_each_entry(os, &ofd->ofd_seq_list, os_list) { + if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) { + atomic_inc(&os->os_refc); + write_unlock(&ofd->ofd_seq_list_lock); + /* The seq has not been added to the list */ + ofd_seq_put(env, new_seq); + return os; + } + } + atomic_inc(&new_seq->os_refc); + list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list); + ofd->ofd_seq_count++; + write_unlock(&ofd->ofd_seq_list_lock); + return new_seq; } -obd_id ofd_last_id(struct ofd_device *ofd, obd_seq group) +/** + * Get last object ID for the given sequence. + * + * \param[in] ofd_seq OFD sequence structure + * + * \retval the last object ID for this sequence + */ +obd_id ofd_seq_last_oid(struct ofd_seq *oseq) { obd_id id; - LASSERT(group <= ofd->ofd_max_group); - - spin_lock(&ofd->ofd_objid_lock); - id = ofd->ofd_last_objids[group]; - spin_unlock(&ofd->ofd_objid_lock); + spin_lock(&oseq->os_last_oid_lock); + id = ostid_id(&oseq->os_oi); + spin_unlock(&oseq->os_last_oid_lock); return id; } -void ofd_last_id_set(struct ofd_device *ofd, obd_id id, obd_seq group) +/** + * Set new last object ID for the given sequence. + * + * \param[in] oseq OFD sequence + * \param[in] id the new OID to set + */ +void ofd_seq_last_oid_set(struct ofd_seq *oseq, obd_id id) { - LASSERT(group <= ofd->ofd_max_group); - spin_lock(&ofd->ofd_objid_lock); - if (ofd->ofd_last_objids[group] < id) - ofd->ofd_last_objids[group] = id; - spin_unlock(&ofd->ofd_objid_lock); + spin_lock(&oseq->os_last_oid_lock); + if (likely(ostid_id(&oseq->os_oi) < id)) + ostid_set_id(&oseq->os_oi, id); + spin_unlock(&oseq->os_last_oid_lock); } -int ofd_last_id_write(const struct lu_env *env, struct ofd_device *ofd, - obd_seq group) +/** + * Update last used OID on disk for the given sequence. + * + * The last used object ID is stored persistently on disk and + * must be written when updated. This function writes the sequence data. + * The format is just an object ID of the latest used object FID. + * Each ID is stored in per-sequence file. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] oseq ofd_seq structure with data to write + * + * \retval 0 on successful write of data from \a oseq + * \retval negative value on error + */ +int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd, + struct ofd_seq *oseq) { struct ofd_thread_info *info = ofd_info(env); obd_id tmp; + struct dt_object *obj = oseq->os_lastid_obj; + struct thandle *th; int rc; ENTRY; + tmp = cpu_to_le64(ofd_seq_last_oid(oseq)); + info->fti_buf.lb_buf = &tmp; info->fti_buf.lb_len = sizeof(tmp); info->fti_off = 0; - CDEBUG(D_INODE, "%s: write last_objid for group "LPU64": "LPU64"\n", - ofd_obd(ofd)->obd_name, group, ofd_last_id(ofd, group)); + LASSERT(obj != NULL); + + th = dt_trans_create(env, ofd->ofd_osd); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); - tmp = cpu_to_le64(ofd_last_id(ofd, group)); + rc = dt_declare_record_write(env, obj, &info->fti_buf, + info->fti_off, th); + if (rc < 0) + GOTO(out, rc); + rc = dt_trans_start_local(env, ofd->ofd_osd, th); + if (rc < 0) + GOTO(out, rc); + rc = dt_record_write(env, obj, &info->fti_buf, &info->fti_off, + th); + if (rc < 0) + GOTO(out, rc); - rc = ofd_record_write(env, ofd, ofd->ofd_lastid_obj[group], - &info->fti_buf, &info->fti_off); - RETURN(rc); + CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n", + ofd_name(ofd), POSTID(&oseq->os_oi), rc); + EXIT; +out: + dt_trans_stop(env, ofd->ofd_osd, th); + return rc; } -int ofd_last_group_write(const struct lu_env *env, struct ofd_device *ofd) +/** + * Deregister LWP items for FLDB and SEQ client on OFD. + * + * LWP is lightweight proxy - simplified connection between + * servers. It is used for FID Location Database (FLDB) and + * sequence (SEQ) client-server interations. + * + * This function is used during server cleanup process to free + * LWP items that were previously set up upon OFD start. + * + * \param[in] ofd OFD device + */ +static void ofd_deregister_seq_exp(struct ofd_device *ofd) { - struct ofd_thread_info *info = ofd_info(env); - obd_seq tmp; - int rc; + struct seq_server_site *ss = &ofd->ofd_seq_site; - ENTRY; + if (ss->ss_client_seq != NULL) { + lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp); + ss->ss_client_seq->lcs_exp = NULL; + } - info->fti_buf.lb_buf = &tmp; - info->fti_buf.lb_len = sizeof(tmp); - info->fti_off = 0; + if (ss->ss_server_fld != NULL) { + lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp); + ss->ss_server_fld->lsf_control_exp = NULL; + } +} - tmp = cpu_to_le32(ofd->ofd_max_group); +/** + * Stop FLDB server on OFD. + * + * This function is part of OFD cleanup process. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * + */ +static void ofd_fld_fini(const struct lu_env *env, struct ofd_device *ofd) +{ + struct seq_server_site *ss = &ofd->ofd_seq_site; - rc = ofd_record_write(env, ofd, ofd->ofd_last_group_file, - &info->fti_buf, &info->fti_off); + if (ss != NULL && ss->ss_server_fld != NULL) { + fld_server_fini(env, ss->ss_server_fld); + OBD_FREE_PTR(ss->ss_server_fld); + ss->ss_server_fld = NULL; + } +} - RETURN(rc); +/** + * Free sequence structures on OFD. + * + * This function is part of OFD cleanup process, it goes through + * the list of ofd_seq structures stored in ofd_device structure + * and frees them. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + */ +void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd) +{ + struct ofd_seq *oseq; + struct ofd_seq *tmp; + struct list_head dispose; + + INIT_LIST_HEAD(&dispose); + write_lock(&ofd->ofd_seq_list_lock); + list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list) + list_move(&oseq->os_list, &dispose); + write_unlock(&ofd->ofd_seq_list_lock); + + while (!list_empty(&dispose)) { + oseq = container_of0(dispose.next, struct ofd_seq, os_list); + list_del_init(&oseq->os_list); + ofd_seq_put(env, oseq); + } } -void ofd_group_fini(const struct lu_env *env, struct ofd_device *ofd, - int group) +/** + * Stop FLDB and SEQ services on OFD. + * + * This function is part of OFD cleanup process. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * + */ +void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd) { - LASSERT(ofd->ofd_lastid_obj[group]); - lu_object_put(env, &ofd->ofd_lastid_obj[group]->do_lu); - ofd->ofd_lastid_obj[group] = NULL; + int rc; + + ofd_deregister_seq_exp(ofd); + + rc = ofd_fid_fini(env, ofd); + if (rc != 0) + CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc); + + ofd_fld_fini(env, ofd); + + ofd_seqs_free(env, ofd); + + LASSERT(list_empty(&ofd->ofd_seq_list)); } -int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group) +/** + * Return ofd_seq structure filled with valid data. + * + * This function gets the ofd_seq by sequence number and read + * corresponding data from disk. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] seq sequence number + * + * \retval ofd_seq structure filled with data + * \retval ERR_PTR pointer on error + */ +struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd, + obd_seq seq) { struct ofd_thread_info *info = ofd_info(env); + struct ofd_seq *oseq = NULL; struct dt_object *dob; obd_id lastid; int rc; ENTRY; - /* if group is already initialized */ - if (ofd->ofd_lastid_obj[group]) - RETURN(0); + /* if seq is already initialized */ + oseq = ofd_seq_get(ofd, seq); + if (oseq != NULL) + RETURN(oseq); + + OBD_ALLOC_PTR(oseq); + if (oseq == NULL) + RETURN(ERR_PTR(-ENOMEM)); - lu_local_obj_fid(&info->fti_fid, OFD_GROUP0_LAST_OID + group); + lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index); memset(&info->fti_attr, 0, sizeof(info->fti_attr)); info->fti_attr.la_valid = LA_MODE; info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR; info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG); - /* create object tracking per-group last created + /* create object tracking per-seq last created * id to be used by orphan recovery mechanism */ dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid, &info->fti_dof, &info->fti_attr); - if (IS_ERR(dob)) - RETURN(PTR_ERR(dob)); + if (IS_ERR(dob)) { + OBD_FREE_PTR(oseq); + RETURN((void *)dob); + } - ofd->ofd_lastid_obj[group] = dob; - mutex_init(&ofd->ofd_create_locks[group]); + oseq->os_lastid_obj = dob; + + INIT_LIST_HEAD(&oseq->os_list); + mutex_init(&oseq->os_create_lock); + spin_lock_init(&oseq->os_last_oid_lock); + ostid_set_seq(&oseq->os_oi, seq); + + atomic_set(&oseq->os_refc, 1); rc = dt_attr_get(env, dob, &info->fti_attr, BYPASS_CAPA); if (rc) @@ -187,10 +409,8 @@ int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group) if (info->fti_attr.la_size == 0) { /* object is just created, initialize last id */ - ofd->ofd_last_objids[group] = OFD_INIT_OBJID; - ofd_last_id_set(ofd, OFD_INIT_OBJID, group); - ofd_last_id_write(env, ofd, group); - ofd_last_group_write(env, ofd); + ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID); + ofd_seq_last_oid_write(env, ofd, oseq); } else if (info->fti_attr.la_size == sizeof(lastid)) { info->fti_off = 0; info->fti_buf.lb_buf = &lastid; @@ -198,275 +418,198 @@ int ofd_group_load(const struct lu_env *env, struct ofd_device *ofd, int group) rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off); if (rc) { - CERROR("can't read last_id: %d\n", rc); + CERROR("%s: can't read last_id: rc = %d\n", + ofd_name(ofd), rc); GOTO(cleanup, rc); } - ofd->ofd_last_objids[group] = le64_to_cpu(lastid); + ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid)); } else { - CERROR("corrupted size %Lu LAST_ID of group %u\n", - (unsigned long long)info->fti_attr.la_size, group); - rc = -EINVAL; + CERROR("%s: corrupted size "LPU64" LAST_ID of seq "LPX64"\n", + ofd_name(ofd), (__u64)info->fti_attr.la_size, seq); + GOTO(cleanup, rc = -EINVAL); } - RETURN(0); + oseq = ofd_seq_add(env, ofd, oseq); + RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT)); cleanup: - ofd_group_fini(env, ofd, group); - RETURN(rc); + ofd_seq_put(env, oseq); + return ERR_PTR(rc); } -/* ofd groups managements */ -int ofd_groups_init(const struct lu_env *env, struct ofd_device *ofd) +/** + * initialize local FLDB server. + * + * \param[in] env execution environment + * \param[in] uuid unique name for this FLDS server + * \param[in] ofd OFD device + * + * \retval 0 on successful initialization + * \retval negative value on error + */ +static int ofd_fld_init(const struct lu_env *env, const char *uuid, + struct ofd_device *ofd) { - struct ofd_thread_info *info = ofd_info(env); - unsigned long groups_size; - obd_seq last_group; - int rc = 0; - int i; + struct seq_server_site *ss = &ofd->ofd_seq_site; + int rc; ENTRY; - spin_lock_init(&ofd->ofd_objid_lock); - - rc = dt_attr_get(env, ofd->ofd_last_group_file, - &info->fti_attr, BYPASS_CAPA); - if (rc) - GOTO(cleanup, rc); - - groups_size = (unsigned long)info->fti_attr.la_size; - - if (groups_size == sizeof(last_group)) { - info->fti_off = 0; - info->fti_buf.lb_buf = &last_group; - info->fti_buf.lb_len = sizeof(last_group); + OBD_ALLOC_PTR(ss->ss_server_fld); + if (ss->ss_server_fld == NULL) + RETURN(rc = -ENOMEM); - rc = dt_record_read(env, ofd->ofd_last_group_file, - &info->fti_buf, &info->fti_off); - if (rc) { - CERROR("can't read LAST_GROUP: %d\n", rc); - GOTO(cleanup, rc); - } - - ofd->ofd_max_group = le32_to_cpu(last_group); - LASSERT(ofd->ofd_max_group <= OFD_MAX_GROUPS); - } else if (groups_size == 0) { - ofd->ofd_max_group = 0; - } else { - CERROR("groups file is corrupted? size = %lu\n", groups_size); - GOTO(cleanup, rc = -EIO); - } - - for (i = 0; i <= ofd->ofd_max_group; i++) { - rc = ofd_group_load(env, ofd, i); - if (rc) { - CERROR("can't load group %d: %d\n", i, rc); - /* Clean all previously set groups */ - while (i > 0) - ofd_group_fini(env, ofd, --i); - GOTO(cleanup, rc); - } + rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid, + LU_SEQ_RANGE_OST); + if (rc < 0) { + OBD_FREE_PTR(ss->ss_server_fld); + ss->ss_server_fld = NULL; + RETURN(rc); } - - CDEBUG(D_OTHER, "%s: %u groups initialized\n", - ofd_obd(ofd)->obd_name, ofd->ofd_max_group + 1); -cleanup: - RETURN(rc); + RETURN(0); } -int ofd_clients_data_init(const struct lu_env *env, struct ofd_device *ofd, - unsigned long fsize) +/** + * Update local FLDB copy from master server. + * + * This callback is called when LWP is connected to the server. + * It retrieves its FLDB entries from MDT0, and it only happens + * when upgrading the existing file system to 2.6. + * + * \param[in] data OFD device + * + * \retval 0 on successful FLDB update + * \retval negative value in case if failure + */ +static int ofd_register_lwp_callback(void *data) { - struct obd_device *obd = ofd_obd(ofd); - struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd; - struct lsd_client_data *lcd = NULL; - struct filter_export_data *fed; - int cl_idx; - int rc = 0; - loff_t off = lsd->lsd_client_start; - - CLASSERT(offsetof(struct lsd_client_data, lcd_padding) + - sizeof(lcd->lcd_padding) == LR_CLIENT_SIZE); - - OBD_ALLOC_PTR(lcd); - if (lcd == NULL) - RETURN(-ENOMEM); - - for (cl_idx = 0; off < fsize; cl_idx++) { - struct obd_export *exp; - __u64 last_rcvd; - - /* Don't assume off is incremented properly by - * fsfilt_read_record(), in case sizeof(*lcd) - * isn't the same as fsd->lsd_client_size. */ - off = lsd->lsd_client_start + cl_idx * lsd->lsd_client_size; - rc = tgt_client_data_read(env, &ofd->ofd_lut, lcd, &off, cl_idx); - if (rc) { - CERROR("error reading FILT %s idx %d off %llu: rc %d\n", - LAST_RCVD, cl_idx, off, rc); - rc = 0; - break; /* read error shouldn't cause startup to fail */ - } + struct lu_env *env; + struct ofd_device *ofd = data; + struct lu_server_fld *fld = ofd->ofd_seq_site.ss_server_fld; + int rc; - if (lcd->lcd_uuid[0] == '\0') { - CDEBUG(D_INFO, "skipping zeroed client at offset %d\n", - cl_idx); - continue; - } - - last_rcvd = lcd->lcd_last_transno; + ENTRY; - /* These exports are cleaned up by ofd_disconnect(), so they - * need to be set up like real exports as ofd_connect() does. - */ - exp = class_new_export(obd, (struct obd_uuid *)lcd->lcd_uuid); + if (!likely(fld->lsf_new)) + RETURN(0); - CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 - " srv lr: "LPU64"\n", lcd->lcd_uuid, cl_idx, - last_rcvd, lsd->lsd_last_transno); + OBD_ALLOC_PTR(env); + if (env == NULL) + RETURN(-ENOMEM); - if (IS_ERR(exp)) { - if (PTR_ERR(exp) == -EALREADY) { - /* export already exists, zero out this one */ - CERROR("Duplicate export %s!\n", lcd->lcd_uuid); - continue; - } - GOTO(err_out, rc = PTR_ERR(exp)); - } + rc = lu_env_init(env, LCT_DT_THREAD); + if (rc < 0) + GOTO(out, rc); - fed = &exp->exp_filter_data; - *fed->fed_ted.ted_lcd = *lcd; - - rc = tgt_client_add(env, exp, cl_idx); - LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */ - /* VBR: set export last committed version */ - exp->exp_last_committed = last_rcvd; - spin_lock(&exp->exp_lock); - exp->exp_connecting = 0; - exp->exp_in_recovery = 0; - spin_unlock(&exp->exp_lock); - obd->obd_max_recoverable_clients++; - class_export_put(exp); - - /* Need to check last_rcvd even for duplicated exports. */ - CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n", - cl_idx, last_rcvd); - - spin_lock(&ofd->ofd_lut.lut_translock); - if (last_rcvd > lsd->lsd_last_transno) - lsd->lsd_last_transno = last_rcvd; - spin_unlock(&ofd->ofd_lut.lut_translock); + rc = fld_update_from_controller(env, fld); + if (rc < 0) { + CERROR("%s: cannot update controller: rc = %d\n", + ofd_name(ofd), rc); + GOTO(out, rc); } - -err_out: - OBD_FREE_PTR(lcd); - RETURN(rc); + EXIT; +out: + lu_env_fini(env); + OBD_FREE_PTR(env); + return rc; } -int ofd_server_data_init(const struct lu_env *env, struct ofd_device *ofd) +/** + * Get LWP exports from LWP connection for local FLDB server and SEQ client. + * + * This function is part of setup process and initialize FLDB server and SEQ + * client, so they may work with remote servers. + * + * \param[in] ofd OFD device + * + * \retval 0 on successful export get + * \retval negative value on error + */ +static int ofd_register_seq_exp(struct ofd_device *ofd) { - struct ofd_thread_info *info = ofd_info(env); - struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd; - struct obd_device *obd = ofd_obd(ofd); - unsigned long last_rcvd_size; - int rc; - - rc = dt_attr_get(env, ofd->ofd_lut.lut_last_rcvd, &info->fti_attr, - BYPASS_CAPA); - if (rc) - RETURN(rc); - - last_rcvd_size = (unsigned long)info->fti_attr.la_size; + struct seq_server_site *ss = &ofd->ofd_seq_site; + char *lwp_name = NULL; + int rc; + + OBD_ALLOC(lwp_name, MAX_OBD_NAME); + if (lwp_name == NULL) + GOTO(out_free, rc = -ENOMEM); + + rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0); + if (rc != 0) + GOTO(out_free, rc); + + rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp, + NULL, NULL); + if (rc != 0) + GOTO(out_free, rc); + + rc = lustre_register_lwp_item(lwp_name, + &ss->ss_server_fld->lsf_control_exp, + ofd_register_lwp_callback, ofd); + if (rc != 0) { + lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp); + ss->ss_client_seq->lcs_exp = NULL; + GOTO(out_free, rc); + } +out_free: + if (lwp_name != NULL) + OBD_FREE(lwp_name, MAX_OBD_NAME); - /* ensure padding in the struct is the correct size */ - CLASSERT (offsetof(struct lr_server_data, lsd_padding) + - sizeof(lsd->lsd_padding) == LR_SERVER_SIZE); + return rc; +} - if (last_rcvd_size == 0) { - LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name); +/** + * Initialize SEQ and FLD service on OFD. + * + * This is part of OFD setup process. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * + * \retval 0 on successful services initialization + * \retval negative value on error + */ +int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd) +{ + int rc; - memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid, - sizeof(lsd->lsd_uuid)); - lsd->lsd_last_transno = 0; - lsd->lsd_mount_count = 0; - lsd->lsd_server_size = LR_SERVER_SIZE; - lsd->lsd_client_start = LR_CLIENT_START; - lsd->lsd_client_size = LR_CLIENT_SIZE; - lsd->lsd_subdir_count = FILTER_SUBDIR_COUNT; - lsd->lsd_feature_incompat = OBD_INCOMPAT_OST; - } else { - rc = tgt_server_data_read(env, &ofd->ofd_lut); - if (rc) { - CDEBUG(D_INODE,"OBD ofd: error reading %s: rc %d\n", - LAST_RCVD, rc); - GOTO(err_fsd, rc); - } - if (strcmp((char *)lsd->lsd_uuid, - (char *)obd->obd_uuid.uuid)) { - LCONSOLE_ERROR("Trying to start OBD %s using the wrong" - " disk %s. Were the /dev/ assignments " - "rearranged?\n", - obd->obd_uuid.uuid, lsd->lsd_uuid); - GOTO(err_fsd, rc = -EINVAL); - } + rc = ofd_fid_init(env, ofd); + if (rc != 0) { + CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc); + return rc; } - lsd->lsd_mount_count++; - obd->u.obt.obt_mount_count = lsd->lsd_mount_count; - obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count; - ofd->ofd_subdir_count = lsd->lsd_subdir_count; - - if (lsd->lsd_feature_incompat & ~OFD_INCOMPAT_SUPP) { - CERROR("%s: unsupported incompat filesystem feature(s) %x\n", - obd->obd_name, - lsd->lsd_feature_incompat & ~OFD_INCOMPAT_SUPP); - GOTO(err_fsd, rc = -EINVAL); - } - if (lsd->lsd_feature_rocompat & ~OFD_ROCOMPAT_SUPP) { - CERROR("%s: unsupported read-only filesystem feature(s) %x\n", - obd->obd_name, - lsd->lsd_feature_rocompat & ~OFD_ROCOMPAT_SUPP); - /* Do something like remount filesystem read-only */ - GOTO(err_fsd, rc = -EINVAL); + rc = ofd_fld_init(env, ofd_name(ofd), ofd); + if (rc < 0) { + CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc); + return rc; } - CDEBUG(D_INODE, "%s: server last_transno : "LPU64"\n", - obd->obd_name, lsd->lsd_last_transno); - CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n", - obd->obd_name, lsd->lsd_mount_count); - CDEBUG(D_INODE, "%s: server data size: %u\n", - obd->obd_name, lsd->lsd_server_size); - CDEBUG(D_INODE, "%s: per-client data start: %u\n", - obd->obd_name, lsd->lsd_client_start); - CDEBUG(D_INODE, "%s: per-client data size: %u\n", - obd->obd_name, lsd->lsd_client_size); - CDEBUG(D_INODE, "%s: server subdir_count: %u\n", - obd->obd_name, lsd->lsd_subdir_count); - CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name, - last_rcvd_size <= lsd->lsd_client_start ? 0 : - (last_rcvd_size - lsd->lsd_client_start) / - lsd->lsd_client_size); - - if (!obd->obd_replayable) - CWARN("%s: recovery support OFF\n", obd->obd_name); - - rc = ofd_clients_data_init(env, ofd, last_rcvd_size); - - spin_lock(&ofd->ofd_lut.lut_translock); - obd->obd_last_committed = lsd->lsd_last_transno; - ofd->ofd_lut.lut_last_transno = lsd->lsd_last_transno; - spin_unlock(&ofd->ofd_lut.lut_translock); - - /* save it, so mount count and last_transno is current */ - rc = tgt_server_data_update(env, &ofd->ofd_lut, 0); - if (rc) - GOTO(err_fsd, rc); - - RETURN(0); + rc = ofd_register_seq_exp(ofd); + if (rc < 0) { + CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc); + return rc; + } -err_fsd: - class_disconnect_exports(obd); - RETURN(rc); + rwlock_init(&ofd->ofd_seq_list_lock); + INIT_LIST_HEAD(&ofd->ofd_seq_list); + ofd->ofd_seq_count = 0; + return rc; } +/** + * Initialize storage for the OFD. + * + * This function sets up service files for OFD. Currently, the only + * service file is "health_check". + * + * \param[in] env execution environment + * \param[in] ofd OFD device + * \param[in] obd OBD device (unused now) + * + * \retval 0 on successful setup + * \retval negative value on error + */ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd, struct obd_device *obd) { @@ -476,23 +619,13 @@ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd, ENTRY; + rc = ofd_seqs_init(env, ofd); + if (rc) + GOTO(out_hc, rc); + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP)) RETURN (-ENOENT); - /* prepare transactions callbacks */ - ofd->ofd_txn_cb.dtc_txn_start = NULL; - ofd->ofd_txn_cb.dtc_txn_stop = ofd_txn_stop_cb; - ofd->ofd_txn_cb.dtc_txn_commit = NULL; - ofd->ofd_txn_cb.dtc_cookie = ofd; - ofd->ofd_txn_cb.dtc_tag = LCT_DT_THREAD; - CFS_INIT_LIST_HEAD(&ofd->ofd_txn_cb.dtc_linkage); - - dt_txn_callback_add(ofd->ofd_osd, &ofd->ofd_txn_cb); - - rc = ofd_server_data_init(env, ofd); - if (rc) - GOTO(out, rc); - lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID); memset(&info->fti_attr, 0, sizeof(info->fti_attr)); info->fti_attr.la_valid = LA_MODE; @@ -506,59 +639,33 @@ int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd, ofd->ofd_health_check_file = fo; - lu_local_obj_fid(&info->fti_fid, OFD_LAST_GROUP_OID); - memset(&info->fti_attr, 0, sizeof(info->fti_attr)); - info->fti_attr.la_valid = LA_MODE; - info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR; - info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG); - - fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid, - &info->fti_dof, &info->fti_attr); - if (IS_ERR(fo)) - GOTO(out_hc, rc = PTR_ERR(fo)); - - ofd->ofd_last_group_file = fo; - - rc = ofd_groups_init(env, ofd); - if (rc) - GOTO(out_lg, rc); - RETURN(0); -out_lg: - lu_object_put(env, &ofd->ofd_last_group_file->do_lu); out_hc: lu_object_put(env, &ofd->ofd_health_check_file->do_lu); out: - dt_txn_callback_del(ofd->ofd_osd, &ofd->ofd_txn_cb); return rc; } +/** + * Cleanup service files on OFD. + * + * This function syncs whole OFD device and close "health check" file. + * + * \param[in] env execution environment + * \param[in] ofd OFD device + */ void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd) { - int i; + int rc; ENTRY; - ofd_info_init(env, NULL); + ofd_seqs_fini(env, ofd); - for (i = 0; i <= ofd->ofd_max_group; i++) { - if (ofd->ofd_lastid_obj[i]) { - ofd_last_id_write(env, ofd, i); - ofd_group_fini(env, ofd, i); - } - } - - i = dt_sync(env, ofd->ofd_osd); - if (i) - CERROR("can't sync: %d\n", i); - - /* Remove transaction callback */ - dt_txn_callback_del(ofd->ofd_osd, &ofd->ofd_txn_cb); - - if (ofd->ofd_last_group_file) { - lu_object_put(env, &ofd->ofd_last_group_file->do_lu); - ofd->ofd_last_group_file = NULL; - } + rc = dt_sync(env, ofd->ofd_osd); + if (rc < 0) + CWARN("%s: can't sync OFD upon cleanup: %d\n", + ofd_name(ofd), rc); if (ofd->ofd_health_check_file) { lu_object_put(env, &ofd->ofd_health_check_file->do_lu);