X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fquota%2Fqsd_lib.c;h=6b827cbf3a03cf8a6914bd125659204de58b42ee;hp=16a5cc427dc744827b170f47e3fb4d636458ea72;hb=d527e812461baf9db2f6ed960a3b6cc12d4ab37c;hpb=318fd8d197ff607a032dac6ed9cb15922e44a53f diff --git a/lustre/quota/qsd_lib.c b/lustre/quota/qsd_lib.c index 16a5cc4..6b827cb 100644 --- a/lustre/quota/qsd_lib.c +++ b/lustre/quota/qsd_lib.c @@ -21,53 +21,313 @@ * GPL HEADER END */ /* - * Copyright (c) 2012 Whamcloud, Inc. + * Copyright (c) 2012, 2017, Intel Corporation. * Use is subject to license terms. * - * Author: Johann Lombardi - * Author: Niu Yawei + * Author: Johann Lombardi + * Author: Niu Yawei */ /* * Quota Slave Driver (QSD) management. + * + * The quota slave feature is implemented under the form of a library called + * QSD. Each OSD device should create a QSD instance via qsd_init() which will + * be used to manage quota enforcement for this device. This implies: + * - completing the reintegration procedure with the quota master (aka QMT, see + * qmt_dev.c) to retrieve the latest quota settings and space distribution. + * - managing quota locks in order to be notified of configuration changes. + * - acquiring space from the QMT when quota space for a given user/group is + * close to exhaustion. + * - allocating quota space to service threads for local request processing. + * + * Once the QSD instance created, the OSD device should invoke qsd_start() + * when recovery is completed. This notifies the QSD that we are about to + * process new requests on which quota should be strictly enforced. + * Then, qsd_op_begin/end can be used to reserve/release/pre-acquire quota space + * for/after each operation until shutdown where the QSD instance should be + * freed via qsd_fini(). */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - #define DEBUG_SUBSYSTEM S_LQUOTA +#include #include "qsd_internal.h" +struct kmem_cache *upd_kmem; + +struct lu_kmem_descr qsd_caches[] = { + { + .ckd_cache = &upd_kmem, + .ckd_name = "upd_kmem", + .ckd_size = sizeof(struct qsd_upd_rec) + }, + { + .ckd_cache = NULL + } +}; + /* define qsd thread key */ LU_KEY_INIT_FINI(qsd, struct qsd_thread_info); -LU_CONTEXT_KEY_DEFINE(qsd, LCT_MD_THREAD | LCT_DT_THREAD | LCT_LOCAL); +LU_CONTEXT_KEY_DEFINE(qsd, LCT_MD_THREAD | LCT_MG_THREAD | LCT_DT_THREAD | LCT_LOCAL); LU_KEY_INIT_GENERIC(qsd); /* some procfs helpers */ -static int lprocfs_qsd_rd_state(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int qsd_state_seq_show(struct seq_file *m, void *data) +{ + struct qsd_instance *qsd = m->private; + char enabled[5]; + + LASSERT(qsd != NULL); + + memset(enabled, 0, sizeof(enabled)); + if (qsd_type_enabled(qsd, USRQUOTA)) + strcat(enabled, "u"); + if (qsd_type_enabled(qsd, GRPQUOTA)) + strcat(enabled, "g"); + if (qsd_type_enabled(qsd, PRJQUOTA)) + strncat(enabled, "p", 1); + if (strlen(enabled) == 0) + strcat(enabled, "none"); + + /* TODO: further pool ID should be removed or + * replaced with pool Name */ + seq_printf(m, "target name: %s\n" + "pool ID: %d\n" + "type: %s\n" + "quota enabled: %s\n" + "conn to master: %s\n", + qsd->qsd_svname, 0, + qsd->qsd_is_md ? "md" : "dt", enabled, + qsd->qsd_exp_valid ? "setup" : "not setup yet"); + + if (qsd->qsd_prepared) { + memset(enabled, 0, sizeof(enabled)); + if (qsd->qsd_type_array[USRQUOTA]->qqi_acct_obj != NULL) + strcat(enabled, "u"); + if (qsd->qsd_type_array[GRPQUOTA]->qqi_acct_obj != NULL) + strcat(enabled, "g"); + if (qsd->qsd_type_array[PRJQUOTA]->qqi_acct_obj != NULL) + strncat(enabled, "p", 1); + if (strlen(enabled) == 0) + strcat(enabled, "none"); + seq_printf(m, "space acct: %s\n" + "user uptodate: glb[%d],slv[%d],reint[%d]\n" + "group uptodate: glb[%d],slv[%d],reint[%d]\n" + "project uptodate: glb[%d],slv[%d],reint[%d]\n", + enabled, + qsd->qsd_type_array[USRQUOTA]->qqi_glb_uptodate, + qsd->qsd_type_array[USRQUOTA]->qqi_slv_uptodate, + qsd->qsd_type_array[USRQUOTA]->qqi_reint, + qsd->qsd_type_array[GRPQUOTA]->qqi_glb_uptodate, + qsd->qsd_type_array[GRPQUOTA]->qqi_slv_uptodate, + qsd->qsd_type_array[GRPQUOTA]->qqi_reint, + qsd->qsd_type_array[PRJQUOTA]->qqi_glb_uptodate, + qsd->qsd_type_array[PRJQUOTA]->qqi_slv_uptodate, + qsd->qsd_type_array[PRJQUOTA]->qqi_reint); + } + return 0; +} +LPROC_SEQ_FOPS_RO(qsd_state); + +static int qsd_enabled_seq_show(struct seq_file *m, void *data) { - struct qsd_instance *qsd = (struct qsd_instance *)data; + struct qsd_instance *qsd = m->private; + char enabled[5]; LASSERT(qsd != NULL); - return snprintf(page, count, - "target name: %s\n" - "pool ID: %d\n" - "type: %s\n" - "quota enabled: none\n", - qsd->qsd_svname, qsd->qsd_pool_id, - qsd->qsd_is_md ? "md" : "dt"); + memset(enabled, 0, sizeof(enabled)); + if (qsd_type_enabled(qsd, USRQUOTA)) + strncat(enabled, "u", sizeof(enabled) - strlen(enabled)); + if (qsd_type_enabled(qsd, GRPQUOTA)) + strncat(enabled, "g", sizeof(enabled) - strlen(enabled)); + if (qsd_type_enabled(qsd, PRJQUOTA)) + strncat(enabled, "p", sizeof(enabled) - strlen(enabled)); + if (strlen(enabled) == 0) + strncat(enabled, "none", sizeof(enabled) - strlen(enabled)); + + seq_printf(m, "%s\n", enabled); + return 0; } +static ssize_t qsd_enabled_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct qsd_instance *qsd = m->private; + char fsname[LUSTRE_MAXFSNAME + 1]; + int enabled = 0; + char valstr[5]; + int pool, rc; + + if (count > 4) + return -E2BIG; + + if (copy_from_user(valstr, buffer, count)) + GOTO(out, count = -EFAULT); + + valstr[sizeof(valstr) - 1] = 0; + if (strchr(valstr, 'u')) + enabled |= BIT(USRQUOTA); + if (strchr(valstr, 'g')) + enabled |= BIT(GRPQUOTA); + if (strchr(valstr, 'p')) + enabled |= BIT(PRJQUOTA); + + if (enabled == 0 && strcmp(valstr, "none")) + GOTO(out, count = -EINVAL); + + if (qsd->qsd_is_md) + pool = LQUOTA_RES_MD; + else + pool = LQUOTA_RES_DT; + + if (server_name2fsname(qsd->qsd_svname, fsname, NULL)) + GOTO(out, count = -EINVAL); + + rc = qsd_config(valstr, fsname, pool); + if (rc) + count = rc; +out: + return count; +} +LPROC_SEQ_FOPS(qsd_enabled); + +/* force reintegration procedure to be executed. + * Used for test/debugging purpose */ +static ssize_t +lprocfs_force_reint_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct qsd_instance *qsd = m->private; + int rc = 0, qtype; + + LASSERT(qsd != NULL); + + write_lock(&qsd->qsd_lock); + if (qsd->qsd_stopping) { + /* don't mess up with shutdown procedure, it is already + * complicated enough */ + rc = -ESHUTDOWN; + } else if (!qsd->qsd_prepared) { + rc = -EAGAIN; + } else { + /* mark all indexes as stale */ + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) { + qsd->qsd_type_array[qtype]->qqi_glb_uptodate = false; + qsd->qsd_type_array[qtype]->qqi_slv_uptodate = false; + } + } + write_unlock(&qsd->qsd_lock); + + if (rc) + return rc; + + /* kick off reintegration */ + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) { + rc = qsd_start_reint_thread(qsd->qsd_type_array[qtype]); + if (rc) + break; + } + return rc == 0 ? count : rc; +} +LPROC_SEQ_FOPS_WR_ONLY(qsd, force_reint); + +static int qsd_timeout_seq_show(struct seq_file *m, void *data) +{ + struct qsd_instance *qsd = m->private; + LASSERT(qsd != NULL); + + seq_printf(m, "%d\n", qsd_wait_timeout(qsd)); + return 0; +} + +static ssize_t +qsd_timeout_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct qsd_instance *qsd = m->private; + time64_t timeout; + int rc; + + LASSERT(qsd != NULL); + rc = kstrtoll_from_user(buffer, count, 0, &timeout); + if (rc) + return rc; + + if (timeout < 0) + return -EINVAL; + + qsd->qsd_timeout = timeout; + return count; +} +LPROC_SEQ_FOPS(qsd_timeout); + static struct lprocfs_vars lprocfs_quota_qsd_vars[] = { - { "info", lprocfs_qsd_rd_state, 0, 0}, + { .name = "info", + .fops = &qsd_state_fops }, + { .name = "enabled", + .fops = &qsd_enabled_fops }, + { .name = "force_reint", + .fops = &qsd_force_reint_fops }, + { .name = "timeout", + .fops = &qsd_timeout_fops }, { NULL } }; /* + * Callback function invoked by the OSP layer when the connection to the master + * has been set up. + * + * \param data - is a pointer to the qsd_instance + * + * \retval - 0 on success, appropriate error on failure + */ +static int qsd_conn_callback(void *data) +{ + struct qsd_instance *qsd = (struct qsd_instance *)data; + int type; + ENTRY; + + /* qsd_exp should now be valid */ + LASSERT(qsd->qsd_exp); + + qsd->qsd_ns = class_exp2obd(qsd->qsd_exp)->obd_namespace; + + write_lock(&qsd->qsd_lock); + /* notify that qsd_exp is now valid */ + qsd->qsd_exp_valid = true; + write_unlock(&qsd->qsd_lock); + + /* Now that the connection to master is setup, we can initiate the + * reintegration procedure for quota types which are enabled. + * It is worth noting that, if the qsd_instance hasn't been started + * already, then we can only complete the first two steps of the + * reintegration procedure (i.e. global lock enqueue and slave + * index transfer) since the space usage reconciliation (i.e. + * step 3) will have to wait for qsd_start() to be called */ + for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) { + struct qsd_qtype_info *qqi = qsd->qsd_type_array[type]; + struct task_struct *t; + + /* qqi_reint_task can be set to NULL at any time, + * so we need to be careful. + */ + rcu_read_lock(); + t = rcu_dereference(qqi->qqi_reint_task); + if (t) + wake_up_process(t); + rcu_read_unlock(); + } + + RETURN(0); +} + +/* * Release qsd_qtype_info structure which contains data associated with a * given quota type. This releases the accounting objects. * It's called on OSD cleanup when the qsd instance is released. @@ -81,6 +341,7 @@ static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd, int qtype) { struct qsd_qtype_info *qqi; + int repeat = 0; ENTRY; if (qsd->qsd_type_array[qtype] == NULL) @@ -88,26 +349,58 @@ static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd, qqi = qsd->qsd_type_array[qtype]; qsd->qsd_type_array[qtype] = NULL; + /* all deferred work lists should be empty */ + LASSERT(list_empty(&qqi->qqi_deferred_glb)); + LASSERT(list_empty(&qqi->qqi_deferred_slv)); + + /* shutdown lquota site */ + if (qqi->qqi_site != NULL && !IS_ERR(qqi->qqi_site)) { + lquota_site_free(env, qqi->qqi_site); + qqi->qqi_site = NULL; + } + + /* The qqi may still be holding by global locks which are being + * canceled asynchronously (LU-4365), see the following steps: + * + * - On server umount, we try to clear all quota locks first by + * disconnecting LWP (which will invalidate import and cleanup + * all locks on it), however, if quota reint process is holding + * the global lock for reintegration at that time, global lock + * will fail to be cleared on LWP disconnection. + * + * - Umount process goes on and stops reint process, the global + * lock will be dropped on reint process exit, however, the lock + * cancel in done in asynchronous way, so the + * qsd_glb_blocking_ast() might haven't been called yet when we + * get here. + */ + while (atomic_read(&qqi->qqi_ref) > 1) { + CDEBUG(D_QUOTA, "qqi reference count %u, repeat: %d\n", + atomic_read(&qqi->qqi_ref), repeat); + repeat++; + schedule_timeout_interruptible(cfs_time_seconds(1)); + } + /* by now, all qqi users should have gone away */ - LASSERT(cfs_atomic_read(&qqi->qqi_ref) == 1); + LASSERT(atomic_read(&qqi->qqi_ref) == 1); lu_ref_fini(&qqi->qqi_reference); /* release accounting object */ if (qqi->qqi_acct_obj != NULL && !IS_ERR(qqi->qqi_acct_obj)) { - lu_object_put(env, &qqi->qqi_acct_obj->do_lu); + dt_object_put(env, qqi->qqi_acct_obj); qqi->qqi_acct_obj = NULL; } /* release slv index */ if (qqi->qqi_slv_obj != NULL && !IS_ERR(qqi->qqi_slv_obj)) { - lu_object_put(env, &qqi->qqi_slv_obj->do_lu); + dt_object_put(env, qqi->qqi_slv_obj); qqi->qqi_slv_obj = NULL; qqi->qqi_slv_ver = 0; } /* release global index */ if (qqi->qqi_glb_obj != NULL && !IS_ERR(qqi->qqi_glb_obj)) { - lu_object_put(env, &qqi->qqi_glb_obj->do_lu); + dt_object_put(env, qqi->qqi_glb_obj); qqi->qqi_glb_obj = NULL; qqi->qqi_glb_ver = 0; } @@ -116,10 +409,45 @@ static void qsd_qtype_fini(const struct lu_env *env, struct qsd_instance *qsd, EXIT; } +static const char *qtype2acct_name(int qtype) +{ + static char unknown[24]; + + switch (qtype) { + case USRQUOTA: + return "acct_user"; + case GRPQUOTA: + return "acct_group"; + case PRJQUOTA: + return "acct_project"; + } + + snprintf(unknown, sizeof(unknown), "acct_unknown_%u", qtype); + return unknown; +} + +static const char *qtype2glb_name(int qtype) +{ + static char unknown[24]; + + switch (qtype) { + case USRQUOTA: + return "limit_user"; + case GRPQUOTA: + return "limit_group"; + case PRJQUOTA: + return "limit_project"; + } + + snprintf(unknown, sizeof(unknown), "acct_unknown_%u", qtype); + return unknown; +} + /* * Allocate and initialize a qsd_qtype_info structure for quota type \qtype. * This opens the accounting object and initializes the proc file. - * It's called on OSD start when the qsd instance is created. + * It's called on OSD start when the qsd_prepare() is invoked on the qsd + * instance. * * \param env - the environment passed by the caller * \param qsd - is the qsd instance which will be in charge of the new @@ -144,28 +472,29 @@ static int qsd_qtype_init(const struct lu_env *env, struct qsd_instance *qsd, if (qqi == NULL) RETURN(-ENOMEM); qsd->qsd_type_array[qtype] = qqi; - cfs_atomic_set(&qqi->qqi_ref, 1); /* referenced from qsd */ + atomic_set(&qqi->qqi_ref, 1); /* referenced from qsd */ /* set backpointer and other parameters */ qqi->qqi_qsd = qsd; qqi->qqi_qtype = qtype; lu_ref_init(&qqi->qqi_reference); - lquota_generate_fid(&qqi->qqi_fid, qsd->qsd_pool_id, QSD_RES_TYPE(qsd), - qtype); qqi->qqi_glb_uptodate = false; qqi->qqi_slv_uptodate = false; qqi->qqi_reint = false; - memset(&qqi->qqi_lockh, 0, sizeof(qqi->qqi_lockh)); - - /* open accounting object */ - LASSERT(qqi->qqi_acct_obj == NULL); - qqi->qqi_acct_obj = acct_obj_lookup(env, qsd->qsd_dev, - qtype == USRQUOTA ? ACCT_USER_OID - : ACCT_GROUP_OID); - /* don't print any error message on failure in order not to confuse - * non-OFD user (e.g. 2.3 MDT stack) */ - if (IS_ERR(qqi->qqi_acct_obj)) + INIT_LIST_HEAD(&qqi->qqi_deferred_glb); + INIT_LIST_HEAD(&qqi->qqi_deferred_slv); + lquota_generate_fid(&qqi->qqi_fid, QSD_RES_TYPE(qsd), qtype); + + /* open accounting object */ + LASSERT(qqi->qqi_acct_obj == NULL); + qqi->qqi_acct_obj = acct_obj_lookup(env, qsd->qsd_dev, qtype); + if (IS_ERR(qqi->qqi_acct_obj)) { + CDEBUG(D_QUOTA, "%s: no %s space accounting support: rc = %ld\n", + qsd->qsd_svname, qtype_name(qtype), + PTR_ERR(qqi->qqi_acct_obj)); qqi->qqi_acct_obj = NULL; + qqi->qqi_acct_failed = true; + } /* open global index copy */ LASSERT(qqi->qqi_glb_obj == NULL); @@ -195,17 +524,32 @@ static int qsd_qtype_init(const struct lu_env *env, struct qsd_instance *qsd, } qqi->qqi_slv_ver = dt_version_get(env, qqi->qqi_slv_obj); - /* register proc entry for accounting object */ - rc = lprocfs_seq_create(qsd->qsd_proc, - qtype == USRQUOTA ? "acct_user" : "acct_group", + /* allocate site */ + qqi->qqi_site = lquota_site_alloc(env, qqi, false, qtype, &qsd_lqe_ops); + if (IS_ERR(qqi->qqi_site)) { + CERROR("%s: can't allocate site "DFID" %ld\n", qsd->qsd_svname, + PFID(&qqi->qqi_fid), PTR_ERR(qqi->qqi_site)); + GOTO(out, rc = PTR_ERR(qqi->qqi_site)); + } + + /* register proc entry for accounting & global index copy objects */ + rc = lprocfs_seq_create(qsd->qsd_proc, qtype2acct_name(qtype), 0444, &lprocfs_quota_seq_fops, qqi->qqi_acct_obj); if (rc) { - CWARN("%s: can't add procfs entry for accounting file %d\n", - qsd->qsd_svname, rc); + CERROR("%s: can't add procfs entry for accounting file %d\n", + qsd->qsd_svname, rc); GOTO(out, rc); } + rc = lprocfs_seq_create(qsd->qsd_proc, qtype2glb_name(qtype), + 0444, &lprocfs_quota_seq_fops, + qqi->qqi_glb_obj); + if (rc) { + CERROR("%s: can't add procfs entry for global index copy %d\n", + qsd->qsd_svname, rc); + GOTO(out, rc); + } EXIT; out: if (rc) @@ -215,7 +559,8 @@ out: /* * Release a qsd_instance. Companion of qsd_init(). This releases all data - * structures associated with the quota slave. + * structures associated with the quota slave (on-disk objects, lquota entry + * tables, ...). * This function should be called when the OSD is shutting down. * * \param env - is the environment passed by the caller @@ -226,22 +571,55 @@ void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd) int qtype; ENTRY; + if (unlikely(qsd == NULL)) + RETURN_EXIT; + CDEBUG(D_QUOTA, "%s: initiating QSD shutdown\n", qsd->qsd_svname); + write_lock(&qsd->qsd_lock); qsd->qsd_stopping = true; + write_unlock(&qsd->qsd_lock); /* remove qsd proc entry */ - if (qsd->qsd_proc != NULL && !IS_ERR(qsd->qsd_proc)) { + if (qsd->qsd_proc != NULL) { lprocfs_remove(&qsd->qsd_proc); qsd->qsd_proc = NULL; } + /* stop the writeback thread */ + qsd_stop_upd_thread(qsd); + + /* shutdown the reintegration threads */ + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) { + if (qsd->qsd_type_array[qtype] == NULL) + continue; + qsd_stop_reint_thread(qsd->qsd_type_array[qtype]); + } + + if (qsd->qsd_ns != NULL) { + qsd->qsd_ns = NULL; + } + + /* release per-filesystem information */ + if (qsd->qsd_fsinfo != NULL) { + mutex_lock(&qsd->qsd_fsinfo->qfs_mutex); + /* remove from the list of fsinfo */ + list_del_init(&qsd->qsd_link); + mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex); + qsd_put_fsinfo(qsd->qsd_fsinfo); + qsd->qsd_fsinfo = NULL; + } + /* free per-quota type data */ - for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) qsd_qtype_fini(env, qsd, qtype); + /* deregister connection to the quota master */ + qsd->qsd_exp_valid = false; + lustre_deregister_lwp_item(&qsd->qsd_exp); + /* release quota root directory */ - if (qsd->qsd_root != NULL && !IS_ERR(qsd->qsd_root)) { - lu_object_put(env, &qsd->qsd_root->do_lu); + if (qsd->qsd_root != NULL) { + dt_object_put(env, qsd->qsd_root); qsd->qsd_root = NULL; } @@ -252,6 +630,7 @@ void qsd_fini(const struct lu_env *env, struct qsd_instance *qsd) qsd->qsd_dev = NULL; } + CDEBUG(D_QUOTA, "%s: QSD shutdown completed\n", qsd->qsd_svname); OBD_FREE_PTR(qsd); EXIT; } @@ -259,8 +638,7 @@ EXPORT_SYMBOL(qsd_fini); /* * Create a new qsd_instance to be associated with backend osd device - * identified by \dev. For now, this function just create procfs files which - * dumps the accounting information + * identified by \dev. * * \param env - the environment passed by the caller * \param svname - is the service name of the OSD device creating this instance @@ -273,60 +651,86 @@ EXPORT_SYMBOL(qsd_fini); */ struct qsd_instance *qsd_init(const struct lu_env *env, char *svname, struct dt_device *dev, - cfs_proc_dir_entry_t *osd_proc) + struct proc_dir_entry *osd_proc, + bool is_md, bool excl) { + struct qsd_thread_info *qti = qsd_info(env); struct qsd_instance *qsd; - int rc, qtype; + int rc, type, idx; ENTRY; + /* only configure qsd for MDT & OST */ + type = server_name2index(svname, &idx, NULL); + if (type != LDD_F_SV_TYPE_MDT && type != LDD_F_SV_TYPE_OST) + RETURN(NULL); + /* allocate qsd instance */ OBD_ALLOC_PTR(qsd); if (qsd == NULL) RETURN(ERR_PTR(-ENOMEM)); - cfs_rwlock_init(&qsd->qsd_lock); + /* generic initializations */ + rwlock_init(&qsd->qsd_lock); + INIT_LIST_HEAD(&qsd->qsd_link); + INIT_LIST_HEAD(&qsd->qsd_upd_list); + spin_lock_init(&qsd->qsd_adjust_lock); + INIT_LIST_HEAD(&qsd->qsd_adjust_list); + qsd->qsd_prepared = false; + qsd->qsd_started = false; + qsd->qsd_is_md = is_md; + qsd->qsd_updating = false; + qsd->qsd_exclusive = excl; + /* copy service name */ - strncpy(qsd->qsd_svname, svname, MAX_OBD_NAME); + if (strlcpy(qsd->qsd_svname, svname, sizeof(qsd->qsd_svname)) + >= sizeof(qsd->qsd_svname)) + GOTO(out, rc = -E2BIG); /* grab reference on osd device */ lu_device_get(&dev->dd_lu_dev); lu_ref_add(&dev->dd_lu_dev.ld_reference, "qsd", qsd); qsd->qsd_dev = dev; - /* we only support pool ID 0 (default data or metadata pool) for the - * time being. A different pool ID could be assigned to this target via - * the configuration log in the future */ - qsd->qsd_pool_id = 0; - - /* Record whether this qsd instance is managing quota enforcement for a - * MDT (i.e. inode quota) or OST (block quota) */ - qsd->qsd_is_md = lu_device_is_md(dev->dd_lu_dev.ld_site->ls_top_dev); - - /* look-up on-disk directory for the quota slave */ - qsd->qsd_root = lquota_disk_dir_find_create(env, dev, NULL, QSD_DIR); - if (IS_ERR(qsd->qsd_root)) { - rc = PTR_ERR(qsd->qsd_root); - CERROR("%s: failed to create quota slave root dir (%d)\n", - svname, rc); + /* get fsname from svname */ + rc = server_name2fsname(svname, qti->qti_buf, NULL); + if (rc) { + CERROR("%s: fail to extract filesystem name\n", svname); GOTO(out, rc); } + /* look up quota setting for the filesystem the target belongs to */ + qsd->qsd_fsinfo = qsd_get_fsinfo(qti->qti_buf, 1); + if (qsd->qsd_fsinfo == NULL) { + CERROR("%s: failed to locate filesystem information\n", svname); + GOTO(out, rc = -EINVAL); + } + + /* add in the list of lquota_fsinfo */ + mutex_lock(&qsd->qsd_fsinfo->qfs_mutex); + list_add_tail(&qsd->qsd_link, &qsd->qsd_fsinfo->qfs_qsd_list); + mutex_unlock(&qsd->qsd_fsinfo->qfs_mutex); + /* register procfs directory */ - qsd->qsd_proc = lprocfs_register(QSD_DIR, osd_proc, - lprocfs_quota_qsd_vars, qsd); + if (qsd->qsd_is_md) + qsd->qsd_proc = lprocfs_register(QSD_DIR_MD, osd_proc, + lprocfs_quota_qsd_vars, qsd); + else + qsd->qsd_proc = lprocfs_register(QSD_DIR_DT, osd_proc, + lprocfs_quota_qsd_vars, qsd); + + if (type == LDD_F_SV_TYPE_MDT && qsd->qsd_is_md) + lprocfs_add_symlink(QSD_DIR, osd_proc, "./%s", QSD_DIR_MD); + else if (type == LDD_F_SV_TYPE_OST && !qsd->qsd_is_md) + lprocfs_add_symlink(QSD_DIR, osd_proc, "./%s", QSD_DIR_DT); + if (IS_ERR(qsd->qsd_proc)) { rc = PTR_ERR(qsd->qsd_proc); + qsd->qsd_proc = NULL; CERROR("%s: fail to create quota slave proc entry (%d)\n", svname, rc); GOTO(out, rc); } - - /* initialize per-quota type data */ - for (qtype = USRQUOTA; qtype < MAXQUOTAS; qtype++) { - rc = qsd_qtype_init(env, qsd, qtype); - if (rc) - GOTO(out, rc); - } + EXIT; out: if (rc) { qsd_fini(env, qsd); @@ -337,12 +741,195 @@ out: EXPORT_SYMBOL(qsd_init); /* + * Initialize on-disk structures in order to manage quota enforcement for + * the target associated with the qsd instance \qsd and starts the reintegration + * procedure for each quota type as soon as possible. + * The last step of the reintegration will be completed once qsd_start() is + * called, at which points the space reconciliation with the master will be + * executed. + * This function must be called when the server stack is fully configured, + * typically when ->ldo_prepare is called across the stack. + * + * \param env - the environment passed by the caller + * \param qsd - is qsd_instance to prepare + * + * \retval - 0 on success, appropriate error on failure + */ +int qsd_prepare(const struct lu_env *env, struct qsd_instance *qsd) +{ + struct qsd_thread_info *qti = qsd_info(env); + int qtype, rc = 0; + ENTRY; + + if (unlikely(qsd == NULL)) + RETURN(0); + + read_lock(&qsd->qsd_lock); + if (qsd->qsd_prepared) { + CERROR("%s: qsd instance already prepared\n", qsd->qsd_svname); + rc = -EALREADY; + } + read_unlock(&qsd->qsd_lock); + if (rc) + RETURN(rc); + + /* Record whether this qsd instance is managing quota enforcement for a + * MDT (i.e. inode quota) or OST (block quota) */ + if (qsd->qsd_is_md) + qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_MD); + else + qsd->qsd_sync_threshold = LQUOTA_LEAST_QUNIT(LQUOTA_RES_DT); + + /* look-up on-disk directory for the quota slave */ + qsd->qsd_root = lquota_disk_dir_find_create(env, qsd->qsd_dev, NULL, + QSD_DIR); + if (IS_ERR(qsd->qsd_root)) { + rc = PTR_ERR(qsd->qsd_root); + qsd->qsd_root = NULL; + CERROR("%s: failed to create quota slave root dir (%d)\n", + qsd->qsd_svname, rc); + RETURN(rc); + } + + /* initialize per-quota type data */ + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) { + rc = qsd_qtype_init(env, qsd, qtype); + if (rc) + RETURN(rc); + } + + /* pools successfully setup, mark the qsd as prepared */ + write_lock(&qsd->qsd_lock); + qsd->qsd_prepared = true; + write_unlock(&qsd->qsd_lock); + + if (qsd->qsd_dev->dd_rdonly) + RETURN(0); + + /* start reintegration thread for each type, if required */ + for (qtype = USRQUOTA; qtype < LL_MAXQUOTAS; qtype++) { + struct qsd_qtype_info *qqi = qsd->qsd_type_array[qtype]; + + if (qsd_type_enabled(qsd, qtype) && + qqi->qqi_acct_failed) { + LCONSOLE_ERROR("%s: can't enable quota enforcement " + "since space accounting isn't functional" + ". Please run tunefs.lustre --quota on " + "an unmounted filesystem if not done " + "already\n", qsd->qsd_svname); + continue; + } + + rc = qsd_start_reint_thread(qqi); + if (rc) { + CERROR("%s: failed to start reint thread for type %s: rc = %d\n", + qsd->qsd_svname, qtype_name(qtype), rc); + RETURN(rc); + } + } + + /* start writeback thread */ + rc = qsd_start_upd_thread(qsd); + if (rc) { + CERROR("%s: failed to start writeback thread (%d)\n", + qsd->qsd_svname, rc); + RETURN(rc); + } + + /* generate osp name */ + rc = tgt_name2lwp_name(qsd->qsd_svname, qti->qti_buf, + MTI_NAME_MAXLEN, 0); + if (rc) { + CERROR("%s: failed to generate ospname (%d)\n", + qsd->qsd_svname, rc); + RETURN(rc); + } + + /* the connection callback will start the reintegration + * procedure if quota is enabled */ + rc = lustre_register_lwp_item(qti->qti_buf, &qsd->qsd_exp, + qsd_conn_callback, (void *)qsd); + if (rc) { + CERROR("%s: fail to get connection to master (%d)\n", + qsd->qsd_svname, rc); + RETURN(rc); + } + + RETURN(0); +} +EXPORT_SYMBOL(qsd_prepare); + +/* + * Start a qsd instance. This will complete the last step of the reintegration + * procedure as soon as possible (provided that the master is reachable). + * This should be called when recovery has been completed and quota should now + * be enforced on every operations. + * + * \param env - the environment passed by the caller + * \param qsd - is the qsd instance associated with the osd device to start + */ +int qsd_start(const struct lu_env *env, struct qsd_instance *qsd) +{ + int type, rc = 0; + ENTRY; + + if (unlikely(qsd == NULL)) + RETURN(0); + + write_lock(&qsd->qsd_lock); + if (!qsd->qsd_prepared) { + CERROR("%s: can't start qsd instance since it wasn't properly " + "initialized\n", qsd->qsd_svname); + rc = -EFAULT; + } else if (qsd->qsd_started) { + CERROR("%s: qsd instance already started\n", qsd->qsd_svname); + rc = -EALREADY; + } else { + /* notify that the qsd_instance is now started */ + qsd->qsd_started = true; + } + write_unlock(&qsd->qsd_lock); + + if (rc) + RETURN(rc); + + /* Trigger the 3rd step of reintegration: If usage > granted, acquire + * up to usage; If usage < granted, release down to usage. */ + for (type = USRQUOTA; type < LL_MAXQUOTAS; type++) { + struct qsd_qtype_info *qqi = qsd->qsd_type_array[type]; + struct task_struct *t; + + /* qqi_reint_task can be set to NULL at any time, + * so we need to be careful. + */ + rcu_read_lock(); + t = rcu_dereference(qqi->qqi_reint_task); + if (t) + wake_up_process(t); + rcu_read_unlock(); + } + + RETURN(rc); +} +EXPORT_SYMBOL(qsd_start); + +void lustre_register_quota_process_config(int (*qpc)(struct lustre_cfg *lcfg)); + +/* * Global initialization performed at module load time */ int qsd_glb_init(void) { + int rc; + + rc = lu_kmem_init(qsd_caches); + if (rc) + return rc; + qsd_key_init_generic(&qsd_thread_key, NULL); lu_context_key_register(&qsd_thread_key); + lustre_register_quota_process_config(qsd_process_config); + return 0; } @@ -351,5 +938,7 @@ int qsd_glb_init(void) */ void qsd_glb_fini(void) { + lustre_register_quota_process_config(NULL); + lu_kmem_fini(qsd_caches); lu_context_key_degister(&qsd_thread_key); }