X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_request.c;h=9f3dea4ce3343007e16f6df5d18264f2e7d7242d;hb=16e0fe6eab3e54aa7beddb75bf7dae05645408e9;hp=23f52e569b974220b18563d0f14d068ae5bb51c7;hpb=d2c0f1d2256eecda43da07fd6a526d909c971b08;p=fs%2Flustre-release.git diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 23f52e5..9f3dea4 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,17 +24,16 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_MDC #ifdef __KERNEL__ @@ -50,18 +47,19 @@ #include #include -#include #include -#include #include #include +#include + #include "mdc_internal.h" -#include #define REQUEST_MINOR 244 -static quota_interface_t *quota_interface; -extern quota_interface_t mdc_quota_interface; +struct mdc_renew_capa_args { + struct obd_capa *ra_oc; + renew_capa_cb_t ra_cb; +}; static int mdc_cleanup(struct obd_device *obd); @@ -88,6 +86,24 @@ int mdc_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req, } } +static inline int mdc_queue_wait(struct ptlrpc_request *req) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + int rc; + + /* mdc_enter_request() ensures that this client has no more + * than cl_max_rpcs_in_flight RPCs simultaneously inf light + * against an MDT. */ + rc = mdc_enter_request(cli); + if (rc != 0) + return rc; + + rc = ptlrpc_queue_wait(req); + mdc_exit_request(cli); + + return rc; +} + /* Helper that implements most of mdc_getstatus and signal_completed_replay. */ /* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, @@ -693,11 +709,11 @@ void mdc_commit_open(struct ptlrpc_request *req) * be put along with freeing \var mod. */ ptlrpc_request_addref(req); - cfs_spin_lock(&req->rq_lock); - req->rq_committed = 1; - cfs_spin_unlock(&req->rq_lock); - req->rq_cb_data = NULL; - obd_mod_put(mod); + spin_lock(&req->rq_lock); + req->rq_committed = 1; + spin_unlock(&req->rq_lock); + req->rq_cb_data = NULL; + obd_mod_put(mod); } int mdc_set_open_replay_data(struct obd_export *exp, @@ -738,13 +754,13 @@ int mdc_set_open_replay_data(struct obd_export *exp, obd_mod_get(mod); obd_mod_get(mod); - cfs_spin_lock(&open_req->rq_lock); - och->och_mod = mod; - mod->mod_och = och; - mod->mod_open_req = open_req; - open_req->rq_cb_data = mod; - open_req->rq_commit_cb = mdc_commit_open; - cfs_spin_unlock(&open_req->rq_lock); + spin_lock(&open_req->rq_lock); + och->och_mod = mod; + mod->mod_och = och; + mod->mod_open_req = open_req; + open_req->rq_cb_data = mod; + open_req->rq_commit_cb = mdc_commit_open; + spin_unlock(&open_req->rq_lock); } rec->cr_fid2 = body->fid1; @@ -767,7 +783,14 @@ int mdc_clear_open_replay_data(struct obd_export *exp, struct md_open_data *mod = och->och_mod; ENTRY; - LASSERT(mod != LP_POISON && mod != NULL); + /** + * It is possible to not have \var mod in a case of eviction between + * lookup and ll_file_open(). + **/ + if (mod == NULL) + RETURN(0); + + LASSERT(mod != LP_POISON); mod->mod_och = NULL; och->och_mod = NULL; @@ -830,9 +853,9 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); /* We no longer want to preserve this open for replay even * though the open was committed. b=3632, b=3633 */ - cfs_spin_lock(&mod->mod_open_req->rq_lock); - mod->mod_open_req->rq_replay = 0; - cfs_spin_unlock(&mod->mod_open_req->rq_lock); + spin_lock(&mod->mod_open_req->rq_lock); + mod->mod_open_req->rq_replay = 0; + spin_unlock(&mod->mod_open_req->rq_lock); } else { CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); } @@ -923,9 +946,9 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, DEBUG_REQ(D_HA, mod->mod_open_req, "matched setattr"); /* We no longer want to preserve this setattr for replay even * though the open was committed. b=3632, b=3633 */ - cfs_spin_lock(&mod->mod_open_req->rq_lock); - mod->mod_open_req->rq_replay = 0; - cfs_spin_unlock(&mod->mod_open_req->rq_lock); + spin_lock(&mod->mod_open_req->rq_lock); + mod->mod_open_req->rq_replay = 0; + spin_unlock(&mod->mod_open_req->rq_lock); } mdc_close_pack(req, op_data); @@ -986,9 +1009,9 @@ int mdc_sendpage(struct obd_export *exp, const struct lu_fid *fid, req->rq_request_portal = MDS_READPAGE_PORTAL; ptlrpc_at_set_req_timeout(req); - desc = ptlrpc_prep_bulk_imp(req, 1, BULK_GET_SOURCE, MDS_BULK_PORTAL); - if (desc == NULL) - GOTO(out, rc = -ENOMEM); + desc = ptlrpc_prep_bulk_imp(req, 1, 1,BULK_GET_SOURCE, MDS_BULK_PORTAL); + if (desc == NULL) + GOTO(out, rc = -ENOMEM); /* NB req now owns desc and will free it when it gets freed. */ ptlrpc_prep_bulk_page(desc, (struct page *)page, 0, offset); @@ -1007,21 +1030,27 @@ out: EXPORT_SYMBOL(mdc_sendpage); #endif -int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, - struct obd_capa *oc, __u64 offset, struct page *page, - struct ptlrpc_request **request) +int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data, + struct page **pages, struct ptlrpc_request **request) { struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; + int i; + cfs_waitq_t waitq; + int resends = 0; + struct l_wait_info lwi; int rc; ENTRY; *request = NULL; + cfs_waitq_init(&waitq); + +restart_bulk: req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE); if (req == NULL) RETURN(-ENOMEM); - mdc_set_capa_size(req, &RMF_CAPA1, oc); + mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE); if (rc) { @@ -1032,21 +1061,37 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, req->rq_request_portal = MDS_READPAGE_PORTAL; ptlrpc_at_set_req_timeout(req); - desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL); + desc = ptlrpc_prep_bulk_imp(req, op_data->op_npages, 1, BULK_PUT_SINK, + MDS_BULK_PORTAL); if (desc == NULL) { ptlrpc_request_free(req); RETURN(-ENOMEM); } /* NB req now owns desc and will free it when it gets freed */ - ptlrpc_prep_bulk_page(desc, page, 0, CFS_PAGE_SIZE); - mdc_readdir_pack(req, offset, CFS_PAGE_SIZE, fid, oc); + for (i = 0; i < op_data->op_npages; i++) + ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, CFS_PAGE_SIZE); + + mdc_readdir_pack(req, op_data->op_offset, + CFS_PAGE_SIZE * op_data->op_npages, + &op_data->op_fid1, op_data->op_capa1); ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); if (rc) { ptlrpc_req_finished(req); - RETURN(rc); + if (rc != -ETIMEDOUT) + RETURN(rc); + + resends++; + if (!client_should_resend(resends, &exp->exp_obd->u.cli)) { + CERROR("too many resend retries, returning error\n"); + RETURN(-EIO); + } + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL); + l_wait_event(waitq, 0, &lwi); + + goto restart_bulk; } rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, @@ -1056,9 +1101,10 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, RETURN(rc); } - if (req->rq_bulk->bd_nob_transferred != CFS_PAGE_SIZE) { + if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) { CERROR("Unexpected # bytes transferred: %d (%ld expected)\n", - req->rq_bulk->bd_nob_transferred, CFS_PAGE_SIZE); + req->rq_bulk->bd_nob_transferred, + CFS_PAGE_SIZE * op_data->op_npages); ptlrpc_req_finished(req); RETURN(-EPROTO); } @@ -1067,9 +1113,11 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid, RETURN(0); } -static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, +static int mdc_statfs(const struct lu_env *env, + struct obd_export *exp, struct obd_statfs *osfs, __u64 max_age, __u32 flags) { + struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req; struct obd_statfs *msfs; struct obd_import *imp = NULL; @@ -1080,10 +1128,10 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, * Since the request might also come from lprocfs, so we need * sync this with client_disconnect_export Bug15684 */ - cfs_down_read(&obd->u.cli.cl_sem); + down_read(&obd->u.cli.cl_sem); if (obd->u.cli.cl_import) imp = class_import_get(obd->u.cli.cl_import); - cfs_up_read(&obd->u.cli.cl_sem); + up_read(&obd->u.cli.cl_sem); if (!imp) RETURN(-ENODEV); @@ -1149,9 +1197,9 @@ static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf) /* Val is struct getinfo_fid2path result plus path */ vallen = sizeof(*gf) + gf->gf_pathlen; - rc = obd_get_info(exp, keylen, key, &vallen, gf, NULL); - if (rc) - GOTO(out, rc); + rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf, NULL); + if (rc != 0 && rc != -EREMOTE) + GOTO(out, rc); if (vallen <= sizeof(*gf)) GOTO(out, rc = -EPROTO); @@ -1166,6 +1214,280 @@ out: return rc; } +static int mdc_ioc_hsm_progress(struct obd_export *exp, + struct hsm_progress_kernel *hpk) +{ + struct obd_import *imp = class_exp2cliimp(exp); + struct hsm_progress_kernel *req_hpk; + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_PROGRESS, + LUSTRE_MDS_VERSION, MDS_HSM_PROGRESS); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + + /* Copy hsm_progress struct */ + req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS); + if (req_hpk == NULL) + GOTO(out, rc = -EPROTO); + + *req_hpk = *hpk; + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + GOTO(out, rc); +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives) +{ + __u32 *archive_mask; + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_REGISTER, + LUSTRE_MDS_VERSION, + MDS_HSM_CT_REGISTER); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + + /* Copy hsm_progress struct */ + archive_mask = req_capsule_client_get(&req->rq_pill, + &RMF_MDS_HSM_ARCHIVE); + if (archive_mask == NULL) + GOTO(out, rc = -EPROTO); + + *archive_mask = archives; + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + GOTO(out, rc); +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_current_action(struct obd_export *exp, + struct md_op_data *op_data) +{ + struct hsm_current_action *hca = op_data->op_data; + struct hsm_current_action *req_hca; + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_MDS_HSM_ACTION); + if (req == NULL) + RETURN(-ENOMEM); + + mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); + + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_ACTION); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + + mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1, + OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0); + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + if (rc) + GOTO(out, rc); + + req_hca = req_capsule_server_get(&req->rq_pill, + &RMF_MDS_HSM_CURRENT_ACTION); + if (req_hca == NULL) + GOTO(out, rc = -EPROTO); + + *hca = *req_hca; + + EXIT; +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp) +{ + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_UNREGISTER, + LUSTRE_MDS_VERSION, + MDS_HSM_CT_UNREGISTER); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + GOTO(out, rc); +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_state_get(struct obd_export *exp, + struct md_op_data *op_data) +{ + struct hsm_user_state *hus = op_data->op_data; + struct hsm_user_state *req_hus; + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_MDS_HSM_STATE_GET); + if (req == NULL) + RETURN(-ENOMEM); + + mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); + + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_GET); + if (rc != 0) { + ptlrpc_request_free(req); + RETURN(rc); + } + + mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1, + OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0); + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + if (rc) + GOTO(out, rc); + + req_hus = req_capsule_server_get(&req->rq_pill, &RMF_HSM_USER_STATE); + if (req_hus == NULL) + GOTO(out, rc = -EPROTO); + + *hus = *req_hus; + + EXIT; +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_state_set(struct obd_export *exp, + struct md_op_data *op_data) +{ + struct hsm_state_set *hss = op_data->op_data; + struct hsm_state_set *req_hss; + struct ptlrpc_request *req; + int rc; + ENTRY; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_MDS_HSM_STATE_SET); + if (req == NULL) + RETURN(-ENOMEM); + + mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); + + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_SET); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + + mdc_pack_body(req, &op_data->op_fid1, op_data->op_capa1, + OBD_MD_FLRMTPERM, 0, op_data->op_suppgids[0], 0); + + /* Copy states */ + req_hss = req_capsule_client_get(&req->rq_pill, &RMF_HSM_STATE_SET); + if (req_hss == NULL) + GOTO(out, rc = -EPROTO); + *req_hss = *hss; + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + GOTO(out, rc); + + EXIT; +out: + ptlrpc_req_finished(req); + return rc; +} + +static int mdc_ioc_hsm_request(struct obd_export *exp, + struct hsm_user_request *hur) +{ + struct obd_import *imp = class_exp2cliimp(exp); + struct ptlrpc_request *req; + struct hsm_request *req_hr; + struct hsm_user_item *req_hui; + char *req_opaque; + int rc; + ENTRY; + + req = ptlrpc_request_alloc(imp, &RQF_MDS_HSM_REQUEST); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + req_capsule_set_size(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM, RCL_CLIENT, + hur->hur_request.hr_itemcount + * sizeof(struct hsm_user_item)); + req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, RCL_CLIENT, + hur->hur_request.hr_data_len); + + rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_REQUEST); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + + mdc_pack_body(req, NULL, NULL, OBD_MD_FLRMTPERM, 0, 0, 0); + + /* Copy hsm_request struct */ + req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST); + if (req_hr == NULL) + GOTO(out, rc = -EPROTO); + *req_hr = hur->hur_request; + + /* Copy hsm_user_item structs */ + req_hui = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM); + if (req_hui == NULL) + GOTO(out, rc = -EPROTO); + memcpy(req_hui, hur->hur_user_item, + hur->hur_request.hr_itemcount * sizeof(struct hsm_user_item)); + + /* Copy opaque field */ + req_opaque = req_capsule_client_get(&req->rq_pill, &RMF_GENERIC_DATA); + if (req_opaque == NULL) + GOTO(out, rc = -EPROTO); + memcpy(req_opaque, hur_data(hur), hur->hur_request.hr_data_len); + + ptlrpc_request_set_replen(req); + + rc = mdc_queue_wait(req); + GOTO(out, rc); + +out: + ptlrpc_req_finished(req); + return rc; +} + static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags) { struct kuc_hdr *lh = (struct kuc_hdr *)buf; @@ -1183,15 +1505,15 @@ static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags) #define D_CHANGELOG 0 struct changelog_show { - __u64 cs_startrec; - __u32 cs_flags; - cfs_file_t *cs_fp; - char *cs_buf; - struct obd_device *cs_obd; + __u64 cs_startrec; + __u32 cs_flags; + struct file *cs_fp; + char *cs_buf; + struct obd_device *cs_obd; }; -static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, - void *data) +static int changelog_show_cb(const struct lu_env *env, struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) { struct changelog_show *cs = data; struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; @@ -1213,14 +1535,14 @@ static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, RETURN(0); } - CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID - " %.*s\n", rec->cr.cr_index, rec->cr.cr_type, - changelog_type2str(rec->cr.cr_type), rec->cr.cr_time, - rec->cr.cr_flags & CLF_FLAGMASK, - PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid), - rec->cr.cr_namelen, rec->cr.cr_name); + CDEBUG(D_CHANGELOG, LPU64" %02d%-5s "LPU64" 0x%x t="DFID" p="DFID + " %.*s\n", rec->cr.cr_index, rec->cr.cr_type, + changelog_type2str(rec->cr.cr_type), rec->cr.cr_time, + rec->cr.cr_flags & CLF_FLAGMASK, + PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid), + rec->cr.cr_namelen, changelog_rec_name(&rec->cr)); - len = sizeof(*lh) + sizeof(rec->cr) + rec->cr.cr_namelen; + len = sizeof(*lh) + changelog_rec_size(&rec->cr) + rec->cr.cr_namelen; /* Set up the message */ lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags); @@ -1243,6 +1565,13 @@ static int mdc_changelog_send_thread(void *csdata) CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n", cs->cs_fp, cs->cs_startrec); + /* + * It's important to daemonize here to close unused FDs. + * The write fd from pipe is already opened by the caller, + * so it's fine to clear all files here + */ + cfs_daemonize("mdc_clg_send_thread"); + OBD_ALLOC(cs->cs_buf, CR_MAXSIZE); if (cs->cs_buf == NULL) GOTO(out, rc = -ENOMEM); @@ -1251,20 +1580,20 @@ static int mdc_changelog_send_thread(void *csdata) ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT); if (ctxt == NULL) GOTO(out, rc = -ENOENT); - rc = llog_create(ctxt, &llh, NULL, CHANGELOG_CATALOG); - if (rc) { - CERROR("llog_create() failed %d\n", rc); - GOTO(out, rc); - } - rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL); - if (rc) { - CERROR("llog_init_handle failed %d\n", rc); - GOTO(out, rc); - } - - /* We need the pipe fd open, so llog_process can't daemonize */ - rc = llog_cat_process_flags(llh, changelog_show_cb, cs, - LLOG_FLAG_NODEAMON, 0, 0); + rc = llog_open(NULL, ctxt, &llh, NULL, CHANGELOG_CATALOG, + LLOG_OPEN_EXISTS); + if (rc) { + CERROR("%s: fail to open changelog catalog: rc = %d\n", + cs->cs_obd->obd_name, rc); + GOTO(out, rc); + } + rc = llog_init_handle(NULL, llh, LLOG_F_IS_CAT, NULL); + if (rc) { + CERROR("llog_init_handle failed %d\n", rc); + GOTO(out, rc); + } + + rc = llog_cat_process(NULL, llh, changelog_show_cb, cs, 0, 0); /* Send EOF no matter what our result */ if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), @@ -1274,14 +1603,16 @@ static int mdc_changelog_send_thread(void *csdata) } out: - cfs_put_file(cs->cs_fp); - if (llh) - llog_cat_put(llh); + fput(cs->cs_fp); + if (llh) + llog_cat_close(NULL, llh); if (ctxt) llog_ctxt_put(ctxt); if (cs->cs_buf) OBD_FREE(cs->cs_buf, CR_MAXSIZE); OBD_FREE_PTR(cs); + /* detach from parent process so we get cleaned up */ + cfs_daemonize("cl_send"); return rc; } @@ -1296,16 +1627,15 @@ static int mdc_ioc_changelog_send(struct obd_device *obd, if (!cs) return -ENOMEM; - cs->cs_obd = obd; - cs->cs_startrec = icc->icc_recno; - /* matching cfs_put_file in mdc_changelog_send_thread */ - cs->cs_fp = cfs_get_fd(icc->icc_id); - cs->cs_flags = icc->icc_flags; + cs->cs_obd = obd; + cs->cs_startrec = icc->icc_recno; + /* matching fput in mdc_changelog_send_thread */ + cs->cs_fp = fget(icc->icc_id); + cs->cs_flags = icc->icc_flags; /* New thread because we should return to user app before writing into our pipe */ - rc = cfs_kernel_thread(mdc_changelog_send_thread, cs, - CLONE_VM | CLONE_FILES); + rc = cfs_create_thread(mdc_changelog_send_thread, cs, CFS_DAEMON_FLAGS); if (rc >= 0) { CDEBUG(D_CHANGELOG, "start changelog thread: %d\n", rc); return 0; @@ -1319,6 +1649,147 @@ static int mdc_ioc_changelog_send(struct obd_device *obd, static int mdc_ioc_hsm_ct_start(struct obd_export *exp, struct lustre_kernelcomm *lk); +static int mdc_quotacheck(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + struct ptlrpc_request *req; + struct obd_quotactl *body; + int rc; + ENTRY; + + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_MDS_QUOTACHECK, LUSTRE_MDS_VERSION, + MDS_QUOTACHECK); + if (req == NULL) + RETURN(-ENOMEM); + + body = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); + *body = *oqctl; + + ptlrpc_request_set_replen(req); + + /* the next poll will find -ENODATA, that means quotacheck is + * going on */ + cli->cl_qchk_stat = -ENODATA; + rc = ptlrpc_queue_wait(req); + if (rc) + cli->cl_qchk_stat = rc; + ptlrpc_req_finished(req); + RETURN(rc); +} + +static int mdc_quota_poll_check(struct obd_export *exp, + struct if_quotacheck *qchk) +{ + struct client_obd *cli = &exp->exp_obd->u.cli; + int rc; + ENTRY; + + qchk->obd_uuid = cli->cl_target_uuid; + memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME)); + + rc = cli->cl_qchk_stat; + /* the client is not the previous one */ + if (rc == CL_NOT_QUOTACHECKED) + rc = -EINTR; + RETURN(rc); +} + +static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct ptlrpc_request *req; + struct obd_quotactl *oqc; + int rc; + ENTRY; + + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION, + MDS_QUOTACTL); + if (req == NULL) + RETURN(-ENOMEM); + + oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); + *oqc = *oqctl; + + ptlrpc_request_set_replen(req); + ptlrpc_at_set_req_timeout(req); + req->rq_no_resend = 1; + + rc = ptlrpc_queue_wait(req); + if (rc) + CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); + + if (req->rq_repmsg && + (oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL))) { + *oqctl = *oqc; + } else if (!rc) { + CERROR ("Can't unpack obd_quotactl\n"); + rc = -EPROTO; + } + ptlrpc_req_finished(req); + + RETURN(rc); +} + +static int mdc_ioc_swap_layouts(struct obd_export *exp, + struct md_op_data *op_data) +{ + CFS_LIST_HEAD(cancels); + struct ptlrpc_request *req; + int rc, count; + struct mdc_swap_layouts *msl, *payload; + ENTRY; + + msl = op_data->op_data; + + /* When the MDT will get the MDS_SWAP_LAYOUTS RPC the + * first thing it will do is to cancel the 2 layout + * locks hold by this client. + * So the client must cancel its layout locks on the 2 fids + * with the request RPC to avoid extra RPC round trips + */ + count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, + LCK_CR, MDS_INODELOCK_LAYOUT); + count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels, + LCK_CR, MDS_INODELOCK_LAYOUT); + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_MDS_SWAP_LAYOUTS); + if (req == NULL) { + ldlm_lock_list_put(&cancels, l_bl_ast, count); + RETURN(-ENOMEM); + } + + mdc_set_capa_size(req, &RMF_CAPA1, op_data->op_capa1); + mdc_set_capa_size(req, &RMF_CAPA2, op_data->op_capa2); + + rc = mdc_prep_elc_req(exp, req, MDS_SWAP_LAYOUTS, &cancels, count); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + + mdc_swap_layouts_pack(req, op_data); + + payload = req_capsule_client_get(&req->rq_pill, &RMF_SWAP_LAYOUTS); + LASSERT(payload); + + *payload = *msl; + + ptlrpc_request_set_replen(req); + + rc = ptlrpc_queue_wait(req); + if (rc) + GOTO(out, rc); + EXIT; + +out: + ptlrpc_req_finished(req); + return rc; +} + static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -1334,9 +1805,6 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, return -EINVAL; } switch (cmd) { - case LL_IOC_HSM_CT_START: - rc = mdc_ioc_hsm_ct_start(exp, karg); - GOTO(out, rc); case OBD_IOC_CHANGELOG_SEND: rc = mdc_ioc_changelog_send(obd, karg); GOTO(out, rc); @@ -1344,17 +1812,33 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct ioc_changelog *icc = karg; struct changelog_setinfo cs = {.cs_recno = icc->icc_recno, .cs_id = icc->icc_id}; - rc = obd_set_info_async(exp, strlen(KEY_CHANGELOG_CLEAR), + rc = obd_set_info_async(NULL, exp, strlen(KEY_CHANGELOG_CLEAR), KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, NULL); GOTO(out, rc); } - case OBD_IOC_FID2PATH: { - rc = mdc_ioc_fid2path(exp, karg); - GOTO(out, rc); - } + case OBD_IOC_FID2PATH: + rc = mdc_ioc_fid2path(exp, karg); + GOTO(out, rc); + case LL_IOC_HSM_CT_START: + rc = mdc_ioc_hsm_ct_start(exp, karg); + GOTO(out, rc); + case LL_IOC_HSM_PROGRESS: + rc = mdc_ioc_hsm_progress(exp, karg); + GOTO(out, rc); + case LL_IOC_HSM_STATE_GET: + rc = mdc_ioc_hsm_state_get(exp, karg); + GOTO(out, rc); + case LL_IOC_HSM_STATE_SET: + rc = mdc_ioc_hsm_state_set(exp, karg); + case LL_IOC_HSM_ACTION: + rc = mdc_ioc_hsm_current_action(exp, karg); + GOTO(out, rc); + case LL_IOC_HSM_REQUEST: + rc = mdc_ioc_hsm_request(exp, karg); + GOTO(out, rc); case OBD_IOC_CLIENT_RECOVER: - rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1); + rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1, 0); if (rc < 0) GOTO(out, rc); GOTO(out, rc = 0); @@ -1363,7 +1847,8 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, rc); case OBD_IOC_PARSE: { ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); - rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL); + rc = class_config_parse_llog(NULL, ctxt, data->ioc_inlbuf1, + NULL); llog_ctxt_put(ctxt); GOTO(out, rc); } @@ -1371,23 +1856,23 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_LLOG_INFO: case OBD_IOC_LLOG_PRINT: { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); - rc = llog_ioctl(ctxt, cmd, data); + rc = llog_ioctl(NULL, ctxt, cmd, data); llog_ctxt_put(ctxt); GOTO(out, rc); } #endif case OBD_IOC_POLL_QUOTACHECK: - rc = lquota_poll_check(quota_interface, exp, - (struct if_quotacheck *)karg); + rc = mdc_quota_poll_check(exp, (struct if_quotacheck *)karg); GOTO(out, rc); case OBD_IOC_PING_TARGET: rc = ptlrpc_obd_ping(obd); GOTO(out, rc); /* - * Normally IOC_OBD_STATFS iocontrol is handled by LMV instead of MDC. - * But when the cluster is upgraded from 1.8, there'd be no LMV layer - * thus we might be called here. Eventually this code should be removed. - * bz20731. + * Normally IOC_OBD_STATFS, OBD_IOC_QUOTACTL iocontrol are handled by + * LMV instead of MDC. But when the cluster is upgraded from 1.8, + * there'd be no LMV layer thus we might be called here. Eventually + * this code should be removed. + * bz20731, LU-592. */ case IOC_OBD_STATFS: { struct obd_statfs stat_buf = {0}; @@ -1401,8 +1886,9 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, (int) sizeof(struct obd_uuid)))) GOTO(out, rc = -EFAULT); - rc = mdc_statfs(obd, &stat_buf, - cfs_time_current_64() - CFS_HZ, 0); + rc = mdc_statfs(NULL, obd->obd_self_export, &stat_buf, + cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS), + 0); if (rc != 0) GOTO(out, rc); @@ -1413,6 +1899,36 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, rc = 0); } + case OBD_IOC_QUOTACTL: { + struct if_quotactl *qctl = karg; + struct obd_quotactl *oqctl; + + OBD_ALLOC_PTR(oqctl); + if (!oqctl) + RETURN(-ENOMEM); + + QCTL_COPY(oqctl, qctl); + rc = obd_quotactl(exp, oqctl); + if (rc == 0) { + QCTL_COPY(qctl, oqctl); + qctl->qc_valid = QC_MDTIDX; + qctl->obd_uuid = obd->u.cli.cl_target_uuid; + } + OBD_FREE_PTR(oqctl); + break; + } + case LL_IOC_GET_CONNECT_FLAGS: { + if (cfs_copy_to_user(uarg, + exp_connect_flags_ptr(exp), + sizeof(__u64))) + GOTO(out, rc = -EFAULT); + else + GOTO(out, rc = 0); + } + case LL_IOC_LOV_SWAP_LAYOUTS: { + rc = mdc_ioc_swap_layouts(exp, karg); + break; + } default: CERROR("mdc_ioctl(): unrecognised ioctl %#x\n", cmd); GOTO(out, rc = -ENOTTY); @@ -1457,74 +1973,90 @@ int mdc_get_info_rpc(struct obd_export *exp, RCL_SERVER, vallen); ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc == 0) { - tmp = req_capsule_server_get(&req->rq_pill, &RMF_GETINFO_VAL); - memcpy(val, tmp, vallen); - if (ptlrpc_rep_need_swab(req)) { - if (KEY_IS(KEY_FID2PATH)) { - lustre_swab_fid2path(val); - } - } - } - ptlrpc_req_finished(req); - - RETURN(rc); + rc = ptlrpc_queue_wait(req); + /* -EREMOTE means the get_info result is partial, and it needs to + * continue on another MDT, see fid2path part in lmv_iocontrol */ + if (rc == 0 || rc == -EREMOTE) { + tmp = req_capsule_server_get(&req->rq_pill, &RMF_GETINFO_VAL); + memcpy(val, tmp, vallen); + if (ptlrpc_rep_need_swab(req)) { + if (KEY_IS(KEY_FID2PATH)) + lustre_swab_fid2path(val); + } + } + ptlrpc_req_finished(req); + + RETURN(rc); } static void lustre_swab_hai(struct hsm_action_item *h) { - __swab32s(&h->hai_len); - __swab32s(&h->hai_action); - lustre_swab_lu_fid(&h->hai_fid); - __swab64s(&h->hai_cookie); - __swab64s(&h->hai_extent_start); - __swab64s(&h->hai_extent_end); - __swab64s(&h->hai_gid); + __swab32s(&h->hai_len); + __swab32s(&h->hai_action); + lustre_swab_lu_fid(&h->hai_fid); + lustre_swab_lu_fid(&h->hai_dfid); + __swab64s(&h->hai_cookie); + __swab64s(&h->hai_extent.offset); + __swab64s(&h->hai_extent.length); + __swab64s(&h->hai_gid); } static void lustre_swab_hal(struct hsm_action_list *h) { - struct hsm_action_item *hai; - int i; + struct hsm_action_item *hai; + int i; + + __swab32s(&h->hal_version); + __swab32s(&h->hal_count); + __swab32s(&h->hal_archive_id); + __swab64s(&h->hal_flags); + hai = hai_zero(h); + for (i = 0; i < h->hal_count; i++) { + lustre_swab_hai(hai); + hai = hai_next(hai); + } +} - __swab32s(&h->hal_version); - __swab32s(&h->hal_count); - __swab32s(&h->hal_archive_num); - hai = hai_zero(h); - for (i = 0; i < h->hal_count; i++) { - lustre_swab_hai(hai); - hai = hai_next(hai); - } +static void lustre_swab_kuch(struct kuc_hdr *l) +{ + __swab16s(&l->kuc_magic); + /* __u8 l->kuc_transport */ + __swab16s(&l->kuc_msgtype); + __swab16s(&l->kuc_msglen); } static int mdc_ioc_hsm_ct_start(struct obd_export *exp, - struct lustre_kernelcomm *lk) + struct lustre_kernelcomm *lk) { - int rc = 0; - - if (lk->lk_group != KUC_GRP_HSM) { - CERROR("Bad copytool group %d\n", lk->lk_group); - return -EINVAL; - } - - CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd, - lk->lk_uid, lk->lk_group, lk->lk_flags); - - if (lk->lk_flags & LK_FLG_STOP) - rc = libcfs_kkuc_group_rem(lk->lk_uid,lk->lk_group); - else { - cfs_file_t *fp = cfs_get_fd(lk->lk_wfd); - rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group); - if (rc && fp) - cfs_put_file(fp); - } - - /* lk_data is archive number mask */ - /* TODO: register archive num with mdt so coordinator can choose - correct agent. */ - - return rc; + struct obd_import *imp = class_exp2cliimp(exp); + __u32 archive = lk->lk_data; + int rc = 0; + + if (lk->lk_group != KUC_GRP_HSM) { + CERROR("Bad copytool group %d\n", lk->lk_group); + return -EINVAL; + } + + CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd, + lk->lk_uid, lk->lk_group, lk->lk_flags); + + if (lk->lk_flags & LK_FLG_STOP) { + rc = libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group); + /* Unregister with the coordinator */ + if (rc == 0) + rc = mdc_ioc_hsm_ct_unregister(imp); + } else { + struct file *fp = fget(lk->lk_wfd); + + rc = libcfs_kkuc_group_add(fp, lk->lk_uid, lk->lk_group, + lk->lk_data); + if (rc && fp) + fput(fp); + if (rc == 0) + rc = mdc_ioc_hsm_ct_register(imp, archive); + } + + return rc; } /** @@ -1534,79 +2066,91 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp, */ static int mdc_hsm_copytool_send(int len, void *val) { - struct kuc_hdr *lh = (struct kuc_hdr *)val; - struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1); - int rc; - ENTRY; - - if (len < sizeof(*lh) + sizeof(*hal)) { - CERROR("Short HSM message %d < %d\n", len, - (int) (sizeof(*lh) + sizeof(*hal))); - RETURN(-EPROTO); - } - if (lh->kuc_magic == __swab16(KUC_MAGIC)) { - lustre_swab_kuch(lh); - lustre_swab_hal(hal); - } else if (lh->kuc_magic != KUC_MAGIC) { - CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC); - RETURN(-EPROTO); - } + struct kuc_hdr *lh = (struct kuc_hdr *)val; + struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1); + int rc; + ENTRY; + + if (len < sizeof(*lh) + sizeof(*hal)) { + CERROR("Short HSM message %d < %d\n", len, + (int) (sizeof(*lh) + sizeof(*hal))); + RETURN(-EPROTO); + } + if (lh->kuc_magic == __swab16(KUC_MAGIC)) { + lustre_swab_kuch(lh); + lustre_swab_hal(hal); + } else if (lh->kuc_magic != KUC_MAGIC) { + CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC); + RETURN(-EPROTO); + } + + CDEBUG(D_HSM, " Received message mg=%x t=%d m=%d l=%d actions=%d " + "on %s\n", + lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype, + lh->kuc_msglen, hal->hal_count, hal->hal_fsname); + + /* Broadcast to HSM listeners */ + rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh); + + RETURN(rc); +} - CDEBUG(D_HSM, " Received message mg=%x t=%d m=%d l=%d actions=%d\n", - lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype, - lh->kuc_msglen, hal->hal_count); +/** + * callback function passed to kuc for re-registering each HSM copytool + * running on MDC, after MDT shutdown/recovery. + * @param data archive id served by the copytool + * @param cb_arg callback argument (obd_import) + */ +static int mdc_hsm_ct_reregister(__u32 data, void *cb_arg) +{ + struct obd_import *imp = (struct obd_import *)cb_arg; + __u32 archive = data; + int rc; - /* Broadcast to HSM listeners */ - rc = libcfs_kkuc_group_put(KUC_GRP_HSM, lh); + CDEBUG(D_HA, "recover copytool registration to MDT (archive=%#x)\n", + archive); + rc = mdc_ioc_hsm_ct_register(imp, archive); - RETURN(rc); + /* ignore error if the copytool is already registered */ + return ((rc != 0) && (rc != -EEXIST)) ? rc : 0; } -int mdc_set_info_async(struct obd_export *exp, - obd_count keylen, void *key, - obd_count vallen, void *val, - struct ptlrpc_request_set *set) +/** + * Re-establish all kuc contexts with MDT + * after MDT shutdown/recovery. + */ +static int mdc_kuc_reregister(struct obd_import *imp) { - struct obd_import *imp = class_exp2cliimp(exp); - int rc = -EINVAL; - ENTRY; - - if (KEY_IS(KEY_INIT_RECOV)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov = *(int *)val; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", - exp->exp_obd->obd_name, imp->imp_initial_recov); - RETURN(0); - } - /* Turn off initial_recov after we try all backup servers once */ - if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); - cfs_spin_lock(&imp->imp_lock); - imp->imp_initial_recov_bk = *(int *)val; - if (imp->imp_initial_recov_bk) - imp->imp_initial_recov = 1; - cfs_spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", - exp->exp_obd->obd_name, imp->imp_initial_recov_bk); - RETURN(0); - } - if (KEY_IS(KEY_READ_ONLY)) { - if (vallen != sizeof(int)) - RETURN(-EINVAL); + /* re-register HSM agents */ + return libcfs_kkuc_group_foreach(KUC_GRP_HSM, mdc_hsm_ct_reregister, + (void *)imp); +} - cfs_spin_lock(&imp->imp_lock); - if (*((int *)val)) { - imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags |= OBD_CONNECT_RDONLY; - } else { - imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags &= ~OBD_CONNECT_RDONLY; - } - cfs_spin_unlock(&imp->imp_lock); +int mdc_set_info_async(const struct lu_env *env, + struct obd_export *exp, + obd_count keylen, void *key, + obd_count vallen, void *val, + struct ptlrpc_request_set *set) +{ + struct obd_import *imp = class_exp2cliimp(exp); + int rc; + ENTRY; + + if (KEY_IS(KEY_READ_ONLY)) { + if (vallen != sizeof(int)) + RETURN(-EINVAL); + + spin_lock(&imp->imp_lock); + if (*((int *)val)) { + imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; + imp->imp_connect_data.ocd_connect_flags |= + OBD_CONNECT_RDONLY; + } else { + imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY; + imp->imp_connect_data.ocd_connect_flags &= + ~OBD_CONNECT_RDONLY; + } + spin_unlock(&imp->imp_lock); rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION, keylen, key, vallen, val, set); @@ -1622,9 +2166,9 @@ int mdc_set_info_async(struct obd_export *exp, } if (KEY_IS(KEY_MDS_CONN)) { /* mds-mds import */ - cfs_spin_lock(&imp->imp_lock); - imp->imp_server_timeout = 1; - cfs_spin_unlock(&imp->imp_lock); + spin_lock(&imp->imp_lock); + imp->imp_server_timeout = 1; + spin_unlock(&imp->imp_lock); imp->imp_client->cli_request_portal = MDS_MDS_PORTAL; CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name); RETURN(0); @@ -1639,11 +2183,13 @@ int mdc_set_info_async(struct obd_export *exp, RETURN(rc); } - RETURN(rc); + CERROR("Unknown key %s\n", (char *)key); + RETURN(-EINVAL); } -int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, - __u32 *vallen, void *val, struct lov_stripe_md *lsm) +int mdc_get_info(const struct lu_env *env, struct obd_export *exp, + __u32 keylen, void *key, __u32 *vallen, void *val, + struct lov_stripe_md *lsm) { int rc = -EINVAL; @@ -1658,8 +2204,7 @@ int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, max_easize = val; *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; RETURN(0); - } - if (KEY_IS(KEY_CONN_DATA)) { + } else if (KEY_IS(KEY_CONN_DATA)) { struct obd_import *imp = class_exp2cliimp(exp); struct obd_connect_data *data = val; @@ -1668,6 +2213,9 @@ int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key, *data = imp->imp_connect_data; RETURN(0); + } else if (KEY_IS(KEY_TGT_COUNT)) { + *((int *)val) = 1; + RETURN(0); } rc = mdc_get_info_rpc(exp, keylen, key, *vallen, val); @@ -1814,12 +2362,9 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, /* * Flush current sequence to make client obtain new one * from server in case of disconnect/reconnect. - * If range is already empty then no need to flush it. */ - if (cli->cl_seq != NULL && - !range_is_exhausted(&cli->cl_seq->lcs_space)) { + if (cli->cl_seq != NULL) seq_client_flush(cli->cl_seq); - } rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL); break; @@ -1831,14 +2376,18 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, break; } - case IMP_EVENT_ACTIVE: { - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL); - break; - } + case IMP_EVENT_ACTIVE: + rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL); + /* restore re-establish kuc registration after reconnecting */ + if (rc == 0) + rc = mdc_kuc_reregister(imp); + break; case IMP_EVENT_OCD: rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL); break; - + case IMP_EVENT_DEACTIVATE: + case IMP_EVENT_ACTIVATE: + break; default: CERROR("Unknown import event %x\n", event); LBUG(); @@ -1846,76 +2395,50 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, RETURN(rc); } -static int mdc_fid_init(struct obd_export *exp) +int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid, + struct md_op_data *op_data) { struct client_obd *cli = &exp->exp_obd->u.cli; - char *prefix; - int rc; + struct lu_client_seq *seq = cli->cl_seq; ENTRY; - - OBD_ALLOC_PTR(cli->cl_seq); - if (cli->cl_seq == NULL) - RETURN(-ENOMEM); - - OBD_ALLOC(prefix, MAX_OBD_NAME + 5); - if (prefix == NULL) - GOTO(out_free_seq, rc = -ENOMEM); - - snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", - exp->exp_obd->obd_name); - - /* Init client side sequence-manager */ - rc = seq_client_init(cli->cl_seq, exp, - LUSTRE_SEQ_METADATA, - prefix, NULL); - OBD_FREE(prefix, MAX_OBD_NAME + 5); - if (rc) - GOTO(out_free_seq, rc); - - RETURN(rc); -out_free_seq: - OBD_FREE_PTR(cli->cl_seq); - cli->cl_seq = NULL; - return rc; + RETURN(seq_client_alloc_fid(NULL, seq, fid)); } -static int mdc_fid_fini(struct obd_export *exp) -{ +struct obd_uuid *mdc_get_uuid(struct obd_export *exp) { struct client_obd *cli = &exp->exp_obd->u.cli; - ENTRY; - - if (cli->cl_seq != NULL) { - seq_client_fini(cli->cl_seq); - OBD_FREE_PTR(cli->cl_seq); - cli->cl_seq = NULL; - } - - RETURN(0); + return &cli->cl_target_uuid; } -int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid, - struct md_op_data *op_data) +/** + * Determine whether the lock can be canceled before replaying it during + * recovery, non zero value will be return if the lock can be canceled, + * or zero returned for not + */ +static int mdc_cancel_for_recovery(struct ldlm_lock *lock) { - struct client_obd *cli = &exp->exp_obd->u.cli; - struct lu_client_seq *seq = cli->cl_seq; - ENTRY; - RETURN(seq_client_alloc_fid(seq, fid)); + if (lock->l_resource->lr_type != LDLM_IBITS) + RETURN(0); + + /* FIXME: if we ever get into a situation where there are too many + * opened files with open locks on a single node, then we really + * should replay these open locks to reget it */ + if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN) + RETURN(0); + + RETURN(1); } -/* XXX This method is used only to clear current fid seq - * once fld/mds insert failed */ -static int mdc_fid_delete(struct obd_export *exp, const struct lu_fid *fid) +static int mdc_resource_inode_free(struct ldlm_resource *res) { - struct client_obd *cli = &exp->exp_obd->u.cli; + if (res->lr_lvb_inode) + res->lr_lvb_inode = NULL; - seq_client_flush(cli->cl_seq); - return 0; + return 0; } -struct obd_uuid *mdc_get_uuid(struct obd_export *exp) { - struct client_obd *cli = &exp->exp_obd->u.cli; - return &cli->cl_target_uuid; -} +struct ldlm_valblock_ops inode_lvbo = { + lvbo_free: mdc_resource_inode_free +}; static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) { @@ -1931,14 +2454,9 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) ptlrpcd_addref(); - OBD_ALLOC(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); - if (!cli->cl_setattr_lock) - GOTO(err_rpc_lock, rc = -ENOMEM); - mdc_init_rpc_lock(cli->cl_setattr_lock); - OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); if (!cli->cl_close_lock) - GOTO(err_setattr_lock, rc = -ENOMEM); + GOTO(err_rpc_lock, rc = -ENOMEM); mdc_init_rpc_lock(cli->cl_close_lock); rc = client_obd_setup(obd, cfg); @@ -1949,6 +2467,10 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) sptlrpc_lprocfs_cliobd_attach(obd); ptlrpc_lprocfs_register_obd(obd); + ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery); + + obd->obd_namespace->ns_lvbo = &inode_lvbo; + rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL); if (rc) { mdc_cleanup(obd); @@ -1959,8 +2481,6 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) err_close_lock: OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); -err_setattr_lock: - OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); err_rpc_lock: OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock)); ptlrpcd_decref(); @@ -2003,18 +2523,10 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) if (obd->obd_type->typ_refcnt <= 1) libcfs_kkuc_group_rem(0, KUC_GRP_HSM); - /* If we set up but never connected, the - client import will not have been cleaned. */ - if (obd->u.cli.cl_import) { - struct obd_import *imp; - cfs_down_write(&obd->u.cli.cl_sem); - imp = obd->u.cli.cl_import; - CERROR("client import never connected\n"); - ptlrpc_invalidate_import(imp); - class_destroy_import(imp); - cfs_up_write(&obd->u.cli.cl_sem); - obd->u.cli.cl_import = NULL; - } + obd_cleanup_client_import(obd); + ptlrpc_lprocfs_unregister_obd(obd); + lprocfs_obd_cleanup(obd); + rc = obd_llog_finish(obd, 0); if (rc != 0) CERROR("failed to cleanup llogging subsystems\n"); @@ -2028,11 +2540,8 @@ static int mdc_cleanup(struct obd_device *obd) struct client_obd *cli = &obd->u.cli; OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock)); - OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); - ptlrpc_lprocfs_unregister_obd(obd); - lprocfs_obd_cleanup(obd); ptlrpcd_decref(); return client_obd_cleanup(obd); @@ -2042,47 +2551,36 @@ static int mdc_cleanup(struct obd_device *obd) static int mdc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, struct obd_device *tgt, int *index) { - struct llog_ctxt *ctxt; - int rc; - ENTRY; + struct llog_ctxt *ctxt; + int rc; - LASSERT(olg == &obd->obd_olg); + ENTRY; - rc = llog_setup(obd, olg, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, - &llog_client_ops); - if (rc) - RETURN(rc); + LASSERT(olg == &obd->obd_olg); - ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); - llog_initiator_connect(ctxt); - llog_ctxt_put(ctxt); + rc = llog_setup(NULL, obd, olg, LLOG_CHANGELOG_REPL_CTXT, tgt, + &llog_client_ops); + if (rc) + RETURN(rc); - rc = llog_setup(obd, olg, LLOG_CHANGELOG_REPL_CTXT, tgt, 0, NULL, - &llog_client_ops); - if (rc == 0) { - ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT); - llog_initiator_connect(ctxt); - llog_ctxt_put(ctxt); - } + ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT); + llog_initiator_connect(ctxt); + llog_ctxt_put(ctxt); - RETURN(rc); + RETURN(0); } static int mdc_llog_finish(struct obd_device *obd, int count) { - struct llog_ctxt *ctxt; - int rc = 0; - ENTRY; + struct llog_ctxt *ctxt; - ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); - if (ctxt) - rc = llog_cleanup(ctxt); + ENTRY; - ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT); - if (ctxt) - rc = llog_cleanup(ctxt); + ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT); + if (ctxt) + llog_cleanup(NULL, ctxt); - RETURN(rc); + RETURN(0); } static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) @@ -2144,11 +2642,10 @@ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid, } static int mdc_interpret_renew_capa(const struct lu_env *env, - struct ptlrpc_request *req, void *unused, + struct ptlrpc_request *req, void *args, int status) { - struct obd_capa *oc = req->rq_async_args.pointer_arg[0]; - renew_capa_cb_t cb = req->rq_async_args.pointer_arg[1]; + struct mdc_renew_capa_args *ra = args; struct mdt_body *body = NULL; struct lustre_capa *capa; ENTRY; @@ -2168,7 +2665,7 @@ static int mdc_interpret_renew_capa(const struct lu_env *env, GOTO(out, capa = ERR_PTR(-EFAULT)); EXIT; out: - cb(oc, capa); + ra->ra_cb(ra->ra_oc, capa); return 0; } @@ -2176,6 +2673,7 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc, renew_capa_cb_t cb) { struct ptlrpc_request *req; + struct mdc_renew_capa_args *ra; ENTRY; req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_GETATTR, @@ -2189,10 +2687,12 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc, mdc_pack_body(req, &oc->c_capa.lc_fid, oc, OBD_MD_FLOSSCAPA, 0, -1, 0); ptlrpc_request_set_replen(req); - req->rq_async_args.pointer_arg[0] = oc; - req->rq_async_args.pointer_arg[1] = cb; + CLASSERT(sizeof(*ra) <= sizeof(req->rq_async_args)); + ra = ptlrpc_req_async_args(req); + ra->ra_oc = oc; + ra->ra_cb = cb; req->rq_interpret_reply = mdc_interpret_renew_capa; - ptlrpcd_add_req(req, PSCOPE_OTHER); + ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1); RETURN(0); } @@ -2202,13 +2702,13 @@ static int mdc_connect(const struct lu_env *env, struct obd_connect_data *data, void *localdata) { - struct obd_import *imp = obd->u.cli.cl_import; + struct obd_import *imp = obd->u.cli.cl_import; - /* mds-mds import features */ - if (data && (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS)) { - cfs_spin_lock(&imp->imp_lock); - imp->imp_server_timeout = 1; - cfs_spin_unlock(&imp->imp_lock); + /* mds-mds import features */ + if (data && (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS)) { + spin_lock(&imp->imp_lock); + imp->imp_server_timeout = 1; + spin_unlock(&imp->imp_lock); imp->imp_client->cli_request_portal = MDS_MDS_PORTAL; CDEBUG(D_OTHER, "%s: Set 'mds' portal and timeout\n", obd->obd_name); @@ -2231,21 +2731,22 @@ struct obd_ops mdc_obd_ops = { .o_statfs = mdc_statfs, .o_pin = mdc_pin, .o_unpin = mdc_unpin, - .o_fid_init = mdc_fid_init, - .o_fid_fini = mdc_fid_fini, + .o_fid_init = client_fid_init, + .o_fid_fini = client_fid_fini, .o_fid_alloc = mdc_fid_alloc, - .o_fid_delete = mdc_fid_delete, .o_import_event = mdc_import_event, .o_llog_init = mdc_llog_init, .o_llog_finish = mdc_llog_finish, .o_get_info = mdc_get_info, .o_process_config = mdc_process_config, .o_get_uuid = mdc_get_uuid, + .o_quotactl = mdc_quotactl, + .o_quotacheck = mdc_quotacheck }; struct md_ops mdc_md_ops = { .m_getstatus = mdc_getstatus, - .m_change_cbdata = mdc_change_cbdata, + .m_null_inode = mdc_null_inode, .m_find_cbdata = mdc_find_cbdata, .m_close = mdc_close, .m_create = mdc_create, @@ -2284,24 +2785,14 @@ int __init mdc_init(void) struct lprocfs_static_vars lvars = { 0 }; lprocfs_mdc_init_vars(&lvars); - cfs_request_module("lquota"); - quota_interface = PORTAL_SYMBOL_GET(mdc_quota_interface); - init_obd_quota_ops(quota_interface, &mdc_obd_ops); - rc = class_register_type(&mdc_obd_ops, &mdc_md_ops, lvars.module_vars, LUSTRE_MDC_NAME, NULL); - if (rc && quota_interface) - PORTAL_SYMBOL_PUT(mdc_quota_interface); - RETURN(rc); } #ifdef __KERNEL__ static void /*__exit*/ mdc_exit(void) { - if (quota_interface) - PORTAL_SYMBOL_PUT(mdc_quota_interface); - class_unregister_type(LUSTRE_MDC_NAME); }