From c61de0f6abf620b7ab12955f17c34fe81426f8c1 Mon Sep 17 00:00:00 2001 From: pravins Date: Mon, 19 May 2008 15:17:02 +0000 Subject: [PATCH] b=14230 i=alex.zhuravlev i=h.huang remove lustre 1.6 mds files from cvs. --- lustre/mds/commit_confd.c | 98 -- lustre/mds/mds_join.c | 508 --------- lustre/mds/mds_lib.c | 488 --------- lustre/mds/mds_open.c | 1533 -------------------------- lustre/mds/mds_reint.c | 2419 ------------------------------------------ lustre/mds/mds_unlink_open.c | 287 ----- lustre/mds/mds_xattr.c | 358 ------- 7 files changed, 5691 deletions(-) delete mode 100644 lustre/mds/commit_confd.c delete mode 100644 lustre/mds/mds_join.c delete mode 100644 lustre/mds/mds_lib.c delete mode 100644 lustre/mds/mds_open.c delete mode 100644 lustre/mds/mds_reint.c delete mode 100644 lustre/mds/mds_unlink_open.c delete mode 100644 lustre/mds/mds_xattr.c diff --git a/lustre/mds/commit_confd.c b/lustre/mds/commit_confd.c deleted file mode 100644 index 8dd2fcd..0000000 --- a/lustre/mds/commit_confd.c +++ /dev/null @@ -1,98 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -void commit_add(struct ) -{ - struct obd_import *import = commit_uuid2import(rec-> uuid); - - if (!import) { - CERROR("unaware of OST UUID %s - dorpping\n", rec-> uuid); - EXIT; - return; - } - - spin_lock(&import->llcconf_lock); - list_add(&rec-> &import); - spin_unlock(&import->llcconf_lock); - EXIT; - return; -} - -void commit_confd_conf_import(struct obd_import *import, - struct llog_commit_confirm_daemon *lccd) -{ - struct list_head *tmp, *save; - - - list_for_each_safe(&import->import_cc_list, tmp, save) { - struct llog_canceld_ctxt *cd; - - if (atomic_read(import->import_cc_count) <= - lccd->llcconf_lowwater) - break; - - cd = list_entry(tmp, struct llog_canceld_ctxt *, llcconf_entry); - atomic_dec(&import->import_cc_count); - commit_confd_add_and_fire(cd); - } - EXIT; - return; -} - - -int commit_confd_main(void *data) -{ - struct llog_commit_confirm_daemon *lccd = data; - - while (1) { - /* something has happened */ - event_wait(); - - if (lccd->flags & LCCD_STOP) - break; - - - /* lock llccd imporlist */ - spin_lock(&lccd->llcconf_lock); - list_for_each_safe(&lccd->llcconf_list, ) { - struct obd_import *import; - import = list_entry(&lccd->llcconf_list, - struct obd_import, - import_entry); - get_import(import); - spin_unlock(&lccd->llcconf_lock); - if (atomic_read(import->import_cc_count) > - lccd->llcconf_highwater) - commit_confd_conf_import(import); - put_import(import); - spin_lock(&lccd->llcconf_lock); - - } - spin_unlock(&lccd->llcconf_lock); - - } - - lccd->flags = LCCD_STOPPED; - RETURN(0); -} diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c deleted file mode 100644 index 9de0bb5..0000000 --- a/lustre/mds/mds_join.c +++ /dev/null @@ -1,508 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_join.c - * Lustre Metadata join handler file - * - * Copyright (c) 2001-2005 Cluster File Systems, Inc. - * Author: Wang Di - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mds_internal.h" - -struct mdsea_cb_data { - struct llog_handle *mc_llh; - struct lov_mds_md *mc_lmm; - struct lov_mds_md_join *mc_lmm_join; - __u64 mc_offset; - __u64 mc_headfile_sz; -}; - -static int mdsea_iterate(struct llog_handle *llh_tail, llog_cb_t cb, - void *cbdata) -{ - return llog_process(llh_tail, cb, cbdata, NULL); -} - -static int mds_insert_join_lmm(struct llog_handle *llh, - struct lov_mds_md *lmm, - __u64 start, __u64 len, - struct lov_mds_md_join *lmmj) -{ - struct llog_rec_hdr rec; - struct mds_extent_desc *med; - int sz_med, rc; - ENTRY; - - - sz_med = lov_mds_md_size(le32_to_cpu(lmm->lmm_stripe_count)); - sz_med += 2 * sizeof(__u64); - sz_med = size_round(sz_med); - - rec.lrh_len = cpu_to_le32(sz_med); - rec.lrh_type = cpu_to_le32(LLOG_JOIN_REC); - - CDEBUG(D_INFO, "insert extent "LPU64":"LPU64" lmm \n", start, len); - - OBD_ALLOC(med, sz_med); - if (med == NULL) - RETURN(-ENOMEM); - - med->med_start = start; - med->med_len = len; - memcpy(&med->med_lmm, lmm, - lov_mds_md_size(le32_to_cpu(lmm->lmm_stripe_count))); - - rc = llog_write_rec(llh, &rec, NULL, 0, med, -1); - OBD_FREE(med, sz_med); - - if (lmmj) { - /*modify lmmj for join stripe info*/ - lmmj->lmmj_md.lmm_stripe_count += lmm->lmm_stripe_count; - lmmj->lmmj_extent_count ++; - } - - RETURN(rc); -} - -static int mdsea_append_extent(struct llog_handle *llh_tail, - struct llog_rec_hdr *rec_in_tail, - struct mdsea_cb_data *cbdata) -{ - struct mds_extent_desc *med = - &((struct llog_array_rec *)rec_in_tail)->lmr_med; - int rc; - ENTRY; - - CDEBUG(D_INODE, "insert lmm extent: "LPU64":"LPU64" \n", - med->med_start, med->med_len); - rc = mds_insert_join_lmm(cbdata->mc_llh, &med->med_lmm, - med->med_start + cbdata->mc_headfile_sz, - med->med_len, cbdata->mc_lmm_join); - if (rc) { - CERROR("error %d insert the lmm \n", rc); - RETURN(rc); - } - RETURN(LLOG_DEL_RECORD); -} - -static void mds_init_stripe_join(struct lov_mds_md_join *lmmj, - struct lov_mds_md *lmm, - struct llog_logid *logid) -{ - lmmj->lmmj_md.lmm_magic = cpu_to_le32(LOV_MAGIC_JOIN); - lmmj->lmmj_md.lmm_object_id = lmm->lmm_object_id; - lmmj->lmmj_md.lmm_object_gr = lmm->lmm_object_gr; - lmmj->lmmj_md.lmm_pattern = lmm->lmm_pattern; - lmmj->lmmj_md.lmm_stripe_size = lmm->lmm_stripe_size; - lmmj->lmmj_md.lmm_stripe_count = 0; - lmmj->lmmj_extent_count = 0; - lmmj->lmmj_array_id = *logid; -} - -static int mdsea_cancel_last_extent(struct llog_handle *llh_tail, - struct llog_rec_hdr *rec_in_tail, - struct mdsea_cb_data *cbdata) -{ - struct mds_extent_desc *med = - &((struct llog_array_rec *)rec_in_tail)->lmr_med; - - CDEBUG(D_INODE, "extent: "LPU64":"LPU64" \n", med->med_start, - med->med_len); - - LASSERTF(cbdata->mc_offset == med->med_start, - "A hole in the extent "LPU64"--"LPU64"\n", - cbdata->mc_offset, med->med_start); - - if (med->med_len != -1) - cbdata->mc_offset = med->med_start + med->med_len; - - if (med->med_start > cbdata->mc_headfile_sz || (med->med_len == -1)) { - CDEBUG(D_INFO, "del rec offset"LPU64", head size "LPU64" \n", - med->med_start, cbdata->mc_headfile_sz); - if (!cbdata->mc_lmm) { - int stripe = le32_to_cpu(med->med_lmm.lmm_stripe_count); - OBD_ALLOC(cbdata->mc_lmm, lov_mds_md_size(stripe)); - if (!cbdata->mc_lmm) - RETURN(-ENOMEM); - memcpy(cbdata->mc_lmm, &med->med_lmm, - lov_mds_md_size(stripe)); - } - RETURN(LLOG_DEL_RECORD); - } - RETURN(0); -} - -static int mds_adjust_last_extent(struct llog_handle *llh_head, - __u64 head_size) -{ - struct mdsea_cb_data *cbdata; - int rc; - ENTRY; - - OBD_ALLOC_PTR(cbdata); - - if (!cbdata) - RETURN(-ENOMEM); - - cbdata->mc_headfile_sz = head_size; - /*Find the last extent and cancel the record in the lmm*/ - rc = mdsea_iterate(llh_head, (llog_cb_t)mdsea_cancel_last_extent, - cbdata); - - if (rc) { - CERROR("can not find the last extent rc=%d\n", rc); - GOTO(exit, rc); - } - - LASSERT(cbdata->mc_lmm); - - CDEBUG(D_INODE, "insert lmm extent: "LPU64":"LPU64" \n", - cbdata->mc_offset, (head_size - cbdata->mc_offset)); - - rc = mds_insert_join_lmm(llh_head, cbdata->mc_lmm, - cbdata->mc_offset, - (head_size - cbdata->mc_offset), - NULL); - if (rc) - CERROR("error insert the lmm rc %d \n", rc); -exit: - if (cbdata && cbdata->mc_lmm) - OBD_FREE(cbdata->mc_lmm, - lov_mds_md_size(cbdata->mc_lmm->lmm_stripe_count)); - if (cbdata) - OBD_FREE_PTR(cbdata); - - RETURN(rc); -} - -static void mds_finish_join(struct mds_obd *mds, struct ptlrpc_request *req, - struct inode *inode, struct lov_mds_md_join *lmmj) -{ - struct mds_body *body = (struct mds_body *) - lustre_msg_buf(req->rq_repmsg, 1, 0); - int max_cookiesize = lmmj->lmmj_md.lmm_stripe_count * - sizeof(struct llog_cookie); - int max_easize = sizeof(*lmmj); - - CDEBUG(D_INFO, "change the max md size from %d to "LPSZ"\n", - mds->mds_max_mdsize, sizeof(*lmmj)); - - if (mds->mds_max_mdsize < max_easize || - mds->mds_max_cookiesize < max_cookiesize) { - body->max_mdsize = mds->mds_max_mdsize > max_easize ? - mds->mds_max_mdsize : max_easize; - mds->mds_max_mdsize = body->max_mdsize; - body->max_cookiesize = mds->mds_max_cookiesize > max_cookiesize? - mds->mds_max_cookiesize : max_cookiesize; - mds->mds_max_cookiesize = body->max_cookiesize; - body->valid |= OBD_MD_FLMODEASIZE; - } - - if (body->valid & OBD_MD_FLMODEASIZE) - CDEBUG(D_INODE, "updating max_mdsize/max_cookiesize: %d/%d\n", - mds->mds_max_mdsize, mds->mds_max_cookiesize); - - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); -} - -static int mds_join_unlink_tail_inode(struct mds_update_record *rec, - struct ptlrpc_request *req, - struct mds_rec_join *join_rec, - struct lov_mds_md *tail_lmm, - int lmm_size, struct dentry *dchild, - void **handle,struct lustre_handle *lockh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct inode *tail_inode, *head_inode; - struct dentry *de_tailparent = NULL, *de_tail = NULL, *de_head = NULL; - struct lustre_handle dlm_handles[4] = {{0}, {0}, {0}, {0}}; - struct ll_fid head_fid; - int rc; - ENTRY; - - if (lockh) - ldlm_lock_decref(lockh, LCK_EX); - - head_inode = dchild->d_inode; - - head_fid.id = head_inode->i_ino; - head_fid.generation = head_inode->i_generation; - head_fid.f_type = head_inode->i_mode & S_IFMT; - - rc = mds_get_parents_children_locked(obd, mds, &join_rec->jr_fid, - &de_tailparent, &head_fid, - &de_head, LCK_EX, rec->ur_name, - rec->ur_namelen, &de_tail, - NULL, 0, NULL, dlm_handles, - LCK_EX); - if (rc) - GOTO(cleanup, rc); - - *lockh = dlm_handles[1]; - LASSERT(de_tailparent); - tail_inode = de_tail->d_inode; - if (tail_inode == NULL) { - CERROR("tail inode doesn't exist(dir %lu,name %s)!\n", - de_tailparent? de_tailparent->d_inode->i_ino : 0, - rec->ur_name); - GOTO(cleanup, rc = -ENOENT); - } - - if (!S_ISREG(tail_inode->i_mode)) { - CERROR("tail file is not a regular file (dir %lu, name %s)!\n", - de_tailparent? de_tailparent->d_inode->i_ino : 0, - rec->ur_name); - GOTO(cleanup, rc = -EINVAL); - } - - *handle = fsfilt_start(obd, head_inode, FSFILT_OP_JOIN, NULL); - if (IS_ERR(*handle)) { - rc = PTR_ERR(*handle); - GOTO(cleanup, rc); - } - - rc = mds_get_md(obd, tail_inode, tail_lmm, &lmm_size, 1); - if (rc < 0) /* get md fails */ - GOTO(cleanup, rc); - - LASSERT(le32_to_cpu(tail_lmm->lmm_magic) == LOV_MAGIC_JOIN || - le32_to_cpu(tail_lmm->lmm_magic) == LOV_MAGIC); - - LASSERT(de_tailparent); - rc = vfs_unlink(de_tailparent->d_inode, de_tail); - - if (rc == 0) { - CDEBUG(D_INODE, "delete the tail inode %lu/%u \n", - tail_inode->i_ino, tail_inode->i_generation); - } -cleanup: - if (dlm_handles[2].cookie != 0) - ldlm_lock_decref(&dlm_handles[2], LCK_EX); - - if (dlm_handles[0].cookie != 0) { - if (rc) - ldlm_lock_decref(&dlm_handles[0], LCK_EX); - else - ptlrpc_save_lock(req, &dlm_handles[0], LCK_EX); - } - if (de_tail) - l_dput(de_tail); - - if (de_tailparent) - l_dput(de_tailparent); - - if (de_head) - l_dput(de_head); - - RETURN(rc); -} - -int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req, - struct dentry *de_head, struct lustre_handle *lockh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct inode *head_inode = NULL; - struct lvfs_run_ctxt saved; - void *handle = NULL; - struct lov_mds_md *head_lmm, *tail_lmm; - struct lov_mds_md_join *head_lmmj = NULL, *tail_lmmj = NULL; - int lmm_size, rc = 0, cleanup_phase = 0, size; - struct llog_handle *llh_head = NULL, *llh_tail = NULL; - struct llog_ctxt *ctxt = NULL; - struct mds_rec_join *join_rec; - ENTRY; - - join_rec = lustre_swab_reqbuf(req, DLM_INTENT_REC_OFF + 3, - sizeof(*join_rec), - lustre_swab_mds_rec_join); - if (join_rec == NULL) - RETURN (-EFAULT); - - DEBUG_REQ(D_INODE, req,"head "LPU64"/%u, ptail ino "LPU64"/%u, tail %s", - rec->ur_fid1->id, rec->ur_fid1->generation, - join_rec->jr_fid.id, join_rec->jr_fid.generation, - rec->ur_name); - - size = mds->mds_max_mdsize; - lmm_size = mds->mds_max_mdsize; - OBD_ALLOC(head_lmm, lmm_size); - OBD_ALLOC(tail_lmm, lmm_size); - if (!head_lmm || !tail_lmm) - GOTO(cleanup, rc = -ENOMEM); - - /* acquire head's dentry */ - LASSERT(de_head); - head_inode = de_head->d_inode; - if (head_inode == NULL) { - CERROR("head inode doesn't exist!\n"); - GOTO(cleanup, rc = -ENOENT); - } - - /*Unlink tail inode and get the lmm back*/ - rc = mds_join_unlink_tail_inode(rec, req, join_rec, tail_lmm, lmm_size, - de_head, &handle, lockh); - if (rc) { - CERROR("unlink tail_inode error %d\n", rc); - GOTO(cleanup, rc); - } - - LOCK_INODE_MUTEX(head_inode); - cleanup_phase = 1; - rc = mds_get_md(obd, head_inode, head_lmm, &size, 0); - if (rc < 0) - GOTO(cleanup, rc); - - LASSERT(le32_to_cpu(head_lmm->lmm_magic) == LOV_MAGIC_JOIN || - le32_to_cpu(head_lmm->lmm_magic) == LOV_MAGIC); - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - ctxt = llog_get_context(obd, LLOG_LOVEA_ORIG_CTXT); - LASSERT(ctxt != NULL); - cleanup_phase = 2; - if (le32_to_cpu(head_lmm->lmm_magic) == LOV_MAGIC) { /*simple file */ - struct llog_logid *llog_array; - - rc = llog_create(ctxt, &llh_head, NULL, NULL); - if (rc) { - CERROR("cannot create new log, error = %d\n", rc); - GOTO(cleanup, rc); - } - cleanup_phase = 3; - llog_array = &llh_head->lgh_id; - CDEBUG(D_INFO,"create arrary for %lu with id "LPU64":"LPU64"\n", - head_inode->i_ino, llog_array->lgl_oid, - llog_array->lgl_ogr); - rc = llog_init_handle(llh_head, LLOG_F_IS_PLAIN, NULL); - if (rc) - GOTO(cleanup, rc); - OBD_ALLOC_PTR(head_lmmj); - if (head_lmmj == NULL) - GOTO(cleanup, rc = -ENOMEM); - mds_init_stripe_join(head_lmmj, head_lmm, llog_array); - mds_insert_join_lmm(llh_head, head_lmm, 0,join_rec->jr_headsize, - head_lmmj); - } else { /*head lmm is join file */ - head_lmmj = (struct lov_mds_md_join *)head_lmm; - /* construct and fill extent llog object */ - rc = llog_create(ctxt, &llh_head, - &head_lmmj->lmmj_array_id, NULL); - if (rc) { - CERROR("cannot open existing log, error = %d\n", rc); - GOTO(cleanup, rc); - } - cleanup_phase = 3; - rc = llog_init_handle(llh_head, LLOG_F_IS_PLAIN, NULL); - if (rc) - GOTO(cleanup, rc); - rc = mds_adjust_last_extent(llh_head, join_rec->jr_headsize); - if (rc) { - CERROR("can't adjust last extent of obj rc=%d\n", rc); - GOTO(cleanup, rc); - } - } - - if (le32_to_cpu(tail_lmm->lmm_magic) != LOV_MAGIC_JOIN) { - mds_insert_join_lmm(llh_head, tail_lmm, join_rec->jr_headsize, - -1, head_lmmj); - } else { - struct mdsea_cb_data cbdata; - tail_lmmj = (struct lov_mds_md_join *)tail_lmm; - - rc = llog_create(ctxt,&llh_tail,&tail_lmmj->lmmj_array_id,NULL); - if (rc) { - CERROR("cannot open existing log, error = %d\n", rc); - GOTO(cleanup, rc); - } - rc = llog_init_handle(llh_tail, LLOG_F_IS_PLAIN, NULL); - if (rc) { - llog_close(llh_tail); - GOTO(cleanup, rc); - } - cbdata.mc_llh = llh_head; - cbdata.mc_headfile_sz = join_rec->jr_headsize; - cbdata.mc_lmm_join = head_lmmj; - rc = mdsea_iterate(llh_tail, (llog_cb_t)mdsea_append_extent, - &cbdata); - if (rc) { - llog_close(llh_tail); - CERROR("can not append extent log error %d \n", rc); - GOTO(cleanup, rc); - } - rc = llog_destroy(llh_tail); - if (rc) { - llog_close(llh_tail); - CERROR("can not destroy log error %d \n", rc); - GOTO(cleanup, rc); - } - llog_free_handle(llh_tail); - } - LASSERT(head_inode); - CDEBUG(D_INODE, "join finish, set lmm V2 to inode %lu \n", - head_inode->i_ino); - fsfilt_set_md(obd, head_inode, handle, head_lmmj, - sizeof(struct lov_mds_md_join), "lov"); - mds_finish_join(mds, req, head_inode, head_lmmj); -cleanup: - rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0, 0); - switch(cleanup_phase){ - case 3: - llog_close(llh_head); - case 2: - llog_ctxt_put(ctxt); - if (head_lmmj && ((void*)head_lmmj != (void*)head_lmm)) - OBD_FREE_PTR(head_lmmj); - - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - case 1: - UNLOCK_INODE_MUTEX(head_inode); - case 0: - if (tail_lmm != NULL) - OBD_FREE(tail_lmm, lmm_size); - if (head_lmm != NULL) - OBD_FREE(head_lmm, lmm_size); - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - RETURN(rc); -} - diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c deleted file mode 100644 index 15bf9d6..0000000 --- a/lustre/mds/mds_lib.c +++ /dev/null @@ -1,488 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#define DEBUG_SUBSYSTEM S_MDS - -#ifndef AUTOCONF_INCLUDED -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include // for wait_on_buffer -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include "mds_internal.h" - -void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode) -{ - fid->id = inode->i_ino; - fid->generation = inode->i_generation; - fid->f_type = (S_IFMT & inode->i_mode); -} - -/* Note that we can copy all of the fields, just some will not be "valid" */ -void mds_pack_inode2body(struct mds_body *b, struct inode *inode) -{ - b->valid |= OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | - OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLTYPE | - OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER | - OBD_MD_FLATIME | OBD_MD_FLMTIME; /* bug 2020 */ - - if (!S_ISREG(inode->i_mode)) - b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLRDEV; - - b->ino = inode->i_ino; - b->atime = LTIME_S(inode->i_atime); - b->mtime = LTIME_S(inode->i_mtime); - b->ctime = LTIME_S(inode->i_ctime); - b->mode = inode->i_mode; - b->size = i_size_read(inode); - b->blocks = inode->i_blocks; - b->uid = inode->i_uid; - b->gid = inode->i_gid; - b->flags = ll_inode_to_ext_flags(b->flags, inode->i_flags); - b->rdev = inode->i_rdev; - /* Return the correct link count for orphan inodes */ - b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink; - b->generation = inode->i_generation; - b->suppgid = -1; -} - -static inline unsigned int attr_unpack(__u64 sa_valid) { - unsigned int ia_valid = 0; - - if (sa_valid & MDS_ATTR_MODE) - ia_valid |= ATTR_MODE; - if (sa_valid & MDS_ATTR_UID) - ia_valid |= ATTR_UID; - if (sa_valid & MDS_ATTR_GID) - ia_valid |= ATTR_GID; - if (sa_valid & MDS_ATTR_SIZE) - ia_valid |= ATTR_SIZE; - if (sa_valid & MDS_ATTR_ATIME) - ia_valid |= ATTR_ATIME; - if (sa_valid & MDS_ATTR_MTIME) - ia_valid |= ATTR_MTIME; - if (sa_valid & MDS_ATTR_CTIME) - ia_valid |= ATTR_CTIME; - if (sa_valid & MDS_ATTR_ATIME_SET) - ia_valid |= ATTR_ATIME_SET; - if (sa_valid & MDS_ATTR_MTIME_SET) - ia_valid |= ATTR_MTIME_SET; - if (sa_valid & MDS_ATTR_FORCE) - ia_valid |= ATTR_FORCE; - if (sa_valid & MDS_ATTR_ATTR_FLAG) - ia_valid |= ATTR_ATTR_FLAG; - if (sa_valid & MDS_ATTR_KILL_SUID) - ia_valid |= ATTR_KILL_SUID; - if (sa_valid & MDS_ATTR_KILL_SGID) - ia_valid |= ATTR_KILL_SGID; - if (sa_valid & MDS_ATTR_CTIME_SET) - ia_valid |= ATTR_CTIME_SET; - if (sa_valid & MDS_ATTR_FROM_OPEN) - ia_valid |= ATTR_FROM_OPEN; - if (sa_valid & MDS_ATTR_BLOCKS) - ia_valid |= ATTR_BLOCKS; - if (sa_valid & MDS_OPEN_OWNEROVERRIDE) - ia_valid |= MDS_OPEN_OWNEROVERRIDE; - return ia_valid; -} - -/* unpacking */ -static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct iattr *attr = &r->ur_iattr; - struct mds_rec_setattr *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof(*rec), - lustre_swab_mds_rec_setattr); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_uc.luc_fsuid = rec->sa_fsuid; - r->ur_uc.luc_fsgid = rec->sa_fsgid; - r->ur_uc.luc_cap = rec->sa_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->sa_suppgid; - r->ur_uc.luc_suppgid2 = -1; -#endif - r->ur_fid1 = &rec->sa_fid; - attr->ia_valid = attr_unpack(rec->sa_valid); - attr->ia_mode = rec->sa_mode; - attr->ia_uid = rec->sa_uid; - attr->ia_gid = rec->sa_gid; - attr->ia_size = rec->sa_size; - LTIME_S(attr->ia_atime) = rec->sa_atime; - LTIME_S(attr->ia_mtime) = rec->sa_mtime; - LTIME_S(attr->ia_ctime) = rec->sa_ctime; - r->ur_flags = rec->sa_attr_flags; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_eadatalen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - if (r->ur_eadatalen) { - r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 1, 0); - if (r->ur_eadata == NULL) - RETURN(-EFAULT); - } - r->ur_cookielen = lustre_msg_buflen(req->rq_reqmsg, offset + 2); - if (r->ur_cookielen) { - r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0); - if (r->ur_eadata == NULL) - RETURN (-EFAULT); - } - if (lustre_msg_buflen(req->rq_reqmsg, offset + 3)) { - r->ur_dlm = lustre_swab_reqbuf(req, offset + 3, - sizeof(*r->ur_dlm), - lustre_swab_ldlm_request); - if (r->ur_dlm == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -static int mds_create_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_create *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof (*rec), - lustre_swab_mds_rec_create); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_uc.luc_fsuid = rec->cr_fsuid; - r->ur_uc.luc_fsgid = rec->cr_fsgid; - r->ur_uc.luc_cap = rec->cr_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->cr_suppgid; - r->ur_uc.luc_suppgid2 = -1; -#endif - r->ur_fid1 = &rec->cr_fid; - r->ur_fid2 = &rec->cr_replayfid; - r->ur_mode = rec->cr_mode; - r->ur_rdev = rec->cr_rdev; - r->ur_time = rec->cr_time; - r->ur_flags = rec->cr_flags; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN (-EFAULT); - r->ur_namelen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - - lustre_set_req_swabbed(req, offset + 2); - r->ur_tgtlen = lustre_msg_buflen(req->rq_reqmsg, offset + 2); - if (r->ur_tgtlen) { - /* NB for now, we only seem to pass NULL terminated symlink - * target strings here. If this ever changes, we'll have - * to stop checking for a buffer filled completely with a - * NULL terminated string here, and make the callers check - * depending on what they expect. We should probably stash - * it in r->ur_eadata in that case, so it's obvious... -eeb - */ - r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0); - if (r->ur_tgt == NULL) - RETURN (-EFAULT); - } - if (lustre_msg_buflen(req->rq_reqmsg, offset + 3)) { - r->ur_dlm = lustre_swab_reqbuf(req, offset + 3, - sizeof(*r->ur_dlm), - lustre_swab_ldlm_request); - if (r->ur_dlm == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -static int mds_link_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_link *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof (*rec), - lustre_swab_mds_rec_link); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_uc.luc_fsuid = rec->lk_fsuid; - r->ur_uc.luc_fsgid = rec->lk_fsgid; - r->ur_uc.luc_cap = rec->lk_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->lk_suppgid1; - r->ur_uc.luc_suppgid2 = rec->lk_suppgid2; -#endif - r->ur_fid1 = &rec->lk_fid1; - r->ur_fid2 = &rec->lk_fid2; - r->ur_time = rec->lk_time; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN (-EFAULT); - r->ur_namelen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - if (lustre_msg_buflen(req->rq_reqmsg, offset + 2)) { - r->ur_dlm = lustre_swab_reqbuf(req, offset + 2, - sizeof(*r->ur_dlm), - lustre_swab_ldlm_request); - if (r->ur_dlm == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_unlink *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof (*rec), - lustre_swab_mds_rec_unlink); - if (rec == NULL) - RETURN(-EFAULT); - - r->ur_uc.luc_fsuid = rec->ul_fsuid; - r->ur_uc.luc_fsgid = rec->ul_fsgid; - r->ur_uc.luc_cap = rec->ul_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->ul_suppgid; - r->ur_uc.luc_suppgid2 = -1; -#endif - r->ur_mode = rec->ul_mode; - r->ur_fid1 = &rec->ul_fid1; - r->ur_fid2 = &rec->ul_fid2; - r->ur_time = rec->ul_time; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN(-EFAULT); - r->ur_namelen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - - if (lustre_msg_buflen(req->rq_reqmsg, offset + 2)) { - r->ur_dlm = lustre_swab_reqbuf(req, offset + 2, - sizeof(*r->ur_dlm), - lustre_swab_ldlm_request); - if (r->ur_dlm == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -static int mds_rename_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_rename *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof (*rec), - lustre_swab_mds_rec_rename); - if (rec == NULL) - RETURN(-EFAULT); - - r->ur_uc.luc_fsuid = rec->rn_fsuid; - r->ur_uc.luc_fsgid = rec->rn_fsgid; - r->ur_uc.luc_cap = rec->rn_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->rn_suppgid1; - r->ur_uc.luc_suppgid2 = rec->rn_suppgid2; -#endif - r->ur_fid1 = &rec->rn_fid1; - r->ur_fid2 = &rec->rn_fid2; - r->ur_time = rec->rn_time; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN(-EFAULT); - r->ur_namelen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - - lustre_set_req_swabbed(req, offset + 2); - r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0); - if (r->ur_tgt == NULL) - RETURN(-EFAULT); - r->ur_tgtlen = lustre_msg_buflen(req->rq_reqmsg, offset + 2); - if (lustre_msg_buflen(req->rq_reqmsg, offset + 3)) { - r->ur_dlm = lustre_swab_reqbuf(req, offset + 3, - sizeof(*r->ur_dlm), - lustre_swab_ldlm_request); - if (r->ur_dlm == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -static int mds_open_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_create *rec; - ENTRY; - - rec = lustre_swab_reqbuf(req, offset, sizeof(*rec), - lustre_swab_mds_rec_create); - if (rec == NULL) - RETURN(-EFAULT); - - r->ur_uc.luc_fsuid = rec->cr_fsuid; - r->ur_uc.luc_fsgid = rec->cr_fsgid; - r->ur_uc.luc_cap = rec->cr_cap; -#if 0 - r->ur_uc.luc_suppgid1 = rec->cr_suppgid; - r->ur_uc.luc_suppgid2 = -1; -#endif - r->ur_fid1 = &rec->cr_fid; - r->ur_fid2 = &rec->cr_replayfid; - r->ur_mode = rec->cr_mode; - r->ur_rdev = rec->cr_rdev; - r->ur_time = rec->cr_time; - r->ur_flags = rec->cr_flags; - - lustre_set_req_swabbed(req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN(-EFAULT); - r->ur_namelen = lustre_msg_buflen(req->rq_reqmsg, offset + 1); - - lustre_set_req_swabbed(req, offset + 2); - r->ur_eadatalen = lustre_msg_buflen(req->rq_reqmsg, offset + 2); - if (r->ur_eadatalen) { - r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0); - if (r->ur_eadata == NULL) - RETURN (-EFAULT); - } - RETURN(0); -} - -typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset, - struct mds_update_record *r); - -static update_unpacker mds_unpackers[REINT_MAX] = { - [REINT_SETATTR] mds_setattr_unpack, - [REINT_CREATE] mds_create_unpack, - [REINT_LINK] mds_link_unpack, - [REINT_UNLINK] mds_unlink_unpack, - [REINT_RENAME] mds_rename_unpack, - [REINT_OPEN] mds_open_unpack, -}; - -int mds_update_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *rec) -{ - mds_reint_t opcode, *opcodep; - int rc; - ENTRY; - - /* NB don't lustre_swab_reqbuf() here. We're just taking a peek - * and we want to leave it to the specific unpacker once we've - * identified the message type */ - opcodep = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*opcodep)); - if (opcodep == NULL) - RETURN(-EFAULT); - - opcode = *opcodep; - if (lustre_msg_swabbed(req->rq_reqmsg)) - __swab32s(&opcode); - - if (opcode >= REINT_MAX || mds_unpackers[opcode] == NULL) { - CERROR("Unexpected opcode %d\n", opcode); - RETURN(-EFAULT); - } - - rec->ur_opcode = opcode; - rc = mds_unpackers[opcode](req, offset, rec); - - RETURN(rc); -} - -int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, - int offset) -{ - struct mds_body *body = lustre_msg_buf(req->rq_reqmsg, offset, - sizeof(*body)); -#if 0 - struct mds_obd *mds = mds_req2mds(req); - int rc; -#endif - - LASSERT(body != NULL); /* previously verified & swabbed by caller */ - -#ifdef CRAY_XT3 - if (req->rq_uid != LNET_UID_ANY) { - /* Non-root local cluster client */ - LASSERT (req->rq_uid != 0); - ucred->luc_fsuid = req->rq_uid; - } else -#endif - { - ucred->luc_fsuid = body->fsuid; - ucred->luc_fsgid = body->fsgid; - ucred->luc_cap = body->capability; - } - -#if 0 - ucred->luc_uce = upcall_cache_get_entry(mds->mds_group_hash, - ucred->luc_fsuid, - ucred->luc_fsgid, 1, - &body->suppgid); - if (IS_ERR(ucred->luc_uce)) { - rc = PTR_ERR(ucred->luc_uce); - ucred->luc_uce = NULL; - return rc; - } - -#ifdef CRAY_XT3 - if (ucred->luc_uce) - ucred->luc_fsgid = ucred->luc_uce->ue_primary; -#endif -#endif - - return 0; -} - -void mds_exit_ucred(struct lvfs_ucred *ucred, struct mds_obd *mds) -{ -#if 0 - upcall_cache_put_entry(mds->mds_group_hash, ucred->luc_uce); -#endif -} diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c deleted file mode 100644 index 61dc455..0000000 --- a/lustre/mds/mds_open.c +++ /dev/null @@ -1,1533 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * Author: Peter Braam - * Author: Andreas Dilger - * Author: Phil Schwan - * Author: Mike Shaver - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "mds_internal.h" - -/* Exported function from this file are: - * - * mds_open - called by the intent handler - * mds_close - an rpc handling function - * mds_pin - an rpc handling function - which will go away - * mds_mfd_close - for force closing files when a client dies - */ - -/* - * MDS file data handling: file data holds a handle for a file opened - * by a client. - */ - -static void mds_mfd_addref(void *mfdp) -{ - struct mds_file_data *mfd = mfdp; - - atomic_inc(&mfd->mfd_refcount); - CDEBUG(D_INFO, "GETting mfd %p : new refcount %d\n", mfd, - atomic_read(&mfd->mfd_refcount)); -} - -/* Create a new mds_file_data struct. - * One reference for handle+med_open_head list and dropped by mds_mfd_unlink(), - * one reference for the caller of this function. */ -struct mds_file_data *mds_mfd_new(void) -{ - struct mds_file_data *mfd; - - OBD_ALLOC(mfd, sizeof *mfd); - if (mfd == NULL) { - CERROR("mds: out of memory\n"); - return NULL; - } - - atomic_set(&mfd->mfd_refcount, 2); - - CFS_INIT_LIST_HEAD(&mfd->mfd_handle.h_link); - CFS_INIT_LIST_HEAD(&mfd->mfd_list); - class_handle_hash(&mfd->mfd_handle, mds_mfd_addref); - - return mfd; -} - -/* Get a new reference on the mfd pointed to by handle, if handle is still - * valid. Caller must drop reference with mds_mfd_put(). */ -static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) -{ - ENTRY; - LASSERT(handle != NULL); - RETURN(class_handle2object(handle->cookie)); -} - -/* Drop mfd reference, freeing struct if this is the last one. */ -static void mds_mfd_put(struct mds_file_data *mfd) -{ - CDEBUG(D_INFO, "PUTting mfd %p : new refcount %d\n", mfd, - atomic_read(&mfd->mfd_refcount) - 1); - LASSERT(atomic_read(&mfd->mfd_refcount) > 0 && - atomic_read(&mfd->mfd_refcount) < 0x5a5a); - if (atomic_dec_and_test(&mfd->mfd_refcount)) { - OBD_FREE_RCU(mfd, sizeof *mfd, &mfd->mfd_handle); - } -} - -/* Remove the mfd handle so that it cannot be found by open/close again. - * Caller must hold med_open_lock for mfd_list manipulation. */ -void mds_mfd_unlink(struct mds_file_data *mfd, int decref) -{ - class_handle_unhash(&mfd->mfd_handle); - list_del_init(&mfd->mfd_list); - if (decref) - mds_mfd_put(mfd); -} - -/* Caller must hold mds->mds_epoch_sem */ -static int mds_alloc_filterdata(struct inode *inode) -{ - LASSERT(inode->i_filterdata == NULL); - OBD_ALLOC(inode->i_filterdata, sizeof(struct mds_filter_data)); - if (inode->i_filterdata == NULL) - return -ENOMEM; - LASSERT(igrab(inode) == inode); - return 0; -} - -/* Caller must hold mds->mds_epoch_sem */ -static void mds_free_filterdata(struct inode *inode) -{ - LASSERT(inode->i_filterdata != NULL); - OBD_FREE(inode->i_filterdata, sizeof(struct mds_filter_data)); - inode->i_filterdata = NULL; - iput(inode); -} - -/* Write access to a file: executors cause a negative count, - * writers a positive count. The semaphore is needed to perform - * a check for the sign and then increment or decrement atomically. - * - * This code is closely tied to the allocation of the d_fsdata and the - * MDS epoch, so we use the same semaphore for the whole lot. - * - * We could use a different semaphore for each file, if it ever shows - * up in a profile, which it won't. - * - * epoch argument is nonzero during recovery */ -static int mds_get_write_access(struct mds_obd *mds, struct inode *inode, - __u64 epoch) -{ - int rc = 0; - - down(&mds->mds_epoch_sem); - - if (atomic_read(&inode->i_writecount) < 0) { - up(&mds->mds_epoch_sem); - RETURN(-ETXTBSY); - } - - - if (MDS_FILTERDATA(inode) && MDS_FILTERDATA(inode)->io_epoch != 0) { - CDEBUG(D_INODE, "continuing MDS epoch "LPU64" for ino %lu/%u\n", - MDS_FILTERDATA(inode)->io_epoch, inode->i_ino, - inode->i_generation); - goto out; - } - - if (inode->i_filterdata == NULL) - mds_alloc_filterdata(inode); - if (inode->i_filterdata == NULL) { - rc = -ENOMEM; - goto out; - } - if (epoch > mds->mds_io_epoch) - mds->mds_io_epoch = epoch; - else - mds->mds_io_epoch++; - MDS_FILTERDATA(inode)->io_epoch = mds->mds_io_epoch; - CDEBUG(D_INODE, "starting MDS epoch "LPU64" for ino %lu/%u\n", - mds->mds_io_epoch, inode->i_ino, inode->i_generation); - out: - if (rc == 0) - atomic_inc(&inode->i_writecount); - up(&mds->mds_epoch_sem); - return rc; -} - -/* Returns EAGAIN if the client needs to get size and/or cookies and close - * again -- which is never true if the file is about to be unlinked. Otherwise - * returns the number of remaining writers. */ -static int mds_put_write_access(struct mds_obd *mds, struct inode *inode, - struct mds_body *body, int unlinking) -{ - int rc = 0; - ENTRY; - - down(&mds->mds_epoch_sem); - atomic_dec(&inode->i_writecount); - rc = atomic_read(&inode->i_writecount); - if (rc > 0) - GOTO(out, rc); -#if 0 - if (!unlinking && !(body->valid & OBD_MD_FLSIZE)) - GOTO(out, rc = EAGAIN); -#endif - mds_free_filterdata(inode); - out: - up(&mds->mds_epoch_sem); - return rc; -} - -static int mds_deny_write_access(struct mds_obd *mds, struct inode *inode) -{ - ENTRY; - down(&mds->mds_epoch_sem); - if (atomic_read(&inode->i_writecount) > 0) { - up(&mds->mds_epoch_sem); - RETURN(-ETXTBSY); - } - atomic_dec(&inode->i_writecount); - up(&mds->mds_epoch_sem); - RETURN(0); -} - -static void mds_allow_write_access(struct inode *inode) -{ - ENTRY; - atomic_inc(&inode->i_writecount); -} - -int mds_query_write_access(struct inode *inode) -{ - ENTRY; - RETURN(atomic_read(&inode->i_writecount)); -} - -/* This replaces the VFS dentry_open, it manages mfd and writecount */ -static struct mds_file_data *mds_dentry_open(struct dentry *dentry, - struct vfsmount *mnt, int flags, - struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_obd *mds = mds_req2mds(req); - struct mds_file_data *mfd; - struct mds_body *body; - int error; - ENTRY; - - mfd = mds_mfd_new(); - if (mfd == NULL) { - CERROR("mds: out of memory\n"); - GOTO(cleanup_dentry, error = -ENOMEM); - } - - body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); - - if (flags & FMODE_WRITE) { - /* FIXME: in recovery, need to pass old epoch here */ - error = mds_get_write_access(mds, dentry->d_inode, 0); - if (error) - GOTO(cleanup_mfd, error); - body->io_epoch = MDS_FILTERDATA(dentry->d_inode)->io_epoch; - } else if (flags & MDS_FMODE_EXEC) { - error = mds_deny_write_access(mds, dentry->d_inode); - if (error) - GOTO(cleanup_mfd, error); - } - - dget(dentry); - - /* Mark the file as open to handle open-unlink. */ - MDS_DOWN_WRITE_ORPHAN_SEM(dentry->d_inode); - mds_orphan_open_inc(dentry->d_inode); - MDS_UP_WRITE_ORPHAN_SEM(dentry->d_inode); - - mfd->mfd_mode = flags; - mfd->mfd_dentry = dentry; - mfd->mfd_xid = req->rq_xid; - - spin_lock(&med->med_open_lock); - list_add(&mfd->mfd_list, &med->med_open_head); - spin_unlock(&med->med_open_lock); - - body->handle.cookie = mfd->mfd_handle.h_cookie; - - RETURN(mfd); - -cleanup_mfd: - mds_mfd_put(mfd); - mds_mfd_unlink(mfd, 1); -cleanup_dentry: - return ERR_PTR(error); -} - -/* Must be called with i_mutex held */ -static int mds_create_objects(struct ptlrpc_request *req, int offset, - struct mds_update_record *rec, - struct mds_obd *mds, struct obd_device *obd, - struct dentry *dchild, void **handle, - struct lov_mds_md **objid) -{ - struct inode *inode = dchild->d_inode; - struct obd_trans_info oti = { 0 }; - struct lov_mds_md *lmm = NULL; - int rc, lmm_size; - struct mds_body *body; - struct obd_info oinfo = { { { 0 } } }; - void *lmm_buf; - ENTRY; - - if (!S_ISREG(inode->i_mode)) - RETURN(0); - if (!md_should_create(rec->ur_flags)) - RETURN(0); - - body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); - - if (body->valid & OBD_MD_FLEASIZE) - RETURN(0); - - oti_init(&oti, req); - - /* replay case */ - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { - if (rec->ur_fid2->id == 0) { - DEBUG_REQ(D_ERROR, req, "fid2 not set on open replay"); - RETURN(-EFAULT); - } - - body->valid |= OBD_MD_FLBLKSZ | OBD_MD_FLEASIZE; - lmm_size = rec->ur_eadatalen; - lmm = rec->ur_eadata; - LASSERT(lmm); - - if (*handle == NULL) - *handle = fsfilt_start(obd,inode,FSFILT_OP_CREATE,NULL); - if (IS_ERR(*handle)) { - rc = PTR_ERR(*handle); - *handle = NULL; - GOTO(out_ids, rc); - } - - rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); - if (rc) - CERROR("open replay failed to set md:%d\n", rc); - lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); - LASSERT(lmm_buf); - memcpy(lmm_buf, lmm, lmm_size); - - *objid = lmm_buf; - RETURN(rc); - } - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ALLOC_OBDO)) - GOTO(out_ids, rc = -ENOMEM); - - OBDO_ALLOC(oinfo.oi_oa); - if (oinfo.oi_oa == NULL) - GOTO(out_ids, rc = -ENOMEM); - oinfo.oi_oa->o_uid = 0; /* must have 0 uid / gid on OST */ - oinfo.oi_oa->o_gid = 0; - oinfo.oi_oa->o_mode = S_IFREG | 0600; - oinfo.oi_oa->o_id = inode->i_ino; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLFLAGS | - OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID; - oinfo.oi_oa->o_size = 0; - - obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - if (!(rec->ur_flags & MDS_OPEN_HAS_OBJS)) { - /* check if things like lfs setstripe are sending us the ea */ - if (rec->ur_flags & MDS_OPEN_HAS_EA) { - rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, - mds->mds_osc_exp, - 0, &oinfo.oi_md, rec->ur_eadata); - if (rc) - GOTO(out_oa, rc); - } else { - OBD_ALLOC(lmm, mds->mds_max_mdsize); - if (lmm == NULL) - GOTO(out_oa, rc = -ENOMEM); - - lmm_size = mds->mds_max_mdsize; - rc = mds_get_md(obd, dchild->d_parent->d_inode, - lmm, &lmm_size, 1); - if (rc > 0) - rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, - mds->mds_osc_exp, - 0, &oinfo.oi_md, lmm); - OBD_FREE(lmm, mds->mds_max_mdsize); - if (rc) - GOTO(out_oa, rc); - } - rc = obd_create(mds->mds_osc_exp, oinfo.oi_oa, - &oinfo.oi_md, &oti); - if (rc) { - int level = D_ERROR; - if (rc == -ENOSPC) - level = D_INODE; - CDEBUG(level, "error creating objects for " - "inode %lu: rc = %d\n", - inode->i_ino, rc); - if (rc > 0) { - CERROR("obd_create returned invalid " - "rc %d\n", rc); - rc = -EIO; - } - GOTO(out_oa, rc); - } - } else { - rc = obd_iocontrol(OBD_IOC_LOV_SETEA, mds->mds_osc_exp, - 0, &oinfo.oi_md, rec->ur_eadata); - if (rc) { - GOTO(out_oa, rc); - } - oinfo.oi_md->lsm_object_id = oinfo.oi_oa->o_id; - oinfo.oi_md->lsm_object_gr = oinfo.oi_oa->o_gr; - } - if (i_size_read(inode)) { - oinfo.oi_oa->o_size = i_size_read(inode); - obdo_from_inode(oinfo.oi_oa, inode, OBD_MD_FLTYPE | - OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE); - - /* pack lustre id to OST */ - oinfo.oi_oa->o_fid = body->fid1.id; - oinfo.oi_oa->o_generation = body->fid1.generation; - oinfo.oi_oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER; - - rc = obd_setattr_rqset(mds->mds_osc_exp, &oinfo, &oti); - if (rc) { - CERROR("error setting attrs for inode %lu: rc %d\n", - inode->i_ino, rc); - if (rc > 0) { - CERROR("obd_setattr_async returned bad rc %d\n", - rc); - rc = -EIO; - } - GOTO(out_oa, rc); - } - } - - body->valid |= OBD_MD_FLBLKSZ | OBD_MD_FLEASIZE; - obdo_refresh_inode(inode, oinfo.oi_oa, OBD_MD_FLBLKSZ); - - LASSERT(oinfo.oi_md && oinfo.oi_md->lsm_object_id); - lmm = NULL; - rc = obd_packmd(mds->mds_osc_exp, &lmm, oinfo.oi_md); - if (rc < 0) { - CERROR("cannot pack lsm, err = %d\n", rc); - GOTO(out_oa, rc); - } - lmm_size = rc; - body->eadatasize = rc; - - if (*handle == NULL) - *handle = fsfilt_start(obd, inode, FSFILT_OP_CREATE, NULL); - if (IS_ERR(*handle)) { - rc = PTR_ERR(*handle); - *handle = NULL; - GOTO(free_diskmd, rc); - } - - rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); - lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); - LASSERT(lmm_buf); - memcpy(lmm_buf, lmm, lmm_size); - - *objid = lmm_buf; // save for mds_lov_update_objid - -free_diskmd: - obd_free_diskmd(mds->mds_osc_exp, &lmm); -out_oa: - oti_free_cookies(&oti); - OBDO_FREE(oinfo.oi_oa); -out_ids: - if (oinfo.oi_md) - obd_free_memmd(mds->mds_osc_exp, &oinfo.oi_md); - RETURN(rc); -} - -static void reconstruct_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *child_lockh) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct mds_obd *mds = mds_req2mds(req); - struct mds_file_data *mfd; - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *parent, *dchild; - struct ldlm_reply *rep; - struct mds_body *body; - int rc; - struct list_head *t; - int put_child = 1; - ENTRY; - - LASSERT(offset == DLM_INTENT_REC_OFF); /* only called via intent */ - rep = lustre_msg_buf(req->rq_repmsg, DLM_LOCKREPLY_OFF, sizeof(*rep)); - body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); - - /* copy rc, transno and disp; steal locks */ - mds_req_from_mcd(req, mcd); - intent_set_disposition(rep, le32_to_cpu(mcd->mcd_last_data)); - - /* Only replay if create or open actually happened. */ - if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) { - EXIT; - return; /* error looking up parent or child */ - } - - parent = mds_fid2dentry(mds, rec->ur_fid1, NULL); - LASSERT(!IS_ERR(parent)); - - dchild = ll_lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1); - LASSERT(!IS_ERR(dchild)); - - if (!dchild->d_inode) - GOTO(out_dput, 0); /* child not present to open */ - - /* At this point, we know we have a child. We'll send - * it back _unless_ it not created and open failed. - */ - if (intent_disposition(rep, DISP_OPEN_OPEN) && - !intent_disposition(rep, DISP_OPEN_CREATE) && - req->rq_status) { - GOTO(out_dput, 0); - } - - mds_pack_inode2fid(&body->fid1, dchild->d_inode); - mds_pack_inode2body(body, dchild->d_inode); - if (S_ISREG(dchild->d_inode->i_mode)) { - rc = mds_pack_md(obd, req->rq_repmsg, DLM_REPLY_REC_OFF + 1, - body, dchild->d_inode, 1); - - if (rc) - LASSERT(rc == req->rq_status); - - /* If we have LOV EA data, the OST holds size, mtime */ - if (!(body->valid & OBD_MD_FLEASIZE)) - body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME); - } - - if (!(rec->ur_flags & MDS_OPEN_JOIN_FILE)) - lustre_shrink_reply(req, DLM_REPLY_REC_OFF + 1, - body->eadatasize, 0); - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL && - !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) { - int acl_off = DLM_REPLY_REC_OFF + (body->eadatasize ? 2 : 1); - - rc = mds_pack_acl(med, dchild->d_inode, req->rq_repmsg, - body, acl_off); - lustre_shrink_reply(req, acl_off, body->aclsize, 0); - if (!req->rq_status && rc) - req->rq_status = rc; - } - - /* If we have -EEXIST as the status, and we were asked to create - * exclusively, we can tell we failed because the file already existed. - */ - if (req->rq_status == -EEXIST && - ((rec->ur_flags & (MDS_OPEN_CREAT | MDS_OPEN_EXCL)) == - (MDS_OPEN_CREAT | MDS_OPEN_EXCL))) { - GOTO(out_dput, 0); - } - - /* If we didn't get as far as trying to open, then some locking thing - * probably went wrong, and we'll just bail here. - */ - if (!intent_disposition(rep, DISP_OPEN_OPEN)) - GOTO(out_dput, 0); - - /* If we failed, then we must have failed opening, so don't look for - * file descriptor or anything, just give the client the bad news. - */ - if (req->rq_status) - GOTO(out_dput, 0); - - mfd = NULL; - spin_lock(&med->med_open_lock); - list_for_each(t, &med->med_open_head) { - mfd = list_entry(t, struct mds_file_data, mfd_list); - if (mfd->mfd_xid == req->rq_xid) { - mds_mfd_addref(mfd); - break; - } - mfd = NULL; - } - spin_unlock(&med->med_open_lock); - - /* #warning "XXX fixme" bug 2991 */ - /* Here it used to LASSERT(mfd) if exp_outstanding_reply != NULL. - * Now that exp_outstanding_reply is a list, it's just using mfd != NULL - * to detect a re-open */ - if (mfd == NULL) { - if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { - rc = mds_join_file(rec, req, dchild, NULL); - if (rc) - GOTO(out_dput, rc); - } - mntget(mds->mds_vfsmnt); - CERROR("Re-opened file \n"); - mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, - rec->ur_flags & ~MDS_OPEN_TRUNC, req); - if (!mfd) { - CERROR("mds: out of memory\n"); - GOTO(out_dput, req->rq_status = -ENOMEM); - } - put_child = 0; - } else { - body->handle.cookie = mfd->mfd_handle.h_cookie; - CDEBUG(D_INODE, "resend mfd %p, cookie "LPX64"\n", mfd, - mfd->mfd_handle.h_cookie); - } - - mds_mfd_put(mfd); - - out_dput: - if (put_child) - l_dput(dchild); - l_dput(parent); - EXIT; -} - -/* do NOT or the MAY_*'s, you'll get the weakest */ -static int accmode(struct inode *inode, int flags) -{ - int res = 0; - - /* Sadly, NFSD reopens a file repeatedly during operation, so the - * "acc_mode = 0" allowance for newly-created files isn't honoured. - * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file - * owner can write to a file even if it is marked readonly to hide - * its brokenness. (bug 5781) */ - if (flags & MDS_OPEN_OWNEROVERRIDE && inode->i_uid == current->fsuid) - return 0; - - if (flags & FMODE_READ) - res = MAY_READ; - if (flags & (FMODE_WRITE|MDS_OPEN_TRUNC)) - res |= MAY_WRITE; - if (flags & MDS_FMODE_EXEC) - res = MAY_EXEC; - return res; -} - -/* Handles object creation, actual opening, and I/O epoch */ -static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, - struct mds_body *body, int flags, void **handle, - struct mds_update_record *rec,struct ldlm_reply *rep, - struct lustre_handle *lockh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_file_data *mfd = NULL; - struct lov_mds_md *lmm = NULL; /* object IDs created */ - int rc = 0; - ENTRY; - - /* atomically create objects if necessary */ - LOCK_INODE_MUTEX(dchild->d_inode); - - if (S_ISREG(dchild->d_inode->i_mode) && - !(body->valid & OBD_MD_FLEASIZE)) { - rc = mds_pack_md(obd, req->rq_repmsg, DLM_REPLY_REC_OFF + 1, - body, dchild->d_inode, 0); - if (rc) { - UNLOCK_INODE_MUTEX(dchild->d_inode); - RETURN(rc); - } - } - if (rec != NULL) { - if ((body->valid & OBD_MD_FLEASIZE) && - (rec->ur_flags & MDS_OPEN_HAS_EA)) { - UNLOCK_INODE_MUTEX(dchild->d_inode); - RETURN(-EEXIST); - } - if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { - UNLOCK_INODE_MUTEX(dchild->d_inode); - rc = mds_join_file(rec, req, dchild, lockh); - if (rc) - RETURN(rc); - LOCK_INODE_MUTEX(dchild->d_inode); - } - if (!(body->valid & OBD_MD_FLEASIZE) && - !(body->valid & OBD_MD_FLMODEASIZE)) { - /* no EA: create objects */ - rc = mds_create_objects(req, DLM_REPLY_REC_OFF + 1, rec, - mds, obd, dchild, handle, &lmm); - if (rc) { - CERROR("mds_create_objects: rc = %d\n", rc); - UNLOCK_INODE_MUTEX(dchild->d_inode); - RETURN(rc); - } - } - } - /* If the inode has no EA data, then MDS holds size, mtime */ - if (S_ISREG(dchild->d_inode->i_mode) && - !(body->valid & OBD_MD_FLEASIZE)) { - body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME); - } - UNLOCK_INODE_MUTEX(dchild->d_inode); - - if (rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) - lustre_shrink_reply(req, DLM_REPLY_REC_OFF + 1, - body->eadatasize, 0); - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL && - rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) { - int acl_off = DLM_REPLY_REC_OFF + (body->eadatasize ? 2 : 1); - - rc = mds_pack_acl(&req->rq_export->exp_mds_data, - dchild->d_inode, req->rq_repmsg, - body, acl_off); - lustre_shrink_reply(req, acl_off, body->aclsize, 0); - if (rc) - RETURN(rc); - } - - intent_set_disposition(rep, DISP_OPEN_OPEN); - mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, flags, req); - if (IS_ERR(mfd)) - RETURN(PTR_ERR(mfd)); - - CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd, - mfd->mfd_handle.h_cookie); - - if (lmm != NULL) - mds_lov_update_objids(obd, lmm); - - if (rc) /* coverity[deadcode] */ - mds_mfd_unlink(mfd, 1); - - mds_mfd_put(mfd); - RETURN(rc); -} - -static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, - struct mds_body *body, int flags, - struct mds_update_record *rec,struct ldlm_reply *rep) -{ - struct mds_obd *mds = mds_req2mds(req); - struct dentry *dchild; - char fidname[LL_FID_NAMELEN]; - int fidlen = 0, rc; - void *handle = NULL; - ENTRY; - - fidlen = ll_fid2str(fidname, fid->id, fid->generation); - dchild = ll_lookup_one_len(fidname, mds->mds_pending_dir, fidlen); - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - CERROR("error looking up %s in PENDING: rc = %d\n",fidname, rc); - RETURN(rc); - } - - if (dchild->d_inode != NULL) { - mds_inode_set_orphan(dchild->d_inode); - CWARN("Orphan %s found and opened in PENDING directory\n", - fidname); - } else { - l_dput(dchild); - - /* We didn't find it in PENDING so it isn't an orphan. See - * if it was a regular inode that was previously created. */ - dchild = mds_fid2dentry(mds, fid, NULL); - if (IS_ERR(dchild)) - RETURN(PTR_ERR(dchild)); - } - - mds_pack_inode2fid(&body->fid1, dchild->d_inode); - mds_pack_inode2body(body, dchild->d_inode); - intent_set_disposition(rep, DISP_LOOKUP_EXECD); - intent_set_disposition(rep, DISP_LOOKUP_POS); - - rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL); - rc = mds_finish_transno(mds, dchild->d_inode, handle, - req, rc, rep ? rep->lock_policy_res1 : 0, 0); - /* XXX what do we do here if mds_finish_transno itself failed? */ - - l_dput(dchild); - RETURN(rc); -} - -int mds_pin(struct ptlrpc_request *req, int offset) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *reqbody, *repbody; - struct lvfs_run_ctxt saved; - int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*repbody) }; - ENTRY; - - reqbody = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*reqbody)); - - rc = lustre_pack_reply(req, 2, size, NULL); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*repbody)); - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = mds_open_by_fid(req, &reqbody->fid1, repbody, reqbody->flags, NULL, - NULL); - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - RETURN(rc); -} - -/* Get an internal lock on the inode number (but not generation) to sync - * new inode creation with inode unlink (bug 2029). If child_lockh is NULL - * we just get the lock as a barrier to wait for other holders of this lock, - * and drop it right away again. */ -int mds_lock_new_child(struct obd_device *obd, struct inode *inode, - struct lustre_handle *child_lockh) -{ - struct ldlm_res_id child_res_id = { .name = { inode->i_ino, 0, 1, 0 } }; - struct lustre_handle lockh; - int lock_flags = LDLM_FL_ATOMIC_CB; - int rc; - - if (child_lockh == NULL) - child_lockh = &lockh; - - rc = ldlm_cli_enqueue_local(obd->obd_namespace, &child_res_id, - LDLM_PLAIN, NULL, LCK_EX, &lock_flags, - ldlm_blocking_ast, ldlm_completion_ast, - NULL, NULL, 0, NULL, child_lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_enqueue_local: %d\n", rc); - else if (child_lockh == &lockh) - ldlm_lock_decref(child_lockh, LCK_EX); - - RETURN(rc); -} - -int mds_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *child_lockh) -{ - /* XXX ALLOCATE _something_ - 464 bytes on stack here */ - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_obd *mds = mds_req2mds(req); - struct ldlm_reply *rep = NULL; - struct mds_body *body = NULL; - struct dentry *dchild = NULL, *dparent = NULL; - struct mds_export_data *med; - struct lustre_handle parent_lockh; - int rc = 0, cleanup_phase = 0, acc_mode, created = 0; - int parent_mode = LCK_CR; - void *handle = NULL; - struct lvfs_dentry_params dp = LVFS_DENTRY_PARAMS_INIT; - unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid }; - unsigned int qpids[MAXQUOTAS] = { 0, 0 }; - int child_mode = LCK_CR; - /* Always returning LOOKUP lock if open succesful to guard - dentry on client. */ - ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}}; - struct ldlm_res_id child_res_id = { .name = {0}}; - int lock_flags = 0; - ENTRY; - - mds_counter_incr(req->rq_export, LPROC_MDS_OPEN); - - OBD_FAIL_TIMEOUT_ORSET(OBD_FAIL_MDS_PAUSE_OPEN, OBD_FAIL_ONCE, - (obd_timeout + 1) / 4); - - CLASSERT(MAXQUOTAS < 4); - if (offset == DLM_INTENT_REC_OFF) { /* intent */ - rep = lustre_msg_buf(req->rq_repmsg, DLM_LOCKREPLY_OFF, - sizeof(*rep)); - body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, - sizeof(*body)); - } else if (offset == REQ_REC_OFF) { /* non-intent reint */ - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); - LBUG(); /* XXX: not supported yet? */ - } else { - body = NULL; - LBUG(); - } - - MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh)); - - /* Step 0: If we are passed a fid, then we assume the client already - * opened this file and is only replaying the RPC, so we open the - * inode by fid (at some large expense in security). */ - /*XXX liblustre use mds_open_by_fid to implement LL_IOC_LOV_SETSTRIPE */ - if (((lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) || - (req->rq_export->exp_libclient && rec->ur_flags&MDS_OPEN_HAS_EA))&& - !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) { - if (rec->ur_fid2->id == 0) { - struct ldlm_lock *lock = ldlm_handle2lock(child_lockh); - if (lock) { - LDLM_ERROR(lock, "fid2 not set on open replay"); - LDLM_LOCK_PUT(lock); - } - DEBUG_REQ(D_ERROR, req, "fid2 not set on open replay"); - RETURN(-EFAULT); - } - - rc = mds_open_by_fid(req, rec->ur_fid2, body, rec->ur_flags, - rec, rep); - if (rc != -ENOENT) { - if (req->rq_export->exp_libclient && - rec->ur_flags & MDS_OPEN_HAS_EA) - RETURN(0); - - RETURN(rc); - } - - /* We didn't find the correct inode on disk either, so we - * need to re-create it via a regular replay. */ - if (!(rec->ur_flags & MDS_OPEN_CREAT)) { - DEBUG_REQ(D_ERROR, req,"OPEN_CREAT not in open replay"); - RETURN(-EFAULT); - } - } else if (rec->ur_fid2->id) { - DEBUG_REQ(D_ERROR, req, "fid2 "LPU64"/%u on open non-replay", - rec->ur_fid2->id, rec->ur_fid2->generation); - RETURN(-EFAULT); - } - - /* If we got here, we must be called via intent */ - LASSERT(offset == DLM_INTENT_REC_OFF); - - med = &req->rq_export->exp_mds_data; - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) { - CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n"); - RETURN(-ENOMEM); - } - - /* Step 1: Find and lock the parent */ - if (rec->ur_flags & (MDS_OPEN_CREAT | MDS_OPEN_JOIN_FILE)) - parent_mode = LCK_EX; - dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode, - &parent_lockh, MDS_INODELOCK_UPDATE); - if (IS_ERR(dparent)) { - rc = PTR_ERR(dparent); - if (rc != -ENOENT) { - CERROR("parent "LPU64"/%u lookup error %d\n", - rec->ur_fid1->id, rec->ur_fid1->generation, rc); - } else { - /* Just cannot find parent - make it look like - * usual negative lookup to avoid extra MDS RPC */ - intent_set_disposition(rep, DISP_LOOKUP_EXECD); - intent_set_disposition(rep, DISP_LOOKUP_NEG); - } - GOTO(cleanup, rc); - } - LASSERT(dparent->d_inode != NULL); - - cleanup_phase = 1; /* parent dentry and lock */ - - if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { - dchild = dget(dparent); - cleanup_phase = 2; /* child dentry */ - acc_mode = accmode(dchild->d_inode, rec->ur_flags); - GOTO(found_child, rc); - } - - /* Step 2: Lookup the child */ - - if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && - (rec->ur_flags & MDS_OPEN_LOCK) && (rec->ur_namelen == 1)) { - /* hack for nfsd with no_subtree_check, it will use anon - * dentry w/o filename to open the file. the anon dentry's - * parent was set to itself, so rec->ur_fid1 is the file. - * And in MDC it cannot derive the dentry's parent dentry, - * hence the file's name, so we hack here in MDS, - * refer to bug 13030. */ - dchild = mds_fid2dentry(mds, rec->ur_fid1, NULL); - } else { - dchild = ll_lookup_one_len(rec->ur_name, dparent, - rec->ur_namelen - 1); - } - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - dchild = NULL; /* don't confuse mds_finish_transno() below */ - GOTO(cleanup, rc); - } - - cleanup_phase = 2; /* child dentry */ - - intent_set_disposition(rep, DISP_LOOKUP_EXECD); - if (dchild->d_inode) - intent_set_disposition(rep, DISP_LOOKUP_POS); - else - intent_set_disposition(rep, DISP_LOOKUP_NEG); - - /*Step 3: If the child was negative, and we're supposed to, create it.*/ - if (dchild->d_inode == NULL) { - unsigned long ino = rec->ur_fid2->id; - struct iattr iattr; - struct inode *inode; - - if (!(rec->ur_flags & MDS_OPEN_CREAT)) { - /* It's negative and we weren't supposed to create it */ - GOTO(cleanup, rc = -ENOENT); - } - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) - GOTO(cleanup, rc = -EROFS); - - intent_set_disposition(rep, DISP_OPEN_CREATE); - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE, - NULL); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - handle = NULL; - GOTO(cleanup, rc); - } - dchild->d_fsdata = (void *) &dp; - dp.ldp_ptr = req; - dp.ldp_inum = ino; - - rc = ll_vfs_create(dparent->d_inode, dchild, rec->ur_mode,NULL); - if (dchild->d_fsdata == (void *)(unsigned long)ino) - dchild->d_fsdata = NULL; - - if (rc) { - CDEBUG(D_INODE, "error during create: %d\n", rc); - GOTO(cleanup, rc); - } - inode = dchild->d_inode; - if (ino) { - LASSERT(ino == inode->i_ino); - /* Written as part of setattr */ - inode->i_generation = rec->ur_fid2->generation; - CDEBUG(D_HA, "recreated ino %lu with gen %u\n", - inode->i_ino, inode->i_generation); - } - - created = 1; - LTIME_S(iattr.ia_atime) = rec->ur_time; - LTIME_S(iattr.ia_ctime) = rec->ur_time; - LTIME_S(iattr.ia_mtime) = rec->ur_time; - - iattr.ia_uid = current->fsuid; /* set by push_ctxt already */ - if (dparent->d_inode->i_mode & S_ISGID) - iattr.ia_gid = dparent->d_inode->i_gid; - else - iattr.ia_gid = current->fsgid; - - iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | - ATTR_MTIME | ATTR_CTIME; - - rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0); - if (rc) - CERROR("error on child setattr: rc = %d\n", rc); - - iattr.ia_valid = ATTR_MTIME | ATTR_CTIME; - - rc = fsfilt_setattr(obd, dparent, handle, &iattr, 0); - if (rc) - CERROR("error on parent setattr: rc = %d\n", rc); - - rc = fsfilt_commit(obd, dchild->d_inode, handle, 0); - handle = NULL; - acc_mode = 0; /* Don't check for permissions */ - } else { - acc_mode = accmode(dchild->d_inode, rec->ur_flags); - } - - LASSERTF(!mds_inode_is_orphan(dchild->d_inode), - "dchild %.*s (%p) inode %p/%lu/%u\n", dchild->d_name.len, - dchild->d_name.name, dchild, dchild->d_inode, - dchild->d_inode->i_ino, dchild->d_inode->i_generation); - -found_child: - mds_pack_inode2fid(&body->fid1, dchild->d_inode); - mds_pack_inode2body(body, dchild->d_inode); - - if (S_ISREG(dchild->d_inode->i_mode)) { - /* Check permissions etc */ - rc = ll_permission(dchild->d_inode, acc_mode, NULL); - if (rc != 0) - GOTO(cleanup, rc); - - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) && - (acc_mode & MAY_WRITE)) - GOTO(cleanup, rc = -EROFS); - - /* An append-only file must be opened in append mode for - * writing */ - if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 && - ((rec->ur_flags & MDS_OPEN_APPEND) == 0 || - (rec->ur_flags & MDS_OPEN_TRUNC) != 0)) - GOTO(cleanup, rc = -EPERM); - } - - if (!created && (rec->ur_flags & MDS_OPEN_CREAT) && - (rec->ur_flags & MDS_OPEN_EXCL)) { - /* File already exists, we didn't just create it, and we - * were passed O_EXCL; err-or. */ - GOTO(cleanup, rc = -EEXIST); // returns a lock to the client - } - - /* if we are following a symlink, don't open */ - if (S_ISLNK(dchild->d_inode->i_mode)) - GOTO(cleanup_no_trans, rc = 0); - - if (S_ISDIR(dchild->d_inode->i_mode)) { - if (rec->ur_flags & MDS_OPEN_CREAT || - rec->ur_flags & FMODE_WRITE) { - /* we are trying to create or write a exist dir */ - GOTO(cleanup, rc = -EISDIR); - } - if (rec->ur_flags & MDS_FMODE_EXEC) { - /* we are trying to exec a directory */ - GOTO(cleanup, rc = -EACCES); - } - if (ll_permission(dchild->d_inode, acc_mode, NULL)) { - intent_set_disposition(rep, DISP_OPEN_OPEN); - GOTO(cleanup, rc = -EACCES); - } - } else if (rec->ur_flags & MDS_OPEN_DIRECTORY) { - GOTO(cleanup, rc = -ENOTDIR); - } - - if (OBD_FAIL_CHECK_RESET(OBD_FAIL_MDS_OPEN_CREATE, - OBD_FAIL_LDLM_REPLY | OBD_FAIL_ONCE)) { - GOTO(cleanup, rc = -EAGAIN); - } - - /* Obtain OPEN lock as well */ - policy.l_inodebits.bits |= MDS_INODELOCK_OPEN; - - /* We cannot use acc_mode here, because it is zeroed in case of - creating a file, so we get wrong lockmode */ - if (accmode(dchild->d_inode, rec->ur_flags) & MAY_WRITE) - child_mode = LCK_CW; - else if (accmode(dchild->d_inode, rec->ur_flags) & MAY_EXEC) - child_mode = LCK_PR; - else - child_mode = LCK_CR; - - if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && - (rec->ur_flags & MDS_OPEN_LOCK)) { - /* In case of replay we do not get a lock assuming that the - caller has it already */ - child_res_id.name[0] = dchild->d_inode->i_ino; - child_res_id.name[1] = dchild->d_inode->i_generation; - - rc = ldlm_cli_enqueue_local(obd->obd_namespace, &child_res_id, - LDLM_IBITS, &policy, child_mode, - &lock_flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, - 0, NULL, child_lockh); - if (rc != ELDLM_OK) - GOTO(cleanup, rc); - - /* Let mds_intent_policy know that we have a lock to return */ - intent_set_disposition(rep, DISP_OPEN_LOCK); - cleanup_phase = 3; - } - - if (!S_ISREG(dchild->d_inode->i_mode) && - !S_ISDIR(dchild->d_inode->i_mode) && - (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) { - /* If client supports this, do not return open handle for - * special device nodes */ - GOTO(cleanup_no_trans, rc = 0); - } - - /* Step 5: mds_open it */ - rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec, - rep, &parent_lockh); - GOTO(cleanup, rc); - - cleanup: - rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, - req, rc, rep ? rep->lock_policy_res1 : 0, 0); - - cleanup_no_trans: - switch (cleanup_phase) { - case 3: - if (rc) - /* It is safe to leave IT_OPEN_LOCK set, if rc is not 0, - * mds_intent_policy won't try to return any locks */ - ldlm_lock_decref(child_lockh, child_mode); - case 2: - if (rc && created) { - int err = vfs_unlink(dparent->d_inode, dchild); - if (err) { - CERROR("unlink(%.*s) in error path: %d\n", - dchild->d_name.len, dchild->d_name.name, - err); - } - } else if (created) { - mds_lock_new_child(obd, dchild->d_inode, NULL); - /* save uid/gid for quota acquire/release */ - qpids[USRQUOTA] = dparent->d_inode->i_uid; - qpids[GRPQUOTA] = dparent->d_inode->i_gid; - } - l_dput(dchild); - case 1: - if (dparent == NULL) - break; - - l_dput(dparent); - if (rc) - ldlm_lock_decref(&parent_lockh, parent_mode); - else - ptlrpc_save_lock(req, &parent_lockh, parent_mode); - } - /* trigger dqacq on the owner of child and parent */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, - FSFILT_OP_CREATE); - - /* If we have not taken the "open" lock, we may not return 0 here, - because caller expects 0 to mean "lock is taken", and it needs - nonzero return here for caller to return EDLM_LOCK_ABORTED to - client. Later caller should rewrite the return value back to zero - if it to be used any further - */ - RETURN(rc); -} - -/* Close a "file descriptor" and possibly unlink an orphan from the - * PENDING directory. Caller must hold child->i_mutex, this drops it. - * - * If we are being called from mds_disconnect() because the client has - * disappeared, then req == NULL and we do not update last_rcvd because - * there is nothing that could be recovered by the client at this stage - * (it will not even _have_ an entry in last_rcvd anymore). - * - * Returns EAGAIN if the client needs to get more data and re-close. */ -int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd, - struct mds_file_data *mfd, int unlink_orphan, - struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, int cookies_size, - __u64 *valid) -{ - struct inode *inode = mfd->mfd_dentry->d_inode; - char fidname[LL_FID_NAMELEN]; - int last_orphan, fidlen, rc = 0, cleanup_phase = 0; - struct dentry *pending_child = NULL; - struct mds_obd *mds = &obd->u.mds; - struct inode *pending_dir = mds->mds_pending_dir->d_inode; - void *handle = NULL; - struct mds_body *request_body = NULL, *reply_body = NULL; - struct lvfs_dentry_params dp = LVFS_DENTRY_PARAMS_INIT; - struct iattr iattr = { 0 }; - ENTRY; - - if (req && req->rq_reqmsg != NULL) - request_body = lustre_msg_buf(req->rq_reqmsg, offset, - sizeof(*request_body)); - if (req && req->rq_repmsg != NULL) - reply_body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*reply_body)); - - fidlen = ll_fid2str(fidname, inode->i_ino, inode->i_generation); - - CDEBUG(D_INODE, "inode %p ino %s nlink %d orphan %d\n", inode, fidname, - inode->i_nlink, mds_orphan_open_count(inode)); - - last_orphan = mds_orphan_open_dec_test(inode) && - mds_inode_is_orphan(inode); - - /* this is half of the actual "close" */ - if (mfd->mfd_mode & FMODE_WRITE) { - rc = mds_put_write_access(mds, inode, request_body, - last_orphan && unlink_orphan); - } else if (mfd->mfd_mode & MDS_FMODE_EXEC) { - mds_allow_write_access(inode); - } - /* here writecount change also needs protection from orphan write sem. - * so drop orphan write sem after mds_put_write_access, bz 12888. */ - MDS_UP_WRITE_ORPHAN_SEM(inode); - - if (last_orphan && unlink_orphan) { - int stripe_count = 0; - LASSERT(rc == 0); /* mds_put_write_access must have succeeded */ - - CDEBUG(D_INODE, "destroying orphan object %s\n", fidname); - - if ((S_ISREG(inode->i_mode) && inode->i_nlink != 1) || - (S_ISDIR(inode->i_mode) && inode->i_nlink != 2)) - CERROR("found \"orphan\" %s %s with link count %d\n", - S_ISREG(inode->i_mode) ? "file" : "dir", - fidname, inode->i_nlink); - - /* Sadly, there is no easy way to save pending_child from - * mds_reint_unlink() into mfd, so we need to re-lookup, - * but normally it will still be in the dcache. */ - LOCK_INODE_MUTEX(pending_dir); - cleanup_phase = 1; /* UNLOCK_INODE_MUTEX(pending_dir) when finished */ - pending_child = lookup_one_len(fidname, mds->mds_pending_dir, - fidlen); - if (IS_ERR(pending_child)) - GOTO(cleanup, rc = PTR_ERR(pending_child)); - LASSERT(pending_child->d_inode != NULL); - - cleanup_phase = 2; /* dput(pending_child) when finished */ - if (S_ISDIR(pending_child->d_inode->i_mode)) { - rc = vfs_rmdir(pending_dir, pending_child); - if (rc) - CERROR("error unlinking orphan dir %s: rc %d\n", - fidname,rc); - goto out; - } - - if (lmm != NULL) { - stripe_count = le32_to_cpu(lmm->lmm_stripe_count); - } - - handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, - NULL, stripe_count); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - handle = NULL; - GOTO(cleanup, rc); - } - - if (lmm != NULL && (*valid & OBD_MD_FLEASIZE) && - mds_log_op_unlink(obd, lmm, lmm_size, - logcookies, cookies_size) > 0) { - *valid |= OBD_MD_FLCOOKIE; - } - - dp.ldp_inum = 0; - dp.ldp_ptr = req; - pending_child->d_fsdata = (void *) &dp; - rc = vfs_unlink(pending_dir, pending_child); - if (rc) - CERROR("error unlinking orphan %s: rc %d\n",fidname,rc); - - goto out; /* Don't bother updating attrs on unlinked inode */ - } - -#if 0 - if (request_body != NULL && mfd->mfd_mode & FMODE_WRITE && rc == 0) { - /* Update the on-disk attributes if this was the last write - * close, and all information was provided (i.e., rc == 0) - * - * XXX this should probably be abstracted with mds_reint_setattr - */ - - if (request_body->valid & OBD_MD_FLMTIME && - LTIME_S(iattr.ia_mtime) > LTIME_S(inode->i_mtime)) { - LTIME_S(iattr.ia_mtime) = request_body->mtime; - iattr.ia_valid |= ATTR_MTIME; - } - if (request_body->valid & OBD_MD_FLCTIME && - LTIME_S(iattr.ia_ctime) > LTIME_S(inode->i_ctime)) { - LTIME_S(iattr.ia_ctime) = request_body->ctime; - iattr.ia_valid |= ATTR_CTIME; - } - - /* XXX can't set block count with fsfilt_setattr (!) */ - if (request_body->valid & OBD_MD_FLSIZE) { - iattr.ia_valid |= ATTR_SIZE; - iattr.ia_size = request_body->size; - } - /* iattr.ia_blocks = request_body->blocks */ - - } -#endif - if (request_body != NULL && request_body->valid & OBD_MD_FLATIME) { - /* Only start a transaction to write out only the atime if - * it is more out-of-date than the specified limit. If we - * are already going to write out the atime then do it anyway. - * */ - LTIME_S(iattr.ia_atime) = request_body->atime; - if ((LTIME_S(iattr.ia_atime) > - LTIME_S(inode->i_atime) + mds->mds_atime_diff) || - (iattr.ia_valid != 0 && - LTIME_S(iattr.ia_atime) > LTIME_S(inode->i_atime))) - iattr.ia_valid |= ATTR_ATIME; - } - - if (iattr.ia_valid != 0) { - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - handle = NULL; - GOTO(cleanup, rc); - } - rc = fsfilt_setattr(obd, mfd->mfd_dentry, handle, &iattr, 0); - if (rc) - CERROR("error in setattr(%s): rc %d\n", fidname, rc); - } -out: - /* If other clients have this file open for write, rc will be > 0 */ - if (rc > 0) - rc = 0; - l_dput(mfd->mfd_dentry); - mds_mfd_put(mfd); - - cleanup: - if (req != NULL && reply_body != NULL) { - rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0, 0); - } else if (handle) { - int err = fsfilt_commit(obd, pending_dir, handle, 0); - if (err) { - CERROR("error committing close: %d\n", err); - if (!rc) - rc = err; - } - } - - switch (cleanup_phase) { - case 2: - dput(pending_child); - case 1: - UNLOCK_INODE_MUTEX(pending_dir); - } - RETURN(rc); -} - -int mds_close(struct ptlrpc_request *req, int offset) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *body; - struct mds_file_data *mfd; - struct lvfs_run_ctxt saved; - struct inode *inode; - int rc, repsize[4] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_body), - obd->u.mds.mds_max_mdsize, - obd->u.mds.mds_max_cookiesize }; - struct mds_body *reply_body; - struct lov_mds_md *lmm; - int lmm_size; - struct llog_cookie *logcookies; - int cookies_size; - ENTRY; - - rc = lustre_pack_reply(req, 4, repsize, NULL); - if (rc) - req->rq_status = rc; - /* continue on to drop local open even if we can't send reply */ - else - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - CDEBUG(D_INODE, "close req->rep_len %d mdsize %d cookiesize %d\n", - req->rq_replen, - obd->u.mds.mds_max_mdsize, obd->u.mds.mds_max_cookiesize); - mds_counter_incr(req->rq_export, LPROC_MDS_CLOSE); - - body = lustre_swab_reqbuf(req, offset, sizeof(*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR("Can't unpack body\n"); - req->rq_status = -EFAULT; - RETURN(-EFAULT); - } - - spin_lock(&med->med_open_lock); - mfd = mds_handle2mfd(&body->handle); - if (mfd == NULL) { - spin_unlock(&med->med_open_lock); - DEBUG_REQ(D_ERROR, req, "no handle for file close ino "LPD64 - ": cookie "LPX64, body->fid1.id, body->handle.cookie); - req->rq_status = -ESTALE; - RETURN(-ESTALE); - } - /* Remove mfd handle so it can't be found again. We consume mfd_list - * reference here, but still have mds_handle2mfd ref until mfd_close. */ - mds_mfd_unlink(mfd, 1); - spin_unlock(&med->med_open_lock); - - inode = mfd->mfd_dentry->d_inode; - /* child orphan sem protects orphan_dec_test && is_orphan race */ - MDS_DOWN_WRITE_ORPHAN_SEM(inode); /* mds_mfd_close drops this */ - if (mds_inode_is_orphan(inode) && mds_orphan_open_count(inode) == 1) { - body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); - LASSERT(body != NULL); - - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - mds_pack_md(obd, req->rq_repmsg, REPLY_REC_OFF + 1, body, inode, - MDS_PACK_MD_LOCK); - } - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - reply_body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*reply_body)); - lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, 0); - lmm_size = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF + 1), - logcookies = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 2, 0); - cookies_size = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF + 2); - req->rq_status = mds_mfd_close(req, offset, obd, mfd, 1, - lmm, lmm_size, logcookies, cookies_size, - &reply_body->valid); - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - mds_shrink_reply(obd, req, body, REPLY_REC_OFF + 1); - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { - CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); - req->rq_status = -ENOMEM; - RETURN(-ENOMEM); - } - - RETURN(rc); -} - -int mds_done_writing(struct ptlrpc_request *req, int offset) -{ - struct mds_body *body; - int rc, size[2] = { sizeof(struct ptlrpc_body), - sizeof(struct mds_body) }; - ENTRY; - - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - body = lustre_swab_reqbuf(req, offset, sizeof(*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR("Can't unpack body\n"); - req->rq_status = -EFAULT; - RETURN(-EFAULT); - } - - rc = lustre_pack_reply(req, 2, size, NULL); - if (rc) - req->rq_status = rc; - - RETURN(0); -} diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c deleted file mode 100644 index bf8b8e5..0000000 --- a/lustre/mds/mds_reint.c +++ /dev/null @@ -1,2419 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_reint.c - * Lustre Metadata Server (mds) reintegration routines - * - * Copyright (C) 2002-2005 Cluster File Systems, Inc. - * Author: Peter Braam - * Author: Andreas Dilger - * Author: Phil Schwan - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mds_internal.h" - -void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data, - int error) -{ - obd_transno_commit_cb(obd, transno, error); -} - -struct mds_logcancel_data { - struct lov_mds_md *mlcd_lmm; - int mlcd_size; - int mlcd_cookielen; - int mlcd_eadatalen; - struct llog_cookie mlcd_cookies[0]; -}; - - -static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, - void *cb_data, int error) -{ - struct mds_logcancel_data *mlcd = cb_data; - struct lov_stripe_md *lsm = NULL; - struct llog_ctxt *ctxt; - int rc; - - obd_transno_commit_cb(obd, transno, error); - - CDEBUG(D_RPCTRACE, "cancelling %d cookies\n", - (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies))); - - rc = obd_unpackmd(obd->u.mds.mds_osc_exp, &lsm, mlcd->mlcd_lmm, - mlcd->mlcd_eadatalen); - if (rc < 0) { - CERROR("bad LSM cancelling %d log cookies: rc %d\n", - (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)), - rc); - } else { - ///* XXX 0 normally, SENDNOW for debug */); - rc = obd_checkmd(obd->u.mds.mds_osc_exp, obd->obd_self_export, - lsm); - if (rc) - CERROR("Can not revalidate lsm %p \n", lsm); - - ctxt = llog_get_context(obd,mlcd->mlcd_cookies[0].lgc_subsys+1); - rc = llog_cancel(ctxt, lsm, mlcd->mlcd_cookielen / - sizeof(*mlcd->mlcd_cookies), - mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW); - llog_ctxt_put(ctxt); - - if (rc) - CERROR("error cancelling %d log cookies: rc %d\n", - (int)(mlcd->mlcd_cookielen / - sizeof(*mlcd->mlcd_cookies)), rc); - } - - OBD_FREE(mlcd, mlcd->mlcd_size); -} - -/* Assumes caller has already pushed us into the kernel context. */ -int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data, - int force_sync) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct obd_device *obd = req->rq_export->exp_obd; - int err; - __u64 transno, prev_transno; - loff_t off; - int log_pri = D_RPCTRACE; - ENTRY; - - if (IS_ERR(handle)) { - LASSERT(rc != 0); - RETURN(rc); - } - - /* if the export has already been failed, we have no last_rcvd slot */ - if (req->rq_export->exp_failed || obd->obd_fail) { - CWARN("commit transaction for disconnected client %s: rc %d\n", - req->rq_export->exp_client_uuid.uuid, rc); - if (rc == 0) - rc = -ENOTCONN; - if (handle) - GOTO(commit, rc); - RETURN(rc); - } - - if (handle == NULL) { - /* if we're starting our own xaction, use our own inode */ - inode = mds->mds_rcvd_filp->f_dentry->d_inode; - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); - if (IS_ERR(handle)) { - CERROR("fsfilt_start: %ld\n", PTR_ERR(handle)); - RETURN(PTR_ERR(handle)); - } - } - - off = med->med_lr_off; - - transno = lustre_msg_get_transno(req->rq_reqmsg); - if (rc != 0) { - if (transno != 0) { - CERROR("%s: replay %s transno "LPU64" failed: rc %d\n", - obd->obd_name, - libcfs_nid2str(req->rq_export->exp_connection->c_peer.nid), - transno, rc); - transno = 0; - } - } else if (transno == 0) { - spin_lock(&mds->mds_transno_lock); - transno = ++mds->mds_last_transno; - spin_unlock(&mds->mds_transno_lock); - } else { - spin_lock(&mds->mds_transno_lock); - if (transno > mds->mds_last_transno) - mds->mds_last_transno = transno; - spin_unlock(&mds->mds_transno_lock); - } - - req->rq_transno = transno; - lustre_msg_set_transno(req->rq_repmsg, transno); - if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) { - prev_transno = le64_to_cpu(mcd->mcd_last_close_transno); - mcd->mcd_last_close_transno = cpu_to_le64(transno); - mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_close_result = cpu_to_le32(rc); - mcd->mcd_last_close_data = cpu_to_le32(op_data); - } else { - prev_transno = le64_to_cpu(mcd->mcd_last_transno); - if (((lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_RESENT | MSG_REPLAY)) == 0) || - (transno > prev_transno)) { - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); - } - } - /* update the server data to not lose the greatest transno. Bug 11125 */ - if ((transno == 0) && (prev_transno == mds->mds_last_transno)) - mds_update_server_data(obd, 0); - - if (off <= 0) { - CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off); - err = -EINVAL; - } else { - struct obd_export *exp = req->rq_export; - - if (!force_sync) - force_sync = fsfilt_add_journal_cb(exp->exp_obd,transno, - handle, mds_commit_cb, - NULL); - - err = fsfilt_write_record(obd, mds->mds_rcvd_filp, mcd, - sizeof(*mcd), &off, - force_sync | exp->exp_need_sync); - if (force_sync) - mds_commit_cb(obd, transno, NULL, err); - } - - if (err) { - log_pri = D_ERROR; - if (rc == 0) - rc = err; - } - - DEBUG_REQ(log_pri, req, - "wrote trans #"LPU64" rc %d client %s at idx %u: err = %d", - transno, rc, mcd->mcd_uuid, med->med_lr_idx, err); - - err = mds_lov_write_objids(obd); - if (err) { - log_pri = D_ERROR; - if (rc == 0) - rc = err; - } - CDEBUG(log_pri, "wrote objids: err = %d\n", err); - -commit: - err = fsfilt_commit(obd, inode, handle, 0); - if (err) { - CERROR("error committing transaction: %d\n", err); - if (!rc) - rc = err; - } - - RETURN(rc); -} - -/* this gives the same functionality as the code between - * sys_chmod and inode_setattr - * chown_common and inode_setattr - * utimes and inode_setattr - */ -int mds_fix_attr(struct inode *inode, struct mds_update_record *rec) -{ - time_t now = cfs_time_current_sec(); - struct iattr *attr = &rec->ur_iattr; - unsigned int ia_valid = attr->ia_valid; - int error; - ENTRY; - - if (ia_valid & ATTR_RAW) - attr->ia_valid &= ~ATTR_RAW; - - if (!(ia_valid & ATTR_CTIME_SET)) - LTIME_S(attr->ia_ctime) = now; - else - attr->ia_valid &= ~ATTR_CTIME_SET; - if (!(ia_valid & ATTR_ATIME_SET)) - LTIME_S(attr->ia_atime) = now; - if (!(ia_valid & ATTR_MTIME_SET)) - LTIME_S(attr->ia_mtime) = now; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - RETURN((attr->ia_valid & ~ATTR_ATTR_FLAG) ? -EPERM : 0); - - /* times */ - if ((ia_valid & (ATTR_MTIME|ATTR_ATIME)) == (ATTR_MTIME|ATTR_ATIME)) { - if (current->fsuid != inode->i_uid && - (error = ll_permission(inode, MAY_WRITE, NULL)) != 0) - RETURN(error); - } - - if (ia_valid & ATTR_SIZE && - /* NFSD hack for open(O_CREAT|O_TRUNC)=mknod+truncate (bug 5781) */ - !(rec->ur_uc.luc_fsuid == inode->i_uid && - ia_valid & MDS_OPEN_OWNEROVERRIDE)) { - if ((error = ll_permission(inode, MAY_WRITE, NULL)) != 0) - RETURN(error); - } - - if (ia_valid & (ATTR_UID | ATTR_GID)) { - /* chown */ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - RETURN(-EPERM); - if (attr->ia_uid == (uid_t) -1) - attr->ia_uid = inode->i_uid; - if (attr->ia_gid == (gid_t) -1) - attr->ia_gid = inode->i_gid; - if (!(ia_valid & ATTR_MODE)) - attr->ia_mode = inode->i_mode; - /* - * If the user or group of a non-directory has been - * changed by a non-root user, remove the setuid bit. - * 19981026 David C Niemi - * - * Changed this to apply to all users, including root, - * to avoid some races. This is the behavior we had in - * 2.0. The check for non-root was definitely wrong - * for 2.2 anyway, as it should have been using - * CAP_FSETID rather than fsuid -- 19990830 SD. - */ - if ((inode->i_mode & S_ISUID) == S_ISUID && - !S_ISDIR(inode->i_mode)) { - attr->ia_mode &= ~S_ISUID; - attr->ia_valid |= ATTR_MODE; - } - /* - * Likewise, if the user or group of a non-directory - * has been changed by a non-root user, remove the - * setgid bit UNLESS there is no group execute bit - * (this would be a file marked for mandatory - * locking). 19981026 David C Niemi - * - * Removed the fsuid check (see the comment above) -- - * 19990830 SD. - */ - if (((inode->i_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP)) && !S_ISDIR(inode->i_mode)) { - attr->ia_mode &= ~S_ISGID; - attr->ia_valid |= ATTR_MODE; - } - } else if (ia_valid & ATTR_MODE) { - int mode = attr->ia_mode; - /* chmod */ - if (attr->ia_mode == (umode_t)-1) - mode = inode->i_mode; - attr->ia_mode = - (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - } - RETURN(0); -} - -void mds_steal_ack_locks(struct ptlrpc_request *req) -{ - struct obd_export *exp = req->rq_export; - struct list_head *tmp; - struct ptlrpc_reply_state *oldrep; - struct ptlrpc_service *svc; - int i; - - /* CAVEAT EMPTOR: spinlock order */ - spin_lock(&exp->exp_lock); - list_for_each (tmp, &exp->exp_outstanding_replies) { - oldrep = list_entry(tmp, struct ptlrpc_reply_state,rs_exp_list); - - if (oldrep->rs_xid != req->rq_xid) - continue; - - if (lustre_msg_get_opc(oldrep->rs_msg) != - lustre_msg_get_opc(req->rq_reqmsg)) - CERROR ("Resent req xid "LPX64" has mismatched opc: " - "new %d old %d\n", req->rq_xid, - lustre_msg_get_opc(req->rq_reqmsg), - lustre_msg_get_opc(oldrep->rs_msg)); - - svc = oldrep->rs_service; - spin_lock (&svc->srv_lock); - - list_del_init (&oldrep->rs_exp_list); - - CWARN("Stealing %d locks from rs %p x"LPD64".t"LPD64 - " o%d NID %s\n", - oldrep->rs_nlocks, oldrep, - oldrep->rs_xid, oldrep->rs_transno, - lustre_msg_get_opc(oldrep->rs_msg), - libcfs_nid2str(exp->exp_connection->c_peer.nid)); - - for (i = 0; i < oldrep->rs_nlocks; i++) - ptlrpc_save_lock(req, - &oldrep->rs_locks[i], - oldrep->rs_modes[i]); - oldrep->rs_nlocks = 0; - - DEBUG_REQ(D_HA, req, "stole locks for"); - ptlrpc_schedule_difficult_reply (oldrep); - - spin_unlock (&svc->srv_lock); - break; - } - spin_unlock(&exp->exp_lock); -} -EXPORT_SYMBOL(mds_steal_ack_locks); -void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd) -{ - if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) { - req->rq_transno = le64_to_cpu(mcd->mcd_last_close_transno); - lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); - req->rq_status = le32_to_cpu(mcd->mcd_last_close_result); - lustre_msg_set_status(req->rq_repmsg, req->rq_status); - } else { - req->rq_transno = le64_to_cpu(mcd->mcd_last_transno); - lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); - req->rq_status = le32_to_cpu(mcd->mcd_last_result); - lustre_msg_set_status(req->rq_repmsg, req->rq_status); - } - DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", - req->rq_transno, req->rq_status); - - mds_steal_ack_locks(req); -} - -static void reconstruct_reint_setattr(struct mds_update_record *rec, - int offset, struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_obd *obd = &req->rq_export->exp_obd->u.mds; - struct dentry *de; - struct mds_body *body; - - mds_req_from_mcd(req, med->med_mcd); - - de = mds_fid2dentry(obd, rec->ur_fid1, NULL); - if (IS_ERR(de)) { - LASSERT(PTR_ERR(de) == req->rq_status); - return; - } - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - mds_pack_inode2fid(&body->fid1, de->d_inode); - mds_pack_inode2body(body, de->d_inode); - - /* Don't return OST-specific attributes if we didn't just set them */ - if (rec->ur_iattr.ia_valid & ATTR_SIZE) - body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) - body->valid |= OBD_MD_FLMTIME; - if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) - body->valid |= OBD_MD_FLATIME; - - l_dput(de); -} - -int mds_osc_setattr_async(struct obd_device *obd, __u32 uid, __u32 gid, - struct lov_mds_md *lmm, int lmm_size, - struct llog_cookie *logcookies, __u64 id, __u32 gen, - struct obd_capa *oc) -{ - struct mds_obd *mds = &obd->u.mds; - struct obd_trans_info oti = { 0 }; - struct obd_info oinfo = { { { 0 } } }; - int rc; - ENTRY; - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OST_SETATTR)) - RETURN(0); - - /* first get memory EA */ - OBDO_ALLOC(oinfo.oi_oa); - if (!oinfo.oi_oa) - RETURN(-ENOMEM); - - LASSERT(lmm); - - rc = obd_unpackmd(mds->mds_osc_exp, &oinfo.oi_md, lmm, lmm_size); - if (rc < 0) { - CERROR("Error unpack md %p for inode "LPU64"\n", lmm, id); - GOTO(out, rc); - } - - rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, oinfo.oi_md); - if (rc) { - CERROR("Error revalidate lsm %p \n", oinfo.oi_md); - GOTO(out, rc); - } - - /* then fill oa */ - oinfo.oi_oa->o_uid = uid; - oinfo.oi_oa->o_gid = gid; - oinfo.oi_oa->o_id = oinfo.oi_md->lsm_object_id; - oinfo.oi_oa->o_gr = oinfo.oi_md->lsm_object_gr; - oinfo.oi_oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | - OBD_MD_FLUID | OBD_MD_FLGID; - if (logcookies) { - oinfo.oi_oa->o_valid |= OBD_MD_FLCOOKIE; - oti.oti_logcookies = logcookies; - } - - oinfo.oi_oa->o_fid = id; - oinfo.oi_oa->o_generation = gen; - oinfo.oi_oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER; - oinfo.oi_capa = oc; - - /* do async setattr from mds to ost not waiting for responses. */ - rc = obd_setattr_async(mds->mds_osc_exp, &oinfo, &oti, NULL); - if (rc) - CDEBUG(D_INODE, "mds to ost setattr objid 0x"LPX64 - " on ost error %d\n", oinfo.oi_md->lsm_object_id, rc); -out: - if (oinfo.oi_md) - obd_free_memmd(mds->mds_osc_exp, &oinfo.oi_md); - OBDO_FREE(oinfo.oi_oa); - RETURN(rc); -} -EXPORT_SYMBOL(mds_osc_setattr_async); - -/* In the raw-setattr case, we lock the child inode. - * In the write-back case or if being called from open, the client holds a lock - * already. - * - * We use the ATTR_FROM_OPEN flag to tell these cases apart. */ -static int mds_reint_setattr(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - unsigned int ia_valid = rec->ur_iattr.ia_valid; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *body; - struct dentry *de; - struct inode *inode = NULL; - struct lustre_handle lockh; - void *handle = NULL; - struct mds_logcancel_data *mlcd = NULL; - struct lov_mds_md *lmm = NULL; - struct llog_cookie *logcookies = NULL; - int lmm_size = 0, need_lock = 1, cookie_size = 0; - int rc = 0, cleanup_phase = 0, err, locked = 0, sync = 0; - unsigned int qcids[MAXQUOTAS] = { 0, 0 }; - unsigned int qpids[MAXQUOTAS] = { rec->ur_iattr.ia_uid, - rec->ur_iattr.ia_gid }; - ENTRY; - - LASSERT(offset == REQ_REC_OFF); - offset = REPLY_REC_OFF; - - DEBUG_REQ(D_INODE, req, "setattr "LPU64"/%u %x", rec->ur_fid1->id, - rec->ur_fid1->generation, rec->ur_iattr.ia_valid); - OBD_COUNTER_INCREMENT(obd, setattr); - - MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req)); - - if (rec->ur_dlm) - ldlm_request_cancel(req, rec->ur_dlm, 0); - - if (rec->ur_iattr.ia_valid & ATTR_FROM_OPEN || - (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY)) { - de = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de)) - GOTO(cleanup, rc = PTR_ERR(de)); - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) - GOTO(cleanup, rc = -EROFS); - } else { - __u64 lockpart = MDS_INODELOCK_UPDATE; - if (rec->ur_iattr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) - lockpart |= MDS_INODELOCK_LOOKUP; - - de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_EX, - &lockh, lockpart); - if (IS_ERR(de)) - GOTO(cleanup, rc = PTR_ERR(de)); - locked = 1; - } - - cleanup_phase = 1; - inode = de->d_inode; - LASSERT(inode); - - if ((rec->ur_iattr.ia_valid & ATTR_FROM_OPEN) || - (rec->ur_iattr.ia_valid & ATTR_SIZE)) { - /* Check write access for the O_TRUNC case */ - if (mds_query_write_access(inode) < 0) - GOTO(cleanup, rc = -ETXTBSY); - } - - /* save uid/gid for quota acq/rel */ - qcids[USRQUOTA] = inode->i_uid; - qcids[GRPQUOTA] = inode->i_gid; - - if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && - rec->ur_eadata != NULL) { - LOCK_INODE_MUTEX(inode); - need_lock = 0; - } - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, inode->i_sb); - - /* start a log jounal handle if needed */ - if (S_ISREG(inode->i_mode) && - rec->ur_iattr.ia_valid & (ATTR_UID | ATTR_GID)) { - lmm_size = mds->mds_max_mdsize; - OBD_ALLOC(lmm, lmm_size); - if (lmm == NULL) - GOTO(cleanup, rc = -ENOMEM); - - cleanup_phase = 2; - rc = mds_get_md(obd, inode, lmm, &lmm_size, need_lock); - if (rc < 0) - GOTO(cleanup, rc); - rc = 0; - - handle = fsfilt_start_log(obd, inode, FSFILT_OP_SETATTR, NULL, - le32_to_cpu(lmm->lmm_stripe_count)); - } else { - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); - } - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) - CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n", - LTIME_S(rec->ur_iattr.ia_mtime), - LTIME_S(rec->ur_iattr.ia_ctime)); - rc = mds_fix_attr(inode, rec); - if (rc) - GOTO(cleanup, rc); - - if (rec->ur_iattr.ia_valid & ATTR_ATTR_FLAG) { /* ioctl */ - rc = fsfilt_iocontrol(obd, inode, NULL, EXT3_IOC_SETFLAGS, - (long)&rec->ur_flags); - } else if (rec->ur_iattr.ia_valid) { /* setattr */ - rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0); - /* journal chown/chgrp in llog, just like unlink */ - if (rc == 0 && lmm_size){ - cookie_size = mds_get_cookie_size(obd, lmm); - OBD_ALLOC(logcookies, cookie_size); - if (logcookies == NULL) - GOTO(cleanup, rc = -ENOMEM); - - if (mds_log_op_setattr(obd, inode->i_uid, inode->i_gid, - lmm, lmm_size, - logcookies, cookie_size) <= 0) { - OBD_FREE(logcookies, cookie_size); - logcookies = NULL; - } - } - } - - if (rc == 0 && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && - rec->ur_eadata != NULL) { - struct lov_stripe_md *lsm = NULL; - struct lov_user_md *lum = NULL; - - rc = ll_permission(inode, MAY_WRITE, NULL); - if (rc < 0) - GOTO(cleanup, rc); - - lum = rec->ur_eadata; - /* if { size, offset, count } = { 0, -1, 0 } (i.e. all default - * values specified) then delete default striping from dir. */ - if (S_ISDIR(inode->i_mode) && - ((lum->lmm_stripe_size == 0 && - lum->lmm_stripe_offset == - (typeof(lum->lmm_stripe_offset))(-1) && - lum->lmm_stripe_count == 0) || - /* lmm_stripe_size == -1 is deprecated in 1.4.6 */ - lum->lmm_stripe_size == - (typeof(lum->lmm_stripe_size))(-1))){ - rc = fsfilt_set_md(obd, inode, handle, NULL, 0, "lov"); - if (rc) - GOTO(cleanup, rc); - } else { - rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, - mds->mds_osc_exp, 0, - &lsm, rec->ur_eadata); - if (rc) - GOTO(cleanup, rc); - - obd_free_memmd(mds->mds_osc_exp, &lsm); - - rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata, - rec->ur_eadatalen, "lov"); - if (rc) - GOTO(cleanup, rc); - } - } - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - - /* don't return OST-specific attributes if we didn't just set them. */ - if (ia_valid & ATTR_SIZE) - body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) - body->valid |= OBD_MD_FLMTIME; - if (ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) - body->valid |= OBD_MD_FLATIME; - - if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) { - OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen + - rec->ur_eadatalen); - if (mlcd) { - mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen + - rec->ur_eadatalen; - mlcd->mlcd_eadatalen = rec->ur_eadatalen; - mlcd->mlcd_cookielen = rec->ur_cookielen; - mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies + - mlcd->mlcd_cookielen; - memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies, - mlcd->mlcd_cookielen); - memcpy(mlcd->mlcd_lmm, rec->ur_eadata, - mlcd->mlcd_eadatalen); - } else { - CERROR("unable to allocate log cancel data\n"); - } - } - EXIT; - cleanup: - if (mlcd != NULL) - sync = fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, - mds_cancel_cookies_cb, mlcd); - err = mds_finish_transno(mds, inode, handle, req, rc, 0, sync); - /* do mds to ost setattr if needed */ - if (!rc && !err && lmm_size) - mds_osc_setattr_async(obd, inode->i_ino, inode->i_generation, lmm, - lmm_size, logcookies, rec->ur_fid1->id, - rec->ur_fid1->generation, NULL); - - switch (cleanup_phase) { - case 2: - OBD_FREE(lmm, mds->mds_max_mdsize); - if (logcookies) - OBD_FREE(logcookies, cookie_size); - case 1: - if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) && - rec->ur_eadata != NULL) - UNLOCK_INODE_MUTEX(inode); - l_dput(de); - if (locked) { - if (rc) { - ldlm_lock_decref(&lockh, LCK_EX); - } else { - ptlrpc_save_lock (req, &lockh, LCK_EX); - } - } - case 0: - break; - default: - LBUG(); - } - if (err && !rc) - rc = err; - - req->rq_status = rc; - - /* trigger dqrel/dqacq for original owner and new owner */ - if (ia_valid & (ATTR_UID | ATTR_GID)) - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, - FSFILT_OP_SETATTR); - - return 0; -} - -static void reconstruct_reint_create(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_obd *obd = &req->rq_export->exp_obd->u.mds; - struct dentry *parent, *child; - struct mds_body *body; - - mds_req_from_mcd(req, med->med_mcd); - - if (req->rq_status) - return; - - parent = mds_fid2dentry(obd, rec->ur_fid1, NULL); - LASSERT(!IS_ERR(parent)); - child = ll_lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1); - LASSERT(!IS_ERR(child)); - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - mds_pack_inode2fid(&body->fid1, child->d_inode); - mds_pack_inode2body(body, child->d_inode); - - l_dput(parent); - l_dput(child); -} - -static int mds_reint_create(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct dentry *dparent = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *dchild = NULL; - struct inode *dir = NULL; - void *handle = NULL; - struct lustre_handle lockh; - int rc = 0, err, type = rec->ur_mode & S_IFMT, cleanup_phase = 0; - int created = 0; - unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid }; - unsigned int qpids[MAXQUOTAS] = { 0, 0 }; - struct lvfs_dentry_params dp = LVFS_DENTRY_PARAMS_INIT; - ENTRY; - - LASSERT(offset == REQ_REC_OFF); - offset = REPLY_REC_OFF; - - LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, - LUSTRE_MDS_NAME)); - - DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u name %s mode %o", - rec->ur_fid1->id, rec->ur_fid1->generation, - rec->ur_name, rec->ur_mode); - - MDS_CHECK_RESENT(req, reconstruct_reint_create(rec, offset, req)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) - GOTO(cleanup, rc = -ESTALE); - - if (rec->ur_dlm) - ldlm_request_cancel(req, rec->ur_dlm, 0); - - dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_EX, &lockh, - MDS_INODELOCK_UPDATE); - if (IS_ERR(dparent)) { - rc = PTR_ERR(dparent); - if (rc != -ENOENT) - CERROR("parent "LPU64"/%u lookup error %d\n", - rec->ur_fid1->id, rec->ur_fid1->generation, rc); - GOTO(cleanup, rc); - } - cleanup_phase = 1; /* locked parent dentry */ - dir = dparent->d_inode; - LASSERT(dir); - - ldlm_lock_dump_handle(D_OTHER, &lockh); - - dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1); - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - if (rc != -ENAMETOOLONG) - CERROR("child lookup error %d\n", rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 2; /* child dentry */ - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, dir->i_sb); - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) { - if (dchild->d_inode) - GOTO(cleanup, rc = -EEXIST); - GOTO(cleanup, rc = -EROFS); - } - - if (dir->i_mode & S_ISGID && S_ISDIR(rec->ur_mode)) - rec->ur_mode |= S_ISGID; - - dchild->d_fsdata = (void *)&dp; - dp.ldp_inum = (unsigned long)rec->ur_fid2->id; - dp.ldp_ptr = req; - - switch (type) { - case S_IFREG:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = ll_vfs_create(dir, dchild, rec->ur_mode, NULL); - mds_counter_incr(req->rq_export, LPROC_MDS_MKNOD); - EXIT; - break; - } - case S_IFDIR:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_mkdir(dir, dchild, rec->ur_mode); - mds_counter_incr(req->rq_export, LPROC_MDS_MKDIR); - EXIT; - break; - } - case S_IFLNK:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - if (rec->ur_tgt == NULL) /* no target supplied */ - rc = -EINVAL; /* -EPROTO? */ - else - rc = ll_vfs_symlink(dir, dchild, rec->ur_tgt, S_IALLUGO); - mds_counter_incr(req->rq_export, LPROC_MDS_MKNOD); - EXIT; - break; - } - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK:{ - int rdev = rec->ur_rdev; - handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); - mds_counter_incr(req->rq_export, LPROC_MDS_MKNOD); - EXIT; - break; - } - default: - CERROR("bad file type %o creating %s\n", type, rec->ur_name); - dchild->d_fsdata = NULL; - GOTO(cleanup, rc = -EINVAL); - } - - /* In case we stored the desired inum in here, we want to clean up. */ - if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id) - dchild->d_fsdata = NULL; - - if (rc) { - CDEBUG(D_INODE, "error during create: %d\n", rc); - GOTO(cleanup, rc); - } else { - struct iattr iattr; - struct inode *inode = dchild->d_inode; - struct mds_body *body; - - created = 1; - LTIME_S(iattr.ia_atime) = rec->ur_time; - LTIME_S(iattr.ia_ctime) = rec->ur_time; - LTIME_S(iattr.ia_mtime) = rec->ur_time; - iattr.ia_uid = current->fsuid; /* set by push_ctxt already */ - if (dir->i_mode & S_ISGID) - iattr.ia_gid = dir->i_gid; - else - iattr.ia_gid = current->fsgid; - iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | - ATTR_MTIME | ATTR_CTIME; - - if (rec->ur_fid2->id) { - LASSERT(rec->ur_fid2->id == inode->i_ino); - inode->i_generation = rec->ur_fid2->generation; - /* Dirtied and committed by the upcoming setattr. */ - CDEBUG(D_INODE, "recreated ino %lu with gen %u\n", - inode->i_ino, inode->i_generation); - } else { - CDEBUG(D_INODE, "created ino %lu with gen %x\n", - inode->i_ino, inode->i_generation); - } - - rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0); - if (rc) - CERROR("error on child setattr: rc = %d\n", rc); - - iattr.ia_valid = ATTR_MTIME | ATTR_CTIME; - rc = fsfilt_setattr(obd, dparent, handle, &iattr, 0); - if (rc) - CERROR("error on parent setattr: rc = %d\n", rc); - - if (S_ISDIR(inode->i_mode)) { - struct lov_mds_md lmm; - int lmm_size = sizeof(lmm); - rc = mds_get_md(obd, dir, &lmm, &lmm_size, 1); - if (rc > 0) { - LOCK_INODE_MUTEX(inode); - rc = fsfilt_set_md(obd, inode, handle, - &lmm, lmm_size, "lov"); - UNLOCK_INODE_MUTEX(inode); - } - if (rc) - CERROR("error on copy stripe info: rc = %d\n", - rc); - } - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - } - EXIT; - -cleanup: - err = mds_finish_transno(mds, dir, handle, req, rc, 0, 0); - - if (rc && created) { - /* Destroy the file we just created. This should not need - * extra journal credits, as we have already modified all of - * the blocks needed in order to create the file in the first - * place. - */ - switch (type) { - case S_IFDIR: - err = vfs_rmdir(dir, dchild); - if (err) - CERROR("rmdir in error path: %d\n", err); - break; - default: - err = vfs_unlink(dir, dchild); - if (err) - CERROR("unlink in error path: %d\n", err); - break; - } - } else if (created) { - /* The inode we were allocated may have just been freed - * by an unlink operation. We take this lock to - * synchronize against the matching reply-ack-lock taken - * in unlink, to avoid replay problems if this reply - * makes it out to the client but the unlink's does not. - * See bug 2029 for more detail.*/ - mds_lock_new_child(obd, dchild->d_inode, NULL); - /* save uid/gid of create inode and parent */ - qpids[USRQUOTA] = dir->i_uid; - qpids[GRPQUOTA] = dir->i_gid; - } else { - rc = err; - } - - switch (cleanup_phase) { - case 2: /* child dentry */ - l_dput(dchild); - case 1: /* locked parent dentry */ - if (rc) { - ldlm_lock_decref(&lockh, LCK_EX); - } else { - ptlrpc_save_lock (req, &lockh, LCK_EX); - } - l_dput(dparent); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - - /* trigger dqacq on the owner of child and parent */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, - FSFILT_OP_CREATE); - return 0; -} - -int res_gt(const struct ldlm_res_id *res1, const struct ldlm_res_id *res2, - ldlm_policy_data_t *p1, ldlm_policy_data_t *p2) -{ - int i; - - for (i = 0; i < RES_NAME_SIZE; i++) { - /* return 1 here, because enqueue_ordered will skip resources - * of all zeroes if they're sorted to the end of the list. */ - if (res1->name[i] == 0 && res2->name[i] != 0) - return 1; - if (res2->name[i] == 0 && res1->name[i] != 0) - return 0; - - if (res1->name[i] > res2->name[i]) - return 1; - if (res1->name[i] < res2->name[i]) - return 0; - } - if (!p1 || !p2) - return 0; - if (memcmp(p1, p2, sizeof(*p1)) < 0) - return 1; - return 0; -} - -/* This function doesn't use ldlm_match_or_enqueue because we're always called - * with EX or PW locks, and the MDS is no longer allowed to match write locks, - * because they take the place of local semaphores. - * - * One or two locks are taken in numerical order. A res_id->name[0] of 0 means - * no lock is taken for that res_id. Must be at least one non-zero res_id. */ -int enqueue_ordered_locks(struct obd_device *obd, - const struct ldlm_res_id *p1_res_id, - struct lustre_handle *p1_lockh, int p1_lock_mode, - ldlm_policy_data_t *p1_policy, - const struct ldlm_res_id *p2_res_id, - struct lustre_handle *p2_lockh, int p2_lock_mode, - ldlm_policy_data_t *p2_policy) -{ - const struct ldlm_res_id *res_id[2] = { p1_res_id, p2_res_id }; - struct lustre_handle *handles[2] = { p1_lockh, p2_lockh }; - int lock_modes[2] = { p1_lock_mode, p2_lock_mode }; - ldlm_policy_data_t *policies[2] = {p1_policy, p2_policy}; - int rc, flags; - ENTRY; - - LASSERT(p1_res_id != NULL && p2_res_id != NULL); - - CDEBUG(D_INFO, "locks before: "LPU64"/"LPU64"\n", - res_id[0]->name[0], res_id[1]->name[0]); - - if (res_gt(p1_res_id, p2_res_id, p1_policy, p2_policy)) { - handles[1] = p1_lockh; - handles[0] = p2_lockh; - res_id[1] = p1_res_id; - res_id[0] = p2_res_id; - lock_modes[1] = p1_lock_mode; - lock_modes[0] = p2_lock_mode; - policies[1] = p1_policy; - policies[0] = p2_policy; - } - - CDEBUG(D_DLMTRACE, "lock order: "LPU64"/"LPU64"\n", - res_id[0]->name[0], res_id[1]->name[0]); - - flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id[0], - LDLM_IBITS, policies[0], lock_modes[0], - &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, 0, - NULL, handles[0]); - if (rc != ELDLM_OK) - RETURN(-EIO); - ldlm_lock_dump_handle(D_OTHER, handles[0]); - - if (memcmp(res_id[0], res_id[1], sizeof(*res_id[0])) == 0 && - (policies[0]->l_inodebits.bits & policies[1]->l_inodebits.bits)) { - memcpy(handles[1], handles[0], sizeof(*(handles[1]))); - ldlm_lock_addref(handles[1], lock_modes[1]); - } else if (res_id[1]->name[0] != 0) { - flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB; - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id[1], - LDLM_IBITS, policies[1], - lock_modes[1], &flags, - ldlm_blocking_ast, - ldlm_completion_ast, NULL, NULL, - 0, NULL, handles[1]); - if (rc != ELDLM_OK) { - ldlm_lock_decref(handles[0], lock_modes[0]); - RETURN(-EIO); - } - ldlm_lock_dump_handle(D_OTHER, handles[1]); - } - - RETURN(0); -} - -static inline int res_eq(const struct ldlm_res_id *res1, - const struct ldlm_res_id *res2) -{ - return !memcmp(res1, res2, sizeof(*res1)); -} - -static inline void -try_to_aggregate_locks(const struct ldlm_res_id *res1, ldlm_policy_data_t *p1, - const struct ldlm_res_id *res2, ldlm_policy_data_t *p2) -{ - if (!res_eq(res1, res2)) - return; - /* XXX: any additional inodebits (to current LOOKUP and UPDATE) - * should be taken with great care here */ - p1->l_inodebits.bits |= p2->l_inodebits.bits; -} - -int enqueue_4ordered_locks(struct obd_device *obd, - const struct ldlm_res_id *p1_res_id, - struct lustre_handle *p1_lockh, int p1_lock_mode, - ldlm_policy_data_t *p1_policy, - const struct ldlm_res_id *p2_res_id, - struct lustre_handle *p2_lockh, int p2_lock_mode, - ldlm_policy_data_t *p2_policy, - const struct ldlm_res_id *c1_res_id, - struct lustre_handle *c1_lockh, int c1_lock_mode, - ldlm_policy_data_t *c1_policy, - const struct ldlm_res_id *c2_res_id, - struct lustre_handle *c2_lockh, int c2_lock_mode, - ldlm_policy_data_t *c2_policy) -{ - const struct ldlm_res_id *res_id[5] = { p1_res_id, p2_res_id, - c1_res_id, c2_res_id }; - struct lustre_handle *dlm_handles[5] = { p1_lockh, p2_lockh, - c1_lockh, c2_lockh }; - int lock_modes[5] = { p1_lock_mode, p2_lock_mode, - c1_lock_mode, c2_lock_mode }; - ldlm_policy_data_t *policies[5] = {p1_policy, p2_policy, - c1_policy, c2_policy}; - int rc, i, j, sorted, flags; - ENTRY; - - CDEBUG(D_DLMTRACE, "locks before: "LPU64"/"LPU64"/"LPU64"/"LPU64"\n", - res_id[0]->name[0], res_id[1]->name[0], res_id[2]->name[0], - res_id[3]->name[0]); - - /* simple insertion sort - we have at most 4 elements */ - for (i = 1; i < 4; i++) { - j = i - 1; - dlm_handles[4] = dlm_handles[i]; - res_id[4] = res_id[i]; - lock_modes[4] = lock_modes[i]; - policies[4] = policies[i]; - - sorted = 0; - do { - if (res_gt(res_id[j], res_id[4], policies[j], - policies[4])) { - dlm_handles[j + 1] = dlm_handles[j]; - res_id[j + 1] = res_id[j]; - lock_modes[j + 1] = lock_modes[j]; - policies[j + 1] = policies[j]; - j--; - } else { - sorted = 1; - } - } while (j >= 0 && !sorted); - - dlm_handles[j + 1] = dlm_handles[4]; - res_id[j + 1] = res_id[4]; - lock_modes[j + 1] = lock_modes[4]; - policies[j + 1] = policies[4]; - } - - CDEBUG(D_DLMTRACE, "lock order: "LPU64"/"LPU64"/"LPU64"/"LPU64"\n", - res_id[0]->name[0], res_id[1]->name[0], res_id[2]->name[0], - res_id[3]->name[0]); - - /* XXX we could send ASTs on all these locks first before blocking? */ - for (i = 0; i < 4; i++) { - flags = LDLM_FL_ATOMIC_CB; - if (res_id[i]->name[0] == 0) - break; - if (i && res_eq(res_id[i], res_id[i-1])) { - memcpy(dlm_handles[i], dlm_handles[i-1], - sizeof(*(dlm_handles[i]))); - ldlm_lock_addref(dlm_handles[i], lock_modes[i]); - } else { - /* we need to enqueue locks with different inodebits - * at once, because otherwise concurrent thread can - * hit the windown between these two locks and we'll - * get to deadlock. see bug 10360. note also, that it - * is impossible to have >2 equal res. */ - if (i < 3) - try_to_aggregate_locks(res_id[i], policies[i], - res_id[i+1], policies[i+1]); - rc = ldlm_cli_enqueue_local(obd->obd_namespace, - res_id[i], LDLM_IBITS, - policies[i], lock_modes[i], - &flags, ldlm_blocking_ast, - ldlm_completion_ast, NULL, - NULL, 0, NULL, - dlm_handles[i]); - if (rc != ELDLM_OK) - GOTO(out_err, rc = -EIO); - ldlm_lock_dump_handle(D_OTHER, dlm_handles[i]); - } - } - - RETURN(0); -out_err: - while (i-- > 0) - ldlm_lock_decref(dlm_handles[i], lock_modes[i]); - - return rc; -} - -/* In the unlikely case that the child changed while we were waiting - * on the lock, we need to drop the lock on the old child and either: - * - if the child has a lower resource name, then we have to also - * drop the parent lock and regain the locks in the right order - * - in the rename case, if the child has a lower resource name than one of - * the other parent/child resources (maxres) we also need to reget the locks - * - if the child has a higher resource name (this is the common case) - * we can just get the lock on the new child (still in lock order) - * - * Returns 0 if the child did not change or if it changed but could be locked. - * Returns 1 if the child changed and we need to re-lock (no locks held). - * Returns -ve error with a valid dchild (no locks held). */ -static int mds_verify_child(struct obd_device *obd, - const struct ldlm_res_id *parent_res_id, - struct lustre_handle *parent_lockh, - struct dentry *dparent, int parent_mode, - struct ldlm_res_id *child_res_id, - struct lustre_handle *child_lockh, - struct dentry **dchildp, int child_mode, - ldlm_policy_data_t *child_policy, - const char *name, int namelen, - const struct ldlm_res_id *maxres) -{ - struct dentry *vchild, *dchild = *dchildp; - int rc = 0, cleanup_phase = 2; /* parent, child locks */ - ENTRY; - - vchild = ll_lookup_one_len(name, dparent, namelen - 1); - if (IS_ERR(vchild)) - GOTO(cleanup, rc = PTR_ERR(vchild)); - - if (likely((vchild->d_inode == NULL && child_res_id->name[0] == 0) || - (vchild->d_inode != NULL && - child_res_id->name[0] == vchild->d_inode->i_ino && - child_res_id->name[1] == vchild->d_inode->i_generation))) { - if (dchild != NULL) - l_dput(dchild); - *dchildp = vchild; - - RETURN(0); - } - - CDEBUG(D_DLMTRACE, "child inode changed: %p != %p (%lu != "LPU64")\n", - vchild->d_inode, dchild ? dchild->d_inode : 0, - vchild->d_inode ? vchild->d_inode->i_ino : 0, - child_res_id->name[0]); - if (child_res_id->name[0] != 0) - ldlm_lock_decref(child_lockh, child_mode); - if (dchild) - l_dput(dchild); - - cleanup_phase = 1; /* parent lock only */ - *dchildp = dchild = vchild; - - if (dchild->d_inode) { - int flags = LDLM_FL_ATOMIC_CB; - child_res_id->name[0] = dchild->d_inode->i_ino; - child_res_id->name[1] = dchild->d_inode->i_generation; - - /* Make sure that we don't try to re-enqueue a lock on the - * same resource if it happens that the source is renamed to - * the target by another thread (bug 9974, thanks racer :-) */ - if (!res_gt(child_res_id, parent_res_id, NULL, NULL) || - !res_gt(child_res_id, maxres, NULL, NULL)) { - CDEBUG(D_DLMTRACE, "relock "LPU64"<("LPU64"|"LPU64")\n", - child_res_id->name[0], parent_res_id->name[0], - maxres->name[0]); - GOTO(cleanup, rc = 1); - } - - rc = ldlm_cli_enqueue_local(obd->obd_namespace, child_res_id, - LDLM_IBITS, child_policy, - child_mode, &flags, - ldlm_blocking_ast, - ldlm_completion_ast, NULL, - NULL, 0, NULL, child_lockh); - if (rc != ELDLM_OK) - GOTO(cleanup, rc = -EIO); - } else { - memset(child_res_id, 0, sizeof(*child_res_id)); - } - - EXIT; -cleanup: - if (rc) { - switch(cleanup_phase) { - case 2: - if (child_res_id->name[0] != 0) - ldlm_lock_decref(child_lockh, child_mode); - case 1: - ldlm_lock_decref(parent_lockh, parent_mode); - } - } - return rc; -} - -#define INODE_CTIME_AGE (10) -#define INODE_CTIME_OLD(inode) (LTIME_S(inode->i_ctime) + \ - INODE_CTIME_AGE < cfs_time_current_sec()) - -int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, - struct ll_fid *fid, - struct lustre_handle *parent_lockh, - struct dentry **dparentp, int parent_mode, - __u64 parent_lockpart, - char *name, int namelen, - struct lustre_handle *child_lockh, - struct dentry **dchildp, int child_mode, - __u64 child_lockpart) -{ - struct ldlm_res_id child_res_id = { .name = {0} }; - struct ldlm_res_id parent_res_id = { .name = {0} }; - ldlm_policy_data_t parent_policy = {.l_inodebits = { parent_lockpart }}; - ldlm_policy_data_t child_policy = {.l_inodebits = { child_lockpart }}; - struct inode *inode; - int rc = 0, cleanup_phase = 0; - ENTRY; - - /* Step 1: Lookup parent */ - *dparentp = mds_fid2dentry(mds, fid, NULL); - if (IS_ERR(*dparentp)) { - rc = PTR_ERR(*dparentp); - *dparentp = NULL; - RETURN(rc); - } - - CDEBUG(D_INODE, "parent ino %lu, name %s\n", - (*dparentp)->d_inode->i_ino, name); - - parent_res_id.name[0] = (*dparentp)->d_inode->i_ino; - parent_res_id.name[1] = (*dparentp)->d_inode->i_generation; - - cleanup_phase = 1; /* parent dentry */ - - /* Step 2: Lookup child (without DLM lock, to get resource name) */ - *dchildp = ll_lookup_one_len(name, *dparentp, namelen - 1); - if (IS_ERR(*dchildp)) { - rc = PTR_ERR(*dchildp); - CDEBUG(D_INODE, "child lookup error %d\n", rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 2; /* child dentry */ - inode = (*dchildp)->d_inode; - if (inode != NULL) { - if (is_bad_inode(inode)) { - CERROR("bad inode returned %lu/%u\n", - inode->i_ino, inode->i_generation); - GOTO(cleanup, rc = -ENOENT); - } - inode = igrab(inode); - } - if (inode == NULL) - goto retry_locks; - - child_res_id.name[0] = inode->i_ino; - child_res_id.name[1] = inode->i_generation; - - /* If we want a LCK_CR for a directory, and this directory has not been - changed for some time, we return not only a LOOKUP lock, but also an - UPDATE lock to have negative dentry starts working for this dir. - Also we apply same logic to non-directories. If the file is rarely - changed - we return both locks and this might save us RPC on - later STAT. */ - if ((child_mode & (LCK_CR|LCK_PR|LCK_CW)) && INODE_CTIME_OLD(inode)) - child_policy.l_inodebits.bits |= MDS_INODELOCK_UPDATE; - - iput(inode); - -retry_locks: - cleanup_phase = 2; /* child dentry */ - - /* Step 3: Lock parent and child in resource order. If child doesn't - * exist, we still have to lock the parent and re-lookup. */ - rc = enqueue_ordered_locks(obd,&parent_res_id,parent_lockh,parent_mode, - &parent_policy, - &child_res_id, child_lockh, child_mode, - &child_policy); - if (rc) - GOTO(cleanup, rc); - - if (!(*dchildp)->d_inode) - cleanup_phase = 3; /* parent lock */ - else - cleanup_phase = 4; /* child lock */ - - /* Step 4: Re-lookup child to verify it hasn't changed since locking */ - rc = mds_verify_child(obd, &parent_res_id, parent_lockh, *dparentp, - parent_mode, &child_res_id, child_lockh, dchildp, - child_mode,&child_policy, name, namelen, &parent_res_id); - if (rc > 0) - goto retry_locks; - if (rc < 0) { - cleanup_phase = 2; - GOTO(cleanup, rc); - } - -cleanup: - if (rc) { - switch (cleanup_phase) { - case 4: - ldlm_lock_decref(child_lockh, child_mode); - case 3: - ldlm_lock_decref(parent_lockh, parent_mode); - case 2: - l_dput(*dchildp); - case 1: - l_dput(*dparentp); - default: ; - } - } - return rc; -} - -void mds_reconstruct_generic(struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - - mds_req_from_mcd(req, med->med_mcd); -} - -/* If we are unlinking an open file/dir (i.e. creating an orphan) then - * we instead link the inode into the PENDING directory until it is - * finally released. We can't simply call mds_reint_rename() or some - * part thereof, because we don't have the inode to check for link - * count/open status until after it is locked. - * - * For lock ordering, caller must get child->i_mutex first, then - * pending->i_mutex before starting journal transaction. - * - * returns 1 on success - * returns 0 if we lost a race and didn't make a new link - * returns negative on error - */ -static int mds_orphan_add_link(struct mds_update_record *rec, - struct obd_device *obd, struct dentry *dentry) -{ - struct mds_obd *mds = &obd->u.mds; - struct inode *pending_dir = mds->mds_pending_dir->d_inode; - struct inode *inode = dentry->d_inode; - struct dentry *pending_child; - char fidname[LL_FID_NAMELEN]; - int fidlen = 0, rc, mode; - ENTRY; - - LASSERT(inode != NULL); - LASSERT(!mds_inode_is_orphan(inode)); -#ifndef HAVE_I_ALLOC_SEM - LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0); -#endif - LASSERT(TRYLOCK_INODE_MUTEX(pending_dir) == 0); - - fidlen = ll_fid2str(fidname, inode->i_ino, inode->i_generation); - - CDEBUG(D_INODE, "pending destroy of %dx open %d linked %s %s = %s\n", - mds_orphan_open_count(inode), inode->i_nlink, - S_ISDIR(inode->i_mode) ? "dir" : - S_ISREG(inode->i_mode) ? "file" : "other",rec->ur_name,fidname); - - if (mds_orphan_open_count(inode) == 0 || inode->i_nlink != 0) - RETURN(0); - - pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen); - if (IS_ERR(pending_child)) - RETURN(PTR_ERR(pending_child)); - - if (pending_child->d_inode != NULL) { - CERROR("re-destroying orphan file %s?\n", rec->ur_name); - LASSERT(pending_child->d_inode == inode); - GOTO(out_dput, rc = 0); - } - - /* link() is semanticaly-wrong for S_IFDIR, so we set S_IFREG - * for linking and return real mode back then -bzzz */ - mode = inode->i_mode; - inode->i_mode = S_IFREG; - rc = vfs_link(dentry, pending_dir, pending_child); - if (rc) - CERROR("error linking orphan %s to PENDING: rc = %d\n", - rec->ur_name, rc); - else - mds_inode_set_orphan(inode); - - /* return mode and correct i_nlink if inode is directory */ - inode->i_mode = mode; - LASSERTF(inode->i_nlink == 1, "%s nlink == %d\n", - S_ISDIR(mode) ? "dir" : S_ISREG(mode) ? "file" : "other", - inode->i_nlink); - if (S_ISDIR(mode)) { - inode->i_nlink++; - pending_dir->i_nlink++; - mark_inode_dirty(inode); - mark_inode_dirty(pending_dir); - } - - GOTO(out_dput, rc = 1); -out_dput: - l_dput(pending_child); - RETURN(rc); -} - -int mds_get_cookie_size(struct obd_device *obd, struct lov_mds_md *lmm) -{ - int count = le32_to_cpu(lmm->lmm_stripe_count); - int real_csize = count * sizeof(struct llog_cookie); - return real_csize; -} - -void mds_shrink_reply(struct obd_device *obd, struct ptlrpc_request *req, - struct mds_body *body, int md_off) -{ - int cookie_size = 0, md_size = 0; - - if (body && body->valid & OBD_MD_FLEASIZE) { - md_size = body->eadatasize; - } - if (body && body->valid & OBD_MD_FLCOOKIE) { - LASSERT(body->valid & OBD_MD_FLEASIZE); - cookie_size = mds_get_cookie_size(obd, lustre_msg_buf( - req->rq_repmsg, md_off, 0)); - } - - CDEBUG(D_INFO, "Shrink to md_size %d cookie_size %d \n", md_size, - cookie_size); - - lustre_shrink_reply(req, md_off, md_size, 1); - - lustre_shrink_reply(req, md_off + (md_size > 0), cookie_size, 0); -} - -static int mds_reint_unlink(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct dentry *dparent = NULL, *dchild; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *body = NULL; - struct inode *child_inode = NULL; - struct lustre_handle parent_lockh, child_lockh, child_reuse_lockh; - void *handle = NULL; - int rc = 0, cleanup_phase = 0; - unsigned int qcids[MAXQUOTAS] = { 0, 0 }; - unsigned int qpids[MAXQUOTAS] = { 0, 0 }; - ENTRY; - - LASSERT(offset == REQ_REC_OFF); /* || offset == DLM_INTENT_REC_OFF); */ - offset = REPLY_REC_OFF; - - DEBUG_REQ(D_INODE, req, "parent ino "LPU64"/%u, child %s", - rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name); - - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) - GOTO(cleanup, rc = -ENOENT); - - if (rec->ur_dlm) - ldlm_request_cancel(req, rec->ur_dlm, 0); - - rc = mds_get_parent_child_locked(obd, mds, rec->ur_fid1, - &parent_lockh, &dparent, LCK_EX, - MDS_INODELOCK_UPDATE, - rec->ur_name, rec->ur_namelen, - &child_lockh, &dchild, LCK_EX, - MDS_INODELOCK_FULL); - if (rc) - GOTO(cleanup, rc); - - cleanup_phase = 1; /* dchild, dparent, locks */ - - dget(dchild); - child_inode = dchild->d_inode; - if (child_inode == NULL) { - CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n", - dparent->d_inode->i_ino, rec->ur_name); - GOTO(cleanup, rc = -ENOENT); - } - - /* save uid/gid for quota acquire/release */ - qcids[USRQUOTA] = child_inode->i_uid; - qcids[GRPQUOTA] = child_inode->i_gid; - qpids[USRQUOTA] = dparent->d_inode->i_uid; - qpids[GRPQUOTA] = dparent->d_inode->i_gid; - - cleanup_phase = 2; /* dchild has a lock */ - - /* We have to do these checks ourselves, in case we are making an - * orphan. The client tells us whether rmdir() or unlink() was called, - * so we need to return appropriate errors (bug 72). */ - if ((rec->ur_mode & S_IFMT) == S_IFDIR) { - if (!S_ISDIR(child_inode->i_mode)) - GOTO(cleanup, rc = -ENOTDIR); - } else { - if (S_ISDIR(child_inode->i_mode)) - GOTO(cleanup, rc = -EISDIR); - } - - /* Check for EROFS after we check ENODENT, ENOTDIR, and EISDIR */ - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) - GOTO(cleanup, rc = -EROFS); - - /* Step 3: Get a lock on the ino to sync with creation WRT inode - * reuse (see bug 2029). */ - rc = mds_lock_new_child(obd, child_inode, &child_reuse_lockh); - if (rc != ELDLM_OK) - GOTO(cleanup, rc); - - cleanup_phase = 3; /* child inum lock */ - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, dparent->d_inode->i_sb); - - /* ldlm_reply in buf[0] if called via intent */ - if (offset == DLM_INTENT_REC_OFF) - offset = DLM_REPLY_REC_OFF; - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - LASSERT(body != NULL); - - /* child orphan sem protects orphan_dec_test && is_orphan race */ - MDS_DOWN_READ_ORPHAN_SEM(child_inode); - cleanup_phase = 4; /* MDS_UP_READ_ORPHAN_SEM(new_inode) when finished */ - - /* If this is potentially the last reference to this inode, get the - * OBD EA data first so the client can destroy OST objects. We - * only do the object removal later if no open files/links remain. */ - if ((S_ISDIR(child_inode->i_mode) && child_inode->i_nlink == 2) || - child_inode->i_nlink == 1) { - if (mds_orphan_open_count(child_inode) > 0) { - /* need to lock pending_dir before transaction */ - LOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); - cleanup_phase = 5; /* UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); */ - } else if (S_ISREG(child_inode->i_mode)) { - mds_pack_inode2fid(&body->fid1, child_inode); - mds_pack_inode2body(body, child_inode); - mds_pack_md(obd, req->rq_repmsg, offset + 1, body, - child_inode, MDS_PACK_MD_LOCK); - } - } - - /* Step 4: Do the unlink: we already verified ur_mode above (bug 72) */ - switch (child_inode->i_mode & S_IFMT) { - case S_IFDIR: - /* Drop any lingering child directories before we start our - * transaction, to avoid doing multiple inode dirty/delete - * in our compound transaction (bug 1321). */ - shrink_dcache_parent(dchild); - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR, - NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_rmdir(dparent->d_inode, dchild); - mds_counter_incr(req->rq_export, LPROC_MDS_RMDIR); - break; - case S_IFREG: { - struct lov_mds_md *lmm = lustre_msg_buf(req->rq_repmsg, - offset + 1, 0); - handle = fsfilt_start_log(obd, dparent->d_inode, - FSFILT_OP_UNLINK, NULL, - le32_to_cpu(lmm->lmm_stripe_count)); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_unlink(dparent->d_inode, dchild); - mds_counter_incr(req->rq_export, LPROC_MDS_UNLINK); - break; - } - case S_IFLNK: - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK, - NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_unlink(dparent->d_inode, dchild); - mds_counter_incr(req->rq_export, LPROC_MDS_UNLINK); - break; - default: - CERROR("bad file type %o unlinking %s\n", rec->ur_mode, - rec->ur_name); - LBUG(); - GOTO(cleanup, rc = -EINVAL); - } - - if (rc == 0 && child_inode->i_nlink == 0) { - if (mds_orphan_open_count(child_inode) > 0) - rc = mds_orphan_add_link(rec, obd, dchild); - - if (rc == 1) - GOTO(cleanup, rc = 0); - - if (!S_ISREG(child_inode->i_mode)) - GOTO(cleanup, rc); - - if (!(body->valid & OBD_MD_FLEASIZE)) { - body->valid |=(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME); - } else if (mds_log_op_unlink(obd, - lustre_msg_buf(req->rq_repmsg, offset + 1, 0), - lustre_msg_buflen(req->rq_repmsg, offset + 1), - lustre_msg_buf(req->rq_repmsg, offset + 2, 0), - lustre_msg_buflen(req->rq_repmsg, offset+2)) > - 0) { - body->valid |= OBD_MD_FLCOOKIE; - } - } - - GOTO(cleanup, rc); -cleanup: - if (rc == 0) { - struct iattr iattr; - int err; - - iattr.ia_valid = ATTR_MTIME | ATTR_CTIME; - LTIME_S(iattr.ia_mtime) = rec->ur_time; - LTIME_S(iattr.ia_ctime) = rec->ur_time; - - err = fsfilt_setattr(obd, dparent, handle, &iattr, 0); - if (err) - CERROR("error on parent setattr: rc = %d\n", err); - } - - rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL, - handle, req, rc, 0, 0); - if (!rc) - (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"), - "unlinked", 0, NULL, NULL); - switch(cleanup_phase) { - case 5: /* pending_dir semaphore */ - UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); - case 4: /* child inode semaphore */ - MDS_UP_READ_ORPHAN_SEM(child_inode); - case 3: /* child ino-reuse lock */ - if (rc && body != NULL) { - // Don't unlink the OST objects if the MDS unlink failed - body->valid = 0; - } - if (rc) - ldlm_lock_decref(&child_reuse_lockh, LCK_EX); - else - ptlrpc_save_lock(req, &child_reuse_lockh, LCK_EX); - case 2: /* child lock */ - ldlm_lock_decref(&child_lockh, LCK_EX); - case 1: /* child and parent dentry, parent lock */ - if (rc) - ldlm_lock_decref(&parent_lockh, LCK_EX); - else - ptlrpc_save_lock(req, &parent_lockh, LCK_EX); - l_dput(dchild); - l_dput(dchild); - l_dput(dparent); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - - mds_shrink_reply(obd, req, body, REPLY_REC_OFF + 1); - - /* trigger dqrel on the owner of child and parent */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, FSFILT_OP_UNLINK); - return 0; -} - -static int mds_reint_link(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *de_src = NULL; - struct dentry *de_tgt_dir = NULL; - struct dentry *dchild = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct lustre_handle *handle = NULL, tgt_dir_lockh, src_lockh; - struct ldlm_res_id src_res_id = { .name = {0} }; - struct ldlm_res_id tgt_dir_res_id = { .name = {0} }; - ldlm_policy_data_t src_policy ={.l_inodebits = {MDS_INODELOCK_UPDATE}}; - ldlm_policy_data_t tgt_dir_policy = - {.l_inodebits = {MDS_INODELOCK_UPDATE}}; - int rc = 0, cleanup_phase = 0; - ENTRY; - - LASSERT(offset == REQ_REC_OFF); - - DEBUG_REQ(D_INODE, req, "original "LPU64"/%u to "LPU64"/%u %s", - rec->ur_fid1->id, rec->ur_fid1->generation, - rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_name); - mds_counter_incr(req->rq_export, LPROC_MDS_LINK); - - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) - GOTO(cleanup, rc = -ENOENT); - - if (rec->ur_dlm) - ldlm_request_cancel(req, rec->ur_dlm, 0); - - /* Step 1: Lookup the source inode and target directory by FID */ - de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de_src)) - GOTO(cleanup, rc = PTR_ERR(de_src)); - - cleanup_phase = 1; /* source dentry */ - - de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL); - if (IS_ERR(de_tgt_dir)) { - rc = PTR_ERR(de_tgt_dir); - de_tgt_dir = NULL; - GOTO(cleanup, rc); - } - - cleanup_phase = 2; /* target directory dentry */ - - CDEBUG(D_INODE, "linking %.*s/%s to inode %lu\n", - de_tgt_dir->d_name.len, de_tgt_dir->d_name.name, rec->ur_name, - de_src->d_inode->i_ino); - - /* Step 2: Take the two locks */ - src_res_id.name[0] = de_src->d_inode->i_ino; - src_res_id.name[1] = de_src->d_inode->i_generation; - tgt_dir_res_id.name[0] = de_tgt_dir->d_inode->i_ino; - tgt_dir_res_id.name[1] = de_tgt_dir->d_inode->i_generation; - - rc = enqueue_ordered_locks(obd, &src_res_id, &src_lockh, LCK_EX, - &src_policy, - &tgt_dir_res_id, &tgt_dir_lockh, LCK_EX, - &tgt_dir_policy); - if (rc) - GOTO(cleanup, rc); - - cleanup_phase = 3; /* locks */ - - if (mds_inode_is_orphan(de_src->d_inode)) { - CDEBUG(D_INODE, "an attempt to link an orphan inode %lu/%u\n", - de_src->d_inode->i_ino, - de_src->d_inode->i_generation); - GOTO(cleanup, rc = -ENOENT); - } - - /* Step 3: Lookup the child */ - dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1); - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - if (rc != -EPERM && rc != -EACCES && rc != -ENAMETOOLONG) - CERROR("child lookup error %d\n", rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 4; /* child dentry */ - - if (dchild->d_inode) { - CDEBUG(D_INODE, "child exists (dir %lu, name %s)\n", - de_tgt_dir->d_inode->i_ino, rec->ur_name); - rc = -EEXIST; - GOTO(cleanup, rc); - } - - /* Step 4: Do it. */ - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, de_src->d_inode->i_sb); - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) - GOTO(cleanup, rc = -EROFS); - - handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild); - if (rc && rc != -EPERM && rc != -EACCES) - CERROR("vfs_link error %d\n", rc); -cleanup: - rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL, - handle, req, rc, 0, 0); - EXIT; - - switch (cleanup_phase) { - case 4: /* child dentry */ - l_dput(dchild); - case 3: /* locks */ - if (rc) { - ldlm_lock_decref(&src_lockh, LCK_EX); - ldlm_lock_decref(&tgt_dir_lockh, LCK_EX); - } else { - ptlrpc_save_lock(req, &src_lockh, LCK_EX); - ptlrpc_save_lock(req, &tgt_dir_lockh, LCK_EX); - } - case 2: /* target dentry */ - l_dput(de_tgt_dir); - case 1: /* source dentry */ - l_dput(de_src); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - return 0; -} - -/* The idea here is that we need to get four locks in the end: - * one on each parent directory, one on each child. We need to take - * these locks in some kind of order (to avoid deadlocks), and the order - * I selected is "increasing resource number" order. We need to look up - * the children, however, before we know what the resource number(s) are. - * Thus the following plan: - * - * 1,2. Look up the parents - * 3,4. Look up the children - * 5. Take locks on the parents and children, in order - * 6. Verify that the children haven't changed since they were looked up - * - * If there was a race and the children changed since they were first looked - * up, it is possible that mds_verify_child() will be able to just grab the - * lock on the new child resource (if it has a higher resource than any other) - * but we need to compare against not only its parent, but also against the - * parent and child of the "other half" of the rename, hence maxres_{src,tgt}. - * - * We need the fancy igrab() on the child inodes because we aren't holding a - * lock on the parent after the lookup is done, so dentry->d_inode may change - * at any time, and igrab() itself doesn't like getting passed a NULL argument. - */ -int mds_get_parents_children_locked(struct obd_device *obd, - struct mds_obd *mds, - struct ll_fid *p1_fid, - struct dentry **de_srcdirp, - struct ll_fid *p2_fid, - struct dentry **de_tgtdirp, - int parent_mode, - const char *old_name, int old_len, - struct dentry **de_oldp, - const char *new_name, int new_len, - struct dentry **de_newp, - struct lustre_handle *dlm_handles, - int child_mode) -{ - struct ldlm_res_id p1_res_id = { .name = {0} }; - struct ldlm_res_id p2_res_id = { .name = {0} }; - struct ldlm_res_id c1_res_id = { .name = {0} }; - struct ldlm_res_id c2_res_id = { .name = {0} }; - ldlm_policy_data_t p_policy = {.l_inodebits = {MDS_INODELOCK_UPDATE}}; - /* Only dentry should disappear, but the inode itself would be - intact otherwise. */ - ldlm_policy_data_t c1_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP}}; - /* If something is going to be replaced, both dentry and inode locks are needed */ - ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_FULL}}; - struct ldlm_res_id *maxres_src, *maxres_tgt; - struct inode *inode; - int rc = 0, cleanup_phase = 0; - ENTRY; - - /* Step 1: Lookup the source directory */ - *de_srcdirp = mds_fid2dentry(mds, p1_fid, NULL); - if (IS_ERR(*de_srcdirp)) - GOTO(cleanup, rc = PTR_ERR(*de_srcdirp)); - - cleanup_phase = 1; /* source directory dentry */ - - p1_res_id.name[0] = (*de_srcdirp)->d_inode->i_ino; - p1_res_id.name[1] = (*de_srcdirp)->d_inode->i_generation; - - /* Step 2: Lookup the target directory */ - if (memcmp(p1_fid, p2_fid, sizeof(*p1_fid)) == 0) { - *de_tgtdirp = dget(*de_srcdirp); - } else { - *de_tgtdirp = mds_fid2dentry(mds, p2_fid, NULL); - if (IS_ERR(*de_tgtdirp)) { - rc = PTR_ERR(*de_tgtdirp); - *de_tgtdirp = NULL; - GOTO(cleanup, rc); - } - } - - cleanup_phase = 2; /* target directory dentry */ - - p2_res_id.name[0] = (*de_tgtdirp)->d_inode->i_ino; - p2_res_id.name[1] = (*de_tgtdirp)->d_inode->i_generation; - - /* Step 3: Lookup the source child entry */ - *de_oldp = ll_lookup_one_len(old_name, *de_srcdirp, old_len - 1); - if (IS_ERR(*de_oldp)) { - rc = PTR_ERR(*de_oldp); - CDEBUG(D_INODE, "old child lookup error (%.*s): %d\n", - old_len - 1, old_name, rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 3; /* original name dentry */ - - inode = (*de_oldp)->d_inode; - if (inode != NULL) - inode = igrab(inode); - if (inode == NULL) - GOTO(cleanup, rc = -ENOENT); - - c1_res_id.name[0] = inode->i_ino; - c1_res_id.name[1] = inode->i_generation; - - iput(inode); - - /* Step 4: Lookup the target child entry */ - if (!new_name) - GOTO(retry_locks, rc); - *de_newp = ll_lookup_one_len(new_name, *de_tgtdirp, new_len - 1); - if (IS_ERR(*de_newp)) { - rc = PTR_ERR(*de_newp); - if (rc != -ENAMETOOLONG) - CERROR("new child lookup error (%.*s): %d\n", - old_len - 1, old_name, rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 4; /* target dentry */ - - inode = (*de_newp)->d_inode; - if (inode != NULL) - inode = igrab(inode); - if (inode == NULL) - goto retry_locks; - - c2_res_id.name[0] = inode->i_ino; - c2_res_id.name[1] = inode->i_generation; - iput(inode); - -retry_locks: - /* Step 5: Take locks on the parents and child(ren) */ - maxres_src = &p1_res_id; - maxres_tgt = &p2_res_id; - cleanup_phase = 4; /* target dentry */ - - if (c1_res_id.name[0] != 0 && res_gt(&c1_res_id, &p1_res_id,NULL,NULL)) - maxres_src = &c1_res_id; - if (c2_res_id.name[0] != 0 && res_gt(&c2_res_id, &p2_res_id,NULL,NULL)) - maxres_tgt = &c2_res_id; - - rc = enqueue_4ordered_locks(obd, &p1_res_id,&dlm_handles[0],parent_mode, - &p_policy, - &p2_res_id, &dlm_handles[1], parent_mode, - &p_policy, - &c1_res_id, &dlm_handles[2], child_mode, - &c1_policy, - &c2_res_id, &dlm_handles[3], child_mode, - &c2_policy); - if (rc) - GOTO(cleanup, rc); - - cleanup_phase = 6; /* parent and child(ren) locks */ - - /* Step 6a: Re-lookup source child to verify it hasn't changed */ - rc = mds_verify_child(obd, &p1_res_id, &dlm_handles[0], *de_srcdirp, - parent_mode, &c1_res_id, &dlm_handles[2], de_oldp, - child_mode, &c1_policy, old_name, old_len, - maxres_tgt); - if (rc) { - if (c2_res_id.name[0] != 0) - ldlm_lock_decref(&dlm_handles[3], child_mode); - ldlm_lock_decref(&dlm_handles[1], parent_mode); - cleanup_phase = 4; - if (rc > 0) - goto retry_locks; - GOTO(cleanup, rc); - } - - if ((*de_oldp)->d_inode == NULL) - GOTO(cleanup, rc = -ENOENT); - - if (!new_name) - GOTO(cleanup, rc); - /* Step 6b: Re-lookup target child to verify it hasn't changed */ - rc = mds_verify_child(obd, &p2_res_id, &dlm_handles[1], *de_tgtdirp, - parent_mode, &c2_res_id, &dlm_handles[3], de_newp, - child_mode, &c2_policy, new_name, new_len, - maxres_src); - if (rc) { - ldlm_lock_decref(&dlm_handles[2], child_mode); - ldlm_lock_decref(&dlm_handles[0], parent_mode); - cleanup_phase = 4; - if (rc > 0) - goto retry_locks; - GOTO(cleanup, rc); - } - - EXIT; -cleanup: - if (rc) { - switch (cleanup_phase) { - case 6: /* child lock(s) */ - if (c2_res_id.name[0] != 0) - ldlm_lock_decref(&dlm_handles[3], child_mode); - if (c1_res_id.name[0] != 0) - ldlm_lock_decref(&dlm_handles[2], child_mode); - case 5: /* parent locks */ - ldlm_lock_decref(&dlm_handles[1], parent_mode); - ldlm_lock_decref(&dlm_handles[0], parent_mode); - case 4: /* target dentry */ - l_dput(*de_newp); - case 3: /* source dentry */ - l_dput(*de_oldp); - case 2: /* target directory dentry */ - l_dput(*de_tgtdirp); - case 1: /* source directry dentry */ - l_dput(*de_srcdirp); - } - } - - return rc; -} - -static int mds_reint_rename(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lockh) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *de_srcdir = NULL; - struct dentry *de_tgtdir = NULL; - struct dentry *de_old = NULL; - struct dentry *de_new = NULL; - struct inode *old_inode = NULL, *new_inode = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct lustre_handle dlm_handles[4]; - struct mds_body *body = NULL; - struct lov_mds_md *lmm = NULL; - int rc = 0, lock_count = 3, cleanup_phase = 0; - void *handle = NULL; - unsigned int qcids[MAXQUOTAS] = { 0, 0 }; - unsigned int qpids[4] = { 0, 0, 0, 0 }; - ENTRY; - - LASSERT(offset == REQ_REC_OFF); - offset = REPLY_REC_OFF; - - DEBUG_REQ(D_INODE, req, "parent "LPU64"/%u %s to "LPU64"/%u %s", - rec->ur_fid1->id, rec->ur_fid1->generation, rec->ur_name, - rec->ur_fid2->id, rec->ur_fid2->generation, rec->ur_tgt); - mds_counter_incr(req->rq_export, LPROC_MDS_RENAME); - - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - if (rec->ur_dlm) - ldlm_request_cancel(req, rec->ur_dlm, 0); - - rc = mds_get_parents_children_locked(obd, mds, rec->ur_fid1, &de_srcdir, - rec->ur_fid2, &de_tgtdir, LCK_EX, - rec->ur_name, rec->ur_namelen, - &de_old, rec->ur_tgt, - rec->ur_tgtlen, &de_new, - dlm_handles, LCK_EX); - if (rc) - GOTO(cleanup, rc); - - cleanup_phase = 1; /* parent(s), children, locks */ - - old_inode = de_old->d_inode; - new_inode = de_new->d_inode; - - if (new_inode != NULL) - lock_count = 4; - - /* sanity check for src inode */ - if (old_inode->i_ino == de_srcdir->d_inode->i_ino || - old_inode->i_ino == de_tgtdir->d_inode->i_ino) - GOTO(cleanup, rc = -EINVAL); - - if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) - GOTO(cleanup, rc = -EROFS); - - if (new_inode == NULL) - goto no_unlink; - - igrab(new_inode); - cleanup_phase = 2; /* iput(new_inode) when finished */ - - /* sanity check for dest inode */ - if (new_inode->i_ino == de_srcdir->d_inode->i_ino || - new_inode->i_ino == de_tgtdir->d_inode->i_ino) - GOTO(cleanup, rc = -EINVAL); - - if (old_inode == new_inode) - GOTO(cleanup, rc = 0); - - /* save uids/gids for qunit acquire/release */ - qcids[USRQUOTA] = old_inode->i_uid; - qcids[GRPQUOTA] = old_inode->i_gid; - qpids[USRQUOTA] = de_tgtdir->d_inode->i_uid; - qpids[GRPQUOTA] = de_tgtdir->d_inode->i_gid; - qpids[2] = de_srcdir->d_inode->i_uid; - qpids[3] = de_srcdir->d_inode->i_gid; - - /* if we are about to remove the target at first, pass the EA of - * that inode to client to perform and cleanup on OST */ - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof(*body)); - LASSERT(body != NULL); - - /* child orphan sem protects orphan_dec_test && is_orphan race */ - MDS_DOWN_READ_ORPHAN_SEM(new_inode); - cleanup_phase = 3; /* MDS_UP_READ_ORPHAN_SEM(new_inode) when finished */ - - if ((S_ISDIR(new_inode->i_mode) && new_inode->i_nlink == 2) || - new_inode->i_nlink == 1) { - if (mds_orphan_open_count(new_inode) > 0) { - /* need to lock pending_dir before transaction */ - LOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); - cleanup_phase = 4; /* UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); */ - } else if (S_ISREG(new_inode->i_mode)) { - mds_pack_inode2fid(&body->fid1, new_inode); - mds_pack_inode2body(body, new_inode); - mds_pack_md(obd, req->rq_repmsg, offset + 1, body, - new_inode, MDS_PACK_MD_LOCK); - } - } - -no_unlink: - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, - de_srcdir->d_inode->i_sb); - - /* Check if we are moving old entry into its child. 2.6 does not - check for this in vfs_rename() anymore */ - if (is_subdir(de_new, de_old)) - GOTO(cleanup, rc = -EINVAL); - - lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, 0); - handle = fsfilt_start_log(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, - NULL, le32_to_cpu(lmm->lmm_stripe_count)); - - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - lock_kernel(); - de_old->d_fsdata = req; - de_new->d_fsdata = req; - - rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new); - unlock_kernel(); - - if (rc == 0 && new_inode != NULL && new_inode->i_nlink == 0) { - if (mds_orphan_open_count(new_inode) > 0) - rc = mds_orphan_add_link(rec, obd, de_new); - - if (rc == 1) - GOTO(cleanup, rc = 0); - - if (!S_ISREG(new_inode->i_mode)) - GOTO(cleanup, rc); - - if (!(body->valid & OBD_MD_FLEASIZE)) { - body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME); - } else if (mds_log_op_unlink(obd, - lustre_msg_buf(req->rq_repmsg, - offset + 1, 0), - lustre_msg_buflen(req->rq_repmsg, - offset + 1), - lustre_msg_buf(req->rq_repmsg, - offset + 2, 0), - lustre_msg_buflen(req->rq_repmsg, - offset + 2)) - > 0) { - body->valid |= OBD_MD_FLCOOKIE; - } - } - - GOTO(cleanup, rc); -cleanup: - rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL, - handle, req, rc, 0, 0); - - switch (cleanup_phase) { - case 4: - UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); - case 3: - MDS_UP_READ_ORPHAN_SEM(new_inode); - case 2: - iput(new_inode); - case 1: - if (rc) { - if (lock_count == 4) - ldlm_lock_decref(&(dlm_handles[3]), LCK_EX); - ldlm_lock_decref(&(dlm_handles[2]), LCK_EX); - ldlm_lock_decref(&(dlm_handles[1]), LCK_EX); - ldlm_lock_decref(&(dlm_handles[0]), LCK_EX); - } else { - if (lock_count == 4) - ptlrpc_save_lock(req,&(dlm_handles[3]), LCK_EX); - ptlrpc_save_lock(req, &(dlm_handles[2]), LCK_EX); - ptlrpc_save_lock(req, &(dlm_handles[1]), LCK_EX); - ptlrpc_save_lock(req, &(dlm_handles[0]), LCK_EX); - } - l_dput(de_new); - l_dput(de_old); - l_dput(de_tgtdir); - l_dput(de_srcdir); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - - /* acquire/release qunit */ - lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, FSFILT_OP_RENAME); - return 0; -} - -typedef int (*mds_reinter)(struct mds_update_record *, int offset, - struct ptlrpc_request *, struct lustre_handle *); - -static mds_reinter reinters[REINT_MAX] = { - [REINT_SETATTR] mds_reint_setattr, - [REINT_CREATE] mds_reint_create, - [REINT_LINK] mds_reint_link, - [REINT_UNLINK] mds_reint_unlink, - [REINT_RENAME] mds_reint_rename, - [REINT_OPEN] mds_open -}; - -int mds_reint_rec(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *lockh) -{ - struct obd_device *obd = req->rq_export->exp_obd; -#if 0 - struct mds_obd *mds = &obd->u.mds; -#ifdef CRAY_XT3 - gid_t fsgid = rec->ur_uc.luc_fsgid; -#endif -#endif - struct lvfs_run_ctxt saved; - int rc; - ENTRY; - -#ifdef CRAY_XT3 - if (req->rq_uid != LNET_UID_ANY) { - /* non-root local cluster client - * NB root's creds are believed... */ - LASSERT (req->rq_uid != 0); - rec->ur_uc.luc_fsuid = req->rq_uid; - rec->ur_uc.luc_cap = 0; - } -#endif - -#if 0 - /* get group info of this user */ - rec->ur_uc.luc_uce = upcall_cache_get_entry(mds->mds_group_hash, - rec->ur_uc.luc_fsuid, - rec->ur_uc.luc_fsgid, 2, - &rec->ur_uc.luc_suppgid1); - - if (IS_ERR(rec->ur_uc.luc_uce)) { - rc = PTR_ERR(rec->ur_uc.luc_uce); - rec->ur_uc.luc_uce = NULL; - RETURN(rc); - } - - /* checked by unpacker */ - LASSERT(rec->ur_opcode < REINT_MAX && reinters[rec->ur_opcode] != NULL); - -#ifdef CRAY_XT3 - if (rec->ur_uc.luc_uce) - rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary; -#endif -#endif - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); - -#if 0 -#ifdef CRAY_XT3 - if (rec->ur_uc.luc_uce && fsgid != rec->ur_uc.luc_fsgid && - in_group_p(fsgid)) { - rec->ur_uc.luc_fsgid = fsgid; - current->fsgid = saved.luc.luc_fsgid = fsgid; - } -#endif -#endif - - rc = reinters[rec->ur_opcode] (rec, offset, req, lockh); - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); - -#if 0 - upcall_cache_put_entry(mds->mds_group_hash, rec->ur_uc.luc_uce); -#endif - RETURN(rc); -} diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c deleted file mode 100644 index 3c2f4fe..0000000 --- a/lustre/mds/mds_unlink_open.c +++ /dev/null @@ -1,287 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/mds/mds_orphan.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam - * Author: Andreas Dilger - * Author: Phil Schwan - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -/* code for handling open unlinked files */ - -#define DEBUG_SUBSYSTEM S_MDS - -#ifndef AUTOCONF_INCLUDED -#include -#endif -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "mds_internal.h" - -int mds_osc_destroy_orphan(struct obd_device *obd, - umode_t mode, - struct lov_mds_md *lmm, - int lmm_size, - struct llog_cookie *logcookies, - int log_unlink) -{ - struct mds_obd *mds = &obd->u.mds; - struct lov_stripe_md *lsm = NULL; - struct obd_trans_info oti = { 0 }; - struct obdo *oa; - int rc; - ENTRY; - - if (lmm_size == 0) - RETURN(0); - - rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size); - if (rc < 0) { - CERROR("Error unpack md %p\n", lmm); - RETURN(rc); - } else { - LASSERT(rc >= sizeof(*lsm)); - rc = 0; - } - - rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, lsm); - if (rc) - GOTO(out_free_memmd, rc); - - OBDO_ALLOC(oa); - if (oa == NULL) - GOTO(out_free_memmd, rc = -ENOMEM); - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_mode = mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; - - if (log_unlink && logcookies) { - oa->o_valid |= OBD_MD_FLCOOKIE; - oti.oti_logcookies = logcookies; - } - rc = obd_destroy(mds->mds_osc_exp, oa, lsm, &oti, obd->obd_self_export); - OBDO_FREE(oa); - if (rc) - CDEBUG(D_INODE, "destroy orphan objid 0x"LPX64" on ost error " - "%d\n", lsm->lsm_object_id, rc); -out_free_memmd: - obd_free_memmd(mds->mds_osc_exp, &lsm); - RETURN(rc); -} - -static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild, - struct inode *inode, struct inode *pending_dir) -{ - struct mds_obd *mds = &obd->u.mds; - struct lov_mds_md *lmm = NULL; - struct llog_cookie *logcookies = NULL; - int lmm_size, log_unlink = 0, cookie_size = 0; - void *handle = NULL; - umode_t mode; - int rc, err; - ENTRY; - - LASSERT(mds->mds_osc_obd != NULL); - - /* We don't need to do any of these other things for orhpan dirs, - * especially not mds_get_md (may get a default LOV EA, bug 4554) */ - mode = inode->i_mode; - if (S_ISDIR(mode)) { - rc = vfs_rmdir(pending_dir, dchild); - if (rc) - CERROR("error %d unlinking dir %*s from PENDING\n", - rc, dchild->d_name.len, dchild->d_name.name); - RETURN(rc); - } - - lmm_size = mds->mds_max_mdsize; - OBD_ALLOC(lmm, lmm_size); - if (lmm == NULL) - RETURN(-ENOMEM); - - rc = mds_get_md(obd, inode, lmm, &lmm_size, 1); - if (rc < 0) - GOTO(out_free_lmm, rc); - - handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL, - le32_to_cpu(lmm->lmm_stripe_count)); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - CERROR("error fsfilt_start: %d\n", rc); - handle = NULL; - GOTO(out_free_lmm, rc); - } - - rc = vfs_unlink(pending_dir, dchild); - if (rc) { - CERROR("error %d unlinking orphan %.*s from PENDING\n", - rc, dchild->d_name.len, dchild->d_name.name); - } else if (lmm_size) { - cookie_size = mds_get_cookie_size(obd, lmm); - OBD_ALLOC(logcookies, cookie_size); - if (logcookies == NULL) - rc = -ENOMEM; - else if (mds_log_op_unlink(obd, lmm,lmm_size,logcookies, - cookie_size) > 0) - log_unlink = 1; - } - - err = fsfilt_commit(obd, pending_dir, handle, 0); - if (err) { - CERROR("error committing orphan unlink: %d\n", err); - if (!rc) - rc = err; - } else if (!rc) { - rc = mds_osc_destroy_orphan(obd, mode, lmm, lmm_size, - logcookies, log_unlink); - } - - if (logcookies != NULL) - OBD_FREE(logcookies, cookie_size); -out_free_lmm: - OBD_FREE(lmm, mds->mds_max_mdsize); - RETURN(rc); -} - -/* Delete inodes which were previously open-unlinked but were not reopened - * during MDS recovery for whatever reason (e.g. client also failed, recovery - * aborted, etc). */ -int mds_cleanup_pending(struct obd_device *obd) -{ - struct mds_obd *mds = &obd->u.mds; - struct lvfs_run_ctxt saved; - struct file *file; - struct dentry *dchild, *dentry; - struct vfsmount *mnt; - struct inode *child_inode, *pending_dir = mds->mds_pending_dir->d_inode; - struct l_linux_dirent *dirent, *n; - struct list_head dentry_list; - char d_name[LL_FID_NAMELEN]; - unsigned long inum; - int i = 0, rc = 0, item = 0, namlen; - ENTRY; - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - /* dentry and mnt ref dropped in dentry_open() on error, or - * in filp_close() if dentry_open() succeeds */ - dentry = dget(mds->mds_pending_dir); - if (IS_ERR(dentry)) - GOTO(err_pop, rc = PTR_ERR(dentry)); - mnt = mntget(mds->mds_vfsmnt); - if (IS_ERR(mnt)) - GOTO(err_mntget, rc = PTR_ERR(mnt)); - - file = dentry_open(mds->mds_pending_dir, mds->mds_vfsmnt, - O_RDONLY | O_LARGEFILE); - if (IS_ERR(file)) - GOTO(err_pop, rc = PTR_ERR(file)); - - CFS_INIT_LIST_HEAD(&dentry_list); - rc = l_readdir(file, &dentry_list); - filp_close(file, 0); - if (rc < 0) - GOTO(err_out, rc); - - list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { - i++; - list_del(&dirent->lld_list); - - namlen = strlen(dirent->lld_name); - LASSERT(sizeof(d_name) >= namlen + 1); - strcpy(d_name, dirent->lld_name); - inum = dirent->lld_ino; - OBD_FREE(dirent, sizeof(*dirent)); - - CDEBUG(D_INODE, "entry %d of PENDING DIR: %s\n", i, d_name); - - if (((namlen == 1) && !strcmp(d_name, ".")) || - ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0) - continue; - - LOCK_INODE_MUTEX(pending_dir); - dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen); - if (IS_ERR(dchild)) { - UNLOCK_INODE_MUTEX(pending_dir); - GOTO(err_out, rc = PTR_ERR(dchild)); - } - if (!dchild->d_inode) { - CWARN("%s: orphan %s has already been removed\n", - obd->obd_name, d_name); - GOTO(next, rc = 0); - } - - if (is_bad_inode(dchild->d_inode)) { - CERROR("%s: bad orphan inode found %lu/%u\n", - obd->obd_name, dchild->d_inode->i_ino, - dchild->d_inode->i_generation); - GOTO(next, rc = -ENOENT); - } - - child_inode = dchild->d_inode; - MDS_DOWN_READ_ORPHAN_SEM(child_inode); - if (mds_inode_is_orphan(child_inode) && - mds_orphan_open_count(child_inode)) { - MDS_UP_READ_ORPHAN_SEM(child_inode); - CWARN("%s: orphan %s re-opened during recovery\n", - obd->obd_name, d_name); - GOTO(next, rc = 0); - } - MDS_UP_READ_ORPHAN_SEM(child_inode); - - rc = mds_unlink_orphan(obd, dchild, child_inode, pending_dir); - CDEBUG(D_INODE, "%s: removed orphan %s: rc %d\n", - obd->obd_name, d_name, rc); - if (rc == 0) - item++; - else - rc = 0; -next: - l_dput(dchild); - UNLOCK_INODE_MUTEX(pending_dir); - } - rc = 0; -err_out: - list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { - list_del(&dirent->lld_list); - OBD_FREE(dirent, sizeof(*dirent)); - } -err_pop: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - if (item > 0) - CWARN("%s: removed %d pending open-unlinked files\n", - obd->obd_name, item); - RETURN(rc); - -err_mntget: - l_dput(mds->mds_pending_dir); - goto err_pop; -} diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c deleted file mode 100644 index ce3199d..0000000 --- a/lustre/mds/mds_xattr.c +++ /dev/null @@ -1,358 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_xattr.c - * Lustre Metadata Server (mds) extended attributes handling - * - * Copyright (C) 2004-2005 Cluster File Systems, Inc. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. - * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mds_internal.h" - -#ifndef XATTR_NAME_ACL_ACCESS -#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" -#endif - -static int mds_getxattr_pack_msg(struct ptlrpc_request *req, - struct dentry *de, - struct mds_body *body) -{ - struct inode *inode = de->d_inode; - char *xattr_name; - int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body) }; - int bufcnt = 2, rc = -EOPNOTSUPP, rc2; - - if (inode == NULL) - return -ENOENT; - - if (body->valid & OBD_MD_FLXATTR) { - xattr_name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF+1,0); - if (!xattr_name) { - CERROR("can't extract xattr name\n"); - return -EFAULT; - } - - if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && - (strncmp(xattr_name, "user.", 5) == 0)) - return -EOPNOTSUPP; - - if (inode->i_op && inode->i_op->getxattr) - rc = inode->i_op->getxattr(de, xattr_name, NULL, 0); - } else if (body->valid & OBD_MD_FLXATTRLS) { - if (inode->i_op && inode->i_op->listxattr) - rc = inode->i_op->listxattr(de, NULL, 0); - } else { - CERROR("valid bits: "LPX64"\n", body->valid); - return -EINVAL; - } - - if (rc < 0) { - if (rc != -ENODATA && rc != -EOPNOTSUPP) - CWARN("get inode %lu EA size error: %d\n", - inode->i_ino, rc); - bufcnt = 1; - } else { - size[bufcnt++] = min_t(int, body->eadatasize, rc); - } - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETXATTR_PACK)) { - CERROR("failed MDS_GETXATTR_PACK test\n"); - req->rq_status = -ENOMEM; - return -ENOMEM; - } - - rc2 = lustre_pack_reply(req, bufcnt, size, NULL); - if (rc2) - return rc2; - - if (rc < 0) - req->rq_status = rc; - return 0; -} - -static int mds_getxattr_internal(struct obd_device *obd, - struct dentry *dentry, - struct ptlrpc_request *req, - struct mds_body *reqbody) -{ - struct mds_body *repbody; - struct inode *inode = dentry->d_inode; - char *xattr_name; - void *buf = NULL; - int buflen, rc = -EOPNOTSUPP; - ENTRY; - - if (inode == NULL) - GOTO(out, rc = -ENOENT); - - repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*repbody)); - LASSERT(repbody != NULL); - - buflen = lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF + 1); - if (buflen) - buf = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, buflen); - - if (reqbody->valid & OBD_MD_FLXATTR) { - xattr_name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF+1,0); - DEBUG_REQ(D_INODE, req, "getxattr %s", xattr_name); - - if (inode->i_op && inode->i_op->getxattr) { - rc = inode->i_op->getxattr(dentry, xattr_name, - buf, buflen); - } - - if (rc < 0 && rc != -ENODATA && rc != -EOPNOTSUPP && - rc != -ERANGE) - CDEBUG(D_OTHER, "getxattr failed: %d\n", rc); - } else if (reqbody->valid & OBD_MD_FLXATTRLS) { - DEBUG_REQ(D_INODE, req, "listxattr"); - - if (inode->i_op && inode->i_op->listxattr) { - rc = inode->i_op->listxattr(dentry, buf, buflen); - } - if (rc < 0) - CDEBUG(D_OTHER, "listxattr failed: %d\n", rc); - } else - LBUG(); - - if (rc >= 0) { - repbody->eadatasize = rc; - rc = 0; - } -out: - req->rq_status = rc; - RETURN(0); -} - -int mds_getxattr(struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct lvfs_run_ctxt saved; - struct dentry *de; - struct mds_body *body; - struct lvfs_ucred uc = {0,}; - int rc = 0; - ENTRY; - - mds_counter_incr(req->rq_export, LPROC_MDS_GETXATTR); - - body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), - lustre_swab_mds_body); - if (body == NULL) - RETURN(-EFAULT); - - rc = mds_init_ucred(&uc, req, REQ_REC_OFF); - if (rc) - GOTO(out_ucred, rc); - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); - de = mds_fid2dentry(mds, &body->fid1, NULL); - if (IS_ERR(de)) { - rc = req->rq_status = PTR_ERR(de); - GOTO(out_pop, rc); - } - - rc = mds_getxattr_pack_msg(req, de, body); - if (rc != 0 || req->rq_status) - GOTO(out_dput, rc); - - rc = mds_getxattr_internal(obd, de, req, body); - -out_dput: - l_dput(de); -out_pop: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); -out_ucred: - mds_exit_ucred(&uc, mds); - return rc; -} - -/* - * alwasy return 0, and set req->rq_status as error number in case - * of failures. - */ -static -int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *de; - struct inode *inode = NULL; - struct lustre_handle lockh; - void *handle = NULL; - char *xattr_name; - char *xattr = NULL; - int xattrlen; - int rc = -EOPNOTSUPP, err = 0; - __u64 lockpart; - ENTRY; - - LASSERT(body); - - DEBUG_REQ(D_INODE, req, "setxattr "LPU64"/%u", - body->fid1.id, body->fid1.generation); - - MDS_CHECK_RESENT(req, mds_reconstruct_generic(req)); - - lockpart = MDS_INODELOCK_UPDATE; - - /* various sanity check for xattr name */ - xattr_name = lustre_msg_string(req->rq_reqmsg, REQ_REC_OFF + 1, 0); - if (!xattr_name) { - CERROR("can't extract xattr name\n"); - GOTO(out, rc = -EPROTO); - } - - DEBUG_REQ(D_INODE, req, "%sxattr %s", - body->valid & OBD_MD_FLXATTR ? "set" : "remove", - xattr_name); - - if (strncmp(xattr_name, "trusted.", 8) == 0) { - if (strcmp(xattr_name + 8, XATTR_LUSTRE_MDS_LOV_EA) == 0) - GOTO(out, rc = -EACCES); - } - - if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && - (strncmp(xattr_name, "user.", 5) == 0)) { - GOTO(out, rc = -EOPNOTSUPP); - } - - if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS)) - lockpart |= MDS_INODELOCK_LOOKUP; - - de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX, - &lockh, lockpart); - if (IS_ERR(de)) - GOTO(out, rc = PTR_ERR(de)); - - inode = de->d_inode; - LASSERT(inode); - - OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb); - - /* filter_op simply use setattr one */ - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); - if (IS_ERR(handle)) - GOTO(out_dput, rc = PTR_ERR(handle)); - - if (body->valid & OBD_MD_FLXATTR) { - if (inode->i_op && inode->i_op->setxattr) { - if (lustre_msg_bufcount(req->rq_reqmsg) < 4) { - CERROR("no xattr data supplied\n"); - GOTO(out_trans, rc = -EFAULT); - } - - xattrlen = lustre_msg_buflen(req->rq_reqmsg, - REQ_REC_OFF + 2); - if (xattrlen) - xattr = lustre_msg_buf(req->rq_reqmsg, - REQ_REC_OFF+2, xattrlen); - - LOCK_INODE_MUTEX(inode); - rc = inode->i_op->setxattr(de, xattr_name, xattr, - xattrlen, body->flags); - UNLOCK_INODE_MUTEX(inode); - } - } else if (body->valid & OBD_MD_FLXATTRRM) { - if (inode->i_op && inode->i_op->removexattr) { - LOCK_INODE_MUTEX(inode); - rc = inode->i_op->removexattr(de, xattr_name); - UNLOCK_INODE_MUTEX(inode); - } - } else { - CERROR("valid bits: "LPX64"\n", body->valid); - rc = -EINVAL; - } - - LASSERT(rc <= 0); -out_trans: - err = mds_finish_transno(mds, inode, handle, req, rc, 0, 0); - -out_dput: - l_dput(de); - if (rc) - ldlm_lock_decref(&lockh, LCK_EX); - else - ptlrpc_save_lock (req, &lockh, LCK_EX); - - if (err && !rc) - rc = err; -out: - req->rq_status = rc; - return 0; -} - -int mds_setxattr(struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct lvfs_run_ctxt saved; - struct mds_body *body; - struct lvfs_ucred uc = {0,}; - int rc; - ENTRY; - - mds_counter_incr(req->rq_export, LPROC_MDS_SETXATTR); - - body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), - lustre_swab_mds_body); - if (body == NULL) - RETURN(-EFAULT); - - if (lustre_msg_bufcount(req->rq_reqmsg) < 3) - RETURN(-EFAULT); - - rc = mds_init_ucred(&uc, req, REQ_REC_OFF); - if (rc) - GOTO(out_ucred, rc); - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); - - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc) - GOTO(out_pop, rc); - - rc = mds_setxattr_internal(req, body); - -out_pop: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); -out_ucred: - mds_exit_ucred(&uc, mds); - return rc; -} -- 1.8.3.1