1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Phil Schwan <phil@clusterfs.com>
6 * Peter Braam <braam@clusterfs.com>
7 * Mike Shaver <shaver@clusterfs.com>
9 * This file is part of Lustre, http://www.lustre.org.
11 * Lustre is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * Lustre is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with Lustre; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 # define EXPORT_SYMTAB
28 #define DEBUG_SUBSYSTEM S_LOV
30 #include <linux/slab.h>
31 #include <linux/module.h>
32 #include <linux/init.h>
33 #include <linux/slab.h>
34 #include <linux/pagemap.h>
35 #include <asm/div64.h>
36 #include <linux/seq_file.h>
38 #include <liblustre.h>
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lib.h>
43 #include <linux/lustre_net.h>
44 #include <linux/lustre_idl.h>
45 #include <linux/lustre_dlm.h>
46 #include <linux/lustre_mds.h>
47 #include <linux/obd_class.h>
48 #include <linux/obd_lov.h>
49 #include <linux/obd_ost.h>
50 #include <linux/lprocfs_status.h>
52 #include "lov_internal.h"
54 /* Add log records for each OSC that this object is striped over, and return
55 * cookies for each one. We _would_ have nice abstraction here, except that
56 * we need to keep cookies in stripe order, even if some are NULL, so that
57 * the right cookies are passed back to the right OSTs at the client side.
58 * Unset cookies should be all-zero (which will never occur naturally). */
59 static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
60 void *buf, struct llog_cookie *logcookies,
61 int numcookies, void *data,
62 struct rw_semaphore **lock, int *lock_count)
64 struct obd_device *obd = ctxt->loc_obd;
65 struct lov_obd *lov = &obd->u.lov;
66 struct lov_oinfo *loi;
67 struct llog_unlink_rec *lur;
68 struct lov_stripe_md *lsm = (struct lov_stripe_md *)buf;
72 OBD_ALLOC(lur, sizeof(*lur));
75 lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur);
76 lur->lur_hdr.lrh_type = MDS_UNLINK_REC;
78 LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
80 /* We need this to serialize llog records between parallel unlinks so
81 * we can replay llog records in strict transno and llog order. If
82 * and when we want to make this more scalable we need to lock and
83 * write records in strictly ost_idx order not lsm order. Consider
84 * file 1 on ost_idx [1, 2, 3, 4] and file 2 on ost_idx [3, 4, 1, 2] */
85 down(&lov->lov_llog_sem);
86 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
87 struct obd_device *child =
88 lov->tgts[loi->loi_ost_idx].ltd_exp->exp_obd;
89 struct llog_ctxt *cctxt;
90 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
92 lur->lur_oid = loi->loi_id;
93 lur->lur_ogen = loi->loi_gr;
94 LASSERT(lsm->lsm_object_gr == loi->loi_gr);
95 rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc,
96 numcookies - rc, NULL,
97 lock != NULL ? lock + rc : NULL, lock_count);
99 up(&lov->lov_llog_sem);
100 OBD_FREE(lur, sizeof(*lur));
105 static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
106 struct llog_logid *logid,
107 struct llog_gen *gen, struct obd_uuid *uuid)
109 struct obd_device *obd = ctxt->loc_obd;
110 struct lov_obd *lov = &obd->u.lov;
111 struct lov_tgt_desc *tgt;
115 LASSERT(lov->desc.ld_tgt_count == count);
116 for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
117 struct obd_device *child;
118 struct llog_ctxt *cctxt;
122 child = tgt->ltd_exp->exp_obd;
124 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
125 if (uuid && !obd_uuid_equals(uuid, &lov->tgts[i].uuid))
128 rc = llog_connect(cctxt, 1, logid, gen, uuid);
130 CERROR("error osc_llog_connect %d\n", i);
138 /* the replicators commit callback */
139 static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, int count,
140 struct llog_cookie *cookies, int flags,
143 struct lov_stripe_md *lsm = (struct lov_stripe_md *)data;
145 struct obd_device *obd = ctxt->loc_obd;
146 struct lov_oinfo *loi;
150 LASSERT(lsm != NULL);
151 LASSERT(count == lsm->lsm_stripe_count);
153 loi = lsm->lsm_oinfo;
155 for (i = 0; i < count; i++, cookies++, loi++) {
156 struct obd_device *child =
157 lov->tgts[loi->loi_ost_idx].ltd_exp->exp_obd;
158 struct llog_ctxt *cctxt;
161 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
162 err = llog_cancel(cctxt, 1, cookies, flags, NULL);
163 if (err && lov->tgts[loi->loi_ost_idx].active) {
164 CERROR("error: objid "LPX64" subobj "LPX64
165 " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
166 loi->loi_id, loi->loi_ost_idx, err);
174 static struct llog_operations lov_unlink_orig_logops = {
175 lop_add: lov_llog_origin_add,
176 lop_connect: lov_llog_origin_connect
179 static struct llog_operations lov_size_repl_logops = {
180 lop_cancel: lov_llog_repl_cancel
183 int lov_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
184 struct obd_device *tgt, int count, struct llog_catid *logid)
186 struct lov_obd *lov = &obd->u.lov;
187 struct lov_tgt_desc *ctgt;
191 rc = obd_llog_setup(obd, llogs, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL,
192 &lov_unlink_orig_logops);
196 rc = obd_llog_setup(obd, llogs, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL,
197 &lov_size_repl_logops);
201 LASSERT(lov->desc.ld_tgt_count == count);
202 for (i = 0, ctgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, ctgt++) {
203 struct obd_device *child;
207 child = ctgt->ltd_exp->exp_obd;
208 rc = obd_llog_init(child, &child->obd_llogs, tgt, 1, logid + i);
210 CERROR("error osc_llog_init %d\n", i);
217 int lov_llog_finish(struct obd_device *obd, struct obd_llogs *llogs, int count)
219 struct lov_obd *lov = &obd->u.lov;
220 struct lov_tgt_desc *tgt;
224 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_UNLINK_ORIG_CTXT));
228 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_SIZE_REPL_CTXT));
232 if (lov->desc.ld_tgt_count != count) {
233 CERROR("LOV tgt count != passed tgt count (%d != %d)\n",
234 lov->desc.ld_tgt_count, count);
235 count = MIN(lov->desc.ld_tgt_count, count);
237 for (i = 0, tgt = lov->tgts; i < count; i++, tgt++) {
238 struct obd_device *child;
242 child = tgt->ltd_exp->exp_obd;
243 rc = obd_llog_finish(child, &child->obd_llogs, 1);
245 CERROR("osc_llog_finish error; index=%d; rc=%d\n",