1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Phil Schwan <phil@clusterfs.com>
6 * Peter Braam <braam@clusterfs.com>
7 * Mike Shaver <shaver@clusterfs.com>
9 * This file is part of Lustre, http://www.lustre.org.
11 * Lustre is free software; you can redistribute it and/or
12 * modify it under the terms of version 2 of the GNU General Public
13 * License as published by the Free Software Foundation.
15 * Lustre is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with Lustre; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 # define EXPORT_SYMTAB
28 #define DEBUG_SUBSYSTEM S_LOV
30 #include <linux/slab.h>
31 #include <linux/module.h>
32 #include <linux/init.h>
33 #include <linux/slab.h>
34 #include <linux/pagemap.h>
35 #include <asm/div64.h>
36 #include <linux/seq_file.h>
38 #include <liblustre.h>
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lib.h>
43 #include <linux/lustre_net.h>
44 #include <linux/lustre_idl.h>
45 #include <linux/lustre_dlm.h>
46 #include <linux/lustre_mds.h>
47 #include <linux/obd_class.h>
48 #include <linux/obd_lov.h>
49 #include <linux/obd_ost.h>
50 #include <linux/lprocfs_status.h>
52 #include "lov_internal.h"
54 /* Add log records for each OSC that this object is striped over, and return
55 * cookies for each one. We _would_ have nice abstraction here, except that
56 * we need to keep cookies in stripe order, even if some are NULL, so that
57 * the right cookies are passed back to the right OSTs at the client side.
58 * Unset cookies should be all-zero (which will never occur naturally). */
59 static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
60 void *buf, struct llog_cookie *logcookies,
61 int numcookies, void *data,
62 struct rw_semaphore **lock, int *lock_count)
64 struct obd_device *obd = ctxt->loc_obd;
65 struct lov_obd *lov = &obd->u.lov;
66 struct lov_oinfo *loi;
67 struct llog_unlink_rec *lur;
68 struct lov_stripe_md *lsm = (struct lov_stripe_md *)buf;
72 OBD_ALLOC(lur, sizeof(*lur));
75 lur->lur_hdr.lrh_len = lur->lur_tail.lrt_len = sizeof(*lur);
76 lur->lur_hdr.lrh_type = MDS_UNLINK_REC;
78 LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
80 /* We need this to serialize llog records between parallel unlinks so
81 * we can replay llog records in strict transno and llog order. If
82 * and when we want to make this more scalable we need to lock and
83 * write records in strictly ost_idx order not lsm order. Consider
84 * file 1 on ost_idx [1, 2, 3, 4] and file 2 on ost_idx [3, 4, 1, 2] */
85 down(&lov->lov_llog_sem);
86 for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
87 struct obd_device *child;
88 struct llog_ctxt *cctxt;
89 struct lov_tgt_desc *tgt;
91 tgt = lov->tgts + loi->loi_ost_idx;
92 if (!lov_tgt_active(lov, tgt, loi->loi_ost_gen)) {
93 CWARN("lov_llog_origin_add: ost idx %d inactive.\n",
98 child = tgt->ltd_exp->exp_obd;
99 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
101 lur->lur_oid = loi->loi_id;
102 lur->lur_ogen = loi->loi_gr;
103 LASSERT(lsm->lsm_object_gr == loi->loi_gr);
104 rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc,
105 numcookies - rc, NULL,
106 lock != NULL ? lock + rc : NULL, lock_count);
107 lov_tgt_decref(lov, tgt);
109 up(&lov->lov_llog_sem);
110 OBD_FREE(lur, sizeof(*lur));
115 static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count,
116 struct llog_logid *logid,
117 struct llog_gen *gen, struct obd_uuid *uuid)
119 struct obd_device *obd = ctxt->loc_obd;
120 struct lov_obd *lov = &obd->u.lov;
121 struct lov_tgt_desc *tgt;
125 LASSERT(lov->desc.ld_tgt_count == count);
126 for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
127 struct obd_device *child;
128 struct llog_ctxt *cctxt;
130 if (!lov_tgt_active(lov, tgt, 0))
133 child = tgt->ltd_exp->exp_obd;
134 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
136 if (uuid && !obd_uuid_equals(uuid, &tgt->uuid)) {
137 lov_tgt_decref(lov, tgt);
141 rc = llog_connect(cctxt, 1, logid, gen, uuid);
142 lov_tgt_decref(lov, tgt);
144 CERROR("error osc_llog_connect %d\n", i);
152 /* the replicators commit callback */
153 static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, int count,
154 struct llog_cookie *cookies, int flags,
157 struct lov_stripe_md *lsm = (struct lov_stripe_md *)data;
159 struct obd_device *obd = ctxt->loc_obd;
160 struct lov_oinfo *loi;
164 LASSERT(lsm != NULL);
165 LASSERT(count == lsm->lsm_stripe_count);
167 loi = lsm->lsm_oinfo;
169 for (i = 0; i < count; i++, cookies++, loi++) {
170 struct lov_tgt_desc *tgt = lov->tgts + loi->loi_ost_idx;
171 struct obd_device *child;
172 struct llog_ctxt *cctxt;
175 if (!lov_tgt_ready(lov, tgt, loi->loi_ost_gen)) {
176 CWARN("warning: LOV OST idx %d: inactive.\n",
181 child = tgt->ltd_exp->exp_obd;
182 cctxt = llog_get_context(&child->obd_llogs, ctxt->loc_idx);
183 err = llog_cancel(cctxt, 1, cookies, flags, NULL);
184 lov_tgt_decref(lov, tgt);
186 if (err && lov_tgt_ready(lov, tgt, loi->loi_ost_gen)) {
187 lov_tgt_decref(lov, tgt);
188 CERROR("error: objid "LPX64" subobj "LPX64
189 " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
190 loi->loi_id, loi->loi_ost_idx, err);
198 static struct llog_operations lov_unlink_orig_logops = {
199 lop_add: lov_llog_origin_add,
200 lop_connect: lov_llog_origin_connect
203 static struct llog_operations lov_size_repl_logops = {
204 lop_cancel: lov_llog_repl_cancel
207 int lov_llog_init(struct obd_device *obd, struct obd_llogs *llogs,
208 struct obd_device *tgt, int count, struct llog_catid *logid)
210 struct lov_obd *lov = &obd->u.lov;
211 struct lov_tgt_desc *ctgt;
215 rc = obd_llog_setup(obd, llogs, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL,
216 &lov_unlink_orig_logops);
220 rc = obd_llog_setup(obd, llogs, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL,
221 &lov_size_repl_logops);
225 LASSERT(lov->desc.ld_tgt_count == count);
226 for (i = 0, ctgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, ctgt++) {
227 struct obd_device *child;
229 if (!lov_tgt_active(lov, ctgt, 0))
232 child = ctgt->ltd_exp->exp_obd;
233 rc = obd_llog_init(child, &child->obd_llogs, tgt, 1, logid + i);
234 lov_tgt_decref(lov, ctgt);
236 CERROR("error osc_llog_init %d\n", i);
243 int lov_llog_finish(struct obd_device *obd, struct obd_llogs *llogs, int count)
245 struct lov_obd *lov = &obd->u.lov;
246 struct lov_tgt_desc *tgt;
250 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_UNLINK_ORIG_CTXT));
254 rc = obd_llog_cleanup(llog_get_context(llogs, LLOG_SIZE_REPL_CTXT));
258 if (lov->desc.ld_tgt_count != count) {
259 CERROR("LOV tgt count != passed tgt count (%d != %d)\n",
260 lov->desc.ld_tgt_count, count);
261 count = MIN(lov->desc.ld_tgt_count, count);
263 for (i = 0, tgt = lov->tgts; i < count; i++, tgt++) {
264 struct obd_device *child;
266 if (!lov_tgt_active(lov, tgt, 0))
269 child = tgt->ltd_exp->exp_obd;
270 rc = obd_llog_finish(child, &child->obd_llogs, 1);
271 lov_tgt_decref(lov, tgt);
273 CERROR("osc_llog_finish error; index=%d; rc=%d\n",