1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #define DEBUG_SUBSYSTEM S_CMOBD
24 #include <linux/version.h>
25 #include <linux/init.h>
26 #include <linux/obd_support.h>
27 #include <linux/lustre_lib.h>
28 #include <linux/lustre_net.h>
29 #include <linux/lustre_idl.h>
30 #include <linux/obd_class.h>
31 #include <linux/lustre_log.h>
32 #include <linux/lustre_cmobd.h>
33 #include <linux/lustre_fsfilt.h>
34 #include <linux/lustre_smfs.h>
36 #include "cm_internal.h"
38 void lov_free_memmd(struct lov_stripe_md **lsmp);
40 int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count,
43 int smfs_rec_unpack(struct smfs_proc_args *args, char *record,
44 char **pbuf, int *opcode);
46 /* helper functions for cmobd to construct pseudo lsm */
47 int cmobd_dummy_lsm(struct lov_stripe_md **lsmp, int stripe_cnt,
48 struct obdo *oa, __u32 stripe_size)
53 rc = lov_alloc_memmd(lsmp, stripe_cnt, LOV_PATTERN_CMOBD);
57 for (i = 0; i < stripe_cnt; i++) {
58 (*lsmp)->lsm_oinfo[i].loi_id = oa->o_id;
59 (*lsmp)->lsm_object_id = oa->o_id;
60 if (oa->o_valid & OBD_MD_FLGROUP) {
61 (*lsmp)->lsm_oinfo[i].loi_gr = oa->o_gr;
62 (*lsmp)->lsm_object_gr = oa->o_gr;
64 (*lsmp)->lsm_oinfo[i].loi_ost_idx = i;
65 (*lsmp)->lsm_stripe_size = stripe_size;
70 void cmobd_free_lsm(struct lov_stripe_md **lsmp)
77 /* reintegration functions */
78 static int cmobd_setattr_reint(struct obd_device *obd, void *rec)
80 struct obdo *oa = (struct obdo*)rec;
81 struct cm_obd *cmobd = &obd->u.cm;
82 struct obd_export *exp = cmobd->master_exp;
83 struct lov_stripe_md *lsm;
89 * nevertheless ost is not used anymore and lov should be always present
90 * as a object storage export, using ost is still possible (just
91 * deprecated) and we should make sure here, that this is really
94 lov = &cmobd->master_exp->exp_obd->u.lov;
95 rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa,
96 (__u32)lov->desc.ld_default_stripe_size);
100 rc = obd_setattr(exp, oa, lsm, NULL);
102 cmobd_free_lsm(&lsm);
107 static int cmobd_create_reint(struct obd_device *obd, void *rec)
109 struct obdo *oa = (struct obdo *)rec;
110 struct cm_obd *cmobd = &obd->u.cm;
111 struct obd_export *exp = cmobd->master_exp;
112 struct lov_stripe_md *lsm;
113 struct obd_trans_info oti = { 0 };
119 * nevertheless ost is not used anymore and lov should be always present
120 * as a object storage export, using ost is still possible (just
121 * deprecated) and we should make sure here, that this is really
124 lov = &cmobd->master_exp->exp_obd->u.lov;
125 rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa,
126 (__u32)lov->desc.ld_default_stripe_size);
129 if (cmobd->master_group != oa->o_gr) {
130 int group = oa->o_gr;
131 int valsize = sizeof(group);
132 rc = obd_set_info(exp, strlen("mds_conn"), "mds_conn",
135 GOTO(out, rc = -EINVAL);
136 cmobd->master_group = oa->o_gr;
138 rc = obd_create(exp, oa, NULL, 0, &lsm, &oti);
140 cmobd_free_lsm(&lsm);
145 /* direct cut-n-paste of filter_blocking_ast() */
146 static int cache_blocking_ast(struct ldlm_lock *lock,
147 struct ldlm_lock_desc *desc,
148 void *data, int flag)
153 if (flag == LDLM_CB_CANCELING) {
154 /* Don't need to do anything here. */
158 /* XXX layering violation! -phil */
159 l_lock(&lock->l_resource->lr_namespace->ns_lock);
160 /* Get this: if filter_blocking_ast is racing with ldlm_intent_policy,
161 * such that filter_blocking_ast is called just before l_i_p takes the
162 * ns_lock, then by the time we get the lock, we might not be the
163 * correct blocking function anymore. So check, and return early, if
165 if (lock->l_blocking_ast != cache_blocking_ast) {
166 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
170 lock->l_flags |= LDLM_FL_CBPENDING;
171 do_ast = (!lock->l_readers && !lock->l_writers);
172 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
175 struct lustre_handle lockh;
178 LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel");
179 ldlm_lock2handle(lock, &lockh);
180 rc = ldlm_cli_cancel(&lockh);
182 CERROR("ldlm_cli_cancel: %d\n", rc);
184 LDLM_DEBUG(lock, "Lock still has references, will be "
190 static int master_blocking_ast(struct ldlm_lock *lock,
191 struct ldlm_lock_desc *desc,
192 void *data, int flag)
195 struct lustre_handle lockh;
199 case LDLM_CB_BLOCKING:
200 ldlm_lock2handle(lock, &lockh);
201 rc = ldlm_cli_cancel(&lockh);
203 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
207 case LDLM_CB_CANCELING:
208 /* do nothing here by now */
216 static int cmobd_write_extents(struct obd_device *obd, struct obdo *oa,
217 struct ldlm_extent *extent)
219 struct cm_obd *cmobd = &obd->u.cm;
220 struct obd_device *cache = cmobd->cache_exp->exp_obd;
221 struct lustre_handle lockh_src = { 0 };
222 struct lustre_handle lockh_dst = { 0 };
223 struct ldlm_res_id res_id;
224 ldlm_policy_data_t policy;
225 struct lov_stripe_md *lsm;
226 int flags = 0, err, rc = 0;
230 /* XXX for debug write replay without smfs and kml */
231 res_id.name[0]= oa->o_id;
232 res_id.name[1]= oa->o_gr;
233 policy.l_extent.start = extent->start;
234 policy.l_extent.end = extent->end;
236 /* get extent read lock on the source replay file */
237 rc = ldlm_cli_enqueue(NULL, NULL, cache->obd_namespace, res_id,
238 LDLM_EXTENT, &policy, LCK_PR,
239 &flags, cache_blocking_ast, ldlm_completion_ast,
240 NULL, NULL, NULL, 0, NULL, &lockh_src);
245 * nevertheless ost is not used anymore and lov should be always present
246 * as a object storage export, using ost is still possible (just
247 * deprecated) and we should make sure here, that this is really
250 lov = &cmobd->master_exp->exp_obd->u.lov;
252 /* construct the pseudo lsm */
255 * it is not good to access lov fields like @desc directly. This is
256 * layering violation. It should be accessed via some interface method,
257 * like llite does. --umka
259 rc = cmobd_dummy_lsm(&lsm, lov->desc.ld_tgt_count, oa,
260 (__u32)lov->desc.ld_default_stripe_size);
264 rc = obd_enqueue(cmobd->master_exp, lsm, LDLM_EXTENT, &policy,
265 LCK_PW, &flags, master_blocking_ast,
266 ldlm_completion_ast, NULL,
267 NULL, 0, NULL, &lockh_dst);
271 err = cmobd_replay_write(obd, oa, &policy.l_extent);
273 rc = obd_cancel(cmobd->master_exp, lsm, LCK_PW, &lockh_dst);
276 /* XXX in fact, I just want to cancel the only lockh_dst
278 rc = obd_cancel_unused(cmobd->master_exp, lsm, 0, NULL);
282 cmobd_free_lsm(&lsm);
284 ldlm_lock_decref(&lockh_src, LCK_PR);
288 static int cmobd_write_reint(struct obd_device *obd, void *rec)
290 struct cm_obd *cmobd = &obd->u.cm;
291 struct obd_device *cache = cmobd->cache_exp->exp_obd;
292 struct obdo *oa = (struct obdo *)rec;
293 struct ldlm_extent *extent = NULL;
294 unsigned long csb, ino;
295 char *extents_buf = NULL;
296 int size = 0, rc = 0, ext_num = 0;
301 obd_get_info(cmobd->cache_exp, strlen("cache_sb") + 1,
302 "cache_sb", &size, &csb);
304 ino = *(int*)(&oa->o_inline[0]);
305 rc = fsfilt_get_ino_write_extents(cache, (struct super_block *)csb, ino,
306 &extents_buf, &ext_num);
309 extent = (struct ldlm_extent *)extents_buf;
311 while (extent && size --) {
312 rc = cmobd_write_extents(obd, oa, extent);
319 fsfilt_free_write_extents(cache, (struct super_block *)csb,
320 ino, extents_buf, ext_num);
324 int cmobd_reint_oss(struct obd_device *obd, void *record, int opcode)
328 return cmobd_create_reint(obd, record);
330 return cmobd_setattr_reint(obd, record);
332 return cmobd_write_reint(obd, record);
334 CERROR("unrecognized oss reint opcode %d\n",