4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ofd/ofd_obd.c
38 * Author: Andreas Dilger <adilger@whamcloud.com>
39 * Author: Alex Zhuravlev <bzzz@whamcloud.com>
40 * Author: Mike Pershin <tappro@whamcloud.com>
43 #define DEBUG_SUBSYSTEM S_FILTER
45 #include "ofd_internal.h"
46 #include <obd_cksum.h>
48 static int ofd_parse_connect_data(const struct lu_env *env,
49 struct obd_export *exp,
50 struct obd_connect_data *data)
52 struct ofd_device *ofd = ofd_exp(exp);
53 struct filter_export_data *fed = &exp->exp_filter_data;
58 CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64
59 " ocd_version: %x ocd_grant: %d ocd_index: %u\n",
60 exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,
61 data->ocd_connect_flags, data->ocd_version,
62 data->ocd_grant, data->ocd_index);
64 if (fed->fed_group != 0 && fed->fed_group != data->ocd_group) {
65 CWARN("!!! This export (nid %s) used object group %d "
66 "earlier; now it's trying to use group %d! This could "
67 "be a bug in the MDS. Please report to "
68 "http://bugs.whamcloud.com/\n",
69 obd_export_nid2str(exp), fed->fed_group,
73 fed->fed_group = data->ocd_group;
75 data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
76 exp->exp_connect_flags = data->ocd_connect_flags;
77 data->ocd_version = LUSTRE_VERSION_CODE;
79 /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
80 if (data->ocd_connect_flags & OBD_CONNECT_MDS)
81 CDEBUG(D_HA, "%s: Received MDS connection for group %u\n",
82 exp->exp_obd->obd_name, data->ocd_group);
83 else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN)
86 if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
87 struct lr_server_data *lsd = &ofd->ofd_lut.lut_lsd;
88 int index = lsd->lsd_ost_index;
90 if (!(lsd->lsd_feature_compat & OBD_COMPAT_OST)) {
91 /* this will only happen on the first connect */
92 lsd->lsd_ost_index = data->ocd_index;
93 lsd->lsd_feature_compat |= OBD_COMPAT_OST;
94 /* sync is not needed here as lut_client_add will
95 * set exp_need_sync flag */
96 lut_server_data_update(env, &ofd->ofd_lut, 0);
97 } else if (index != data->ocd_index) {
98 LCONSOLE_ERROR_MSG(0x136, "Connection from %s to index"
99 " %u doesn't match actual OST index"
100 " %u in last_rcvd file, bad "
102 obd_export_nid2str(exp), index,
108 if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_SIZE)) {
109 data->ocd_brw_size = 65536;
110 } else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
111 data->ocd_brw_size = min(data->ocd_brw_size,
112 (__u32)(PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT));
113 if (data->ocd_brw_size == 0) {
114 CERROR("%s: cli %s/%p ocd_connect_flags: "LPX64
115 " ocd_version: %x ocd_grant: %d ocd_index: %u "
116 "ocd_brw_size is unexpectedly zero, "
117 "network data corruption?"
118 "Refusing connection of this client\n",
119 exp->exp_obd->obd_name,
120 exp->exp_client_uuid.uuid,
121 exp, data->ocd_connect_flags, data->ocd_version,
122 data->ocd_grant, data->ocd_index);
127 if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) {
128 __u32 cksum_types = data->ocd_cksum_types;
130 /* The client set in ocd_cksum_types the checksum types it
131 * supports. We have to mask off the algorithms that we don't
133 data->ocd_cksum_types &= cksum_types_supported();
135 if (unlikely(data->ocd_cksum_types == 0)) {
136 CERROR("%s: Connect with checksum support but no "
137 "ocd_cksum_types is set\n",
138 exp->exp_obd->obd_name);
142 CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
143 "%x\n", exp->exp_obd->obd_name, obd_export_nid2str(exp),
144 cksum_types, data->ocd_cksum_types);
146 /* This client does not support OBD_CONNECT_CKSUM
147 * fall back to CRC32 */
148 CDEBUG(D_RPCTRACE, "%s: cli %s does not support "
149 "OBD_CONNECT_CKSUM, CRC32 will be used\n",
150 exp->exp_obd->obd_name, obd_export_nid2str(exp));
153 if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
154 data->ocd_maxbytes = ofd->ofd_dt_conf.ddp_maxbytes;
159 static int ofd_obd_reconnect(const struct lu_env *env, struct obd_export *exp,
160 struct obd_device *obd, struct obd_uuid *cluuid,
161 struct obd_connect_data *data, void *localdata)
167 if (exp == NULL || obd == NULL || cluuid == NULL)
170 rc = lu_env_refill((struct lu_env *)env);
172 CERROR("Failure to refill session: '%d'\n", rc);
176 ofd_info_init(env, exp);
177 rc = ofd_parse_connect_data(env, exp, data);
182 static int ofd_obd_connect(const struct lu_env *env, struct obd_export **_exp,
183 struct obd_device *obd, struct obd_uuid *cluuid,
184 struct obd_connect_data *data, void *localdata)
186 struct obd_export *exp;
187 struct ofd_device *ofd;
188 struct lustre_handle conn = { 0 };
193 if (_exp == NULL || obd == NULL || cluuid == NULL)
196 ofd = ofd_dev(obd->obd_lu_dev);
198 rc = class_connect(&conn, obd, cluuid);
202 exp = class_conn2export(&conn);
203 LASSERT(exp != NULL);
205 rc = lu_env_refill((struct lu_env *)env);
207 CERROR("Failure to refill session: '%d'\n", rc);
211 ofd_info_init(env, exp);
213 rc = ofd_parse_connect_data(env, exp, data);
217 ofd_export_stats_init(ofd, exp, localdata);
218 group = data->ocd_group;
219 if (obd->obd_replayable) {
220 struct tg_export_data *ted = &exp->exp_target_data;
222 memcpy(ted->ted_lcd->lcd_uuid, cluuid,
223 sizeof(ted->ted_lcd->lcd_uuid));
224 rc = lut_client_new(env, exp);
232 if (group > ofd->ofd_max_group) {
233 ofd->ofd_max_group = group;
234 rc = ofd_group_load(env, ofd, group);
238 class_disconnect(exp);
246 static int ofd_obd_disconnect(struct obd_export *exp)
254 class_export_get(exp);
256 rc = server_disconnect_export(exp);
258 rc = lu_env_init(&env, LCT_DT_THREAD);
262 /* Do not erase record for recoverable client. */
263 if (exp->exp_obd->obd_replayable &&
264 (!exp->exp_obd->obd_fail || exp->exp_failed))
265 lut_client_del(&env, exp);
268 class_export_put(exp);
272 static int ofd_init_export(struct obd_export *exp)
276 cfs_spin_lock_init(&exp->exp_filter_data.fed_lock);
277 CFS_INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list);
278 cfs_spin_lock(&exp->exp_lock);
279 exp->exp_connecting = 1;
280 cfs_spin_unlock(&exp->exp_lock);
282 /* self-export doesn't need client data and ldlm initialization */
283 if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
284 &exp->exp_client_uuid)))
287 rc = lut_client_alloc(exp);
289 ldlm_init_export(exp);
291 CERROR("%s: Can't initialize export: rc %d\n",
292 exp->exp_obd->obd_name, rc);
296 static int ofd_destroy_export(struct obd_export *exp)
298 if (exp->exp_filter_data.fed_pending)
299 CERROR("%s: cli %s/%p has %lu pending on destroyed export"
300 "\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
301 exp, exp->exp_filter_data.fed_pending);
303 target_destroy_export(exp);
305 if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
306 &exp->exp_client_uuid)))
309 ldlm_destroy_export(exp);
310 lut_client_free(exp);
312 LASSERT(cfs_list_empty(&exp->exp_filter_data.fed_mod_list));
316 int ofd_obd_postrecov(struct obd_device *obd)
319 struct lu_device *ldev = obd->obd_lu_dev;
324 rc = lu_env_init(&env, LCT_DT_THREAD);
327 ofd_info_init(&env, obd->obd_self_export);
329 rc = ldev->ld_ops->ldo_recovery_complete(&env, ldev);
334 static int ofd_set_mds_conn(struct obd_export *exp, void *val)
340 LCONSOLE_WARN("%s: received MDS connection from %s\n",
341 exp->exp_obd->obd_name, obd_export_nid2str(exp));
345 static int ofd_set_info_async(const struct lu_env *env, struct obd_export *exp,
346 __u32 keylen, void *key, __u32 vallen, void *val,
347 struct ptlrpc_request_set *set)
349 struct ofd_device *ofd = ofd_exp(exp);
354 if (exp->exp_obd == NULL) {
355 CDEBUG(D_IOCTL, "invalid export %p\n", exp);
359 if (KEY_IS(KEY_CAPA_KEY)) {
360 rc = ofd_update_capa_key(ofd, val);
362 CERROR("ofd update capability key failed: %d\n", rc);
363 } else if (KEY_IS(KEY_MDS_CONN)) {
364 rc = ofd_set_mds_conn(exp, val);
366 CERROR("%s: Unsupported key %s\n",
367 exp->exp_obd->obd_name, (char*)key);
374 static int ofd_get_info(const struct lu_env *env, struct obd_export *exp,
375 __u32 keylen, void *key, __u32 *vallen, void *val,
376 struct lov_stripe_md *lsm)
378 struct ofd_device *ofd = ofd_exp(exp);
383 if (exp->exp_obd == NULL) {
384 CDEBUG(D_IOCTL, "invalid client export %p\n", exp);
388 if (KEY_IS(KEY_BLOCKSIZE)) {
389 __u32 *blocksize = val;
391 if (*vallen < sizeof(*blocksize))
393 *blocksize = 1 << ofd->ofd_dt_conf.ddp_block_shift;
395 *vallen = sizeof(*blocksize);
396 } else if (KEY_IS(KEY_BLOCKSIZE_BITS)) {
397 __u32 *blocksize_bits = val;
398 if (blocksize_bits) {
399 if (*vallen < sizeof(*blocksize_bits))
401 *blocksize_bits = ofd->ofd_dt_conf.ddp_block_shift;
403 *vallen = sizeof(*blocksize_bits);
404 } else if (KEY_IS(KEY_LAST_ID)) {
405 obd_id *last_id = val;
407 if (*vallen < sizeof(*last_id))
409 *last_id = ofd_last_id(ofd,
410 exp->exp_filter_data.fed_group);
412 *vallen = sizeof(*last_id);
414 CERROR("Not supported key %s\n", (char*)key);
421 /** helper function for statfs, also used by grant code */
422 int ofd_statfs_internal(const struct lu_env *env, struct ofd_device *ofd,
423 struct obd_statfs *osfs, __u64 max_age, int *from_cache)
427 rc = dt_statfs(env, ofd->ofd_osd, osfs);
434 static int ofd_statfs(const struct lu_env *env, struct obd_export *exp,
435 struct obd_statfs *osfs, __u64 max_age, __u32 flags)
437 struct ofd_device *ofd = ofd_dev(exp->exp_obd->obd_lu_dev);
442 rc = ofd_statfs_internal(env, ofd, osfs, max_age, NULL);
446 if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOSPC,
447 ofd->ofd_lut.lut_lsd.lsd_ost_index))
448 osfs->os_bfree = osfs->os_bavail = 2;
450 if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOINO,
451 ofd->ofd_lut.lut_lsd.lsd_ost_index))
458 static int ofd_sync(const struct lu_env *env, struct obd_export *exp,
459 struct obd_info *oinfo, obd_size start, obd_size end,
460 struct ptlrpc_request_set *set)
462 struct ofd_device *ofd = ofd_exp(exp);
467 /* if no objid is specified, it means "sync whole filesystem" */
468 if (oinfo->oi_oa == NULL || !(oinfo->oi_oa->o_valid & OBD_MD_FLID)) {
469 rc = dt_sync(env, ofd->ofd_osd);
478 int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
479 void *karg, void *uarg)
482 struct ofd_device *ofd = ofd_exp(exp);
483 struct obd_device *obd = ofd_obd(ofd);
488 CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
489 rc = lu_env_init(&env, LCT_LOCAL);
494 case OBD_IOC_ABORT_RECOVERY:
495 CERROR("aborting recovery for device %s\n", obd->obd_name);
496 target_stop_recovery_thread(obd);
499 CDEBUG(D_RPCTRACE, "syncing ost %s\n", obd->obd_name);
500 rc = dt_sync(&env, ofd->ofd_osd);
502 case OBD_IOC_SET_READONLY:
503 rc = dt_sync(&env, ofd->ofd_osd);
505 rc = dt_ro(&env, ofd->ofd_osd);
508 CERROR("Not supported cmd = %d for device %s\n",
517 static int ofd_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
524 case OBD_CLEANUP_EARLY:
526 case OBD_CLEANUP_EXPORTS:
527 target_cleanup_recovery(obd);
533 static int ofd_ping(const struct lu_env *env, struct obd_export *exp)
538 static int ofd_health_check(const struct lu_env *env, struct obd_device *obd)
540 struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev);
541 struct ofd_thread_info *info;
542 #ifdef USE_HEALTH_CHECK_WRITE
547 info = ofd_info_init(env, NULL);
548 rc = dt_statfs(env, ofd->ofd_osd, &info->fti_u.osfs);
552 if (info->fti_u.osfs.os_state == OS_STATE_READONLY)
553 GOTO(out, rc = -EROFS);
555 #ifdef USE_HEALTH_CHECK_WRITE
556 OBD_ALLOC(info->fti_buf.lb_buf, CFS_PAGE_SIZE);
557 if (info->fti_buf.lb_buf == NULL)
558 GOTO(out, rc = -ENOMEM);
560 info->fti_buf.lb_len = CFS_PAGE_SIZE;
563 th = dt_trans_create(env, ofd->ofd_osd);
565 GOTO(out, rc = PTR_ERR(th));
567 rc = dt_declare_record_write(env, ofd->ofd_health_check_file,
568 info->fti_buf.lb_len, info->fti_off, th);
570 th->th_sync = 1; /* sync IO is needed */
571 rc = dt_trans_start_local(env, ofd->ofd_osd, th);
573 rc = dt_record_write(env, ofd->ofd_health_check_file,
574 &info->fti_buf, &info->fti_off,
577 dt_trans_stop(env, ofd->ofd_osd, th);
579 OBD_FREE(info->fti_buf.lb_buf, CFS_PAGE_SIZE);
581 CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
587 static int ofd_obd_notify(struct obd_device *obd, struct obd_device *unused,
588 enum obd_notify_event ev, void *data)
591 case OBD_NOTIFY_CONFIG:
592 LASSERT(obd->obd_no_conn);
593 cfs_spin_lock(&obd->obd_dev_lock);
594 obd->obd_no_conn = 0;
595 cfs_spin_unlock(&obd->obd_dev_lock);
598 CDEBUG(D_INFO, "%s: Unhandled notification %#x\n",
604 struct obd_ops ofd_obd_ops = {
605 .o_owner = THIS_MODULE,
606 .o_connect = ofd_obd_connect,
607 .o_reconnect = ofd_obd_reconnect,
608 .o_disconnect = ofd_obd_disconnect,
609 .o_set_info_async = ofd_set_info_async,
610 .o_get_info = ofd_get_info,
611 .o_statfs = ofd_statfs,
612 .o_init_export = ofd_init_export,
613 .o_destroy_export = ofd_destroy_export,
614 .o_postrecov = ofd_obd_postrecov,
616 .o_iocontrol = ofd_iocontrol,
617 .o_precleanup = ofd_precleanup,
619 .o_health_check = ofd_health_check,
620 .o_notify = ofd_obd_notify,