4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Alexey Zhuravlev <bzzz@whamcloud.com>
39 * Author: Mikhail Pershin <tappro@whamcloud.com>
42 #define DEBUG_SUBSYSTEM S_FILTER
44 #include "ofd_internal.h"
46 int ofd_record_write(const struct lu_env *env, struct ofd_device *ofd,
47 struct dt_object *dt, struct lu_buf *buf, loff_t *off)
56 th = dt_trans_create(env, ofd->ofd_osd);
60 rc = dt_declare_record_write(env, dt, buf, *off, th);
62 rc = dt_trans_start_local(env, ofd->ofd_osd, th);
64 rc = dt_record_write(env, dt, buf, off, th);
66 dt_trans_stop(env, ofd->ofd_osd, th);
71 int ofd_precreate_batch(struct ofd_device *ofd, int batch)
75 spin_lock(&ofd->ofd_batch_lock);
76 count = min(ofd->ofd_precreate_batch, batch);
77 spin_unlock(&ofd->ofd_batch_lock);
82 struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, obd_seq seq)
86 read_lock(&ofd->ofd_seq_list_lock);
87 cfs_list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) {
88 if (ostid_seq(&oseq->os_oi) == seq) {
89 atomic_inc(&oseq->os_refc);
90 read_unlock(&ofd->ofd_seq_list_lock);
94 read_unlock(&ofd->ofd_seq_list_lock);
98 static void ofd_seq_destroy(const struct lu_env *env,
101 LASSERT(cfs_list_empty(&oseq->os_list));
102 LASSERT(oseq->os_lastid_obj != NULL);
103 lu_object_put(env, &oseq->os_lastid_obj->do_lu);
107 void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq)
109 if (atomic_dec_and_test(&oseq->os_refc))
110 ofd_seq_destroy(env, oseq);
113 static void ofd_seq_delete(const struct lu_env *env, struct ofd_seq *oseq)
115 cfs_list_del_init(&oseq->os_list);
116 ofd_seq_put(env, oseq);
120 * Add a new sequence to the OFD device.
122 * \param ofd OFD device
123 * \param new_seq new sequence to be added
125 * \retval the seq to be added or the existing seq
127 static struct ofd_seq *ofd_seq_add(const struct lu_env *env,
128 struct ofd_device *ofd,
129 struct ofd_seq *new_seq)
131 struct ofd_seq *os = NULL;
133 write_lock(&ofd->ofd_seq_list_lock);
134 cfs_list_for_each_entry(os, &ofd->ofd_seq_list, os_list) {
135 if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) {
136 atomic_inc(&os->os_refc);
137 write_unlock(&ofd->ofd_seq_list_lock);
138 /* The seq has not been added to the list */
139 ofd_seq_put(env, new_seq);
143 atomic_inc(&new_seq->os_refc);
144 cfs_list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list);
145 ofd->ofd_seq_count++;
146 write_unlock(&ofd->ofd_seq_list_lock);
150 obd_id ofd_seq_last_oid(struct ofd_seq *oseq)
154 spin_lock(&oseq->os_last_oid_lock);
155 id = ostid_id(&oseq->os_oi);
156 spin_unlock(&oseq->os_last_oid_lock);
161 void ofd_seq_last_oid_set(struct ofd_seq *oseq, obd_id id)
163 spin_lock(&oseq->os_last_oid_lock);
164 if (likely(ostid_id(&oseq->os_oi) < id))
165 ostid_set_id(&oseq->os_oi, id);
166 spin_unlock(&oseq->os_last_oid_lock);
169 int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
170 struct ofd_seq *oseq)
172 struct ofd_thread_info *info = ofd_info(env);
178 tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
180 info->fti_buf.lb_buf = &tmp;
181 info->fti_buf.lb_len = sizeof(tmp);
184 rc = ofd_record_write(env, ofd, oseq->os_lastid_obj, &info->fti_buf,
187 CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n",
188 ofd_name(ofd), POSTID(&oseq->os_oi), rc);
193 static void ofd_deregister_seq_exp(struct ofd_device *ofd)
195 struct seq_server_site *ss = &ofd->ofd_seq_site;
197 if (ss->ss_client_seq != NULL) {
198 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
199 ss->ss_client_seq->lcs_exp = NULL;
202 if (ss->ss_server_fld != NULL) {
203 lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp);
204 ss->ss_server_fld->lsf_control_exp = NULL;
208 static int ofd_fld_fini(const struct lu_env *env,
209 struct ofd_device *ofd)
211 struct seq_server_site *ss = &ofd->ofd_seq_site;
214 if (ss && ss->ss_server_fld) {
215 fld_server_fini(env, ss->ss_server_fld);
216 OBD_FREE_PTR(ss->ss_server_fld);
217 ss->ss_server_fld = NULL;
223 void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd)
225 struct ofd_seq *oseq;
229 CFS_INIT_LIST_HEAD(&dispose);
230 write_lock(&ofd->ofd_seq_list_lock);
231 cfs_list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list) {
232 cfs_list_move(&oseq->os_list, &dispose);
234 write_unlock(&ofd->ofd_seq_list_lock);
236 while (!cfs_list_empty(&dispose)) {
237 oseq = container_of0(dispose.next, struct ofd_seq, os_list);
238 ofd_seq_delete(env, oseq);
242 void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd)
246 ofd_deregister_seq_exp(ofd);
248 rc = ofd_fid_fini(env, ofd);
250 CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc);
252 rc = ofd_fld_fini(env, ofd);
254 CERROR("%s: fld fini error: rc = %d\n", ofd_name(ofd), rc);
256 ofd_seqs_free(env, ofd);
258 LASSERT(cfs_list_empty(&ofd->ofd_seq_list));
263 * \retval the seq with seq number or errno (never NULL)
265 struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
268 struct ofd_thread_info *info = ofd_info(env);
269 struct ofd_seq *oseq = NULL;
270 struct dt_object *dob;
276 /* if seq is already initialized */
277 oseq = ofd_seq_get(ofd, seq);
283 RETURN(ERR_PTR(-ENOMEM));
285 lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index);
286 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
287 info->fti_attr.la_valid = LA_MODE;
288 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
289 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
291 /* create object tracking per-seq last created
292 * id to be used by orphan recovery mechanism */
293 dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
294 &info->fti_dof, &info->fti_attr);
300 oseq->os_lastid_obj = dob;
302 CFS_INIT_LIST_HEAD(&oseq->os_list);
303 mutex_init(&oseq->os_create_lock);
304 spin_lock_init(&oseq->os_last_oid_lock);
305 ostid_set_seq(&oseq->os_oi, seq);
307 atomic_set(&oseq->os_refc, 1);
309 rc = dt_attr_get(env, dob, &info->fti_attr, BYPASS_CAPA);
313 if (info->fti_attr.la_size == 0) {
314 /* object is just created, initialize last id */
315 ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID);
316 ofd_seq_last_oid_write(env, ofd, oseq);
317 } else if (info->fti_attr.la_size == sizeof(lastid)) {
319 info->fti_buf.lb_buf = &lastid;
320 info->fti_buf.lb_len = sizeof(lastid);
322 rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
324 CERROR("%s: can't read last_id: rc = %d\n",
328 ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid));
330 CERROR("%s: corrupted size "LPU64" LAST_ID of seq "LPX64"\n",
331 ofd_name(ofd), (__u64)info->fti_attr.la_size, seq);
332 GOTO(cleanup, rc = -EINVAL);
335 oseq = ofd_seq_add(env, ofd, oseq);
336 RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT));
338 ofd_seq_put(env, oseq);
342 static int ofd_fld_init(const struct lu_env *env, const char *uuid,
343 struct ofd_device *ofd)
345 struct seq_server_site *ss = &ofd->ofd_seq_site;
349 OBD_ALLOC_PTR(ss->ss_server_fld);
350 if (ss->ss_server_fld == NULL)
351 RETURN(rc = -ENOMEM);
353 rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid,
356 OBD_FREE_PTR(ss->ss_server_fld);
357 ss->ss_server_fld = NULL;
364 * It will retrieve its FLDB entries from MDT0, and it only happens
365 * when upgrading existent FS to 2.6.
367 static int ofd_register_lwp_callback(void *data)
370 struct ofd_device *ofd = data;
371 struct lu_server_fld *fld = ofd->ofd_seq_site.ss_server_fld;
375 if (!likely(fld->lsf_new))
378 rc = lu_env_init(&env, LCT_DT_THREAD);
380 CERROR("%s: cannot init env: rc = %d\n", ofd_name(ofd), rc);
384 rc = fld_update_from_controller(&env, fld);
386 CERROR("%s: cannot update controller: rc = %d\n",
395 static int ofd_register_seq_exp(struct ofd_device *ofd)
397 struct seq_server_site *ss = &ofd->ofd_seq_site;
398 char *lwp_name = NULL;
401 OBD_ALLOC(lwp_name, MAX_OBD_NAME);
402 if (lwp_name == NULL)
403 GOTO(out_free, rc = -ENOMEM);
405 rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0);
409 rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp,
414 rc = lustre_register_lwp_item(lwp_name,
415 &ss->ss_server_fld->lsf_control_exp,
416 ofd_register_lwp_callback, ofd);
418 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
419 ss->ss_client_seq->lcs_exp = NULL;
423 if (lwp_name != NULL)
424 OBD_FREE(lwp_name, MAX_OBD_NAME);
429 /* object sequence management */
430 int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd)
434 rc = ofd_fid_init(env, ofd);
436 CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc);
440 rc = ofd_fld_init(env, ofd_name(ofd), ofd);
442 CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc);
446 rc = ofd_register_seq_exp(ofd);
448 CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc);
452 rwlock_init(&ofd->ofd_seq_list_lock);
453 CFS_INIT_LIST_HEAD(&ofd->ofd_seq_list);
454 ofd->ofd_seq_count = 0;
458 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
459 struct obd_device *obd)
461 struct ofd_thread_info *info = ofd_info(env);
462 struct dt_object *fo;
467 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
470 lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
471 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
472 info->fti_attr.la_valid = LA_MODE;
473 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
474 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
476 fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
477 &info->fti_dof, &info->fti_attr);
479 GOTO(out, rc = PTR_ERR(fo));
481 ofd->ofd_health_check_file = fo;
483 rc = ofd_seqs_init(env, ofd);
489 lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
494 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
500 ofd_info_init(env, NULL);
502 ofd_seqs_fini(env, ofd);
504 i = dt_sync(env, ofd->ofd_osd);
506 CERROR("can't sync: %d\n", i);
508 if (ofd->ofd_health_check_file) {
509 lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
510 ofd->ofd_health_check_file = NULL;