4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012 Whamcloud, Inc.
26 * lustre/obdclass/local_storage.c
28 * Local storage for file/objects with fid generation. Works on top of OSD.
30 * Author: Mikhail Pershin <mike.pershin@intel.com>
33 #define DEBUG_SUBSYSTEM S_CLASS
35 #include "local_storage.h"
37 /* all initialized local storages on this node are linked on this */
38 static CFS_LIST_HEAD(ls_list_head);
39 static CFS_DEFINE_MUTEX(ls_list_mutex);
41 static int ls_object_init(const struct lu_env *env, struct lu_object *o,
42 const struct lu_object_conf *unused)
45 struct lu_object *below;
46 struct lu_device *under;
50 ls = container_of0(o->lo_dev, struct ls_device, ls_top_dev.dd_lu_dev);
51 under = &ls->ls_osd->dd_lu_dev;
52 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
56 lu_object_add(o, below);
61 static void ls_object_free(const struct lu_env *env, struct lu_object *o)
63 struct ls_object *obj = lu2ls_obj(o);
64 struct lu_object_header *h = o->lo_header;
66 dt_object_fini(&obj->ls_obj);
67 lu_object_header_fini(h);
71 struct lu_object_operations ls_lu_obj_ops = {
72 .loo_object_init = ls_object_init,
73 .loo_object_free = ls_object_free,
76 struct lu_object *ls_object_alloc(const struct lu_env *env,
77 const struct lu_object_header *_h,
80 struct lu_object_header *h;
91 lu_object_header_init(h);
92 dt_object_init(&o->ls_obj, h, d);
93 lu_object_add_top(h, l);
95 l->lo_ops = &ls_lu_obj_ops;
103 static struct lu_device_operations ls_lu_dev_ops = {
104 .ldo_object_alloc = ls_object_alloc
107 static struct ls_device *__ls_find_dev(struct dt_device *dev)
109 struct ls_device *ls, *ret = NULL;
111 cfs_list_for_each_entry(ls, &ls_list_head, ls_linkage) {
112 if (ls->ls_osd == dev) {
113 cfs_atomic_inc(&ls->ls_refcount);
121 struct ls_device *ls_find_dev(struct dt_device *dev)
123 struct ls_device *ls;
125 cfs_mutex_lock(&ls_list_mutex);
126 ls = __ls_find_dev(dev);
127 cfs_mutex_unlock(&ls_list_mutex);
132 static struct lu_device_type_operations ls_device_type_ops = {
137 static struct lu_device_type ls_lu_type = {
138 .ldt_name = "local_storage",
139 .ldt_ops = &ls_device_type_ops,
142 static struct ls_device *ls_device_get(const struct lu_env *env,
143 struct dt_device *dev)
145 struct ls_device *ls;
149 cfs_mutex_lock(&ls_list_mutex);
150 ls = __ls_find_dev(dev);
154 /* not found, then create */
157 GOTO(out_ls, ls = ERR_PTR(-ENOMEM));
159 cfs_atomic_set(&ls->ls_refcount, 1);
160 CFS_INIT_LIST_HEAD(&ls->ls_los_list);
161 cfs_mutex_init(&ls->ls_los_mutex);
165 LASSERT(dev->dd_lu_dev.ld_site);
166 lu_device_init(&ls->ls_top_dev.dd_lu_dev, &ls_lu_type);
167 ls->ls_top_dev.dd_lu_dev.ld_ops = &ls_lu_dev_ops;
168 ls->ls_top_dev.dd_lu_dev.ld_site = dev->dd_lu_dev.ld_site;
170 /* finally add ls to the list */
171 cfs_list_add(&ls->ls_linkage, &ls_list_head);
173 cfs_mutex_unlock(&ls_list_mutex);
177 static void ls_device_put(const struct lu_env *env, struct ls_device *ls)
180 if (!cfs_atomic_dec_and_test(&ls->ls_refcount))
183 cfs_mutex_lock(&ls_list_mutex);
184 if (cfs_atomic_read(&ls->ls_refcount) == 0) {
185 LASSERT(cfs_list_empty(&ls->ls_los_list));
186 cfs_list_del(&ls->ls_linkage);
187 lu_site_purge(env, ls->ls_top_dev.dd_lu_dev.ld_site, ~0);
188 lu_device_fini(&ls->ls_top_dev.dd_lu_dev);
191 cfs_mutex_unlock(&ls_list_mutex);
195 * local file fid generation
197 int local_object_fid_generate(const struct lu_env *env,
198 struct local_oid_storage *los,
201 LASSERT(los->los_dev);
202 LASSERT(los->los_obj);
206 /* to make it unique after reboot we store
207 * the latest generated fid atomically with
208 * object creation see local_object_create() */
210 cfs_mutex_lock(&los->los_id_lock);
211 fid->f_seq = los->los_seq;
212 fid->f_oid = los->los_last_oid++;
214 cfs_mutex_unlock(&los->los_id_lock);
219 int local_object_declare_create(const struct lu_env *env,
220 struct local_oid_storage *los,
221 struct dt_object *o, struct lu_attr *attr,
222 struct dt_object_format *dof,
225 struct dt_thread_info *dti = dt_info(env);
230 /* update fid generation file */
232 LASSERT(dt_object_exists(los->los_obj));
233 rc = dt_declare_record_write(env, los->los_obj,
234 sizeof(struct los_ondisk), 0, th);
239 rc = dt_declare_create(env, o, attr, NULL, dof, th);
243 dti->dti_lb.lb_buf = NULL;
244 dti->dti_lb.lb_len = sizeof(dti->dti_lma);
245 rc = dt_declare_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA, 0, th);
250 int local_object_create(const struct lu_env *env,
251 struct local_oid_storage *los,
252 struct dt_object *o, struct lu_attr *attr,
253 struct dt_object_format *dof, struct thandle *th)
255 struct dt_thread_info *dti = dt_info(env);
256 struct los_ondisk losd;
261 rc = dt_create(env, o, attr, NULL, dof, th);
265 lustre_lma_init(&dti->dti_lma, lu_object_fid(&o->do_lu));
266 lustre_lma_swab(&dti->dti_lma);
267 dti->dti_lb.lb_buf = &dti->dti_lma;
268 dti->dti_lb.lb_len = sizeof(dti->dti_lma);
269 rc = dt_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA, 0, th,
275 LASSERT(los->los_obj);
276 LASSERT(dt_object_exists(los->los_obj));
278 /* many threads can be updated this, serialize
279 * them here to avoid the race where one thread
280 * takes the value first, but writes it last */
281 cfs_mutex_lock(&los->los_id_lock);
283 /* update local oid number on disk so that
284 * we know the last one used after reboot */
285 losd.lso_magic = cpu_to_le32(LOS_MAGIC);
286 losd.lso_next_oid = cpu_to_le32(los->los_last_oid);
289 dti->dti_lb.lb_buf = &losd;
290 dti->dti_lb.lb_len = sizeof(losd);
291 rc = dt_record_write(env, los->los_obj, &dti->dti_lb, &dti->dti_off,
293 cfs_mutex_unlock(&los->los_id_lock);
299 * Create local named object (file, directory or index) in parent directory.
301 struct dt_object *__local_file_create(const struct lu_env *env,
302 const struct lu_fid *fid,
303 struct local_oid_storage *los,
304 struct ls_device *ls,
305 struct dt_object *parent,
306 const char *name, struct lu_attr *attr,
307 struct dt_object_format *dof)
309 struct dt_thread_info *dti = dt_info(env);
310 struct dt_object *dto;
314 dto = ls_locate(env, ls, fid);
315 if (unlikely(IS_ERR(dto)))
318 LASSERT(dto != NULL);
319 if (dt_object_exists(dto))
320 GOTO(out, rc = -EEXIST);
322 th = dt_trans_create(env, ls->ls_osd);
324 GOTO(out, rc = PTR_ERR(th));
326 rc = local_object_declare_create(env, los, dto, attr, dof, th);
328 GOTO(trans_stop, rc);
330 if (dti->dti_dof.dof_type == DFT_DIR) {
331 dt_declare_ref_add(env, dto, th);
332 dt_declare_ref_add(env, parent, th);
335 rc = dt_declare_insert(env, parent, (void *)fid, (void *)name, th);
337 GOTO(trans_stop, rc);
339 rc = dt_trans_start_local(env, ls->ls_osd, th);
341 GOTO(trans_stop, rc);
343 dt_write_lock(env, dto, 0);
344 if (dt_object_exists(dto))
345 GOTO(unlock, rc = 0);
347 CDEBUG(D_OTHER, "create new object "DFID"\n",
348 PFID(lu_object_fid(&dto->do_lu)));
349 rc = local_object_create(env, los, dto, attr, dof, th);
352 LASSERT(dt_object_exists(dto));
354 if (dti->dti_dof.dof_type == DFT_DIR) {
355 if (!dt_try_as_dir(env, dto))
356 GOTO(destroy, rc = -ENOTDIR);
357 /* Add "." and ".." for newly created dir */
358 rc = dt_insert(env, dto, (void *)fid, (void *)".", th,
362 dt_ref_add(env, dto, th);
363 rc = dt_insert(env, dto, (void *)lu_object_fid(&parent->do_lu),
364 (void *)"..", th, BYPASS_CAPA, 1);
369 dt_write_lock(env, parent, 0);
370 rc = dt_insert(env, parent, (const struct dt_rec *)fid,
371 (const struct dt_key *)name, th, BYPASS_CAPA, 1);
372 if (dti->dti_dof.dof_type == DFT_DIR)
373 dt_ref_add(env, parent, th);
374 dt_write_unlock(env, parent);
379 dt_destroy(env, dto, th);
381 dt_write_unlock(env, dto);
383 dt_trans_stop(env, ls->ls_osd, th);
386 lu_object_put_nocache(env, &dto->do_lu);
389 struct lu_fid dti_fid;
390 /* since local files FIDs are not in OI the directory entry
391 * is used to get inode number/generation, we need to do lookup
392 * again to cache this data after create */
393 rc = dt_lookup_dir(env, parent, name, &dti_fid);
400 * Look up and create (if it does not exist) a local named file or directory in
403 struct dt_object *local_file_find_or_create(const struct lu_env *env,
404 struct local_oid_storage *los,
405 struct dt_object *parent,
406 const char *name, __u32 mode)
408 struct dt_thread_info *dti = dt_info(env);
409 struct dt_object *dto;
414 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
416 /* name is found, get the object */
417 dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
418 else if (rc != -ENOENT)
421 rc = local_object_fid_generate(env, los, &dti->dti_fid);
425 /* create the object */
426 dti->dti_attr.la_valid = LA_MODE;
427 dti->dti_attr.la_mode = mode;
428 dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
429 dto = __local_file_create(env, &dti->dti_fid, los,
430 dt2ls_dev(los->los_dev),
431 parent, name, &dti->dti_attr,
437 EXPORT_SYMBOL(local_file_find_or_create);
439 struct dt_object *local_file_find_or_create_with_fid(const struct lu_env *env,
440 struct dt_device *dt,
441 const struct lu_fid *fid,
442 struct dt_object *parent,
446 struct dt_thread_info *dti = dt_info(env);
447 struct dt_object *dto;
452 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
454 /* name is found, get the object */
455 if (!lu_fid_eq(fid, &dti->dti_fid))
456 dto = ERR_PTR(-EINVAL);
458 dto = dt_locate(env, dt, fid);
459 } else if (rc != -ENOENT) {
462 struct ls_device *ls;
464 ls = ls_device_get(env, dt);
466 dto = ERR_PTR(PTR_ERR(ls));
468 /* create the object */
469 dti->dti_attr.la_valid = LA_MODE;
470 dti->dti_attr.la_mode = mode;
471 dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
472 dto = __local_file_create(env, fid, NULL, ls, parent,
473 name, &dti->dti_attr,
475 /* ls_device_put() will finalize the ls device, we
476 * have to open the object in other device stack */
478 dti->dti_fid = dto->do_lu.lo_header->loh_fid;
479 lu_object_put_nocache(env, &dto->do_lu);
480 dto = dt_locate(env, dt, &dti->dti_fid);
482 ls_device_put(env, ls);
487 EXPORT_SYMBOL(local_file_find_or_create_with_fid);
490 * Look up and create (if it does not exist) a local named index file in parent
493 struct dt_object *local_index_find_or_create(const struct lu_env *env,
494 struct local_oid_storage *los,
495 struct dt_object *parent,
496 const char *name, __u32 mode,
497 const struct dt_index_features *ft)
499 struct dt_thread_info *dti = dt_info(env);
500 struct dt_object *dto;
505 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
507 /* name is found, get the object */
508 dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
509 } else if (rc != -ENOENT) {
512 rc = local_object_fid_generate(env, los, &dti->dti_fid);
516 /* create the object */
517 dti->dti_attr.la_valid = LA_MODE;
518 dti->dti_attr.la_mode = mode;
519 dti->dti_dof.dof_type = DFT_INDEX;
520 dti->dti_dof.u.dof_idx.di_feat = ft;
521 dto = __local_file_create(env, &dti->dti_fid, los,
522 dt2ls_dev(los->los_dev),
523 parent, name, &dti->dti_attr,
530 EXPORT_SYMBOL(local_index_find_or_create);
533 local_index_find_or_create_with_fid(const struct lu_env *env,
534 struct dt_device *dt,
535 const struct lu_fid *fid,
536 struct dt_object *parent,
537 const char *name, __u32 mode,
538 const struct dt_index_features *ft)
540 struct dt_thread_info *dti = dt_info(env);
541 struct dt_object *dto;
546 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
548 /* name is found, get the object */
549 if (!lu_fid_eq(fid, &dti->dti_fid))
550 dto = ERR_PTR(-EINVAL);
552 dto = dt_locate(env, dt, fid);
553 } else if (rc != -ENOENT) {
556 struct ls_device *ls;
558 ls = ls_device_get(env, dt);
560 dto = ERR_PTR(PTR_ERR(ls));
562 /* create the object */
563 dti->dti_attr.la_valid = LA_MODE;
564 dti->dti_attr.la_mode = mode;
565 dti->dti_dof.dof_type = DFT_INDEX;
566 dti->dti_dof.u.dof_idx.di_feat = ft;
567 dto = __local_file_create(env, fid, NULL, ls, parent,
568 name, &dti->dti_attr,
570 /* ls_device_put() will finalize the ls device, we
571 * have to open the object in other device stack */
573 dti->dti_fid = dto->do_lu.lo_header->loh_fid;
574 lu_object_put_nocache(env, &dto->do_lu);
575 dto = dt_locate(env, dt, &dti->dti_fid);
577 ls_device_put(env, ls);
582 EXPORT_SYMBOL(local_index_find_or_create_with_fid);
584 static struct local_oid_storage *dt_los_find(struct ls_device *ls, __u64 seq)
586 struct local_oid_storage *los, *ret = NULL;
588 cfs_list_for_each_entry(los, &ls->ls_los_list, los_list) {
589 if (los->los_seq == seq) {
590 cfs_atomic_inc(&los->los_refcount);
599 * Initialize local OID storage for required sequence.
600 * That may be needed for services that uses local files and requires
601 * dynamic OID allocation for them.
603 * Per each sequence we have an object with 'first_fid' identificator
604 * containing the counter for OIDs of locally created files with that
607 * It is used now by llog subsystem and MGS for NID tables
609 * Function gets first_fid to create counter object.
610 * All dynamic fids will be generated with the same sequence and incremented
613 * Returned local_oid_storage is in-memory representaion of OID storage
615 int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev,
616 const struct lu_fid *first_fid,
617 struct local_oid_storage **los)
619 struct dt_thread_info *dti = dt_info(env);
620 struct ls_device *ls;
621 struct los_ondisk losd;
623 struct dt_object *root = NULL;
629 ls = ls_device_get(env, dev);
633 cfs_mutex_lock(&ls->ls_los_mutex);
634 *los = dt_los_find(ls, fid_seq(first_fid));
638 /* not found, then create */
641 GOTO(out, rc = -ENOMEM);
643 cfs_atomic_set(&(*los)->los_refcount, 1);
644 cfs_mutex_init(&(*los)->los_id_lock);
645 (*los)->los_dev = &ls->ls_top_dev;
646 cfs_atomic_inc(&ls->ls_refcount);
647 cfs_list_add(&(*los)->los_list, &ls->ls_los_list);
649 /* initialize data allowing to generate new fids,
650 * literally we need a sequence */
651 o = ls_locate(env, ls, first_fid);
653 GOTO(out_los, rc = PTR_ERR(o));
655 rc = dt_root_get(env, dev, &dti->dti_fid);
659 root = ls_locate(env, ls, &dti->dti_fid);
661 GOTO(out_los, rc = PTR_ERR(root));
663 if (dt_try_as_dir(env, root) == 0)
664 GOTO(out_los, rc = -ENOTDIR);
666 dt_write_lock(env, o, 0);
667 if (!dt_object_exists(o)) {
668 th = dt_trans_create(env, dev);
670 GOTO(out_lock, rc = PTR_ERR(th));
672 dti->dti_attr.la_valid = LA_MODE | LA_TYPE;
673 dti->dti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
674 dti->dti_dof.dof_type = dt_mode_to_dft(S_IFREG);
676 rc = dt_declare_create(env, o, &dti->dti_attr, NULL,
681 snprintf(dti->dti_buf, sizeof(dti->dti_buf),
682 "seq-%Lx-lastid", fid_seq(first_fid));
683 rc = dt_declare_insert(env, root,
684 (const struct dt_rec *)lu_object_fid(&o->do_lu),
685 (const struct dt_key *)dti->dti_buf,
690 dti->dti_lb.lb_buf = NULL;
691 dti->dti_lb.lb_len = sizeof(dti->dti_lma);
692 rc = dt_declare_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA,
697 rc = dt_declare_record_write(env, o, sizeof(losd), 0, th);
701 rc = dt_trans_start_local(env, dev, th);
705 LASSERT(!dt_object_exists(o));
706 rc = dt_create(env, o, &dti->dti_attr, NULL, &dti->dti_dof, th);
709 LASSERT(dt_object_exists(o));
711 lustre_lma_init(&dti->dti_lma, lu_object_fid(&o->do_lu));
712 lustre_lma_swab(&dti->dti_lma);
713 dti->dti_lb.lb_buf = &dti->dti_lma;
714 dti->dti_lb.lb_len = sizeof(dti->dti_lma);
715 rc = dt_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA, 0,
720 losd.lso_magic = cpu_to_le32(LOS_MAGIC);
721 losd.lso_next_oid = cpu_to_le32(fid_oid(first_fid) + 1);
724 dti->dti_lb.lb_buf = &losd;
725 dti->dti_lb.lb_len = sizeof(losd);
726 rc = dt_record_write(env, o, &dti->dti_lb, &dti->dti_off, th);
729 rc = dt_insert(env, root,
730 (const struct dt_rec *)lu_object_fid(&o->do_lu),
731 (const struct dt_key *)dti->dti_buf, th,
736 dt_trans_stop(env, dev, th);
739 dti->dti_lb.lb_buf = &losd;
740 dti->dti_lb.lb_len = sizeof(losd);
741 rc = dt_record_read(env, o, &dti->dti_lb, &dti->dti_off);
742 if (rc == 0 && le32_to_cpu(losd.lso_magic) != LOS_MAGIC) {
743 CERROR("local storage file "DFID" is corrupted\n",
749 dt_write_unlock(env, o);
752 lu_object_put_nocache(env, &root->do_lu);
757 lu_object_put_nocache(env, &o->do_lu);
759 (*los)->los_seq = fid_seq(first_fid);
760 (*los)->los_last_oid = le32_to_cpu(losd.lso_next_oid);
764 cfs_mutex_unlock(&ls->ls_los_mutex);
765 ls_device_put(env, ls);
768 EXPORT_SYMBOL(local_oid_storage_init);
770 void local_oid_storage_fini(const struct lu_env *env,
771 struct local_oid_storage *los)
773 struct ls_device *ls;
775 if (!cfs_atomic_dec_and_test(&los->los_refcount))
779 LASSERT(los->los_dev);
780 ls = dt2ls_dev(los->los_dev);
782 cfs_mutex_lock(&ls->ls_los_mutex);
783 if (cfs_atomic_read(&los->los_refcount) == 0) {
785 lu_object_put_nocache(env, &los->los_obj->do_lu);
786 cfs_list_del(&los->los_list);
789 cfs_mutex_unlock(&ls->ls_los_mutex);
790 ls_device_put(env, ls);
792 EXPORT_SYMBOL(local_oid_storage_fini);