4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012, Intel Corporation.
26 * lustre/obdclass/local_storage.c
28 * Local storage for file/objects with fid generation. Works on top of OSD.
30 * Author: Mikhail Pershin <mike.pershin@intel.com>
33 #define DEBUG_SUBSYSTEM S_CLASS
35 #include "local_storage.h"
37 /* all initialized local storages on this node are linked on this */
38 static CFS_LIST_HEAD(ls_list_head);
39 static DEFINE_MUTEX(ls_list_mutex);
41 static int ls_object_init(const struct lu_env *env, struct lu_object *o,
42 const struct lu_object_conf *unused)
45 struct lu_object *below;
46 struct lu_device *under;
50 ls = container_of0(o->lo_dev, struct ls_device, ls_top_dev.dd_lu_dev);
51 under = &ls->ls_osd->dd_lu_dev;
52 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
56 lu_object_add(o, below);
61 static void ls_object_free(const struct lu_env *env, struct lu_object *o)
63 struct ls_object *obj = lu2ls_obj(o);
64 struct lu_object_header *h = o->lo_header;
66 dt_object_fini(&obj->ls_obj);
67 lu_object_header_fini(h);
71 struct lu_object_operations ls_lu_obj_ops = {
72 .loo_object_init = ls_object_init,
73 .loo_object_free = ls_object_free,
76 struct lu_object *ls_object_alloc(const struct lu_env *env,
77 const struct lu_object_header *_h,
80 struct lu_object_header *h;
91 lu_object_header_init(h);
92 dt_object_init(&o->ls_obj, h, d);
93 lu_object_add_top(h, l);
95 l->lo_ops = &ls_lu_obj_ops;
103 static struct lu_device_operations ls_lu_dev_ops = {
104 .ldo_object_alloc = ls_object_alloc
107 static struct ls_device *__ls_find_dev(struct dt_device *dev)
109 struct ls_device *ls, *ret = NULL;
111 cfs_list_for_each_entry(ls, &ls_list_head, ls_linkage) {
112 if (ls->ls_osd == dev) {
113 cfs_atomic_inc(&ls->ls_refcount);
121 struct ls_device *ls_find_dev(struct dt_device *dev)
123 struct ls_device *ls;
125 mutex_lock(&ls_list_mutex);
126 ls = __ls_find_dev(dev);
127 mutex_unlock(&ls_list_mutex);
132 static struct lu_device_type_operations ls_device_type_ops = {
137 static struct lu_device_type ls_lu_type = {
138 .ldt_name = "local_storage",
139 .ldt_ops = &ls_device_type_ops,
142 struct ls_device *ls_device_get(struct dt_device *dev)
144 struct ls_device *ls;
148 mutex_lock(&ls_list_mutex);
149 ls = __ls_find_dev(dev);
153 /* not found, then create */
156 GOTO(out_ls, ls = ERR_PTR(-ENOMEM));
158 cfs_atomic_set(&ls->ls_refcount, 1);
159 CFS_INIT_LIST_HEAD(&ls->ls_los_list);
160 mutex_init(&ls->ls_los_mutex);
164 LASSERT(dev->dd_lu_dev.ld_site);
165 lu_device_init(&ls->ls_top_dev.dd_lu_dev, &ls_lu_type);
166 ls->ls_top_dev.dd_lu_dev.ld_ops = &ls_lu_dev_ops;
167 ls->ls_top_dev.dd_lu_dev.ld_site = dev->dd_lu_dev.ld_site;
169 /* finally add ls to the list */
170 cfs_list_add(&ls->ls_linkage, &ls_list_head);
172 mutex_unlock(&ls_list_mutex);
176 void ls_device_put(const struct lu_env *env, struct ls_device *ls)
179 if (!cfs_atomic_dec_and_test(&ls->ls_refcount))
182 mutex_lock(&ls_list_mutex);
183 if (cfs_atomic_read(&ls->ls_refcount) == 0) {
184 LASSERT(cfs_list_empty(&ls->ls_los_list));
185 cfs_list_del(&ls->ls_linkage);
186 lu_site_purge(env, ls->ls_top_dev.dd_lu_dev.ld_site, ~0);
187 lu_device_fini(&ls->ls_top_dev.dd_lu_dev);
190 mutex_unlock(&ls_list_mutex);
194 * local file fid generation
196 int local_object_fid_generate(const struct lu_env *env,
197 struct local_oid_storage *los,
200 LASSERT(los->los_dev);
201 LASSERT(los->los_obj);
205 /* to make it unique after reboot we store
206 * the latest generated fid atomically with
207 * object creation see local_object_create() */
209 mutex_lock(&los->los_id_lock);
210 fid->f_seq = los->los_seq;
211 fid->f_oid = los->los_last_oid++;
213 mutex_unlock(&los->los_id_lock);
218 int local_object_declare_create(const struct lu_env *env,
219 struct local_oid_storage *los,
220 struct dt_object *o, struct lu_attr *attr,
221 struct dt_object_format *dof,
224 struct dt_thread_info *dti = dt_info(env);
229 /* update fid generation file */
231 LASSERT(dt_object_exists(los->los_obj));
232 rc = dt_declare_record_write(env, los->los_obj,
233 sizeof(struct los_ondisk), 0, th);
238 rc = dt_declare_create(env, o, attr, NULL, dof, th);
242 dti->dti_lb.lb_buf = NULL;
243 dti->dti_lb.lb_len = sizeof(dti->dti_lma);
244 rc = dt_declare_xattr_set(env, o, &dti->dti_lb, XATTR_NAME_LMA, 0, th);
249 int local_object_create(const struct lu_env *env,
250 struct local_oid_storage *los,
251 struct dt_object *o, struct lu_attr *attr,
252 struct dt_object_format *dof, struct thandle *th)
254 struct dt_thread_info *dti = dt_info(env);
255 struct los_ondisk losd;
260 rc = dt_create(env, o, attr, NULL, dof, th);
267 LASSERT(los->los_obj);
268 LASSERT(dt_object_exists(los->los_obj));
270 /* many threads can be updated this, serialize
271 * them here to avoid the race where one thread
272 * takes the value first, but writes it last */
273 mutex_lock(&los->los_id_lock);
275 /* update local oid number on disk so that
276 * we know the last one used after reboot */
277 losd.lso_magic = cpu_to_le32(LOS_MAGIC);
278 losd.lso_next_oid = cpu_to_le32(los->los_last_oid);
281 dti->dti_lb.lb_buf = &losd;
282 dti->dti_lb.lb_len = sizeof(losd);
283 rc = dt_record_write(env, los->los_obj, &dti->dti_lb, &dti->dti_off,
285 mutex_unlock(&los->los_id_lock);
291 * Create local named object (file, directory or index) in parent directory.
293 struct dt_object *__local_file_create(const struct lu_env *env,
294 const struct lu_fid *fid,
295 struct local_oid_storage *los,
296 struct ls_device *ls,
297 struct dt_object *parent,
298 const char *name, struct lu_attr *attr,
299 struct dt_object_format *dof)
301 struct dt_thread_info *dti = dt_info(env);
302 struct dt_object *dto;
306 dto = ls_locate(env, ls, fid);
307 if (unlikely(IS_ERR(dto)))
310 LASSERT(dto != NULL);
311 if (dt_object_exists(dto))
312 GOTO(out, rc = -EEXIST);
314 th = dt_trans_create(env, ls->ls_osd);
316 GOTO(out, rc = PTR_ERR(th));
318 rc = local_object_declare_create(env, los, dto, attr, dof, th);
320 GOTO(trans_stop, rc);
322 if (dti->dti_dof.dof_type == DFT_DIR) {
323 dt_declare_ref_add(env, dto, th);
324 dt_declare_ref_add(env, parent, th);
327 rc = dt_declare_insert(env, parent, (void *)fid, (void *)name, th);
329 GOTO(trans_stop, rc);
331 rc = dt_trans_start_local(env, ls->ls_osd, th);
333 GOTO(trans_stop, rc);
335 dt_write_lock(env, dto, 0);
336 if (dt_object_exists(dto))
337 GOTO(unlock, rc = 0);
339 CDEBUG(D_OTHER, "create new object "DFID"\n",
340 PFID(lu_object_fid(&dto->do_lu)));
341 rc = local_object_create(env, los, dto, attr, dof, th);
344 LASSERT(dt_object_exists(dto));
346 if (dti->dti_dof.dof_type == DFT_DIR) {
347 if (!dt_try_as_dir(env, dto))
348 GOTO(destroy, rc = -ENOTDIR);
349 /* Add "." and ".." for newly created dir */
350 rc = dt_insert(env, dto, (void *)fid, (void *)".", th,
354 dt_ref_add(env, dto, th);
355 rc = dt_insert(env, dto, (void *)lu_object_fid(&parent->do_lu),
356 (void *)"..", th, BYPASS_CAPA, 1);
361 dt_write_lock(env, parent, 0);
362 rc = dt_insert(env, parent, (const struct dt_rec *)fid,
363 (const struct dt_key *)name, th, BYPASS_CAPA, 1);
364 if (dti->dti_dof.dof_type == DFT_DIR)
365 dt_ref_add(env, parent, th);
366 dt_write_unlock(env, parent);
371 dt_destroy(env, dto, th);
373 dt_write_unlock(env, dto);
375 dt_trans_stop(env, ls->ls_osd, th);
378 lu_object_put_nocache(env, &dto->do_lu);
385 * Look up and create (if it does not exist) a local named file or directory in
388 struct dt_object *local_file_find_or_create(const struct lu_env *env,
389 struct local_oid_storage *los,
390 struct dt_object *parent,
391 const char *name, __u32 mode)
393 struct dt_thread_info *dti = dt_info(env);
394 struct dt_object *dto;
399 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
401 /* name is found, get the object */
402 dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
403 else if (rc != -ENOENT)
406 rc = local_object_fid_generate(env, los, &dti->dti_fid);
410 /* create the object */
411 dti->dti_attr.la_valid = LA_MODE;
412 dti->dti_attr.la_mode = mode;
413 dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
414 dto = __local_file_create(env, &dti->dti_fid, los,
415 dt2ls_dev(los->los_dev),
416 parent, name, &dti->dti_attr,
422 EXPORT_SYMBOL(local_file_find_or_create);
424 struct dt_object *local_file_find_or_create_with_fid(const struct lu_env *env,
425 struct dt_device *dt,
426 const struct lu_fid *fid,
427 struct dt_object *parent,
431 struct dt_thread_info *dti = dt_info(env);
432 struct dt_object *dto;
437 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
439 dto = dt_locate(env, dt, &dti->dti_fid);
440 } else if (rc != -ENOENT) {
443 struct ls_device *ls;
445 ls = ls_device_get(dt);
447 dto = ERR_PTR(PTR_ERR(ls));
449 /* create the object */
450 dti->dti_attr.la_valid = LA_MODE;
451 dti->dti_attr.la_mode = mode;
452 dti->dti_dof.dof_type = dt_mode_to_dft(mode & S_IFMT);
453 dto = __local_file_create(env, fid, NULL, ls, parent,
454 name, &dti->dti_attr,
456 /* ls_device_put() will finalize the ls device, we
457 * have to open the object in other device stack */
459 dti->dti_fid = dto->do_lu.lo_header->loh_fid;
460 lu_object_put_nocache(env, &dto->do_lu);
461 dto = dt_locate(env, dt, &dti->dti_fid);
463 ls_device_put(env, ls);
468 EXPORT_SYMBOL(local_file_find_or_create_with_fid);
471 * Look up and create (if it does not exist) a local named index file in parent
474 struct dt_object *local_index_find_or_create(const struct lu_env *env,
475 struct local_oid_storage *los,
476 struct dt_object *parent,
477 const char *name, __u32 mode,
478 const struct dt_index_features *ft)
480 struct dt_thread_info *dti = dt_info(env);
481 struct dt_object *dto;
486 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
488 /* name is found, get the object */
489 dto = ls_locate(env, dt2ls_dev(los->los_dev), &dti->dti_fid);
490 } else if (rc != -ENOENT) {
493 rc = local_object_fid_generate(env, los, &dti->dti_fid);
497 /* create the object */
498 dti->dti_attr.la_valid = LA_MODE;
499 dti->dti_attr.la_mode = mode;
500 dti->dti_dof.dof_type = DFT_INDEX;
501 dti->dti_dof.u.dof_idx.di_feat = ft;
502 dto = __local_file_create(env, &dti->dti_fid, los,
503 dt2ls_dev(los->los_dev),
504 parent, name, &dti->dti_attr,
511 EXPORT_SYMBOL(local_index_find_or_create);
514 local_index_find_or_create_with_fid(const struct lu_env *env,
515 struct dt_device *dt,
516 const struct lu_fid *fid,
517 struct dt_object *parent,
518 const char *name, __u32 mode,
519 const struct dt_index_features *ft)
521 struct dt_thread_info *dti = dt_info(env);
522 struct dt_object *dto;
527 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
529 /* name is found, get the object */
530 if (!lu_fid_eq(fid, &dti->dti_fid))
531 dto = ERR_PTR(-EINVAL);
533 dto = dt_locate(env, dt, fid);
534 } else if (rc != -ENOENT) {
537 struct ls_device *ls;
539 ls = ls_device_get(dt);
541 dto = ERR_PTR(PTR_ERR(ls));
543 /* create the object */
544 dti->dti_attr.la_valid = LA_MODE;
545 dti->dti_attr.la_mode = mode;
546 dti->dti_dof.dof_type = DFT_INDEX;
547 dti->dti_dof.u.dof_idx.di_feat = ft;
548 dto = __local_file_create(env, fid, NULL, ls, parent,
549 name, &dti->dti_attr,
551 /* ls_device_put() will finalize the ls device, we
552 * have to open the object in other device stack */
554 dti->dti_fid = dto->do_lu.lo_header->loh_fid;
555 lu_object_put_nocache(env, &dto->do_lu);
556 dto = dt_locate(env, dt, &dti->dti_fid);
558 ls_device_put(env, ls);
563 EXPORT_SYMBOL(local_index_find_or_create_with_fid);
565 static int local_object_declare_unlink(const struct lu_env *env,
566 struct dt_device *dt,
568 struct dt_object *c, const char *name,
573 rc = dt_declare_delete(env, p, (const struct dt_key *)name, th);
577 rc = dt_declare_ref_del(env, c, th);
581 return dt_declare_destroy(env, c, th);
584 int local_object_unlink(const struct lu_env *env, struct dt_device *dt,
585 struct dt_object *parent, const char *name)
587 struct dt_thread_info *dti = dt_info(env);
588 struct dt_object *dto;
594 rc = dt_lookup_dir(env, parent, name, &dti->dti_fid);
600 dto = dt_locate(env, dt, &dti->dti_fid);
601 if (unlikely(IS_ERR(dto)))
602 RETURN(PTR_ERR(dto));
604 th = dt_trans_create(env, dt);
606 GOTO(out, rc = PTR_ERR(th));
608 rc = local_object_declare_unlink(env, dt, parent, dto, name, th);
612 rc = dt_trans_start_local(env, dt, th);
616 dt_write_lock(env, dto, 0);
617 rc = dt_delete(env, parent, (struct dt_key *)name, th, BYPASS_CAPA);
621 rc = dt_ref_del(env, dto, th);
623 rc = dt_insert(env, parent,
624 (const struct dt_rec *)&dti->dti_fid,
625 (const struct dt_key *)name, th, BYPASS_CAPA, 1);
629 rc = dt_destroy(env, dto, th);
631 dt_write_unlock(env, dto);
633 dt_trans_stop(env, dt, th);
635 lu_object_put_nocache(env, &dto->do_lu);
638 EXPORT_SYMBOL(local_object_unlink);
640 struct local_oid_storage *dt_los_find(struct ls_device *ls, __u64 seq)
642 struct local_oid_storage *los, *ret = NULL;
644 cfs_list_for_each_entry(los, &ls->ls_los_list, los_list) {
645 if (los->los_seq == seq) {
646 cfs_atomic_inc(&los->los_refcount);
654 void dt_los_put(struct local_oid_storage *los)
656 if (cfs_atomic_dec_and_test(&los->los_refcount))
657 /* should never happen, only local_oid_storage_fini should
658 * drop refcount to zero */
664 * Initialize local OID storage for required sequence.
665 * That may be needed for services that uses local files and requires
666 * dynamic OID allocation for them.
668 * Per each sequence we have an object with 'first_fid' identificator
669 * containing the counter for OIDs of locally created files with that
672 * It is used now by llog subsystem and MGS for NID tables
674 * Function gets first_fid to create counter object.
675 * All dynamic fids will be generated with the same sequence and incremented
678 * Returned local_oid_storage is in-memory representaion of OID storage
680 int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev,
681 const struct lu_fid *first_fid,
682 struct local_oid_storage **los)
684 struct dt_thread_info *dti = dt_info(env);
685 struct ls_device *ls;
686 struct los_ondisk losd;
687 struct dt_object *root = NULL;
688 struct dt_object *o = NULL;
694 ls = ls_device_get(dev);
698 mutex_lock(&ls->ls_los_mutex);
699 *los = dt_los_find(ls, fid_seq(first_fid));
703 /* not found, then create */
706 GOTO(out, rc = -ENOMEM);
708 cfs_atomic_set(&(*los)->los_refcount, 1);
709 mutex_init(&(*los)->los_id_lock);
710 (*los)->los_dev = &ls->ls_top_dev;
711 cfs_atomic_inc(&ls->ls_refcount);
712 cfs_list_add(&(*los)->los_list, &ls->ls_los_list);
714 rc = dt_root_get(env, dev, &dti->dti_fid);
718 root = ls_locate(env, ls, &dti->dti_fid);
720 GOTO(out_los, rc = PTR_ERR(root));
722 /* initialize data allowing to generate new fids,
723 * literally we need a sequence */
724 snprintf(dti->dti_buf, sizeof(dti->dti_buf), "seq-%Lx-lastid",
726 rc = dt_lookup_dir(env, root, dti->dti_buf, &dti->dti_fid);
728 dti->dti_fid = *first_fid;
732 o = ls_locate(env, ls, &dti->dti_fid);
734 GOTO(out_los, rc = PTR_ERR(o));
735 LASSERT(fid_seq(&dti->dti_fid) == fid_seq(first_fid));
736 if (!dt_object_exists(o)) {
737 LASSERT(rc == -ENOENT);
739 th = dt_trans_create(env, dev);
741 GOTO(out_lock, rc = PTR_ERR(th));
743 dti->dti_attr.la_valid = LA_MODE | LA_TYPE;
744 dti->dti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
745 dti->dti_dof.dof_type = dt_mode_to_dft(S_IFREG);
747 rc = dt_declare_create(env, o, &dti->dti_attr, NULL,
752 rc = dt_declare_insert(env, root,
753 (const struct dt_rec *)&dti->dti_fid,
754 (const struct dt_key *)dti->dti_buf,
759 rc = dt_declare_record_write(env, o, sizeof(losd), 0, th);
763 rc = dt_trans_start_local(env, dev, th);
767 dt_write_lock(env, root, 0);
768 dt_write_lock(env, o, 0);
769 if (dt_object_exists(o))
770 GOTO(out_lock, rc = 0);
772 rc = dt_create(env, o, &dti->dti_attr, NULL, &dti->dti_dof,
777 losd.lso_magic = cpu_to_le32(LOS_MAGIC);
778 losd.lso_next_oid = cpu_to_le32(fid_oid(first_fid) + 1);
781 dti->dti_lb.lb_buf = &losd;
782 dti->dti_lb.lb_len = sizeof(losd);
783 rc = dt_record_write(env, o, &dti->dti_lb, &dti->dti_off, th);
786 rc = dt_insert(env, root,
787 (const struct dt_rec *)&dti->dti_fid,
788 (const struct dt_key *)dti->dti_buf,
793 dt_write_unlock(env, o);
794 dt_write_unlock(env, root);
796 dt_trans_stop(env, dev, th);
799 dti->dti_lb.lb_buf = &losd;
800 dti->dti_lb.lb_len = sizeof(losd);
801 dt_read_lock(env, o, 0);
802 rc = dt_record_read(env, o, &dti->dti_lb, &dti->dti_off);
803 dt_read_unlock(env, o);
804 if (rc == 0 && le32_to_cpu(losd.lso_magic) != LOS_MAGIC) {
805 CERROR("local storage file "DFID" is corrupted\n",
811 if (root != NULL && !IS_ERR(root))
812 lu_object_put_nocache(env, &root->do_lu);
815 cfs_list_del(&(*los)->los_list);
816 cfs_atomic_dec(&ls->ls_refcount);
819 if (o != NULL && !IS_ERR(o))
820 lu_object_put_nocache(env, &o->do_lu);
822 (*los)->los_seq = fid_seq(first_fid);
823 (*los)->los_last_oid = le32_to_cpu(losd.lso_next_oid);
827 mutex_unlock(&ls->ls_los_mutex);
828 ls_device_put(env, ls);
831 EXPORT_SYMBOL(local_oid_storage_init);
833 void local_oid_storage_fini(const struct lu_env *env,
834 struct local_oid_storage *los)
836 struct ls_device *ls;
838 if (!cfs_atomic_dec_and_test(&los->los_refcount))
842 LASSERT(los->los_dev);
843 ls = dt2ls_dev(los->los_dev);
845 mutex_lock(&ls->ls_los_mutex);
846 if (cfs_atomic_read(&los->los_refcount) == 0) {
848 lu_object_put_nocache(env, &los->los_obj->do_lu);
849 cfs_list_del(&los->los_list);
852 mutex_unlock(&ls->ls_los_mutex);
853 ls_device_put(env, ls);
855 EXPORT_SYMBOL(local_oid_storage_fini);