4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2013, 2014, Intel Corporation.
26 * lustre/osp/osp_md_object.c
28 * OST/MDT proxy device (OSP) Metadata methods
30 * This file implements methods for remote MD object, which include
31 * dt_object_operations, dt_index_operations and dt_body_operations.
33 * If there are multiple MDTs in one filesystem, one operation might
34 * include modifications in several MDTs. In such cases, clients
35 * send the RPC to the master MDT, then the operation is decomposed into
36 * object updates which will be dispatched to OSD or OSP. The local updates
37 * go to local OSD and the remote updates go to OSP. In OSP, these remote
38 * object updates will be packed into an update RPC, sent to the remote MDT
39 * and handled by Object Update Target (OUT).
41 * In DNE phase I, because of missing complete recovery solution, updates
42 * will be executed in order and synchronously.
43 * 1. The transaction is created.
44 * 2. In transaction declare, it collects and packs remote
45 * updates (in osp_md_declare_xxx()).
46 * 3. In transaction start, it sends these remote updates
47 * to remote MDTs, which will execute these updates synchronously.
48 * 4. In transaction execute phase, the local updates will be executed
51 * Author: Di Wang <di.wang@intel.com>
54 #define DEBUG_SUBSYSTEM S_MDS
56 #include <lustre_log.h>
57 #include "osp_internal.h"
59 static const char dot[] = ".";
60 static const char dotdot[] = "..";
63 * Add OUT_CREATE sub-request into the OUT RPC.
65 * Note: if the object has already been created, we must add object
66 * destroy sub-request ahead of the create, so it will destroy then
67 * re-create the object.
69 * \param[in] env execution environment
70 * \param[in] dt object to be created
71 * \param[in] attr attribute of the created object
72 * \param[in] hint creation hint
73 * \param[in] dof creation format information
74 * \param[in] th the transaction handle
76 * \retval only return 0 for now
78 static int __osp_md_declare_object_create(const struct lu_env *env,
81 struct dt_allocation_hint *hint,
82 struct dt_object_format *dof,
85 struct dt_update_request *update;
88 update = thandle_to_dt_update_request(th);
89 LASSERT(update != NULL);
91 if (lu_object_exists(&dt->do_lu)) {
92 /* If the object already exists, we needs to destroy
93 * this orphan object first.
95 * The scenario might happen in this case
97 * 1. client send remote create to MDT0.
98 * 2. MDT0 send create update to MDT1.
99 * 3. MDT1 finished create synchronously.
100 * 4. MDT0 failed and reboot.
101 * 5. client resend remote create to MDT0.
102 * 6. MDT0 tries to resend create update to MDT1,
103 * but find the object already exists
105 CDEBUG(D_HA, "%s: object "DFID" exists, destroy this orphan\n",
106 dt->do_lu.lo_dev->ld_obd->obd_name,
107 PFID(lu_object_fid(&dt->do_lu)));
109 rc = out_ref_del_pack(env, &update->dur_buf,
110 lu_object_fid(&dt->do_lu),
111 update->dur_batchid);
115 if (S_ISDIR(lu_object_attr(&dt->do_lu))) {
116 /* decrease for ".." */
117 rc = out_ref_del_pack(env, &update->dur_buf,
118 lu_object_fid(&dt->do_lu),
119 update->dur_batchid);
124 rc = out_object_destroy_pack(env, &update->dur_buf,
125 lu_object_fid(&dt->do_lu),
126 update->dur_batchid);
130 dt->do_lu.lo_header->loh_attr &= ~LOHA_EXISTS;
131 /* Increase batchid to add this orphan object deletion
132 * to separate transaction */
133 update_inc_batchid(update);
136 rc = out_create_pack(env, &update->dur_buf,
137 lu_object_fid(&dt->do_lu), attr, hint, dof,
138 update->dur_batchid);
143 CERROR("%s: Insert update error: rc = %d\n",
144 dt->do_lu.lo_dev->ld_obd->obd_name, rc);
150 * Implementation of dt_object_operations::do_declare_create
152 * For non-remote transaction, it will add an OUT_CREATE sub-request
153 * into the OUT RPC that will be flushed when the transaction start.
155 * \param[in] env execution environment
156 * \param[in] dt remote object to be created
157 * \param[in] attr attribute of the created object
158 * \param[in] hint creation hint
159 * \param[in] dof creation format information
160 * \param[in] th the transaction handle
162 * \retval 0 if the insertion succeeds.
163 * \retval negative errno if the insertion fails.
165 int osp_md_declare_object_create(const struct lu_env *env,
166 struct dt_object *dt,
167 struct lu_attr *attr,
168 struct dt_allocation_hint *hint,
169 struct dt_object_format *dof,
174 if (!is_only_remote_trans(th)) {
175 rc = __osp_md_declare_object_create(env, dt, attr, hint,
178 CDEBUG(D_INFO, "declare create md_object "DFID": rc = %d\n",
179 PFID(&dt->do_lu.lo_header->loh_fid), rc);
186 * Implementation of dt_object_operations::do_create
188 * For remote transaction, it will add an OUT_CREATE sub-request into
189 * the OUT RPC that will be flushed when the transaction stop.
191 * It sets necessary flags for created object. In DNE phase I,
192 * remote updates are actually executed during transaction start,
193 * i.e. the object has already been created when calling this method.
195 * \param[in] env execution environment
196 * \param[in] dt object to be created
197 * \param[in] attr attribute of the created object
198 * \param[in] hint creation hint
199 * \param[in] dof creation format information
200 * \param[in] th the transaction handle
202 * \retval only return 0 for now
204 int osp_md_object_create(const struct lu_env *env, struct dt_object *dt,
205 struct lu_attr *attr, struct dt_allocation_hint *hint,
206 struct dt_object_format *dof, struct thandle *th)
210 if (is_only_remote_trans(th)) {
211 rc = __osp_md_declare_object_create(env, dt, attr, hint,
214 CDEBUG(D_INFO, "create md_object "DFID": rc = %d\n",
215 PFID(&dt->do_lu.lo_header->loh_fid), rc);
219 dt->do_lu.lo_header->loh_attr |= LOHA_EXISTS |
220 (attr->la_mode & S_IFMT);
221 dt2osp_obj(dt)->opo_non_exist = 0;
228 * Add OUT_REF_DEL sub-request into the OUT RPC.
230 * \param[in] env execution environment
231 * \param[in] dt object to decrease the reference count.
232 * \param[in] th the transaction handle of refcount decrease.
234 * \retval 0 if the insertion succeeds.
235 * \retval negative errno if the insertion fails.
237 static int __osp_md_ref_del(const struct lu_env *env, struct dt_object *dt,
240 struct dt_update_request *update;
243 update = thandle_to_dt_update_request(th);
244 LASSERT(update != NULL);
246 rc = out_ref_del_pack(env, &update->dur_buf,
247 lu_object_fid(&dt->do_lu),
248 update->dur_batchid);
253 * Implementation of dt_object_operations::do_declare_ref_del
255 * For non-remote transaction, it will add an OUT_REF_DEL sub-request
256 * into the OUT RPC that will be flushed when the transaction start.
258 * \param[in] env execution environment
259 * \param[in] dt object to decrease the reference count.
260 * \param[in] th the transaction handle of refcount decrease.
262 * \retval 0 if the insertion succeeds.
263 * \retval negative errno if the insertion fails.
265 static int osp_md_declare_ref_del(const struct lu_env *env,
266 struct dt_object *dt, struct thandle *th)
270 if (!is_only_remote_trans(th)) {
271 rc = __osp_md_ref_del(env, dt, th);
273 CDEBUG(D_INFO, "declare ref del "DFID": rc = %d\n",
274 PFID(&dt->do_lu.lo_header->loh_fid), rc);
281 * Implementation of dt_object_operations::do_ref_del
283 * For remote transaction, it will add an OUT_REF_DEL sub-request into
284 * the OUT RPC that will be flushed when the transaction stop.
286 * \param[in] env execution environment
287 * \param[in] dt object to decrease the reference count
288 * \param[in] th the transaction handle
290 * \retval only return 0 for now
292 static int osp_md_ref_del(const struct lu_env *env, struct dt_object *dt,
297 if (is_only_remote_trans(th)) {
298 rc = __osp_md_ref_del(env, dt, th);
300 CDEBUG(D_INFO, "ref del "DFID": rc = %d\n",
301 PFID(&dt->do_lu.lo_header->loh_fid), rc);
308 * Add OUT_REF_ADD sub-request into the OUT RPC.
310 * \param[in] env execution environment
311 * \param[in] dt object on which to increase the reference count.
312 * \param[in] th the transaction handle.
314 * \retval 0 if the insertion succeeds.
315 * \retval negative errno if the insertion fails.
317 static int __osp_md_ref_add(const struct lu_env *env, struct dt_object *dt,
320 struct dt_update_request *update;
323 update = thandle_to_dt_update_request(th);
324 LASSERT(update != NULL);
326 rc = out_ref_add_pack(env, &update->dur_buf,
327 lu_object_fid(&dt->do_lu),
328 update->dur_batchid);
334 * Implementation of dt_object_operations::do_declare_ref_del
336 * For non-remote transaction, it will add an OUT_REF_ADD sub-request
337 * into the OUT RPC that will be flushed when the transaction start.
339 * \param[in] env execution environment
340 * \param[in] dt object on which to increase the reference count.
341 * \param[in] th the transaction handle.
343 * \retval 0 if the insertion succeeds.
344 * \retval negative errno if the insertion fails.
346 static int osp_md_declare_ref_add(const struct lu_env *env,
347 struct dt_object *dt, struct thandle *th)
351 if (!is_only_remote_trans(th)) {
352 rc = __osp_md_ref_add(env, dt, th);
354 CDEBUG(D_INFO, "declare ref add "DFID": rc = %d\n",
355 PFID(&dt->do_lu.lo_header->loh_fid), rc);
362 * Implementation of dt_object_operations::do_ref_add
364 * For remote transaction, it will add an OUT_REF_ADD sub-request into
365 * the OUT RPC that will be flushed when the transaction stop.
367 * \param[in] env execution environment
368 * \param[in] dt object on which to increase the reference count
369 * \param[in] th the transaction handle
371 * \retval only return 0 for now
373 static int osp_md_ref_add(const struct lu_env *env, struct dt_object *dt,
378 if (is_only_remote_trans(th)) {
379 rc = __osp_md_ref_add(env, dt, th);
381 CDEBUG(D_INFO, "ref add "DFID": rc = %d\n",
382 PFID(&dt->do_lu.lo_header->loh_fid), rc);
389 * Implementation of dt_object_operations::do_ah_init
391 * Initialize the allocation hint for object creation, which is usually called
392 * before the creation, and these hints (parent and child mode) will be sent to
393 * the remote Object Update Target (OUT) and used in the object create process,
394 * same as OSD object creation.
396 * \param[in] env execution environment
397 * \param[in] ah the hint to be initialized
398 * \param[in] parent the parent of the object
399 * \param[in] child the object to be created
400 * \param[in] child_mode the mode of the created object
402 static void osp_md_ah_init(const struct lu_env *env,
403 struct dt_allocation_hint *ah,
404 struct dt_object *parent,
405 struct dt_object *child,
410 ah->dah_parent = parent;
411 ah->dah_mode = child_mode;
415 * Add OUT_ATTR_SET sub-request into the OUT RPC.
417 * \param[in] env execution environment
418 * \param[in] dt object on which to set attributes
419 * \param[in] attr attributes to be set
420 * \param[in] th the transaction handle
422 * \retval 0 if the insertion succeeds.
423 * \retval negative errno if the insertion fails.
425 int __osp_md_attr_set(const struct lu_env *env, struct dt_object *dt,
426 const struct lu_attr *attr, struct thandle *th)
428 struct dt_update_request *update;
431 update = thandle_to_dt_update_request(th);
432 LASSERT(update != NULL);
434 rc = out_attr_set_pack(env, &update->dur_buf,
435 lu_object_fid(&dt->do_lu), attr,
436 update->dur_batchid);
442 * Implementation of dt_object_operations::do_declare_attr_get
444 * Declare setting attributes to the specified remote object.
446 * If the transaction is a non-remote transaction, then add the OUT_ATTR_SET
447 * sub-request into the OUT RPC that will be flushed when the transaction start.
449 * \param[in] env execution environment
450 * \param[in] dt object on which to set attributes
451 * \param[in] attr attributes to be set
452 * \param[in] th the transaction handle
454 * \retval 0 if the insertion succeeds.
455 * \retval negative errno if the insertion fails.
457 int osp_md_declare_attr_set(const struct lu_env *env, struct dt_object *dt,
458 const struct lu_attr *attr, struct thandle *th)
462 if (!is_only_remote_trans(th)) {
463 rc = __osp_md_attr_set(env, dt, attr, th);
465 CDEBUG(D_INFO, "declare attr set md_object "DFID": rc = %d\n",
466 PFID(&dt->do_lu.lo_header->loh_fid), rc);
473 * Implementation of dt_object_operations::do_attr_set
475 * Set attributes to the specified remote object.
477 * If the transaction is a remote transaction, then add the OUT_ATTR_SET
478 * sub-request into the OUT RPC that will be flushed when the transaction stop.
480 * \param[in] env execution environment
481 * \param[in] dt object to set attributes
482 * \param[in] attr attributes to be set
483 * \param[in] th the transaction handle
485 * \retval only return 0 for now
487 int osp_md_attr_set(const struct lu_env *env, struct dt_object *dt,
488 const struct lu_attr *attr, struct thandle *th)
492 if (is_only_remote_trans(th)) {
493 rc = __osp_md_attr_set(env, dt, attr, th);
495 CDEBUG(D_INFO, "attr set md_object "DFID": rc = %d\n",
496 PFID(&dt->do_lu.lo_header->loh_fid), rc);
503 * Implementation of dt_object_operations::do_read_lock
505 * osp_md_object_{read,write}_lock() will only lock the remote object in the
506 * local cache, which uses the semaphore (opo_sem) inside the osp_object to
507 * lock the object. Note: it will not lock the object in the whole cluster,
508 * which relies on the LDLM lock.
510 * \param[in] env execution environment
511 * \param[in] dt object to be locked
512 * \param[in] role lock role from MDD layer, see mdd_object_role().
514 static void osp_md_object_read_lock(const struct lu_env *env,
515 struct dt_object *dt, unsigned role)
517 struct osp_object *obj = dt2osp_obj(dt);
519 LASSERT(obj->opo_owner != env);
520 down_read_nested(&obj->opo_sem, role);
522 LASSERT(obj->opo_owner == NULL);
526 * Implementation of dt_object_operations::do_write_lock
528 * Lock the remote object in write mode.
530 * \param[in] env execution environment
531 * \param[in] dt object to be locked
532 * \param[in] role lock role from MDD layer, see mdd_object_role().
534 static void osp_md_object_write_lock(const struct lu_env *env,
535 struct dt_object *dt, unsigned role)
537 struct osp_object *obj = dt2osp_obj(dt);
539 down_write_nested(&obj->opo_sem, role);
541 LASSERT(obj->opo_owner == NULL);
542 obj->opo_owner = env;
546 * Implementation of dt_object_operations::do_read_unlock
548 * Unlock the read lock of remote object.
550 * \param[in] env execution environment
551 * \param[in] dt object to be unlocked
553 static void osp_md_object_read_unlock(const struct lu_env *env,
554 struct dt_object *dt)
556 struct osp_object *obj = dt2osp_obj(dt);
558 up_read(&obj->opo_sem);
562 * Implementation of dt_object_operations::do_write_unlock
564 * Unlock the write lock of remote object.
566 * \param[in] env execution environment
567 * \param[in] dt object to be unlocked
569 static void osp_md_object_write_unlock(const struct lu_env *env,
570 struct dt_object *dt)
572 struct osp_object *obj = dt2osp_obj(dt);
574 LASSERT(obj->opo_owner == env);
575 obj->opo_owner = NULL;
576 up_write(&obj->opo_sem);
580 * Implementation of dt_object_operations::do_write_locked
582 * Test if the object is locked in write mode.
584 * \param[in] env execution environment
585 * \param[in] dt object to be tested
587 static int osp_md_object_write_locked(const struct lu_env *env,
588 struct dt_object *dt)
590 struct osp_object *obj = dt2osp_obj(dt);
592 return obj->opo_owner == env;
596 * Implementation of dt_index_operations::dio_lookup
598 * Look up record by key under a remote index object. It packs lookup update
599 * into RPC, sends to the remote OUT and waits for the lookup result.
601 * \param[in] env execution environment
602 * \param[in] dt index object to lookup
603 * \param[out] rec record in which to return lookup result
604 * \param[in] key key of index which will be looked up
606 * \retval 1 if the lookup succeeds.
607 * \retval negative errno if the lookup fails.
609 static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt,
610 struct dt_rec *rec, const struct dt_key *key)
612 struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2;
613 struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev);
614 struct dt_device *dt_dev = &osp->opd_dt_dev;
615 struct dt_update_request *update;
616 struct object_update_reply *reply;
617 struct ptlrpc_request *req = NULL;
622 /* Because it needs send the update buffer right away,
623 * just create an update buffer, instead of attaching the
624 * update_remote list of the thandle.
626 update = dt_update_request_create(dt_dev);
628 RETURN(PTR_ERR(update));
630 rc = out_index_lookup_pack(env, &update->dur_buf,
631 lu_object_fid(&dt->do_lu), rec, key);
633 CERROR("%s: Insert update error: rc = %d\n",
634 dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
638 rc = osp_remote_sync(env, osp, update, &req, false);
642 reply = req_capsule_server_sized_get(&req->rq_pill,
643 &RMF_OUT_UPDATE_REPLY,
644 OUT_UPDATE_REPLY_SIZE);
645 if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
646 CERROR("%s: Wrong version %x expected %x: rc = %d\n",
647 dt_dev->dd_lu_dev.ld_obd->obd_name,
648 reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
649 GOTO(out, rc = -EPROTO);
652 rc = object_update_result_data_get(reply, lbuf, 0);
656 if (lbuf->lb_len != sizeof(*fid)) {
657 CERROR("%s: lookup "DFID" %s wrong size %d\n",
658 dt_dev->dd_lu_dev.ld_obd->obd_name,
659 PFID(lu_object_fid(&dt->do_lu)), (char *)key,
661 GOTO(out, rc = -EINVAL);
665 if (ptlrpc_rep_need_swab(req))
666 lustre_swab_lu_fid(fid);
667 if (!fid_is_sane(fid)) {
668 CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n",
669 dt_dev->dd_lu_dev.ld_obd->obd_name,
670 PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid));
671 GOTO(out, rc = -EINVAL);
674 memcpy(rec, fid, sizeof(*fid));
680 ptlrpc_req_finished(req);
682 dt_update_request_destroy(update);
688 * Add OUT_INDEX_INSERT sub-request into the OUT RPC.
690 * \param[in] env execution environment
691 * \param[in] dt object for which to insert index
692 * \param[in] rec record of the index which will be inserted
693 * \param[in] key key of the index which will be inserted
694 * \param[in] th the transaction handle
696 * \retval 0 if the insertion succeeds.
697 * \retval negative errno if the insertion fails.
699 static int __osp_md_index_insert(const struct lu_env *env,
700 struct dt_object *dt,
701 const struct dt_rec *rec,
702 const struct dt_key *key,
705 struct osp_thandle *oth = thandle_to_osp_thandle(th);
706 struct dt_update_request *update = oth->ot_dur;
710 rc = out_index_insert_pack(env, &update->dur_buf,
711 lu_object_fid(&dt->do_lu), rec, key,
712 update->dur_batchid);
716 /* Before async update is allowed, if it will insert remote
717 * name entry, it should make sure the local object is created,
718 * i.e. the remote update RPC should be sent after local
719 * update(create object) */
720 oth->ot_send_updates_after_local_trans = true;
726 * Implementation of dt_index_operations::dio_declare_insert
728 * For non-remote transaction, it will add an OUT_INDEX_INSERT sub-request
729 * into the OUT RPC that will be flushed when the transaction start.
731 * \param[in] env execution environment
732 * \param[in] dt object for which to insert index
733 * \param[in] rec record of the index which will be inserted
734 * \param[in] key key of the index which will be inserted
735 * \param[in] th the transaction handle
737 * \retval 0 if the insertion succeeds.
738 * \retval negative errno if the insertion fails.
740 static int osp_md_declare_index_insert(const struct lu_env *env,
741 struct dt_object *dt,
742 const struct dt_rec *rec,
743 const struct dt_key *key,
748 if (!is_only_remote_trans(th)) {
749 rc = __osp_md_index_insert(env, dt, rec, key, th);
751 CDEBUG(D_INFO, "declare index insert "DFID" key %s, rec "DFID
752 ": rc = %d\n", PFID(&dt->do_lu.lo_header->loh_fid),
754 PFID(((struct dt_insert_rec *)rec)->rec_fid), rc);
761 * Implementation of dt_index_operations::dio_insert
763 * For remote transaction, it will add an OUT_INDEX_INSERT sub-request
764 * into the OUT RPC that will be flushed when the transaction stop.
766 * \param[in] env execution environment
767 * \param[in] dt object for which to insert index
768 * \param[in] rec record of the index to be inserted
769 * \param[in] key key of the index to be inserted
770 * \param[in] th the transaction handle
771 * \param[in] ignore_quota quota enforcement for insert
773 * \retval only return 0 for now
775 static int osp_md_index_insert(const struct lu_env *env,
776 struct dt_object *dt,
777 const struct dt_rec *rec,
778 const struct dt_key *key,
784 if (is_only_remote_trans(th)) {
785 rc = __osp_md_index_insert(env, dt, rec, key, th);
787 CDEBUG(D_INFO, "index insert "DFID" key %s, rec "DFID
788 ": rc = %d\n", PFID(&dt->do_lu.lo_header->loh_fid),
790 PFID(((struct dt_insert_rec *)rec)->rec_fid), rc);
797 * Add OUT_INDEX_DELETE sub-request into the OUT RPC.
799 * \param[in] env execution environment
800 * \param[in] dt object for which to delete index
801 * \param[in] key key of the index
802 * \param[in] th the transaction handle
804 * \retval 0 if the insertion succeeds.
805 * \retval negative errno if the insertion fails.
807 static int __osp_md_index_delete(const struct lu_env *env,
808 struct dt_object *dt,
809 const struct dt_key *key,
812 struct dt_update_request *update;
815 update = thandle_to_dt_update_request(th);
816 LASSERT(update != NULL);
818 rc = out_index_delete_pack(env, &update->dur_buf,
819 lu_object_fid(&dt->do_lu), key,
820 update->dur_batchid);
825 * Implementation of dt_index_operations::dio_declare_delete
827 * For non-remote transaction, it will add an OUT_INDEX_DELETE sub-request
828 * into the OUT RPC that will be flushed when the transaction start.
830 * \param[in] env execution environment
831 * \param[in] dt object for which to delete index
832 * \param[in] key key of the index
833 * \param[in] th the transaction handle
835 * \retval 0 if the insertion succeeds.
836 * \retval negative errno if the insertion fails.
838 static int osp_md_declare_index_delete(const struct lu_env *env,
839 struct dt_object *dt,
840 const struct dt_key *key,
845 if (!is_only_remote_trans(th)) {
846 rc = __osp_md_index_delete(env, dt, key, th);
848 CDEBUG(D_INFO, "declare index delete "DFID" %s: rc = %d\n",
849 PFID(&dt->do_lu.lo_header->loh_fid), (char *)key, rc);
856 * Implementation of dt_index_operations::dio_delete
858 * For remote transaction, it will add an OUT_INDEX_DELETE sub-request
859 * into the OUT RPC that will be flushed when the transaction stop.
861 * \param[in] env execution environment
862 * \param[in] dt object for which to delete index
863 * \param[in] key key of the index which will be deleted
864 * \param[in] th the transaction handle
866 * \retval only return 0 for now
868 static int osp_md_index_delete(const struct lu_env *env,
869 struct dt_object *dt,
870 const struct dt_key *key,
875 if (is_only_remote_trans(th)) {
876 rc = __osp_md_index_delete(env, dt, key, th);
878 CDEBUG(D_INFO, "index delete "DFID" %s: rc = %d\n",
879 PFID(&dt->do_lu.lo_header->loh_fid), (char *)key, rc);
886 * Implementation of dt_index_operations::dio_it.next
888 * Advance the pointer of the iterator to the next entry. It shares a similar
889 * internal implementation with osp_orphan_it_next(), which is being used for
890 * remote orphan index object. This method will be used for remote directory.
892 * \param[in] env execution environment
893 * \param[in] di iterator of this iteration
895 * \retval 0 if the pointer is advanced successfuly.
896 * \retval 1 if it reaches to the end of the index object.
897 * \retval negative errno if the pointer cannot be advanced.
899 static int osp_md_index_it_next(const struct lu_env *env, struct dt_it *di)
901 struct osp_it *it = (struct osp_it *)di;
902 struct lu_idxpage *idxpage;
903 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
908 idxpage = it->ooi_cur_idxpage;
909 if (idxpage != NULL) {
910 if (idxpage->lip_nr == 0)
916 (struct lu_dirent *)idxpage->lip_entries;
918 } else if (le16_to_cpu(ent->lde_reclen) != 0 &&
919 it->ooi_pos_ent < idxpage->lip_nr) {
920 ent = (struct lu_dirent *)(((char *)ent) +
921 le16_to_cpu(ent->lde_reclen));
929 rc = osp_it_next_page(env, di);
937 * Implementation of dt_index_operations::dio_it.key
939 * Get the key at current iterator poisiton. These iteration methods
940 * (dio_it) will only be used for iterating the remote directory, so
941 * the key is the name of the directory entry.
943 * \param[in] env execution environment
944 * \param[in] di iterator of this iteration
946 * \retval name of the current entry
948 static struct dt_key *osp_it_key(const struct lu_env *env,
949 const struct dt_it *di)
951 struct osp_it *it = (struct osp_it *)di;
952 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
954 return (struct dt_key *)ent->lde_name;
958 * Implementation of dt_index_operations::dio_it.key_size
960 * Get the key size at current iterator poisiton. These iteration methods
961 * (dio_it) will only be used for iterating the remote directory, so the key
962 * size is the name size of the directory entry.
964 * \param[in] env execution environment
965 * \param[in] di iterator of this iteration
967 * \retval name size of the current entry
970 static int osp_it_key_size(const struct lu_env *env, const struct dt_it *di)
972 struct osp_it *it = (struct osp_it *)di;
973 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
975 return (int)le16_to_cpu(ent->lde_namelen);
979 * Implementation of dt_index_operations::dio_it.rec
981 * Get the record at current iterator position. These iteration methods
982 * (dio_it) will only be used for iterating the remote directory, so it
983 * uses lu_dirent_calc_size() to calculate the record size.
985 * \param[in] env execution environment
986 * \param[in] di iterator of this iteration
987 * \param[out] rec the record to be returned
988 * \param[in] attr attributes of the index object, so it knows
989 * how to pack the entry.
991 * \retval only return 0 for now
993 static int osp_md_index_it_rec(const struct lu_env *env, const struct dt_it *di,
994 struct dt_rec *rec, __u32 attr)
996 struct osp_it *it = (struct osp_it *)di;
997 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
1000 reclen = lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen), attr);
1001 memcpy(rec, ent, reclen);
1006 * Implementation of dt_index_operations::dio_it.load
1008 * Locate the iteration cursor to the specified position (cookie).
1010 * \param[in] env pointer to the thread context
1011 * \param[in] di pointer to the iteration structure
1012 * \param[in] hash the specified position
1014 * \retval positive number for locating to the exactly position
1016 * \retval 0 for arriving at the end of the iteration
1017 * \retval negative error number on failure
1019 static int osp_it_load(const struct lu_env *env, const struct dt_it *di,
1022 struct osp_it *it = (struct osp_it *)di;
1025 it->ooi_next = hash;
1026 rc = osp_md_index_it_next(env, (struct dt_it *)di);
1036 const struct dt_index_operations osp_md_index_ops = {
1037 .dio_lookup = osp_md_index_lookup,
1038 .dio_declare_insert = osp_md_declare_index_insert,
1039 .dio_insert = osp_md_index_insert,
1040 .dio_declare_delete = osp_md_declare_index_delete,
1041 .dio_delete = osp_md_index_delete,
1043 .init = osp_it_init,
1044 .fini = osp_it_fini,
1047 .next = osp_md_index_it_next,
1049 .key_size = osp_it_key_size,
1050 .rec = osp_md_index_it_rec,
1051 .store = osp_it_store,
1052 .load = osp_it_load,
1053 .key_rec = osp_it_key_rec,
1058 * Implementation of dt_object_operations::do_index_try
1060 * Try to initialize the index API pointer for the given object. This
1061 * is the entry point of the index API, i.e. we must call this method
1062 * to initialize the index object before calling other index methods.
1064 * \param[in] env execution environment
1065 * \param[in] dt index object to be initialized
1066 * \param[in] feat the index feature of the object
1068 * \retval 0 if the initialization succeeds.
1069 * \retval negative errno if the initialization fails.
1071 static int osp_md_index_try(const struct lu_env *env,
1072 struct dt_object *dt,
1073 const struct dt_index_features *feat)
1075 dt->do_index_ops = &osp_md_index_ops;
1080 * Implementation of dt_object_operations::do_object_lock
1082 * Enqueue a lock (by ldlm_cli_enqueue()) of remote object on the remote MDT,
1083 * which will lock the object in the global namespace.
1085 * \param[in] env execution environment
1086 * \param[in] dt object to be locked
1087 * \param[out] lh lock handle
1088 * \param[in] einfo enqueue information
1089 * \param[in] policy lock policy
1091 * \retval ELDLM_OK if locking the object succeeds.
1092 * \retval negative errno if locking fails.
1094 static int osp_md_object_lock(const struct lu_env *env,
1095 struct dt_object *dt,
1096 struct lustre_handle *lh,
1097 struct ldlm_enqueue_info *einfo,
1098 ldlm_policy_data_t *policy)
1100 struct ldlm_res_id *res_id;
1101 struct dt_device *dt_dev = lu2dt_dev(dt->do_lu.lo_dev);
1102 struct osp_device *osp = dt2osp_dev(dt_dev);
1103 struct ptlrpc_request *req;
1108 res_id = einfo->ei_res_id;
1109 LASSERT(res_id != NULL);
1111 mode = ldlm_lock_match(osp->opd_obd->obd_namespace,
1112 LDLM_FL_BLOCK_GRANTED, res_id,
1113 einfo->ei_type, policy,
1114 einfo->ei_mode, lh, 0);
1118 req = ldlm_enqueue_pack(osp->opd_exp, 0);
1120 RETURN(PTR_ERR(req));
1122 rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
1123 (const ldlm_policy_data_t *)policy,
1124 &flags, NULL, 0, LVB_T_NONE, lh, 0);
1126 ptlrpc_req_finished(req);
1128 return rc == ELDLM_OK ? 0 : -EIO;
1132 * Implementation of dt_object_operations::do_object_unlock
1134 * Cancel a lock of a remote object.
1136 * \param[in] env execution environment
1137 * \param[in] dt object to be unlocked
1138 * \param[in] einfo lock enqueue information
1139 * \param[in] policy lock policy
1141 * \retval Only return 0 for now.
1143 static int osp_md_object_unlock(const struct lu_env *env,
1144 struct dt_object *dt,
1145 struct ldlm_enqueue_info *einfo,
1146 ldlm_policy_data_t *policy)
1148 struct lustre_handle *lockh = einfo->ei_cbdata;
1150 /* unlock finally */
1151 ldlm_lock_decref(lockh, einfo->ei_mode);
1156 struct dt_object_operations osp_md_obj_ops = {
1157 .do_read_lock = osp_md_object_read_lock,
1158 .do_write_lock = osp_md_object_write_lock,
1159 .do_read_unlock = osp_md_object_read_unlock,
1160 .do_write_unlock = osp_md_object_write_unlock,
1161 .do_write_locked = osp_md_object_write_locked,
1162 .do_declare_create = osp_md_declare_object_create,
1163 .do_create = osp_md_object_create,
1164 .do_declare_ref_add = osp_md_declare_ref_add,
1165 .do_ref_add = osp_md_ref_add,
1166 .do_declare_ref_del = osp_md_declare_ref_del,
1167 .do_ref_del = osp_md_ref_del,
1168 .do_declare_destroy = osp_declare_object_destroy,
1169 .do_destroy = osp_object_destroy,
1170 .do_ah_init = osp_md_ah_init,
1171 .do_attr_get = osp_attr_get,
1172 .do_declare_attr_set = osp_md_declare_attr_set,
1173 .do_attr_set = osp_md_attr_set,
1174 .do_xattr_get = osp_xattr_get,
1175 .do_declare_xattr_set = osp_declare_xattr_set,
1176 .do_xattr_set = osp_xattr_set,
1177 .do_declare_xattr_del = osp_declare_xattr_del,
1178 .do_xattr_del = osp_xattr_del,
1179 .do_index_try = osp_md_index_try,
1180 .do_object_lock = osp_md_object_lock,
1181 .do_object_unlock = osp_md_object_unlock,
1185 * Implementation of dt_body_operations::dbo_declare_write
1187 * Declare an object write. In DNE phase I, it will pack the write
1188 * object update into the RPC.
1190 * \param[in] env execution environment
1191 * \param[in] dt object to be written
1192 * \param[in] buf buffer to write which includes an embedded size field
1193 * \param[in] pos offet in the object to start writing at
1194 * \param[in] th transaction handle
1196 * \retval 0 if the insertion succeeds.
1197 * \retval negative errno if the insertion fails.
1199 static ssize_t osp_md_declare_write(const struct lu_env *env,
1200 struct dt_object *dt,
1201 const struct lu_buf *buf,
1202 loff_t pos, struct thandle *th)
1204 struct dt_update_request *update;
1207 update = thandle_to_dt_update_request(th);
1208 LASSERT(update != NULL);
1210 rc = out_write_pack(env, &update->dur_buf, lu_object_fid(&dt->do_lu),
1211 buf, pos, update->dur_batchid);
1218 * Implementation of dt_body_operations::dbo_write
1220 * Return the buffer size. In DNE phase I, remote updates
1221 * are actually executed during transaction start, the buffer has
1222 * already been written when this method is being called.
1224 * \param[in] env execution environment
1225 * \param[in] dt object to be written
1226 * \param[in] buf buffer to write which includes an embedded size field
1227 * \param[in] pos offet in the object to start writing at
1228 * \param[in] th transaction handle
1229 * \param[in] ignore_quota quota enforcement for this write
1231 * \retval the buffer size in bytes.
1233 static ssize_t osp_md_write(const struct lu_env *env, struct dt_object *dt,
1234 const struct lu_buf *buf, loff_t *pos,
1235 struct thandle *handle, int ignore_quota)
1237 *pos += buf->lb_len;
1241 /* These body operation will be used to write symlinks during migration etc */
1242 struct dt_body_operations osp_md_body_ops = {
1243 .dbo_declare_write = osp_md_declare_write,
1244 .dbo_write = osp_md_write,