4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/osp/osp_md_object.c
28 * OST/MDT proxy device (OSP) Metadata methods
30 * This file implements methods for remote MD object, which include
31 * dt_object_operations, dt_index_operations and dt_body_operations.
33 * If there are multiple MDTs in one filesystem, one operation might
34 * include modifications in several MDTs. In such cases, clients
35 * send the RPC to the master MDT, then the operation is decomposed into
36 * object updates which will be dispatched to OSD or OSP. The local updates
37 * go to local OSD and the remote updates go to OSP. In OSP, these remote
38 * object updates will be packed into an update RPC, sent to the remote MDT
39 * and handled by Object Update Target (OUT).
41 * In DNE phase I, because of missing complete recovery solution, updates
42 * will be executed in order and synchronously.
43 * 1. The transaction is created.
44 * 2. In transaction declare, it collects and packs remote
45 * updates (in osp_md_declare_xxx()).
46 * 3. In transaction start, it sends these remote updates
47 * to remote MDTs, which will execute these updates synchronously.
48 * 4. In transaction execute phase, the local updates will be executed
51 * Author: Di Wang <di.wang@intel.com>
54 #define DEBUG_SUBSYSTEM S_MDS
56 #include <lustre_log.h>
57 #include "osp_internal.h"
59 static const char dot[] = ".";
60 static const char dotdot[] = "..";
63 * Implementation of dt_object_operations::do_declare_create
65 * Insert object create update into the RPC, which will be sent during
66 * transaction start. Note: if the object has already been created,
67 * we must add object destroy updates ahead of create updates, so it will
68 * destroy then recreate the object.
70 * \param[in] env execution environment
71 * \param[in] dt remote object to be created
72 * \param[in] attr attribute of the created object
73 * \param[in] hint creation hint
74 * \param[in] dof creation format information
75 * \param[in] th the transaction handle
77 * \retval 0 if the insertion succeeds.
78 * \retval negative errno if the insertion fails.
80 int osp_md_declare_object_create(const struct lu_env *env,
83 struct dt_allocation_hint *hint,
84 struct dt_object_format *dof,
87 struct dt_update_request *update;
90 update = dt_update_request_find_or_create(th, dt);
92 CERROR("%s: Get OSP update buf failed: rc = %d\n",
93 dt->do_lu.lo_dev->ld_obd->obd_name,
94 (int)PTR_ERR(update));
95 return PTR_ERR(update);
98 if (lu_object_exists(&dt->do_lu)) {
99 /* If the object already exists, we needs to destroy
100 * this orphan object first.
102 * The scenario might happen in this case
104 * 1. client send remote create to MDT0.
105 * 2. MDT0 send create update to MDT1.
106 * 3. MDT1 finished create synchronously.
107 * 4. MDT0 failed and reboot.
108 * 5. client resend remote create to MDT0.
109 * 6. MDT0 tries to resend create update to MDT1,
110 * but find the object already exists
112 CDEBUG(D_HA, "%s: object "DFID" exists, destroy this orphan\n",
113 dt->do_lu.lo_dev->ld_obd->obd_name,
114 PFID(lu_object_fid(&dt->do_lu)));
116 rc = out_ref_del_pack(env, &update->dur_buf,
117 lu_object_fid(&dt->do_lu),
118 update->dur_batchid);
122 if (S_ISDIR(lu_object_attr(&dt->do_lu))) {
123 /* decrease for ".." */
124 rc = out_ref_del_pack(env, &update->dur_buf,
125 lu_object_fid(&dt->do_lu),
126 update->dur_batchid);
131 rc = out_object_destroy_pack(env, &update->dur_buf,
132 lu_object_fid(&dt->do_lu),
133 update->dur_batchid);
137 dt->do_lu.lo_header->loh_attr &= ~LOHA_EXISTS;
138 /* Increase batchid to add this orphan object deletion
139 * to separate transaction */
140 update_inc_batchid(update);
143 rc = out_create_pack(env, &update->dur_buf,
144 lu_object_fid(&dt->do_lu), attr, hint, dof,
145 update->dur_batchid);
150 CERROR("%s: Insert update error: rc = %d\n",
151 dt->do_lu.lo_dev->ld_obd->obd_name, rc);
157 * Implementation of dt_object_operations::do_create
159 * It sets necessary flags for created object. In DNE phase I,
160 * remote updates are actually executed during transaction start,
161 * i.e. the object has already been created when calling this method.
163 * \param[in] env execution environment
164 * \param[in] dt object to be created
165 * \param[in] attr attribute of the created object
166 * \param[in] hint creation hint
167 * \param[in] dof creation format information
168 * \param[in] th the transaction handle
170 * \retval only return 0 for now
172 int osp_md_object_create(const struct lu_env *env, struct dt_object *dt,
173 struct lu_attr *attr, struct dt_allocation_hint *hint,
174 struct dt_object_format *dof, struct thandle *th)
176 CDEBUG(D_INFO, "create object "DFID"\n",
177 PFID(&dt->do_lu.lo_header->loh_fid));
179 /* Because the create update RPC will be sent during declare phase,
180 * if creation reaches here, it means the object has been created
182 dt->do_lu.lo_header->loh_attr |= LOHA_EXISTS | (attr->la_mode & S_IFMT);
183 dt2osp_obj(dt)->opo_non_exist = 0;
189 * Implementation of dt_object_operations::do_declare_ref_del
191 * Declare decreasing the reference count of the remote object, i.e. insert
192 * decreasing object reference count update into the RPC, which will be sent
193 * during transaction start.
195 * \param[in] env execution environment
196 * \param[in] dt object to decrease the reference count.
197 * \param[in] th the transaction handle of refcount decrease.
199 * \retval 0 if the insertion succeeds.
200 * \retval negative errno if the insertion fails.
202 static int osp_md_declare_object_ref_del(const struct lu_env *env,
203 struct dt_object *dt,
206 struct dt_update_request *update;
209 update = dt_update_request_find_or_create(th, dt);
210 if (IS_ERR(update)) {
211 CERROR("%s: Get OSP update buf failed: rc = %d\n",
212 dt->do_lu.lo_dev->ld_obd->obd_name,
213 (int)PTR_ERR(update));
214 return PTR_ERR(update);
217 rc = out_ref_del_pack(env, &update->dur_buf,
218 lu_object_fid(&dt->do_lu),
219 update->dur_batchid);
224 * Implementation of dt_object_operations::do_ref_del
226 * Do nothing in this method for now. In DNE phase I, remote updates are
227 * actually executed during transaction start, i.e. the object reference
228 * count has already been decreased when calling this method.
230 * \param[in] env execution environment
231 * \param[in] dt object to decrease the reference count
232 * \param[in] th the transaction handle
234 * \retval only return 0 for now
236 static int osp_md_object_ref_del(const struct lu_env *env,
237 struct dt_object *dt,
240 CDEBUG(D_INFO, "ref del object "DFID"\n",
241 PFID(&dt->do_lu.lo_header->loh_fid));
247 * Implementation of dt_object_operations::do_declare_ref_del
249 * Declare increasing the reference count of the remote object,
250 * i.e. insert increasing object reference count update into RPC.
252 * \param[in] env execution environment
253 * \param[in] dt object on which to increase the reference count.
254 * \param[in] th the transaction handle.
256 * \retval 0 if the insertion succeeds.
257 * \retval negative errno if the insertion fails.
259 static int osp_md_declare_ref_add(const struct lu_env *env,
260 struct dt_object *dt, struct thandle *th)
262 struct dt_update_request *update;
265 update = dt_update_request_find_or_create(th, dt);
266 if (IS_ERR(update)) {
267 CERROR("%s: Get OSP update buf failed: rc = %d\n",
268 dt->do_lu.lo_dev->ld_obd->obd_name,
269 (int)PTR_ERR(update));
270 return PTR_ERR(update);
273 rc = out_ref_add_pack(env, &update->dur_buf,
274 lu_object_fid(&dt->do_lu),
275 update->dur_batchid);
281 * Implementation of dt_object_operations::do_ref_add
283 * Do nothing in this method for now. In DNE phase I, remote updates are
284 * actually executed during transaction start, i.e. the object reference
285 * count has already been increased when calling this method.
287 * \param[in] env execution environment
288 * \param[in] dt object on which to increase the reference count
289 * \param[in] th the transaction handle
291 * \retval only return 0 for now
293 static int osp_md_object_ref_add(const struct lu_env *env, struct dt_object *dt,
296 CDEBUG(D_INFO, "ref add object "DFID"\n",
297 PFID(&dt->do_lu.lo_header->loh_fid));
303 * Implementation of dt_object_operations::do_ah_init
305 * Initialize the allocation hint for object creation, which is usually called
306 * before the creation, and these hints (parent and child mode) will be sent to
307 * the remote Object Update Target (OUT) and used in the object create process,
308 * same as OSD object creation.
310 * \param[in] env execution environment
311 * \param[in] ah the hint to be initialized
312 * \param[in] parent the parent of the object
313 * \param[in] child the object to be created
314 * \param[in] child_mode the mode of the created object
316 static void osp_md_ah_init(const struct lu_env *env,
317 struct dt_allocation_hint *ah,
318 struct dt_object *parent,
319 struct dt_object *child,
324 ah->dah_parent = parent;
325 ah->dah_mode = child_mode;
329 * Implementation of dt_object_operations::do_declare_attr_get
331 * Declare setting attributes of the remote object, i.e. insert remote
332 * object attr_set update into RPC.
334 * \param[in] env execution environment
335 * \param[in] dt object on which to set attributes
336 * \param[in] attr attributes to be set
337 * \param[in] th the transaction handle
339 * \retval 0 if the insertion succeeds.
340 * \retval negative errno if the insertion fails.
342 int osp_md_declare_attr_set(const struct lu_env *env, struct dt_object *dt,
343 const struct lu_attr *attr, struct thandle *th)
345 struct dt_update_request *update;
348 update = dt_update_request_find_or_create(th, dt);
349 if (IS_ERR(update)) {
350 CERROR("%s: Get OSP update buf failed: %d\n",
351 dt->do_lu.lo_dev->ld_obd->obd_name,
352 (int)PTR_ERR(update));
353 return PTR_ERR(update);
356 rc = out_attr_set_pack(env, &update->dur_buf,
357 lu_object_fid(&dt->do_lu), attr,
358 update->dur_batchid);
364 * Implementation of dt_object_operations::do_attr_set
366 * Do nothing in this method for now. In DNE phase I, remote updates
367 * are actually executed during transaction start, i.e. object attributes
368 * have already been set when calling this method.
370 * \param[in] env execution environment
371 * \param[in] dt object to set attributes
372 * \param[in] attr attributes to be set
373 * \param[in] th the transaction handle
374 * \param[in] capa capability of setting attributes (not yet implemented).
376 * \retval only return 0 for now
378 int osp_md_attr_set(const struct lu_env *env, struct dt_object *dt,
379 const struct lu_attr *attr, struct thandle *th,
380 struct lustre_capa *capa)
382 CDEBUG(D_INFO, "attr set object "DFID"\n",
383 PFID(&dt->do_lu.lo_header->loh_fid));
389 * Implementation of dt_object_operations::do_read_lock
391 * osp_md_object_{read,write}_lock() will only lock the remote object in the
392 * local cache, which uses the semaphore (opo_sem) inside the osp_object to
393 * lock the object. Note: it will not lock the object in the whole cluster,
394 * which relies on the LDLM lock.
396 * \param[in] env execution environment
397 * \param[in] dt object to be locked
398 * \param[in] role lock role from MDD layer, see mdd_object_role().
400 static void osp_md_object_read_lock(const struct lu_env *env,
401 struct dt_object *dt, unsigned role)
403 struct osp_object *obj = dt2osp_obj(dt);
405 LASSERT(obj->opo_owner != env);
406 down_read_nested(&obj->opo_sem, role);
408 LASSERT(obj->opo_owner == NULL);
412 * Implementation of dt_object_operations::do_write_lock
414 * Lock the remote object in write mode.
416 * \param[in] env execution environment
417 * \param[in] dt object to be locked
418 * \param[in] role lock role from MDD layer, see mdd_object_role().
420 static void osp_md_object_write_lock(const struct lu_env *env,
421 struct dt_object *dt, unsigned role)
423 struct osp_object *obj = dt2osp_obj(dt);
425 down_write_nested(&obj->opo_sem, role);
427 LASSERT(obj->opo_owner == NULL);
428 obj->opo_owner = env;
432 * Implementation of dt_object_operations::do_read_unlock
434 * Unlock the read lock of remote object.
436 * \param[in] env execution environment
437 * \param[in] dt object to be unlocked
439 static void osp_md_object_read_unlock(const struct lu_env *env,
440 struct dt_object *dt)
442 struct osp_object *obj = dt2osp_obj(dt);
444 up_read(&obj->opo_sem);
448 * Implementation of dt_object_operations::do_write_unlock
450 * Unlock the write lock of remote object.
452 * \param[in] env execution environment
453 * \param[in] dt object to be unlocked
455 static void osp_md_object_write_unlock(const struct lu_env *env,
456 struct dt_object *dt)
458 struct osp_object *obj = dt2osp_obj(dt);
460 LASSERT(obj->opo_owner == env);
461 obj->opo_owner = NULL;
462 up_write(&obj->opo_sem);
466 * Implementation of dt_object_operations::do_write_locked
468 * Test if the object is locked in write mode.
470 * \param[in] env execution environment
471 * \param[in] dt object to be tested
473 static int osp_md_object_write_locked(const struct lu_env *env,
474 struct dt_object *dt)
476 struct osp_object *obj = dt2osp_obj(dt);
478 return obj->opo_owner == env;
482 * Implementation of dt_index_operations::dio_lookup
484 * Look up record by key under a remote index object. It packs lookup update
485 * into RPC, sends to the remote OUT and waits for the lookup result.
487 * \param[in] env execution environment
488 * \param[in] dt index object to lookup
489 * \param[out] rec record in which to return lookup result
490 * \param[in] key key of index which will be looked up
491 * \param[in] capa capability of lookup (not yet implemented)
493 * \retval 1 if the lookup succeeds.
494 * \retval negative errno if the lookup fails.
496 static int osp_md_index_lookup(const struct lu_env *env, struct dt_object *dt,
497 struct dt_rec *rec, const struct dt_key *key,
498 struct lustre_capa *capa)
500 struct lu_buf *lbuf = &osp_env_info(env)->osi_lb2;
501 struct osp_device *osp = lu2osp_dev(dt->do_lu.lo_dev);
502 struct dt_device *dt_dev = &osp->opd_dt_dev;
503 struct dt_update_request *update;
504 struct object_update_reply *reply;
505 struct ptlrpc_request *req = NULL;
510 /* Because it needs send the update buffer right away,
511 * just create an update buffer, instead of attaching the
512 * update_remote list of the thandle.
514 update = dt_update_request_create(dt_dev);
516 RETURN(PTR_ERR(update));
518 rc = out_index_lookup_pack(env, &update->dur_buf,
519 lu_object_fid(&dt->do_lu), rec, key);
521 CERROR("%s: Insert update error: rc = %d\n",
522 dt_dev->dd_lu_dev.ld_obd->obd_name, rc);
526 rc = out_remote_sync(env, osp->opd_obd->u.cli.cl_import, update, &req);
530 reply = req_capsule_server_sized_get(&req->rq_pill,
531 &RMF_OUT_UPDATE_REPLY,
532 OUT_UPDATE_REPLY_SIZE);
533 if (reply->ourp_magic != UPDATE_REPLY_MAGIC) {
534 CERROR("%s: Wrong version %x expected %x: rc = %d\n",
535 dt_dev->dd_lu_dev.ld_obd->obd_name,
536 reply->ourp_magic, UPDATE_REPLY_MAGIC, -EPROTO);
537 GOTO(out, rc = -EPROTO);
540 rc = object_update_result_data_get(reply, lbuf, 0);
544 if (lbuf->lb_len != sizeof(*fid)) {
545 CERROR("%s: lookup "DFID" %s wrong size %d\n",
546 dt_dev->dd_lu_dev.ld_obd->obd_name,
547 PFID(lu_object_fid(&dt->do_lu)), (char *)key,
549 GOTO(out, rc = -EINVAL);
553 if (ptlrpc_rep_need_swab(req))
554 lustre_swab_lu_fid(fid);
555 if (!fid_is_sane(fid)) {
556 CERROR("%s: lookup "DFID" %s invalid fid "DFID"\n",
557 dt_dev->dd_lu_dev.ld_obd->obd_name,
558 PFID(lu_object_fid(&dt->do_lu)), (char *)key, PFID(fid));
559 GOTO(out, rc = -EINVAL);
562 memcpy(rec, fid, sizeof(*fid));
568 ptlrpc_req_finished(req);
570 dt_update_request_destroy(update);
576 * Implementation of dt_index_operations::dio_declare_insert
578 * Declare the index insert of the remote object, i.e. pack index insert update
579 * into the RPC, which will be sent during transaction start.
581 * \param[in] env execution environment
582 * \param[in] dt object for which to insert index
583 * \param[in] rec record of the index which will be inserted
584 * \param[in] key key of the index which will be inserted
585 * \param[in] th the transaction handle
587 * \retval 0 if the insertion succeeds.
588 * \retval negative errno if the insertion fails.
590 static int osp_md_declare_insert(const struct lu_env *env,
591 struct dt_object *dt,
592 const struct dt_rec *rec,
593 const struct dt_key *key,
596 struct dt_update_request *update;
599 update = dt_update_request_find_or_create(th, dt);
600 if (IS_ERR(update)) {
601 CERROR("%s: Get OSP update buf failed: rc = %d\n",
602 dt->do_lu.lo_dev->ld_obd->obd_name,
603 (int)PTR_ERR(update));
604 return PTR_ERR(update);
607 rc = out_index_insert_pack(env, &update->dur_buf,
608 lu_object_fid(&dt->do_lu), rec, key,
609 update->dur_batchid);
614 * Implementation of dt_index_operations::dio_insert
616 * Do nothing in this method for now. In DNE phase I, remote updates
617 * are actually executed during transaction start, i.e. the index has
618 * already been inserted when calling this method.
620 * \param[in] env execution environment
621 * \param[in] dt object for which to insert index
622 * \param[in] rec record of the index to be inserted
623 * \param[in] key key of the index to be inserted
624 * \param[in] th the transaction handle
625 * \param[in] capa capability of insert (not yet implemented)
626 * \param[in] ignore_quota quota enforcement for insert
628 * \retval only return 0 for now
630 static int osp_md_index_insert(const struct lu_env *env,
631 struct dt_object *dt,
632 const struct dt_rec *rec,
633 const struct dt_key *key,
635 struct lustre_capa *capa,
642 * Implementation of dt_index_operations::dio_declare_delete
644 * Declare the index delete of the remote object, i.e. insert index delete
645 * update into the RPC, which will be sent during transaction start.
647 * \param[in] env execution environment
648 * \param[in] dt object for which to delete index
649 * \param[in] key key of the index
650 * \param[in] th the transaction handle
652 * \retval 0 if the insertion succeeds.
653 * \retval negative errno if the insertion fails.
655 static int osp_md_declare_delete(const struct lu_env *env,
656 struct dt_object *dt,
657 const struct dt_key *key,
660 struct dt_update_request *update;
663 update = dt_update_request_find_or_create(th, dt);
664 if (IS_ERR(update)) {
665 CERROR("%s: Get OSP update buf failed: rc = %d\n",
666 dt->do_lu.lo_dev->ld_obd->obd_name,
667 (int)PTR_ERR(update));
668 return PTR_ERR(update);
671 rc = out_index_delete_pack(env, &update->dur_buf,
672 lu_object_fid(&dt->do_lu), key,
673 update->dur_batchid);
678 * Implementation of dt_index_operations::dio_delete
680 * Do nothing in this method for now. Because in DNE phase I, remote updates
681 * are actually executed during transaction start, i.e. the index has already
682 * been deleted when calling this method.
684 * \param[in] env execution environment
685 * \param[in] dt object for which to delete index
686 * \param[in] key key of the index which will be deleted
687 * \param[in] th the transaction handle
688 * \param[in] capa capability of delete (not yet implemented)
690 * \retval only return 0 for now
692 static int osp_md_index_delete(const struct lu_env *env,
693 struct dt_object *dt,
694 const struct dt_key *key,
696 struct lustre_capa *capa)
698 CDEBUG(D_INFO, "index delete "DFID" %s\n",
699 PFID(&dt->do_lu.lo_header->loh_fid), (char *)key);
705 * Implementation of dt_index_operations::dio_it.next
707 * Advance the pointer of the iterator to the next entry. It shares a similar
708 * internal implementation with osp_orphan_it_next(), which is being used for
709 * remote orphan index object. This method will be used for remote directory.
711 * \param[in] env execution environment
712 * \param[in] di iterator of this iteration
714 * \retval 0 if the pointer is advanced successfuly.
715 * \retval 1 if it reaches to the end of the index object.
716 * \retval negative errno if the pointer cannot be advanced.
718 int osp_md_index_it_next(const struct lu_env *env, struct dt_it *di)
720 struct osp_it *it = (struct osp_it *)di;
721 struct lu_idxpage *idxpage;
722 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
727 idxpage = it->ooi_cur_idxpage;
728 if (idxpage != NULL) {
729 if (idxpage->lip_nr == 0)
735 (struct lu_dirent *)idxpage->lip_entries;
737 } else if (le16_to_cpu(ent->lde_reclen) != 0 &&
738 it->ooi_pos_ent < idxpage->lip_nr) {
739 ent = (struct lu_dirent *)(((char *)ent) +
740 le16_to_cpu(ent->lde_reclen));
748 rc = osp_it_next_page(env, di);
756 * Implementation of dt_index_operations::dio_it.key
758 * Get the key at current iterator poisiton. These iteration methods
759 * (dio_it) will only be used for iterating the remote directory, so
760 * the key is the name of the directory entry.
762 * \param[in] env execution environment
763 * \param[in] di iterator of this iteration
765 * \retval name of the current entry
767 static struct dt_key *osp_it_key(const struct lu_env *env,
768 const struct dt_it *di)
770 struct osp_it *it = (struct osp_it *)di;
771 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
773 return (struct dt_key *)ent->lde_name;
777 * Implementation of dt_index_operations::dio_it.key_size
779 * Get the key size at current iterator poisiton. These iteration methods
780 * (dio_it) will only be used for iterating the remote directory, so the key
781 * size is the name size of the directory entry.
783 * \param[in] env execution environment
784 * \param[in] di iterator of this iteration
786 * \retval name size of the current entry
789 static int osp_it_key_size(const struct lu_env *env, const struct dt_it *di)
791 struct osp_it *it = (struct osp_it *)di;
792 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
794 return (int)le16_to_cpu(ent->lde_namelen);
798 * Implementation of dt_index_operations::dio_it.rec
800 * Get the record at current iterator position. These iteration methods
801 * (dio_it) will only be used for iterating the remote directory, so it
802 * uses lu_dirent_calc_size() to calculate the record size.
804 * \param[in] env execution environment
805 * \param[in] di iterator of this iteration
806 * \param[out] rec the record to be returned
807 * \param[in] attr attributes of the index object, so it knows
808 * how to pack the entry.
810 * \retval only return 0 for now
812 static int osp_md_index_it_rec(const struct lu_env *env, const struct dt_it *di,
813 struct dt_rec *rec, __u32 attr)
815 struct osp_it *it = (struct osp_it *)di;
816 struct lu_dirent *ent = (struct lu_dirent *)it->ooi_ent;
819 reclen = lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen), attr);
820 memcpy(rec, ent, reclen);
825 * Implementation of dt_index_operations::dio_it.load
827 * Locate the iteration cursor to the specified position (cookie).
829 * \param[in] env pointer to the thread context
830 * \param[in] di pointer to the iteration structure
831 * \param[in] hash the specified position
833 * \retval positive number for locating to the exactly position
835 * \retval 0 for arriving at the end of the iteration
836 * \retval negative error number on failure
838 static int osp_it_load(const struct lu_env *env, const struct dt_it *di,
841 struct osp_it *it = (struct osp_it *)di;
845 rc = osp_md_index_it_next(env, (struct dt_it *)di);
855 const struct dt_index_operations osp_md_index_ops = {
856 .dio_lookup = osp_md_index_lookup,
857 .dio_declare_insert = osp_md_declare_insert,
858 .dio_insert = osp_md_index_insert,
859 .dio_declare_delete = osp_md_declare_delete,
860 .dio_delete = osp_md_index_delete,
866 .next = osp_md_index_it_next,
868 .key_size = osp_it_key_size,
869 .rec = osp_md_index_it_rec,
870 .store = osp_it_store,
872 .key_rec = osp_it_key_rec,
877 * Implementation of dt_object_operations::do_index_try
879 * Try to initialize the index API pointer for the given object. This
880 * is the entry point of the index API, i.e. we must call this method
881 * to initialize the index object before calling other index methods.
883 * \param[in] env execution environment
884 * \param[in] dt index object to be initialized
885 * \param[in] feat the index feature of the object
887 * \retval 0 if the initialization succeeds.
888 * \retval negative errno if the initialization fails.
890 static int osp_md_index_try(const struct lu_env *env,
891 struct dt_object *dt,
892 const struct dt_index_features *feat)
894 dt->do_index_ops = &osp_md_index_ops;
899 * Implementation of dt_object_operations::do_object_lock
901 * Enqueue a lock (by ldlm_cli_enqueue()) of remote object on the remote MDT,
902 * which will lock the object in the global namespace.
904 * \param[in] env execution environment
905 * \param[in] dt object to be locked
906 * \param[out] lh lock handle
907 * \param[in] einfo enqueue information
908 * \param[in] policy lock policy
910 * \retval ELDLM_OK if locking the object succeeds.
911 * \retval negative errno if locking fails.
913 static int osp_md_object_lock(const struct lu_env *env,
914 struct dt_object *dt,
915 struct lustre_handle *lh,
916 struct ldlm_enqueue_info *einfo,
917 ldlm_policy_data_t *policy)
919 struct ldlm_res_id *res_id;
920 struct dt_device *dt_dev = lu2dt_dev(dt->do_lu.lo_dev);
921 struct osp_device *osp = dt2osp_dev(dt_dev);
922 struct ptlrpc_request *req;
927 res_id = einfo->ei_res_id;
928 LASSERT(res_id != NULL);
930 mode = ldlm_lock_match(osp->opd_obd->obd_namespace,
931 LDLM_FL_BLOCK_GRANTED, res_id,
932 einfo->ei_type, policy,
933 einfo->ei_mode, lh, 0);
937 req = ldlm_enqueue_pack(osp->opd_exp, 0);
939 RETURN(PTR_ERR(req));
941 rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
942 (const ldlm_policy_data_t *)policy,
943 &flags, NULL, 0, LVB_T_NONE, lh, 0);
945 ptlrpc_req_finished(req);
947 return rc == ELDLM_OK ? 0 : -EIO;
951 * Implementation of dt_object_operations::do_object_unlock
953 * Cancel a lock of a remote object.
955 * \param[in] env execution environment
956 * \param[in] dt object to be unlocked
957 * \param[in] einfo lock enqueue information
958 * \param[in] policy lock policy
960 * \retval Only return 0 for now.
962 static int osp_md_object_unlock(const struct lu_env *env,
963 struct dt_object *dt,
964 struct ldlm_enqueue_info *einfo,
965 ldlm_policy_data_t *policy)
967 struct lustre_handle *lockh = einfo->ei_cbdata;
970 ldlm_lock_decref(lockh, einfo->ei_mode);
975 struct dt_object_operations osp_md_obj_ops = {
976 .do_read_lock = osp_md_object_read_lock,
977 .do_write_lock = osp_md_object_write_lock,
978 .do_read_unlock = osp_md_object_read_unlock,
979 .do_write_unlock = osp_md_object_write_unlock,
980 .do_write_locked = osp_md_object_write_locked,
981 .do_declare_create = osp_md_declare_object_create,
982 .do_create = osp_md_object_create,
983 .do_declare_ref_add = osp_md_declare_ref_add,
984 .do_ref_add = osp_md_object_ref_add,
985 .do_declare_ref_del = osp_md_declare_object_ref_del,
986 .do_ref_del = osp_md_object_ref_del,
987 .do_declare_destroy = osp_declare_object_destroy,
988 .do_destroy = osp_object_destroy,
989 .do_ah_init = osp_md_ah_init,
990 .do_attr_get = osp_attr_get,
991 .do_declare_attr_set = osp_md_declare_attr_set,
992 .do_attr_set = osp_md_attr_set,
993 .do_xattr_get = osp_xattr_get,
994 .do_declare_xattr_set = osp_declare_xattr_set,
995 .do_xattr_set = osp_xattr_set,
996 .do_declare_xattr_del = osp_declare_xattr_del,
997 .do_xattr_del = osp_xattr_del,
998 .do_index_try = osp_md_index_try,
999 .do_object_lock = osp_md_object_lock,
1000 .do_object_unlock = osp_md_object_unlock,
1004 * Implementation of dt_body_operations::dbo_declare_write
1006 * Declare an object write. In DNE phase I, it will pack the write
1007 * object update into the RPC.
1009 * \param[in] env execution environment
1010 * \param[in] dt object to be written
1011 * \param[in] buf buffer to write which includes an embedded size field
1012 * \param[in] pos offet in the object to start writing at
1013 * \param[in] th transaction handle
1015 * \retval 0 if the insertion succeeds.
1016 * \retval negative errno if the insertion fails.
1018 static ssize_t osp_md_declare_write(const struct lu_env *env,
1019 struct dt_object *dt,
1020 const struct lu_buf *buf,
1021 loff_t pos, struct thandle *th)
1023 struct dt_update_request *update;
1026 update = dt_update_request_find_or_create(th, dt);
1027 if (IS_ERR(update)) {
1028 CERROR("%s: Get OSP update buf failed: rc = %d\n",
1029 dt->do_lu.lo_dev->ld_obd->obd_name,
1030 (int)PTR_ERR(update));
1031 return PTR_ERR(update);
1034 rc = out_write_pack(env, &update->dur_buf, lu_object_fid(&dt->do_lu),
1035 buf, pos, update->dur_batchid);
1042 * Implementation of dt_body_operations::dbo_write
1044 * Return the buffer size. In DNE phase I, remote updates
1045 * are actually executed during transaction start, the buffer has
1046 * already been written when this method is being called.
1048 * \param[in] env execution environment
1049 * \param[in] dt object to be written
1050 * \param[in] buf buffer to write which includes an embedded size field
1051 * \param[in] pos offet in the object to start writing at
1052 * \param[in] th transaction handle
1053 * \param[in] capa capability of the write (not yet implemented)
1054 * \param[in] ignore_quota quota enforcement for this write
1056 * \retval the buffer size in bytes.
1058 static ssize_t osp_md_write(const struct lu_env *env, struct dt_object *dt,
1059 const struct lu_buf *buf, loff_t *pos,
1060 struct thandle *handle,
1061 struct lustre_capa *capa, int ignore_quota)
1063 *pos += buf->lb_len;
1067 /* These body operation will be used to write symlinks during migration etc */
1068 struct dt_body_operations osp_md_body_ops = {
1069 .dbo_declare_write = osp_md_declare_write,
1070 .dbo_write = osp_md_write,