Whamcloud - gitweb
LU-1770 ptlrpc: introducing OBD_CONNECT_FLOCK_OWNER flag
[fs/lustre-release.git] / lustre / obdecho / echo_client.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Whamcloud, Inc.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_ECHO
38 #ifdef __KERNEL__
39 #include <libcfs/libcfs.h>
40 #else
41 #include <liblustre.h>
42 #endif
43
44 #include <obd.h>
45 #include <obd_support.h>
46 #include <obd_class.h>
47 #include <lustre_debug.h>
48 #include <lprocfs_status.h>
49 #include <cl_object.h>
50 #include <lustre_fid.h>
51 #include <lustre_acl.h>
52 #include <lustre_net.h>
53 #include <obd_lov.h>
54
55 #include "echo_internal.h"
56
57 /** \defgroup echo_client Echo Client
58  * @{
59  */
60
61 struct echo_device {
62         struct cl_device        ed_cl;
63         struct echo_client_obd *ed_ec;
64
65         struct cl_site          ed_site_myself;
66         struct cl_site         *ed_site;
67         struct lu_device       *ed_next;
68         int                     ed_next_islov;
69         int                     ed_next_ismd;
70         struct lu_client_seq   *ed_cl_seq;
71 };
72
73 struct echo_object {
74         struct cl_object        eo_cl;
75         struct cl_object_header eo_hdr;
76
77         struct echo_device     *eo_dev;
78         cfs_list_t              eo_obj_chain;
79         struct lov_stripe_md   *eo_lsm;
80         cfs_atomic_t            eo_npages;
81         int                     eo_deleted;
82 };
83
84 struct echo_object_conf {
85         struct cl_object_conf  eoc_cl;
86         struct lov_stripe_md **eoc_md;
87 };
88
89 struct echo_page {
90         struct cl_page_slice   ep_cl;
91         cfs_mutex_t            ep_lock;
92         cfs_page_t            *ep_vmpage;
93 };
94
95 struct echo_lock {
96         struct cl_lock_slice   el_cl;
97         cfs_list_t             el_chain;
98         struct echo_object    *el_object;
99         __u64                  el_cookie;
100         cfs_atomic_t           el_refcount;
101 };
102
103 struct echo_io {
104         struct cl_io_slice     ei_cl;
105 };
106
107 #if 0
108 struct echo_req {
109         struct cl_req_slice er_cl;
110 };
111 #endif
112
113 static int echo_client_setup(const struct lu_env *env,
114                              struct obd_device *obddev,
115                              struct lustre_cfg *lcfg);
116 static int echo_client_cleanup(struct obd_device *obddev);
117
118
119 /** \defgroup echo_helpers Helper functions
120  * @{
121  */
122 static inline struct echo_device *cl2echo_dev(const struct cl_device *dev)
123 {
124         return container_of0(dev, struct echo_device, ed_cl);
125 }
126
127 static inline struct cl_device *echo_dev2cl(struct echo_device *d)
128 {
129         return &d->ed_cl;
130 }
131
132 static inline struct echo_device *obd2echo_dev(const struct obd_device *obd)
133 {
134         return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev));
135 }
136
137 static inline struct cl_object *echo_obj2cl(struct echo_object *eco)
138 {
139         return &eco->eo_cl;
140 }
141
142 static inline struct echo_object *cl2echo_obj(const struct cl_object *o)
143 {
144         return container_of(o, struct echo_object, eo_cl);
145 }
146
147 static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s)
148 {
149         return container_of(s, struct echo_page, ep_cl);
150 }
151
152 static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s)
153 {
154         return container_of(s, struct echo_lock, el_cl);
155 }
156
157 static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl)
158 {
159         return ecl->el_cl.cls_lock;
160 }
161
162 static struct lu_context_key echo_thread_key;
163 static inline struct echo_thread_info *echo_env_info(const struct lu_env *env)
164 {
165         struct echo_thread_info *info;
166         info = lu_context_key_get(&env->le_ctx, &echo_thread_key);
167         LASSERT(info != NULL);
168         return info;
169 }
170
171 static inline
172 struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
173 {
174         return container_of(c, struct echo_object_conf, eoc_cl);
175 }
176
177 static inline void lsm2fid(struct lov_stripe_md *lsm, struct lu_fid *fid)
178 {
179         fid_zero(fid);
180         fid->f_seq = FID_SEQ_ECHO;
181         /* truncated to 32 bits by assignment */
182         fid->f_oid = lsm->lsm_object_id;
183         fid->f_ver = lsm->lsm_object_id >> 32;
184 }
185 /** @} echo_helpers */
186
187 static struct echo_object *cl_echo_object_find(struct echo_device *d,
188                                                struct lov_stripe_md **lsm);
189 static int cl_echo_object_put(struct echo_object *eco);
190 static int cl_echo_enqueue   (struct echo_object *eco, obd_off start,
191                               obd_off end, int mode, __u64 *cookie);
192 static int cl_echo_cancel    (struct echo_device *d, __u64 cookie);
193 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
194                               cfs_page_t **pages, int npages, int async);
195
196 static struct echo_thread_info *echo_env_info(const struct lu_env *env);
197
198 struct echo_thread_info {
199         struct echo_object_conf eti_conf;
200         struct lustre_md        eti_md;
201
202         struct cl_2queue        eti_queue;
203         struct cl_io            eti_io;
204         struct cl_lock_descr    eti_descr;
205         struct lu_fid           eti_fid;
206         struct lu_fid           eti_fid2;
207         struct md_op_spec       eti_spec;
208         struct lov_mds_md_v3    eti_lmm;
209         struct lov_user_md_v3   eti_lum;
210         struct md_attr          eti_ma;
211         struct lu_name          eti_lname;
212         /* per-thread values, can be re-used */
213         void                    *eti_big_lmm;
214         int                     eti_big_lmmsize;
215         char                    eti_name[20];
216         struct lu_buf           eti_buf;
217         char                    eti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE];
218 };
219
220 /* No session used right now */
221 struct echo_session_info {
222         unsigned long dummy;
223 };
224
225 static cfs_mem_cache_t *echo_page_kmem;
226 static cfs_mem_cache_t *echo_lock_kmem;
227 static cfs_mem_cache_t *echo_object_kmem;
228 static cfs_mem_cache_t *echo_thread_kmem;
229 static cfs_mem_cache_t *echo_session_kmem;
230 //static cfs_mem_cache_t *echo_req_kmem;
231
232 static struct lu_kmem_descr echo_caches[] = {
233         {
234                 .ckd_cache = &echo_page_kmem,
235                 .ckd_name  = "echo_page_kmem",
236                 .ckd_size  = sizeof (struct echo_page)
237         },
238         {
239                 .ckd_cache = &echo_lock_kmem,
240                 .ckd_name  = "echo_lock_kmem",
241                 .ckd_size  = sizeof (struct echo_lock)
242         },
243         {
244                 .ckd_cache = &echo_object_kmem,
245                 .ckd_name  = "echo_object_kmem",
246                 .ckd_size  = sizeof (struct echo_object)
247         },
248         {
249                 .ckd_cache = &echo_thread_kmem,
250                 .ckd_name  = "echo_thread_kmem",
251                 .ckd_size  = sizeof (struct echo_thread_info)
252         },
253         {
254                 .ckd_cache = &echo_session_kmem,
255                 .ckd_name  = "echo_session_kmem",
256                 .ckd_size  = sizeof (struct echo_session_info)
257         },
258 #if 0
259         {
260                 .ckd_cache = &echo_req_kmem,
261                 .ckd_name  = "echo_req_kmem",
262                 .ckd_size  = sizeof (struct echo_req)
263         },
264 #endif
265         {
266                 .ckd_cache = NULL
267         }
268 };
269
270 /** \defgroup echo_page Page operations
271  *
272  * Echo page operations.
273  *
274  * @{
275  */
276 static cfs_page_t *echo_page_vmpage(const struct lu_env *env,
277                                     const struct cl_page_slice *slice)
278 {
279         return cl2echo_page(slice)->ep_vmpage;
280 }
281
282 static int echo_page_own(const struct lu_env *env,
283                          const struct cl_page_slice *slice,
284                          struct cl_io *io, int nonblock)
285 {
286         struct echo_page *ep = cl2echo_page(slice);
287
288         if (!nonblock)
289                 cfs_mutex_lock(&ep->ep_lock);
290         else if (!cfs_mutex_trylock(&ep->ep_lock))
291                 return -EAGAIN;
292         return 0;
293 }
294
295 static void echo_page_disown(const struct lu_env *env,
296                              const struct cl_page_slice *slice,
297                              struct cl_io *io)
298 {
299         struct echo_page *ep = cl2echo_page(slice);
300
301         LASSERT(cfs_mutex_is_locked(&ep->ep_lock));
302         cfs_mutex_unlock(&ep->ep_lock);
303 }
304
305 static void echo_page_discard(const struct lu_env *env,
306                               const struct cl_page_slice *slice,
307                               struct cl_io *unused)
308 {
309         cl_page_delete(env, slice->cpl_page);
310 }
311
312 static int echo_page_is_vmlocked(const struct lu_env *env,
313                                  const struct cl_page_slice *slice)
314 {
315         if (cfs_mutex_is_locked(&cl2echo_page(slice)->ep_lock))
316                 return -EBUSY;
317         return -ENODATA;
318 }
319
320 static void echo_page_completion(const struct lu_env *env,
321                                  const struct cl_page_slice *slice,
322                                  int ioret)
323 {
324         LASSERT(slice->cpl_page->cp_sync_io != NULL);
325 }
326
327 static void echo_page_fini(const struct lu_env *env,
328                            struct cl_page_slice *slice)
329 {
330         struct echo_page *ep    = cl2echo_page(slice);
331         struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
332         cfs_page_t *vmpage      = ep->ep_vmpage;
333         ENTRY;
334
335         cfs_atomic_dec(&eco->eo_npages);
336         page_cache_release(vmpage);
337         OBD_SLAB_FREE_PTR(ep, echo_page_kmem);
338         EXIT;
339 }
340
341 static int echo_page_prep(const struct lu_env *env,
342                           const struct cl_page_slice *slice,
343                           struct cl_io *unused)
344 {
345         return 0;
346 }
347
348 static int echo_page_print(const struct lu_env *env,
349                            const struct cl_page_slice *slice,
350                            void *cookie, lu_printer_t printer)
351 {
352         struct echo_page *ep = cl2echo_page(slice);
353
354         (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
355                    ep, cfs_mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
356         return 0;
357 }
358
359 static const struct cl_page_operations echo_page_ops = {
360         .cpo_own           = echo_page_own,
361         .cpo_disown        = echo_page_disown,
362         .cpo_discard       = echo_page_discard,
363         .cpo_vmpage        = echo_page_vmpage,
364         .cpo_fini          = echo_page_fini,
365         .cpo_print         = echo_page_print,
366         .cpo_is_vmlocked   = echo_page_is_vmlocked,
367         .io = {
368                 [CRT_READ] = {
369                         .cpo_prep        = echo_page_prep,
370                         .cpo_completion  = echo_page_completion,
371                 },
372                 [CRT_WRITE] = {
373                         .cpo_prep        = echo_page_prep,
374                         .cpo_completion  = echo_page_completion,
375                 }
376         }
377 };
378 /** @} echo_page */
379
380 /** \defgroup echo_lock Locking
381  *
382  * echo lock operations
383  *
384  * @{
385  */
386 static void echo_lock_fini(const struct lu_env *env,
387                            struct cl_lock_slice *slice)
388 {
389         struct echo_lock *ecl = cl2echo_lock(slice);
390
391         LASSERT(cfs_list_empty(&ecl->el_chain));
392         OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
393 }
394
395 static void echo_lock_delete(const struct lu_env *env,
396                              const struct cl_lock_slice *slice)
397 {
398         struct echo_lock *ecl      = cl2echo_lock(slice);
399
400         LASSERT(cfs_list_empty(&ecl->el_chain));
401 }
402
403 static int echo_lock_fits_into(const struct lu_env *env,
404                                const struct cl_lock_slice *slice,
405                                const struct cl_lock_descr *need,
406                                const struct cl_io *unused)
407 {
408         return 1;
409 }
410
411 static struct cl_lock_operations echo_lock_ops = {
412         .clo_fini      = echo_lock_fini,
413         .clo_delete    = echo_lock_delete,
414         .clo_fits_into = echo_lock_fits_into
415 };
416
417 /** @} echo_lock */
418
419 /** \defgroup echo_cl_ops cl_object operations
420  *
421  * operations for cl_object
422  *
423  * @{
424  */
425 static struct cl_page *echo_page_init(const struct lu_env *env,
426                                       struct cl_object *obj,
427                                       struct cl_page *page, cfs_page_t *vmpage)
428 {
429         struct echo_page *ep;
430         ENTRY;
431
432         OBD_SLAB_ALLOC_PTR_GFP(ep, echo_page_kmem, CFS_ALLOC_IO);
433         if (ep != NULL) {
434                 struct echo_object *eco = cl2echo_obj(obj);
435                 ep->ep_vmpage = vmpage;
436                 page_cache_get(vmpage);
437                 cfs_mutex_init(&ep->ep_lock);
438                 cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
439                 cfs_atomic_inc(&eco->eo_npages);
440         }
441         RETURN(ERR_PTR(ep ? 0 : -ENOMEM));
442 }
443
444 static int echo_io_init(const struct lu_env *env, struct cl_object *obj,
445                         struct cl_io *io)
446 {
447         return 0;
448 }
449
450 static int echo_lock_init(const struct lu_env *env,
451                           struct cl_object *obj, struct cl_lock *lock,
452                           const struct cl_io *unused)
453 {
454         struct echo_lock *el;
455         ENTRY;
456
457         OBD_SLAB_ALLOC_PTR_GFP(el, echo_lock_kmem, CFS_ALLOC_IO);
458         if (el != NULL) {
459                 cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops);
460                 el->el_object = cl2echo_obj(obj);
461                 CFS_INIT_LIST_HEAD(&el->el_chain);
462                 cfs_atomic_set(&el->el_refcount, 0);
463         }
464         RETURN(el == NULL ? -ENOMEM : 0);
465 }
466
467 static int echo_conf_set(const struct lu_env *env, struct cl_object *obj,
468                          const struct cl_object_conf *conf)
469 {
470         return 0;
471 }
472
473 static const struct cl_object_operations echo_cl_obj_ops = {
474         .coo_page_init = echo_page_init,
475         .coo_lock_init = echo_lock_init,
476         .coo_io_init   = echo_io_init,
477         .coo_conf_set  = echo_conf_set
478 };
479 /** @} echo_cl_ops */
480
481 /** \defgroup echo_lu_ops lu_object operations
482  *
483  * operations for echo lu object.
484  *
485  * @{
486  */
487 static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
488                             const struct lu_object_conf *conf)
489 {
490         struct echo_device *ed         = cl2echo_dev(lu2cl_dev(obj->lo_dev));
491         struct echo_client_obd *ec     = ed->ed_ec;
492         struct echo_object *eco        = cl2echo_obj(lu2cl(obj));
493         ENTRY;
494
495         if (ed->ed_next) {
496                 struct lu_object  *below;
497                 struct lu_device  *under;
498
499                 under = ed->ed_next;
500                 below = under->ld_ops->ldo_object_alloc(env, obj->lo_header,
501                                                         under);
502                 if (below == NULL)
503                         RETURN(-ENOMEM);
504                 lu_object_add(obj, below);
505         }
506
507         if (!ed->ed_next_ismd) {
508                 const struct cl_object_conf *cconf = lu2cl_conf(conf);
509                 struct echo_object_conf *econf = cl2echo_conf(cconf);
510
511                 LASSERT(econf->eoc_md);
512                 eco->eo_lsm = *econf->eoc_md;
513                 /* clear the lsm pointer so that it won't get freed. */
514                 *econf->eoc_md = NULL;
515         } else {
516                 eco->eo_lsm = NULL;
517         }
518
519         eco->eo_dev = ed;
520         cfs_atomic_set(&eco->eo_npages, 0);
521
522         cfs_spin_lock(&ec->ec_lock);
523         cfs_list_add_tail(&eco->eo_obj_chain, &ec->ec_objects);
524         cfs_spin_unlock(&ec->ec_lock);
525
526         RETURN(0);
527 }
528
529 /* taken from osc_unpackmd() */
530 static int echo_alloc_memmd(struct echo_device *ed,
531                             struct lov_stripe_md **lsmp)
532 {
533         int lsm_size;
534
535         ENTRY;
536
537         /* If export is lov/osc then use their obd method */
538         if (ed->ed_next != NULL)
539                 return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
540         /* OFD has no unpackmd method, do everything here */
541         lsm_size = lov_stripe_md_size(1);
542
543         LASSERT(*lsmp == NULL);
544         OBD_ALLOC(*lsmp, lsm_size);
545         if (*lsmp == NULL)
546                 RETURN(-ENOMEM);
547
548         OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
549         if ((*lsmp)->lsm_oinfo[0] == NULL) {
550                 OBD_FREE(*lsmp, lsm_size);
551                 RETURN(-ENOMEM);
552         }
553
554         loi_init((*lsmp)->lsm_oinfo[0]);
555         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
556
557         RETURN(lsm_size);
558 }
559
560 static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
561 {
562         int lsm_size;
563
564         ENTRY;
565
566         /* If export is lov/osc then use their obd method */
567         if (ed->ed_next != NULL)
568                 return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
569         /* OFD has no unpackmd method, do everything here */
570         lsm_size = lov_stripe_md_size(1);
571
572         LASSERT(*lsmp != NULL);
573         OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
574         OBD_FREE(*lsmp, lsm_size);
575         *lsmp = NULL;
576         RETURN(0);
577 }
578
579 static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
580 {
581         struct echo_object *eco    = cl2echo_obj(lu2cl(obj));
582         struct echo_client_obd *ec = eco->eo_dev->ed_ec;
583         ENTRY;
584
585         LASSERT(cfs_atomic_read(&eco->eo_npages) == 0);
586
587         cfs_spin_lock(&ec->ec_lock);
588         cfs_list_del_init(&eco->eo_obj_chain);
589         cfs_spin_unlock(&ec->ec_lock);
590
591         lu_object_fini(obj);
592         lu_object_header_fini(obj->lo_header);
593
594         if (eco->eo_lsm)
595                 echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
596         OBD_SLAB_FREE_PTR(eco, echo_object_kmem);
597         EXIT;
598 }
599
600 static int echo_object_print(const struct lu_env *env, void *cookie,
601                             lu_printer_t p, const struct lu_object *o)
602 {
603         struct echo_object *obj = cl2echo_obj(lu2cl(o));
604
605         return (*p)(env, cookie, "echoclient-object@%p", obj);
606 }
607
608 static const struct lu_object_operations echo_lu_obj_ops = {
609         .loo_object_init      = echo_object_init,
610         .loo_object_delete    = NULL,
611         .loo_object_release   = NULL,
612         .loo_object_free      = echo_object_free,
613         .loo_object_print     = echo_object_print,
614         .loo_object_invariant = NULL
615 };
616 /** @} echo_lu_ops */
617
618 /** \defgroup echo_lu_dev_ops  lu_device operations
619  *
620  * Operations for echo lu device.
621  *
622  * @{
623  */
624 static struct lu_object *echo_object_alloc(const struct lu_env *env,
625                                            const struct lu_object_header *hdr,
626                                            struct lu_device *dev)
627 {
628         struct echo_object *eco;
629         struct lu_object *obj = NULL;
630         ENTRY;
631
632         /* we're the top dev. */
633         LASSERT(hdr == NULL);
634         OBD_SLAB_ALLOC_PTR_GFP(eco, echo_object_kmem, CFS_ALLOC_IO);
635         if (eco != NULL) {
636                 struct cl_object_header *hdr = &eco->eo_hdr;
637
638                 obj = &echo_obj2cl(eco)->co_lu;
639                 cl_object_header_init(hdr);
640                 lu_object_init(obj, &hdr->coh_lu, dev);
641                 lu_object_add_top(&hdr->coh_lu, obj);
642
643                 eco->eo_cl.co_ops = &echo_cl_obj_ops;
644                 obj->lo_ops       = &echo_lu_obj_ops;
645         }
646         RETURN(obj);
647 }
648
649 static struct lu_device_operations echo_device_lu_ops = {
650         .ldo_object_alloc   = echo_object_alloc,
651 };
652
653 /** @} echo_lu_dev_ops */
654
655 static struct cl_device_operations echo_device_cl_ops = {
656 };
657
658 /** \defgroup echo_init Setup and teardown
659  *
660  * Init and fini functions for echo client.
661  *
662  * @{
663  */
664 static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
665 {
666         struct cl_site *site = &ed->ed_site_myself;
667         int rc;
668
669         /* initialize site */
670         rc = cl_site_init(site, &ed->ed_cl);
671         if (rc) {
672                 CERROR("Cannot initilize site for echo client(%d)\n", rc);
673                 return rc;
674         }
675
676         rc = lu_site_init_finish(&site->cs_lu);
677         if (rc)
678                 return rc;
679
680         ed->ed_site = site;
681         return 0;
682 }
683
684 static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
685 {
686         if (ed->ed_site) {
687                 if (!ed->ed_next_ismd)
688                         cl_site_fini(ed->ed_site);
689                 ed->ed_site = NULL;
690         }
691 }
692
693 static void *echo_thread_key_init(const struct lu_context *ctx,
694                           struct lu_context_key *key)
695 {
696         struct echo_thread_info *info;
697
698         OBD_SLAB_ALLOC_PTR_GFP(info, echo_thread_kmem, CFS_ALLOC_IO);
699         if (info == NULL)
700                 info = ERR_PTR(-ENOMEM);
701         return info;
702 }
703
704 static void echo_thread_key_fini(const struct lu_context *ctx,
705                          struct lu_context_key *key, void *data)
706 {
707         struct echo_thread_info *info = data;
708         OBD_SLAB_FREE_PTR(info, echo_thread_kmem);
709 }
710
711 static void echo_thread_key_exit(const struct lu_context *ctx,
712                          struct lu_context_key *key, void *data)
713 {
714 }
715
716 static struct lu_context_key echo_thread_key = {
717         .lct_tags = LCT_CL_THREAD,
718         .lct_init = echo_thread_key_init,
719         .lct_fini = echo_thread_key_fini,
720         .lct_exit = echo_thread_key_exit
721 };
722
723 static void *echo_session_key_init(const struct lu_context *ctx,
724                                   struct lu_context_key *key)
725 {
726         struct echo_session_info *session;
727
728         OBD_SLAB_ALLOC_PTR_GFP(session, echo_session_kmem, CFS_ALLOC_IO);
729         if (session == NULL)
730                 session = ERR_PTR(-ENOMEM);
731         return session;
732 }
733
734 static void echo_session_key_fini(const struct lu_context *ctx,
735                                  struct lu_context_key *key, void *data)
736 {
737         struct echo_session_info *session = data;
738         OBD_SLAB_FREE_PTR(session, echo_session_kmem);
739 }
740
741 static void echo_session_key_exit(const struct lu_context *ctx,
742                                  struct lu_context_key *key, void *data)
743 {
744 }
745
746 static struct lu_context_key echo_session_key = {
747         .lct_tags = LCT_SESSION,
748         .lct_init = echo_session_key_init,
749         .lct_fini = echo_session_key_fini,
750         .lct_exit = echo_session_key_exit
751 };
752
753 LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
754
755 #define ECHO_SEQ_WIDTH 0xffffffff
756 static int echo_fid_init(struct echo_device *ed, char *obd_name,
757                          struct md_site *ms)
758 {
759         char *prefix;
760         int rc;
761         ENTRY;
762
763         OBD_ALLOC_PTR(ed->ed_cl_seq);
764         if (ed->ed_cl_seq == NULL)
765                 RETURN(-ENOMEM);
766
767         OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
768         if (prefix == NULL)
769                 GOTO(out_free_seq, rc = -ENOMEM);
770
771         snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", obd_name);
772
773         /* Init client side sequence-manager */
774         rc = seq_client_init(ed->ed_cl_seq, NULL,
775                              LUSTRE_SEQ_METADATA,
776                              prefix, ms->ms_server_seq);
777         ed->ed_cl_seq->lcs_width = ECHO_SEQ_WIDTH;
778         OBD_FREE(prefix, MAX_OBD_NAME + 5);
779         if (rc)
780                 GOTO(out_free_seq, rc);
781
782         RETURN(0);
783
784 out_free_seq:
785         OBD_FREE_PTR(ed->ed_cl_seq);
786         ed->ed_cl_seq = NULL;
787         RETURN(rc);
788 }
789
790 static int echo_fid_fini(struct obd_device *obddev)
791 {
792         struct echo_device *ed = obd2echo_dev(obddev);
793         ENTRY;
794
795         if (ed->ed_cl_seq != NULL) {
796                 seq_client_fini(ed->ed_cl_seq);
797                 OBD_FREE_PTR(ed->ed_cl_seq);
798                 ed->ed_cl_seq = NULL;
799         }
800
801         RETURN(0);
802 }
803
804 static struct lu_device *echo_device_alloc(const struct lu_env *env,
805                                            struct lu_device_type *t,
806                                            struct lustre_cfg *cfg)
807 {
808         struct lu_device   *next;
809         struct echo_device *ed;
810         struct cl_device   *cd;
811         struct obd_device  *obd = NULL; /* to keep compiler happy */
812         struct obd_device  *tgt;
813         const char *tgt_type_name;
814         int rc;
815         int cleanup = 0;
816         ENTRY;
817
818         OBD_ALLOC_PTR(ed);
819         if (ed == NULL)
820                 GOTO(out, rc = -ENOMEM);
821
822         cleanup = 1;
823         cd = &ed->ed_cl;
824         rc = cl_device_init(cd, t);
825         if (rc)
826                 GOTO(out, rc);
827
828         cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
829         cd->cd_ops = &echo_device_cl_ops;
830
831         cleanup = 2;
832         obd = class_name2obd(lustre_cfg_string(cfg, 0));
833         LASSERT(obd != NULL);
834         LASSERT(env != NULL);
835
836         tgt = class_name2obd(lustre_cfg_string(cfg, 1));
837         if (tgt == NULL) {
838                 CERROR("Can not find tgt device %s\n",
839                         lustre_cfg_string(cfg, 1));
840                 GOTO(out, rc = -ENODEV);
841         }
842
843         next = tgt->obd_lu_dev;
844         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
845                 ed->ed_next_ismd = 1;
846         } else {
847                 ed->ed_next_ismd = 0;
848                 rc = echo_site_init(env, ed);
849                 if (rc)
850                         GOTO(out, rc);
851         }
852         cleanup = 3;
853
854         rc = echo_client_setup(env, obd, cfg);
855         if (rc)
856                 GOTO(out, rc);
857
858         ed->ed_ec = &obd->u.echo_client;
859         cleanup = 4;
860
861         if (ed->ed_next_ismd) {
862                 /* Suppose to connect to some Metadata layer */
863                 struct lu_site *ls;
864                 struct lu_device *ld;
865                 int    found = 0;
866
867                 if (next == NULL) {
868                         CERROR("%s is not lu device type!\n",
869                                lustre_cfg_string(cfg, 1));
870                         GOTO(out, rc = -EINVAL);
871                 }
872
873                 tgt_type_name = lustre_cfg_string(cfg, 2);
874                 if (!tgt_type_name) {
875                         CERROR("%s no type name for echo %s setup\n",
876                                 lustre_cfg_string(cfg, 1),
877                                 tgt->obd_type->typ_name);
878                         GOTO(out, rc = -EINVAL);
879                 }
880
881                 ls = next->ld_site;
882
883                 cfs_spin_lock(&ls->ls_ld_lock);
884                 cfs_list_for_each_entry(ld, &ls->ls_ld_linkage, ld_linkage) {
885                         if (strcmp(ld->ld_type->ldt_name, tgt_type_name) == 0) {
886                                 found = 1;
887                                 break;
888                         }
889                 }
890                 cfs_spin_unlock(&ls->ls_ld_lock);
891
892                 if (found == 0) {
893                         CERROR("%s is not lu device type!\n",
894                                lustre_cfg_string(cfg, 1));
895                         GOTO(out, rc = -EINVAL);
896                 }
897
898                 next = ld;
899                 /* For MD echo client, it will use the site in MDS stack */
900                 ed->ed_site_myself.cs_lu = *ls;
901                 ed->ed_site = &ed->ed_site_myself;
902                 ed->ed_cl.cd_lu_dev.ld_site = &ed->ed_site_myself.cs_lu;
903                 rc = echo_fid_init(ed, obd->obd_name, lu_site2md(ls));
904                 if (rc) {
905                         CERROR("echo fid init error %d\n", rc);
906                         GOTO(out, rc);
907                 }
908         } else {
909                  /* if echo client is to be stacked upon ost device, the next is
910                   * NULL since ost is not a clio device so far */
911                 if (next != NULL && !lu_device_is_cl(next))
912                         next = NULL;
913
914                 tgt_type_name = tgt->obd_type->typ_name;
915                 if (next != NULL) {
916                         LASSERT(next != NULL);
917                         if (next->ld_site != NULL)
918                                 GOTO(out, rc = -EBUSY);
919
920                         next->ld_site = &ed->ed_site->cs_lu;
921                         rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
922                                                      next->ld_type->ldt_name,
923                                                      NULL);
924                         if (rc)
925                                 GOTO(out, rc);
926
927                         /* Tricky case, I have to determine the obd type since
928                          * CLIO uses the different parameters to initialize
929                          * objects for lov & osc. */
930                         if (strcmp(tgt_type_name, LUSTRE_LOV_NAME) == 0)
931                                 ed->ed_next_islov = 1;
932                         else
933                                 LASSERT(strcmp(tgt_type_name,
934                                                LUSTRE_OSC_NAME) == 0);
935                 } else
936                         LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
937         }
938
939         ed->ed_next = next;
940         RETURN(&cd->cd_lu_dev);
941 out:
942         switch(cleanup) {
943         case 4: {
944                 int rc2;
945                 rc2 = echo_client_cleanup(obd);
946                 if (rc2)
947                         CERROR("Cleanup obd device %s error(%d)\n",
948                                obd->obd_name, rc2);
949         }
950
951         case 3:
952                 echo_site_fini(env, ed);
953         case 2:
954                 cl_device_fini(&ed->ed_cl);
955         case 1:
956                 OBD_FREE_PTR(ed);
957         case 0:
958         default:
959                 break;
960         }
961         return(ERR_PTR(rc));
962 }
963
964 static int echo_device_init(const struct lu_env *env, struct lu_device *d,
965                           const char *name, struct lu_device *next)
966 {
967         LBUG();
968         return 0;
969 }
970
971 static struct lu_device *echo_device_fini(const struct lu_env *env,
972                                           struct lu_device *d)
973 {
974         struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
975         struct lu_device *next = ed->ed_next;
976
977         while (next && !ed->ed_next_ismd)
978                 next = next->ld_type->ldt_ops->ldto_device_fini(env, next);
979         return NULL;
980 }
981
982 static void echo_lock_release(const struct lu_env *env,
983                               struct echo_lock *ecl,
984                               int still_used)
985 {
986         struct cl_lock *clk = echo_lock2cl(ecl);
987
988         cl_lock_get(clk);
989         cl_unuse(env, clk);
990         cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
991         if (!still_used) {
992                 cl_lock_mutex_get(env, clk);
993                 cl_lock_cancel(env, clk);
994                 cl_lock_delete(env, clk);
995                 cl_lock_mutex_put(env, clk);
996         }
997         cl_lock_put(env, clk);
998 }
999
1000 static struct lu_device *echo_device_free(const struct lu_env *env,
1001                                           struct lu_device *d)
1002 {
1003         struct echo_device     *ed   = cl2echo_dev(lu2cl_dev(d));
1004         struct echo_client_obd *ec   = ed->ed_ec;
1005         struct echo_object     *eco;
1006         struct lu_device       *next = ed->ed_next;
1007
1008         CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
1009                ed, next);
1010
1011         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
1012
1013         /* check if there are objects still alive.
1014          * It shouldn't have any object because lu_site_purge would cleanup
1015          * all of cached objects. Anyway, probably the echo device is being
1016          * parallelly accessed.
1017          */
1018         cfs_spin_lock(&ec->ec_lock);
1019         cfs_list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain)
1020                 eco->eo_deleted = 1;
1021         cfs_spin_unlock(&ec->ec_lock);
1022
1023         /* purge again */
1024         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
1025
1026         CDEBUG(D_INFO,
1027                "Waiting for the reference of echo object to be dropped\n");
1028
1029         /* Wait for the last reference to be dropped. */
1030         cfs_spin_lock(&ec->ec_lock);
1031         while (!cfs_list_empty(&ec->ec_objects)) {
1032                 cfs_spin_unlock(&ec->ec_lock);
1033                 CERROR("echo_client still has objects at cleanup time, "
1034                        "wait for 1 second\n");
1035                 cfs_schedule_timeout_and_set_state(CFS_TASK_UNINT,
1036                                                    cfs_time_seconds(1));
1037                 lu_site_purge(env, &ed->ed_site->cs_lu, -1);
1038                 cfs_spin_lock(&ec->ec_lock);
1039         }
1040         cfs_spin_unlock(&ec->ec_lock);
1041
1042         LASSERT(cfs_list_empty(&ec->ec_locks));
1043
1044         CDEBUG(D_INFO, "No object exists, exiting...\n");
1045
1046         echo_client_cleanup(d->ld_obd);
1047         echo_fid_fini(d->ld_obd);
1048         while (next && !ed->ed_next_ismd)
1049                 next = next->ld_type->ldt_ops->ldto_device_free(env, next);
1050
1051         LASSERT(ed->ed_site == lu2cl_site(d->ld_site));
1052         echo_site_fini(env, ed);
1053         cl_device_fini(&ed->ed_cl);
1054         OBD_FREE_PTR(ed);
1055
1056         return NULL;
1057 }
1058
1059 static const struct lu_device_type_operations echo_device_type_ops = {
1060         .ldto_init = echo_type_init,
1061         .ldto_fini = echo_type_fini,
1062
1063         .ldto_start = echo_type_start,
1064         .ldto_stop  = echo_type_stop,
1065
1066         .ldto_device_alloc = echo_device_alloc,
1067         .ldto_device_free  = echo_device_free,
1068         .ldto_device_init  = echo_device_init,
1069         .ldto_device_fini  = echo_device_fini
1070 };
1071
1072 static struct lu_device_type echo_device_type = {
1073         .ldt_tags     = LU_DEVICE_CL,
1074         .ldt_name     = LUSTRE_ECHO_CLIENT_NAME,
1075         .ldt_ops      = &echo_device_type_ops,
1076         .ldt_ctx_tags = LCT_CL_THREAD | LCT_MD_THREAD | LCT_DT_THREAD,
1077 };
1078 /** @} echo_init */
1079
1080 /** \defgroup echo_exports Exported operations
1081  *
1082  * exporting functions to echo client
1083  *
1084  * @{
1085  */
1086
1087 /* Interfaces to echo client obd device */
1088 static struct echo_object *cl_echo_object_find(struct echo_device *d,
1089                                                struct lov_stripe_md **lsmp)
1090 {
1091         struct lu_env *env;
1092         struct echo_thread_info *info;
1093         struct echo_object_conf *conf;
1094         struct lov_stripe_md    *lsm;
1095         struct echo_object *eco;
1096         struct cl_object   *obj;
1097         struct lu_fid *fid;
1098         int refcheck;
1099         ENTRY;
1100
1101         LASSERT(lsmp);
1102         lsm = *lsmp;
1103         LASSERT(lsm);
1104         LASSERT(lsm->lsm_object_id);
1105
1106         /* Never return an object if the obd is to be freed. */
1107         if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
1108                 RETURN(ERR_PTR(-ENODEV));
1109
1110         env = cl_env_get(&refcheck);
1111         if (IS_ERR(env))
1112                 RETURN((void *)env);
1113
1114         info = echo_env_info(env);
1115         conf = &info->eti_conf;
1116         if (d->ed_next) {
1117                 if (!d->ed_next_islov) {
1118                         struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
1119                         LASSERT(oinfo != NULL);
1120                         oinfo->loi_id = lsm->lsm_object_id;
1121                         oinfo->loi_seq = lsm->lsm_object_seq;
1122                         conf->eoc_cl.u.coc_oinfo = oinfo;
1123                 } else {
1124                         struct lustre_md *md;
1125                         md = &info->eti_md;
1126                         memset(md, 0, sizeof *md);
1127                         md->lsm = lsm;
1128                         conf->eoc_cl.u.coc_md = md;
1129                 }
1130         }
1131         conf->eoc_md = lsmp;
1132
1133         fid  = &info->eti_fid;
1134         lsm2fid(lsm, fid);
1135
1136         /* In the function below, .hs_keycmp resolves to
1137          * lu_obj_hop_keycmp() */
1138         /* coverity[overrun-buffer-val] */
1139         obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl);
1140         if (IS_ERR(obj))
1141                 GOTO(out, eco = (void*)obj);
1142
1143         eco = cl2echo_obj(obj);
1144         if (eco->eo_deleted) {
1145                 cl_object_put(env, obj);
1146                 eco = ERR_PTR(-EAGAIN);
1147         }
1148
1149 out:
1150         cl_env_put(env, &refcheck);
1151         RETURN(eco);
1152 }
1153
1154 static int cl_echo_object_put(struct echo_object *eco)
1155 {
1156         struct lu_env *env;
1157         struct cl_object *obj = echo_obj2cl(eco);
1158         int refcheck;
1159         ENTRY;
1160
1161         env = cl_env_get(&refcheck);
1162         if (IS_ERR(env))
1163                 RETURN(PTR_ERR(env));
1164
1165         /* an external function to kill an object? */
1166         if (eco->eo_deleted) {
1167                 struct lu_object_header *loh = obj->co_lu.lo_header;
1168                 LASSERT(&eco->eo_hdr == luh2coh(loh));
1169                 cfs_set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
1170         }
1171
1172         cl_object_put(env, obj);
1173         cl_env_put(env, &refcheck);
1174         RETURN(0);
1175 }
1176
1177 static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
1178                             obd_off start, obd_off end, int mode,
1179                             __u64 *cookie , __u32 enqflags)
1180 {
1181         struct cl_io *io;
1182         struct cl_lock *lck;
1183         struct cl_object *obj;
1184         struct cl_lock_descr *descr;
1185         struct echo_thread_info *info;
1186         int rc = -ENOMEM;
1187         ENTRY;
1188
1189         info = echo_env_info(env);
1190         io = &info->eti_io;
1191         descr = &info->eti_descr;
1192         obj = echo_obj2cl(eco);
1193
1194         descr->cld_obj   = obj;
1195         descr->cld_start = cl_index(obj, start);
1196         descr->cld_end   = cl_index(obj, end);
1197         descr->cld_mode  = mode == LCK_PW ? CLM_WRITE : CLM_READ;
1198         descr->cld_enq_flags = enqflags;
1199         io->ci_obj = obj;
1200
1201         lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
1202         if (lck) {
1203                 struct echo_client_obd *ec = eco->eo_dev->ed_ec;
1204                 struct echo_lock *el;
1205
1206                 rc = cl_wait(env, lck);
1207                 if (rc == 0) {
1208                         el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
1209                         cfs_spin_lock(&ec->ec_lock);
1210                         if (cfs_list_empty(&el->el_chain)) {
1211                                 cfs_list_add(&el->el_chain, &ec->ec_locks);
1212                                 el->el_cookie = ++ec->ec_unique;
1213                         }
1214                         cfs_atomic_inc(&el->el_refcount);
1215                         *cookie = el->el_cookie;
1216                         cfs_spin_unlock(&ec->ec_lock);
1217                 } else
1218                         cl_lock_release(env, lck, "ec enqueue", cfs_current());
1219         }
1220         RETURN(rc);
1221 }
1222
1223 static int cl_echo_enqueue(struct echo_object *eco, obd_off start, obd_off end,
1224                            int mode, __u64 *cookie)
1225 {
1226         struct echo_thread_info *info;
1227         struct lu_env *env;
1228         struct cl_io *io;
1229         int refcheck;
1230         int result;
1231         ENTRY;
1232
1233         env = cl_env_get(&refcheck);
1234         if (IS_ERR(env))
1235                 RETURN(PTR_ERR(env));
1236
1237         info = echo_env_info(env);
1238         io = &info->eti_io;
1239
1240         io->ci_ignore_layout = 1;
1241         result = cl_io_init(env, io, CIT_MISC, echo_obj2cl(eco));
1242         if (result < 0)
1243                 GOTO(out, result);
1244         LASSERT(result == 0);
1245
1246         result = cl_echo_enqueue0(env, eco, start, end, mode, cookie, 0);
1247         cl_io_fini(env, io);
1248
1249         EXIT;
1250 out:
1251         cl_env_put(env, &refcheck);
1252         return result;
1253 }
1254
1255 static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
1256                            __u64 cookie)
1257 {
1258         struct echo_client_obd *ec = ed->ed_ec;
1259         struct echo_lock       *ecl = NULL;
1260         cfs_list_t             *el;
1261         int found = 0, still_used = 0;
1262         ENTRY;
1263
1264         LASSERT(ec != NULL);
1265         cfs_spin_lock (&ec->ec_lock);
1266         cfs_list_for_each (el, &ec->ec_locks) {
1267                 ecl = cfs_list_entry (el, struct echo_lock, el_chain);
1268                 CDEBUG(D_INFO, "ecl: %p, cookie: "LPX64"\n", ecl, ecl->el_cookie);
1269                 found = (ecl->el_cookie == cookie);
1270                 if (found) {
1271                         if (cfs_atomic_dec_and_test(&ecl->el_refcount))
1272                                 cfs_list_del_init(&ecl->el_chain);
1273                         else
1274                                 still_used = 1;
1275                         break;
1276                 }
1277         }
1278         cfs_spin_unlock (&ec->ec_lock);
1279
1280         if (!found)
1281                 RETURN(-ENOENT);
1282
1283         echo_lock_release(env, ecl, still_used);
1284         RETURN(0);
1285 }
1286
1287 static int cl_echo_cancel(struct echo_device *ed, __u64 cookie)
1288 {
1289         struct lu_env *env;
1290         int refcheck;
1291         int rc;
1292         ENTRY;
1293
1294         env = cl_env_get(&refcheck);
1295         if (IS_ERR(env))
1296                 RETURN(PTR_ERR(env));
1297
1298         rc = cl_echo_cancel0(env, ed, cookie);
1299
1300         cl_env_put(env, &refcheck);
1301         RETURN(rc);
1302 }
1303
1304 static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
1305                              enum cl_req_type unused, struct cl_2queue *queue)
1306 {
1307         struct cl_page *clp;
1308         struct cl_page *temp;
1309         int result = 0;
1310         ENTRY;
1311
1312         cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
1313                 int rc;
1314                 rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
1315                 if (rc == 0)
1316                         continue;
1317                 result = result ?: rc;
1318         }
1319         RETURN(result);
1320 }
1321
1322 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
1323                               cfs_page_t **pages, int npages, int async)
1324 {
1325         struct lu_env           *env;
1326         struct echo_thread_info *info;
1327         struct cl_object        *obj = echo_obj2cl(eco);
1328         struct echo_device      *ed  = eco->eo_dev;
1329         struct cl_2queue        *queue;
1330         struct cl_io            *io;
1331         struct cl_page          *clp;
1332         struct lustre_handle    lh = { 0 };
1333         int page_size = cl_page_size(obj);
1334         int refcheck;
1335         int rc;
1336         int i;
1337         ENTRY;
1338
1339         LASSERT((offset & ~CFS_PAGE_MASK) == 0);
1340         LASSERT(ed->ed_next != NULL);
1341         env = cl_env_get(&refcheck);
1342         if (IS_ERR(env))
1343                 RETURN(PTR_ERR(env));
1344
1345         info    = echo_env_info(env);
1346         io      = &info->eti_io;
1347         queue   = &info->eti_queue;
1348
1349         cl_2queue_init(queue);
1350
1351         io->ci_ignore_layout = 1;
1352         rc = cl_io_init(env, io, CIT_MISC, obj);
1353         if (rc < 0)
1354                 GOTO(out, rc);
1355         LASSERT(rc == 0);
1356
1357
1358         rc = cl_echo_enqueue0(env, eco, offset,
1359                               offset + npages * CFS_PAGE_SIZE - 1,
1360                               rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
1361                               CEF_NEVER);
1362         if (rc < 0)
1363                 GOTO(error_lock, rc);
1364
1365         for (i = 0; i < npages; i++) {
1366                 LASSERT(pages[i]);
1367                 clp = cl_page_find(env, obj, cl_index(obj, offset),
1368                                    pages[i], CPT_TRANSIENT);
1369                 if (IS_ERR(clp)) {
1370                         rc = PTR_ERR(clp);
1371                         break;
1372                 }
1373                 LASSERT(clp->cp_type == CPT_TRANSIENT);
1374
1375                 rc = cl_page_own(env, io, clp);
1376                 if (rc) {
1377                         LASSERT(clp->cp_state == CPS_FREEING);
1378                         cl_page_put(env, clp);
1379                         break;
1380                 }
1381
1382                 cl_2queue_add(queue, clp);
1383
1384                 /* drop the reference count for cl_page_find, so that the page
1385                  * will be freed in cl_2queue_fini. */
1386                 cl_page_put(env, clp);
1387                 cl_page_clip(env, clp, 0, page_size);
1388
1389                 offset += page_size;
1390         }
1391
1392         if (rc == 0) {
1393                 enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE;
1394
1395                 async = async && (typ == CRT_WRITE);
1396                 if (async)
1397                         rc = cl_echo_async_brw(env, io, typ, queue);
1398                 else
1399                         rc = cl_io_submit_sync(env, io, typ, queue, 0);
1400                 CDEBUG(D_INFO, "echo_client %s write returns %d\n",
1401                        async ? "async" : "sync", rc);
1402         }
1403
1404         cl_echo_cancel0(env, ed, lh.cookie);
1405         EXIT;
1406 error_lock:
1407         cl_2queue_discard(env, io, queue);
1408         cl_2queue_disown(env, io, queue);
1409         cl_2queue_fini(env, queue);
1410         cl_io_fini(env, io);
1411 out:
1412         cl_env_put(env, &refcheck);
1413         return rc;
1414 }
1415 /** @} echo_exports */
1416
1417
1418 static obd_id last_object_id;
1419
1420 static int
1421 echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
1422 {
1423         struct lov_stripe_md *ulsm = _ulsm;
1424         int nob, i;
1425
1426         nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
1427         if (nob > ulsm_nob)
1428                 return (-EINVAL);
1429
1430         if (cfs_copy_to_user (ulsm, lsm, sizeof(ulsm)))
1431                 return (-EFAULT);
1432
1433         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1434                 if (cfs_copy_to_user (ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i],
1435                                       sizeof(lsm->lsm_oinfo[0])))
1436                         return (-EFAULT);
1437         }
1438         return 0;
1439 }
1440
1441 static int
1442 echo_copyin_lsm (struct echo_device *ed, struct lov_stripe_md *lsm,
1443                  void *ulsm, int ulsm_nob)
1444 {
1445         struct echo_client_obd *ec = ed->ed_ec;
1446         int                     i;
1447
1448         if (ulsm_nob < sizeof (*lsm))
1449                 return (-EINVAL);
1450
1451         if (cfs_copy_from_user (lsm, ulsm, sizeof (*lsm)))
1452                 return (-EFAULT);
1453
1454         if (lsm->lsm_stripe_count > ec->ec_nstripes ||
1455             lsm->lsm_magic != LOV_MAGIC ||
1456             (lsm->lsm_stripe_size & (~CFS_PAGE_MASK)) != 0 ||
1457             ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
1458                 return (-EINVAL);
1459
1460
1461         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1462                 if (cfs_copy_from_user(lsm->lsm_oinfo[i],
1463                                        ((struct lov_stripe_md *)ulsm)-> \
1464                                        lsm_oinfo[i],
1465                                        sizeof(lsm->lsm_oinfo[0])))
1466                         return (-EFAULT);
1467         }
1468         return (0);
1469 }
1470
1471 static inline void echo_md_build_name(struct lu_name *lname, char *name,
1472                                       __u64 id)
1473 {
1474         sprintf(name, LPU64, id);
1475         lname->ln_name = name;
1476         lname->ln_namelen = strlen(name);
1477 }
1478
1479 /* similar to mdt_attr_get_complex */
1480 static int echo_big_lmm_get(const struct lu_env *env, struct md_object *o,
1481                             struct md_attr *ma)
1482 {
1483         struct echo_thread_info *info = echo_env_info(env);
1484         int                      rc;
1485
1486         ENTRY;
1487
1488         LASSERT(ma->ma_lmm_size > 0);
1489
1490         rc = mo_xattr_get(env, o, &LU_BUF_NULL, XATTR_NAME_LOV);
1491         if (rc < 0)
1492                 RETURN(rc);
1493
1494         /* big_lmm may need to be grown */
1495         if (info->eti_big_lmmsize < rc) {
1496                 int size = size_roundup_power2(rc);
1497
1498                 if (info->eti_big_lmmsize > 0) {
1499                         /* free old buffer */
1500                         LASSERT(info->eti_big_lmm);
1501                         OBD_FREE_LARGE(info->eti_big_lmm,
1502                                        info->eti_big_lmmsize);
1503                         info->eti_big_lmm = NULL;
1504                         info->eti_big_lmmsize = 0;
1505                 }
1506
1507                 OBD_ALLOC_LARGE(info->eti_big_lmm, size);
1508                 if (info->eti_big_lmm == NULL)
1509                         RETURN(-ENOMEM);
1510                 info->eti_big_lmmsize = size;
1511         }
1512         LASSERT(info->eti_big_lmmsize >= rc);
1513
1514         info->eti_buf.lb_buf = info->eti_big_lmm;
1515         info->eti_buf.lb_len = info->eti_big_lmmsize;
1516         rc = mo_xattr_get(env, o, &info->eti_buf, XATTR_NAME_LOV);
1517         if (rc < 0)
1518                 RETURN(rc);
1519
1520         ma->ma_valid |= MA_LOV;
1521         ma->ma_lmm = info->eti_big_lmm;
1522         ma->ma_lmm_size = rc;
1523
1524         RETURN(0);
1525 }
1526
1527 int echo_attr_get_complex(const struct lu_env *env, struct md_object *next,
1528                           struct md_attr *ma)
1529 {
1530         struct echo_thread_info *info = echo_env_info(env);
1531         struct lu_buf           *buf = &info->eti_buf;
1532         cfs_umode_t              mode = lu_object_attr(&next->mo_lu);
1533         int                      need = ma->ma_need;
1534         int                      rc = 0, rc2;
1535
1536         ENTRY;
1537
1538         ma->ma_valid = 0;
1539
1540         if (need & MA_INODE) {
1541                 ma->ma_need = MA_INODE;
1542                 rc = mo_attr_get(env, next, ma);
1543                 if (rc)
1544                         GOTO(out, rc);
1545                 ma->ma_valid |= MA_INODE;
1546         }
1547
1548         if (need & MA_LOV) {
1549                 if (S_ISREG(mode) || S_ISDIR(mode)) {
1550                         LASSERT(ma->ma_lmm_size > 0);
1551                         buf->lb_buf = ma->ma_lmm;
1552                         buf->lb_len = ma->ma_lmm_size;
1553                         rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_LOV);
1554                         if (rc2 > 0) {
1555                                 ma->ma_lmm_size = rc2;
1556                                 ma->ma_valid |= MA_LOV;
1557                         } else if (rc2 == -ENODATA) {
1558                                 /* no LOV EA */
1559                                 ma->ma_lmm_size = 0;
1560                         } else if (rc2 == -ERANGE) {
1561                                 rc2 = echo_big_lmm_get(env, next, ma);
1562                                 if (rc2 < 0)
1563                                         GOTO(out, rc = rc2);
1564                         } else {
1565                                 GOTO(out, rc = rc2);
1566                         }
1567                 }
1568         }
1569
1570 #ifdef CONFIG_FS_POSIX_ACL
1571         if (need & MA_ACL_DEF && S_ISDIR(mode)) {
1572                 buf->lb_buf = ma->ma_acl;
1573                 buf->lb_len = ma->ma_acl_size;
1574                 rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_ACL_DEFAULT);
1575                 if (rc2 > 0) {
1576                         ma->ma_acl_size = rc2;
1577                         ma->ma_valid |= MA_ACL_DEF;
1578                 } else if (rc2 == -ENODATA) {
1579                         /* no ACLs */
1580                         ma->ma_acl_size = 0;
1581                 } else {
1582                         GOTO(out, rc = rc2);
1583                 }
1584         }
1585 #endif
1586 out:
1587         ma->ma_need = need;
1588         CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64" ma_lmm=%p\n",
1589                rc, ma->ma_valid, ma->ma_lmm);
1590         RETURN(rc);
1591 }
1592
1593 static int
1594 echo_md_create_internal(const struct lu_env *env, struct echo_device *ed,
1595                         struct md_object *parent, struct lu_fid *fid,
1596                         struct lu_name *lname, struct md_op_spec *spec,
1597                         struct md_attr *ma)
1598 {
1599         struct lu_object        *ec_child, *child;
1600         struct lu_device        *ld = ed->ed_next;
1601         struct echo_thread_info *info = echo_env_info(env);
1602         struct lu_fid           *fid2 = &info->eti_fid2;
1603         struct lu_object_conf    conf = { .loc_flags = LOC_F_NEW };
1604         int                      rc;
1605
1606         ENTRY;
1607
1608         rc = mdo_lookup(env, parent, lname, fid2, spec);
1609         if (rc == 0)
1610                 return -EEXIST;
1611         else if (rc != -ENOENT)
1612                 return rc;
1613
1614         ec_child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev,
1615                                      fid, &conf);
1616         if (IS_ERR(ec_child)) {
1617                 CERROR("Can not find the child "DFID": rc = %ld\n", PFID(fid),
1618                         PTR_ERR(ec_child));
1619                 RETURN(PTR_ERR(ec_child));
1620         }
1621
1622         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1623         if (child == NULL) {
1624                 CERROR("Can not locate the child "DFID"\n", PFID(fid));
1625                 GOTO(out_put, rc = -EINVAL);
1626         }
1627
1628         CDEBUG(D_RPCTRACE, "Start creating object "DFID" %s %p\n",
1629                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1630
1631         /*
1632          * Do not perform lookup sanity check. We know that name does not exist.
1633          */
1634         spec->sp_cr_lookup = 0;
1635         rc = mdo_create(env, parent, lname, lu2md(child), spec, ma);
1636         if (rc) {
1637                 CERROR("Can not create child "DFID": rc = %d\n", PFID(fid), rc);
1638                 GOTO(out_put, rc);
1639         }
1640         CDEBUG(D_RPCTRACE, "End creating object "DFID" %s %p rc  = %d\n",
1641                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent, rc);
1642         EXIT;
1643 out_put:
1644         lu_object_put(env, ec_child);
1645         return rc;
1646 }
1647
1648 static int echo_set_lmm_size(const struct lu_env *env, struct lu_device *ld,
1649                              struct md_attr *ma)
1650 {
1651         struct echo_thread_info *info = echo_env_info(env);
1652
1653         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
1654                 ma->ma_lmm = (void *)&info->eti_lmm;
1655                 ma->ma_lmm_size = sizeof(info->eti_lmm);
1656         } else {
1657                 LASSERT(info->eti_big_lmmsize);
1658                 ma->ma_lmm = info->eti_big_lmm;
1659                 ma->ma_lmm_size = info->eti_big_lmmsize;
1660         }
1661
1662         return 0;
1663 }
1664
1665 static int echo_create_md_object(const struct lu_env *env,
1666                                  struct echo_device *ed,
1667                                  struct lu_object *ec_parent,
1668                                  struct lu_fid *fid,
1669                                  char *name, int namelen,
1670                                  __u64 id, __u32 mode, int count,
1671                                  int stripe_count, int stripe_offset)
1672 {
1673         struct lu_object        *parent;
1674         struct echo_thread_info *info = echo_env_info(env);
1675         struct lu_name          *lname = &info->eti_lname;
1676         struct md_op_spec       *spec = &info->eti_spec;
1677         struct md_attr          *ma = &info->eti_ma;
1678         struct lu_device        *ld = ed->ed_next;
1679         int                      rc = 0;
1680         int                      i;
1681
1682         ENTRY;
1683
1684         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1685         if (parent == NULL)
1686                 RETURN(-ENXIO);
1687
1688         memset(ma, 0, sizeof(*ma));
1689         memset(spec, 0, sizeof(*spec));
1690         if (stripe_count != 0) {
1691                 spec->sp_cr_flags |= FMODE_WRITE;
1692                 echo_set_lmm_size(env, ld, ma);
1693                 if (stripe_count != -1) {
1694                         struct lov_user_md_v3 *lum = &info->eti_lum;
1695
1696                         lum->lmm_magic = LOV_USER_MAGIC_V3;
1697                         lum->lmm_stripe_count = stripe_count;
1698                         lum->lmm_stripe_offset = stripe_offset;
1699                         lum->lmm_pattern = 0;
1700                         spec->u.sp_ea.eadata = lum;
1701                         spec->u.sp_ea.eadatalen = sizeof(*lum);
1702                         spec->sp_cr_flags |= MDS_OPEN_HAS_EA;
1703                 }
1704         }
1705
1706         ma->ma_attr.la_mode = mode;
1707         ma->ma_attr.la_valid = LA_CTIME | LA_MODE;
1708         ma->ma_attr.la_ctime = cfs_time_current_64();
1709
1710         if (name != NULL) {
1711                 lname->ln_name = name;
1712                 lname->ln_namelen = namelen;
1713                 /* If name is specified, only create one object by name */
1714                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1715                                              spec, ma);
1716                 RETURN(rc);
1717         }
1718
1719         /* Create multiple object sequenced by id */
1720         for (i = 0; i < count; i++) {
1721                 char *tmp_name = info->eti_name;
1722
1723                 echo_md_build_name(lname, tmp_name, id);
1724
1725                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1726                                              spec, ma);
1727                 if (rc) {
1728                         CERROR("Can not create child %s: rc = %d\n", tmp_name,
1729                                 rc);
1730                         break;
1731                 }
1732                 id++;
1733                 fid->f_oid++;
1734         }
1735
1736         RETURN(rc);
1737 }
1738
1739 static struct lu_object *echo_md_lookup(const struct lu_env *env,
1740                                         struct echo_device *ed,
1741                                         struct md_object *parent,
1742                                         struct lu_name *lname)
1743 {
1744         struct echo_thread_info *info = echo_env_info(env);
1745         struct lu_fid           *fid = &info->eti_fid;
1746         struct lu_object        *child;
1747         int    rc;
1748         ENTRY;
1749
1750         CDEBUG(D_INFO, "lookup %s in parent "DFID" %p\n", lname->ln_name,
1751                PFID(fid), parent);
1752         rc = mdo_lookup(env, parent, lname, fid, NULL);
1753         if (rc) {
1754                 CERROR("lookup %s: rc = %d\n", lname->ln_name, rc);
1755                 RETURN(ERR_PTR(rc));
1756         }
1757
1758         child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
1759
1760         RETURN(child);
1761 }
1762
1763 static int echo_setattr_object(const struct lu_env *env,
1764                                struct echo_device *ed,
1765                                struct lu_object *ec_parent,
1766                                __u64 id, int count)
1767 {
1768         struct lu_object        *parent;
1769         struct echo_thread_info *info = echo_env_info(env);
1770         struct lu_name          *lname = &info->eti_lname;
1771         char                    *name = info->eti_name;
1772         struct lu_device        *ld = ed->ed_next;
1773         struct lu_buf           *buf = &info->eti_buf;
1774         int                      rc = 0;
1775         int                      i;
1776
1777         ENTRY;
1778
1779         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1780         if (parent == NULL)
1781                 RETURN(-ENXIO);
1782
1783         for (i = 0; i < count; i++) {
1784                 struct lu_object *ec_child, *child;
1785
1786                 echo_md_build_name(lname, name, id);
1787
1788                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1789                 if (IS_ERR(ec_child)) {
1790                         CERROR("Can't find child %s: rc = %ld\n",
1791                                 lname->ln_name, PTR_ERR(ec_child));
1792                         RETURN(PTR_ERR(ec_child));
1793                 }
1794
1795                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1796                 if (child == NULL) {
1797                         CERROR("Can not locate the child %s\n", lname->ln_name);
1798                         lu_object_put(env, ec_child);
1799                         rc = -EINVAL;
1800                         break;
1801                 }
1802
1803                 CDEBUG(D_RPCTRACE, "Start setattr object "DFID"\n",
1804                        PFID(lu_object_fid(child)));
1805
1806                 buf->lb_buf = info->eti_xattr_buf;
1807                 buf->lb_len = sizeof(info->eti_xattr_buf);
1808
1809                 sprintf(name, "%s.test1", XATTR_USER_PREFIX);
1810                 rc = mo_xattr_set(env, lu2md(child), buf, name,
1811                                   LU_XATTR_CREATE);
1812                 if (rc < 0) {
1813                         CERROR("Can not setattr child "DFID": rc = %d\n",
1814                                 PFID(lu_object_fid(child)), rc);
1815                         lu_object_put(env, ec_child);
1816                         break;
1817                 }
1818                 CDEBUG(D_RPCTRACE, "End setattr object "DFID"\n",
1819                        PFID(lu_object_fid(child)));
1820                 id++;
1821                 lu_object_put(env, ec_child);
1822         }
1823         RETURN(rc);
1824 }
1825
1826 static int echo_getattr_object(const struct lu_env *env,
1827                                struct echo_device *ed,
1828                                struct lu_object *ec_parent,
1829                                __u64 id, int count)
1830 {
1831         struct lu_object        *parent;
1832         struct echo_thread_info *info = echo_env_info(env);
1833         struct lu_name          *lname = &info->eti_lname;
1834         char                    *name = info->eti_name;
1835         struct md_attr          *ma = &info->eti_ma;
1836         struct lu_device        *ld = ed->ed_next;
1837         int                      rc = 0;
1838         int                      i;
1839
1840         ENTRY;
1841
1842         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1843         if (parent == NULL)
1844                 RETURN(-ENXIO);
1845
1846         memset(ma, 0, sizeof(*ma));
1847         ma->ma_need |= MA_INODE | MA_LOV | MA_PFID | MA_HSM | MA_ACL_DEF;
1848         ma->ma_acl = info->eti_xattr_buf;
1849         ma->ma_acl_size = sizeof(info->eti_xattr_buf);
1850
1851         for (i = 0; i < count; i++) {
1852                 struct lu_object *ec_child, *child;
1853
1854                 ma->ma_valid = 0;
1855                 echo_md_build_name(lname, name, id);
1856                 echo_set_lmm_size(env, ld, ma);
1857
1858                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1859                 if (IS_ERR(ec_child)) {
1860                         CERROR("Can't find child %s: rc = %ld\n",
1861                                lname->ln_name, PTR_ERR(ec_child));
1862                         RETURN(PTR_ERR(ec_child));
1863                 }
1864
1865                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1866                 if (child == NULL) {
1867                         CERROR("Can not locate the child %s\n", lname->ln_name);
1868                         lu_object_put(env, ec_child);
1869                         RETURN(-EINVAL);
1870                 }
1871
1872                 CDEBUG(D_RPCTRACE, "Start getattr object "DFID"\n",
1873                        PFID(lu_object_fid(child)));
1874                 rc = echo_attr_get_complex(env, lu2md(child), ma);
1875                 if (rc) {
1876                         CERROR("Can not getattr child "DFID": rc = %d\n",
1877                                 PFID(lu_object_fid(child)), rc);
1878                         lu_object_put(env, ec_child);
1879                         break;
1880                 }
1881                 CDEBUG(D_RPCTRACE, "End getattr object "DFID"\n",
1882                        PFID(lu_object_fid(child)));
1883                 id++;
1884                 lu_object_put(env, ec_child);
1885         }
1886
1887         RETURN(rc);
1888 }
1889
1890 static int echo_lookup_object(const struct lu_env *env,
1891                               struct echo_device *ed,
1892                               struct lu_object *ec_parent,
1893                               __u64 id, int count)
1894 {
1895         struct lu_object        *parent;
1896         struct echo_thread_info *info = echo_env_info(env);
1897         struct lu_name          *lname = &info->eti_lname;
1898         char                    *name = info->eti_name;
1899         struct lu_fid           *fid = &info->eti_fid;
1900         struct lu_device        *ld = ed->ed_next;
1901         int                      rc = 0;
1902         int                      i;
1903
1904         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1905         if (parent == NULL)
1906                 return -ENXIO;
1907
1908         /*prepare the requests*/
1909         for (i = 0; i < count; i++) {
1910                 echo_md_build_name(lname, name, id);
1911
1912                 CDEBUG(D_RPCTRACE, "Start lookup object "DFID" %s %p\n",
1913                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1914
1915                 rc = mdo_lookup(env, lu2md(parent), lname, fid, NULL);
1916                 if (rc) {
1917                         CERROR("Can not lookup child %s: rc = %d\n", name, rc);
1918                         break;
1919                 }
1920                 CDEBUG(D_RPCTRACE, "End lookup object "DFID" %s %p\n",
1921                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1922
1923                 id++;
1924         }
1925         return rc;
1926 }
1927
1928 static int echo_md_destroy_internal(const struct lu_env *env,
1929                                     struct echo_device *ed,
1930                                     struct md_object *parent,
1931                                     struct lu_name *lname,
1932                                     struct md_attr *ma)
1933 {
1934         struct lu_device   *ld = ed->ed_next;
1935         struct lu_object   *ec_child;
1936         struct lu_object   *child;
1937         int                 rc;
1938
1939         ENTRY;
1940
1941         ec_child = echo_md_lookup(env, ed, parent, lname);
1942         if (IS_ERR(ec_child)) {
1943                 CERROR("Can't find child %s: rc = %ld\n", lname->ln_name,
1944                         PTR_ERR(ec_child));
1945                 RETURN(PTR_ERR(ec_child));
1946         }
1947
1948         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1949         if (child == NULL) {
1950                 CERROR("Can not locate the child %s\n", lname->ln_name);
1951                 GOTO(out_put, rc = -EINVAL);
1952         }
1953
1954         CDEBUG(D_RPCTRACE, "Start destroy object "DFID" %s %p\n",
1955                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1956
1957         rc = mdo_unlink(env, parent, lu2md(child), lname, ma);
1958         if (rc) {
1959                 CERROR("Can not unlink child %s: rc = %d\n",
1960                         lname->ln_name, rc);
1961                 GOTO(out_put, rc);
1962         }
1963         CDEBUG(D_RPCTRACE, "End destroy object "DFID" %s %p\n",
1964                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1965 out_put:
1966         lu_object_put(env, ec_child);
1967         return rc;
1968 }
1969
1970 static int echo_destroy_object(const struct lu_env *env,
1971                                struct echo_device *ed,
1972                                struct lu_object *ec_parent,
1973                                char *name, int namelen,
1974                                __u64 id, __u32 mode,
1975                                int count)
1976 {
1977         struct echo_thread_info *info = echo_env_info(env);
1978         struct lu_name          *lname = &info->eti_lname;
1979         struct md_attr          *ma = &info->eti_ma;
1980         struct lu_device        *ld = ed->ed_next;
1981         struct lu_object        *parent;
1982         int                      rc = 0;
1983         int                      i;
1984         ENTRY;
1985
1986         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1987         if (parent == NULL)
1988                 RETURN(-EINVAL);
1989
1990         memset(ma, 0, sizeof(*ma));
1991         ma->ma_attr.la_mode = mode;
1992         ma->ma_attr.la_valid = LA_CTIME;
1993         ma->ma_attr.la_ctime = cfs_time_current_64();
1994         ma->ma_need = MA_INODE;
1995         ma->ma_valid = 0;
1996
1997         if (name != NULL) {
1998                 lname->ln_name = name;
1999                 lname->ln_namelen = namelen;
2000                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
2001                                               ma);
2002                 RETURN(rc);
2003         }
2004
2005         /*prepare the requests*/
2006         for (i = 0; i < count; i++) {
2007                 char *tmp_name = info->eti_name;
2008
2009                 ma->ma_valid = 0;
2010                 echo_md_build_name(lname, tmp_name, id);
2011
2012                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
2013                                               ma);
2014                 if (rc) {
2015                         CERROR("Can not unlink child %s: rc = %d\n", name, rc);
2016                         break;
2017                 }
2018                 id++;
2019         }
2020
2021         RETURN(rc);
2022 }
2023
2024 static struct lu_object *echo_resolve_path(const struct lu_env *env,
2025                                            struct echo_device *ed, char *path,
2026                                            int path_len)
2027 {
2028         struct lu_device        *ld = ed->ed_next;
2029         struct md_device        *md = lu2md_dev(ld);
2030         struct echo_thread_info *info = echo_env_info(env);
2031         struct lu_fid           *fid = &info->eti_fid;
2032         struct lu_name          *lname = &info->eti_lname;
2033         struct lu_object        *parent = NULL;
2034         struct lu_object        *child = NULL;
2035         int rc = 0;
2036         ENTRY;
2037
2038         /*Only support MDD layer right now*/
2039         rc = md->md_ops->mdo_root_get(env, md, fid);
2040         if (rc) {
2041                 CERROR("get root error: rc = %d\n", rc);
2042                 RETURN(ERR_PTR(rc));
2043         }
2044
2045         parent = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
2046         if (IS_ERR(parent)) {
2047                 CERROR("Can not find the parent "DFID": rc = %ld\n",
2048                         PFID(fid), PTR_ERR(parent));
2049                 RETURN(parent);
2050         }
2051
2052         while (1) {
2053                 struct lu_object *ld_parent;
2054                 char *e;
2055
2056                 e = strsep(&path, "/");
2057                 if (e == NULL)
2058                         break;
2059
2060                 if (e[0] == 0) {
2061                         if (!path || path[0] == '\0')
2062                                 break;
2063                         continue;
2064                 }
2065
2066                 lname->ln_name = e;
2067                 lname->ln_namelen = strlen(e);
2068
2069                 ld_parent = lu_object_locate(parent->lo_header, ld->ld_type);
2070                 if (ld_parent == NULL) {
2071                         lu_object_put(env, parent);
2072                         rc = -EINVAL;
2073                         break;
2074                 }
2075
2076                 child = echo_md_lookup(env, ed, lu2md(ld_parent), lname);
2077                 lu_object_put(env, parent);
2078                 if (IS_ERR(child)) {
2079                         rc = (int)PTR_ERR(child);
2080                         CERROR("lookup %s under parent "DFID": rc = %d\n",
2081                                 lname->ln_name, PFID(lu_object_fid(ld_parent)),
2082                                 rc);
2083                         break;
2084                 }
2085                 parent = child;
2086         }
2087         if (rc)
2088                 RETURN(ERR_PTR(rc));
2089
2090         RETURN(parent);
2091 }
2092
2093 #define ECHO_MD_CTX_TAG (LCT_REMEMBER | LCT_MD_THREAD)
2094 #define ECHO_MD_SES_TAG (LCT_REMEMBER | LCT_SESSION)
2095 static int echo_md_handler(struct echo_device *ed, int command,
2096                            char *path, int path_len, int id, int count,
2097                            struct obd_ioctl_data *data)
2098 {
2099         struct echo_thread_info *info;
2100         struct lu_device      *ld = ed->ed_next;
2101         struct lu_env         *env;
2102         int                    refcheck;
2103         struct lu_object      *parent;
2104         char                  *name = NULL;
2105         int                    namelen = data->ioc_plen2;
2106         int                    rc = 0;
2107         ENTRY;
2108
2109         if (ld == NULL) {
2110                 CERROR("MD echo client is not being initialized properly\n");
2111                 RETURN(-EINVAL);
2112         }
2113
2114         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
2115                 CERROR("Only support MDD layer right now!\n");
2116                 RETURN(-EINVAL);
2117         }
2118
2119         env = cl_env_get(&refcheck);
2120         if (IS_ERR(env))
2121                 RETURN(PTR_ERR(env));
2122
2123         rc = lu_env_refill_by_tags(env, ECHO_MD_CTX_TAG, ECHO_MD_SES_TAG);
2124         if (rc != 0)
2125                 GOTO(out_env, rc);
2126
2127         /* init big_lmm buffer */
2128         info = echo_env_info(env);
2129         LASSERT(info->eti_big_lmm == NULL);
2130         OBD_ALLOC_LARGE(info->eti_big_lmm, MIN_MD_SIZE);
2131         if (info->eti_big_lmm == NULL)
2132                 GOTO(out_env, rc = -ENOMEM);
2133         info->eti_big_lmmsize = MIN_MD_SIZE;
2134
2135         parent = echo_resolve_path(env, ed, path, path_len);
2136         if (IS_ERR(parent)) {
2137                 CERROR("Can not resolve the path %s: rc = %ld\n", path,
2138                         PTR_ERR(parent));
2139                 GOTO(out_free, rc = PTR_ERR(parent));
2140         }
2141
2142         if (namelen > 0) {
2143                 OBD_ALLOC(name, namelen + 1);
2144                 if (name == NULL)
2145                         GOTO(out_put, rc = -ENOMEM);
2146                 if (cfs_copy_from_user(name, data->ioc_pbuf2, namelen))
2147                         GOTO(out_name, rc = -EFAULT);
2148         }
2149
2150         switch (command) {
2151         case ECHO_MD_CREATE:
2152         case ECHO_MD_MKDIR: {
2153                 struct echo_thread_info *info = echo_env_info(env);
2154                 __u32 mode = data->ioc_obdo2.o_mode;
2155                 struct lu_fid *fid = &info->eti_fid;
2156                 int stripe_count = (int)data->ioc_obdo2.o_misc;
2157                 int stripe_index = (int)data->ioc_obdo2.o_stripe_idx;
2158
2159                 fid->f_seq = data->ioc_obdo1.o_seq;
2160                 fid->f_oid = (__u32)data->ioc_obdo1.o_id;
2161                 fid->f_ver = 0;
2162                 /* In the function below, .hs_keycmp resolves to
2163                  * lu_obj_hop_keycmp() */
2164                 /* coverity[overrun-buffer-val] */
2165                 rc = echo_create_md_object(env, ed, parent, fid, name, namelen,
2166                                            id, mode, count, stripe_count,
2167                                            stripe_index);
2168                 break;
2169         }
2170         case ECHO_MD_DESTROY:
2171         case ECHO_MD_RMDIR: {
2172                 __u32 mode = data->ioc_obdo2.o_mode;
2173
2174                 rc = echo_destroy_object(env, ed, parent, name, namelen,
2175                                          id, mode, count);
2176                 break;
2177         }
2178         case ECHO_MD_LOOKUP:
2179                 rc = echo_lookup_object(env, ed, parent, id, count);
2180                 break;
2181         case ECHO_MD_GETATTR:
2182                 rc = echo_getattr_object(env, ed, parent, id, count);
2183                 break;
2184         case ECHO_MD_SETATTR:
2185                 rc = echo_setattr_object(env, ed, parent, id, count);
2186                 break;
2187         default:
2188                 CERROR("unknown command %d\n", command);
2189                 rc = -EINVAL;
2190                 break;
2191         }
2192 out_name:
2193         if (name != NULL)
2194                 OBD_FREE(name, namelen + 1);
2195 out_put:
2196         lu_object_put(env, parent);
2197 out_free:
2198         LASSERT(info->eti_big_lmm);
2199         OBD_FREE_LARGE(info->eti_big_lmm, info->eti_big_lmmsize);
2200         info->eti_big_lmm = NULL;
2201         info->eti_big_lmmsize = 0;
2202 out_env:
2203         cl_env_put(env, &refcheck);
2204         return rc;
2205 }
2206
2207 static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
2208                               int on_target, struct obdo *oa, void *ulsm,
2209                               int ulsm_nob, struct obd_trans_info *oti)
2210 {
2211         struct echo_object     *eco;
2212         struct echo_client_obd *ec = ed->ed_ec;
2213         struct lov_stripe_md   *lsm = NULL;
2214         int                     rc;
2215         int                     created = 0;
2216         ENTRY;
2217
2218         if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
2219             (on_target ||                       /* set_stripe */
2220              ec->ec_nstripes != 0)) {           /* LOV */
2221                 CERROR ("No valid oid\n");
2222                 RETURN(-EINVAL);
2223         }
2224
2225         rc = echo_alloc_memmd(ed, &lsm);
2226         if (rc < 0) {
2227                 CERROR("Cannot allocate md: rc = %d\n", rc);
2228                 GOTO(failed, rc);
2229         }
2230
2231         if (ulsm != NULL) {
2232                 int i, idx;
2233
2234                 rc = echo_copyin_lsm (ed, lsm, ulsm, ulsm_nob);
2235                 if (rc != 0)
2236                         GOTO(failed, rc);
2237
2238                 if (lsm->lsm_stripe_count == 0)
2239                         lsm->lsm_stripe_count = ec->ec_nstripes;
2240
2241                 if (lsm->lsm_stripe_size == 0)
2242                         lsm->lsm_stripe_size = CFS_PAGE_SIZE;
2243
2244                 idx = cfs_rand();
2245
2246                 /* setup stripes: indices + default ids if required */
2247                 for (i = 0; i < lsm->lsm_stripe_count; i++) {
2248                         if (lsm->lsm_oinfo[i]->loi_id == 0)
2249                                 lsm->lsm_oinfo[i]->loi_id = lsm->lsm_object_id;
2250
2251                         lsm->lsm_oinfo[i]->loi_ost_idx =
2252                                 (idx + i) % ec->ec_nstripes;
2253                 }
2254         }
2255
2256         /* setup object ID here for !on_target and LOV hint */
2257         if (oa->o_valid & OBD_MD_FLID)
2258                 lsm->lsm_object_id = oa->o_id;
2259
2260         if (lsm->lsm_object_id == 0)
2261                 lsm->lsm_object_id = ++last_object_id;
2262
2263         rc = 0;
2264         if (on_target) {
2265                 /* Only echo objects are allowed to be created */
2266                 LASSERT((oa->o_valid & OBD_MD_FLGROUP) &&
2267                         (oa->o_seq == FID_SEQ_ECHO));
2268                 rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
2269                 if (rc != 0) {
2270                         CERROR("Cannot create objects: rc = %d\n", rc);
2271                         GOTO(failed, rc);
2272                 }
2273                 created = 1;
2274         }
2275
2276         /* See what object ID we were given */
2277         oa->o_id = lsm->lsm_object_id;
2278         oa->o_valid |= OBD_MD_FLID;
2279
2280         eco = cl_echo_object_find(ed, &lsm);
2281         if (IS_ERR(eco))
2282                 GOTO(failed, rc = PTR_ERR(eco));
2283         cl_echo_object_put(eco);
2284
2285         CDEBUG(D_INFO, "oa->o_id = %lx\n", (long)oa->o_id);
2286         EXIT;
2287
2288  failed:
2289         if (created && rc)
2290                 obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL, NULL);
2291         if (lsm)
2292                 echo_free_memmd(ed, &lsm);
2293         if (rc)
2294                 CERROR("create object failed with: rc = %d\n", rc);
2295         return (rc);
2296 }
2297
2298 static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
2299                            struct obdo *oa)
2300 {
2301         struct lov_stripe_md   *lsm = NULL;
2302         struct echo_object     *eco;
2303         int                     rc;
2304         ENTRY;
2305
2306         if ((oa->o_valid & OBD_MD_FLID) == 0 ||
2307             oa->o_id == 0)  /* disallow use of object id 0 */
2308         {
2309                 CERROR ("No valid oid\n");
2310                 RETURN(-EINVAL);
2311         }
2312
2313         rc = echo_alloc_memmd(ed, &lsm);
2314         if (rc < 0)
2315                 RETURN(rc);
2316
2317         lsm->lsm_object_id = oa->o_id;
2318         if (oa->o_valid & OBD_MD_FLGROUP)
2319                 lsm->lsm_object_seq = oa->o_seq;
2320         else
2321                 lsm->lsm_object_seq = FID_SEQ_ECHO;
2322
2323         rc = 0;
2324         eco = cl_echo_object_find(ed, &lsm);
2325         if (!IS_ERR(eco))
2326                 *ecop = eco;
2327         else
2328                 rc = PTR_ERR(eco);
2329         if (lsm)
2330                 echo_free_memmd(ed, &lsm);
2331         RETURN(rc);
2332 }
2333
2334 static void echo_put_object(struct echo_object *eco)
2335 {
2336         if (cl_echo_object_put(eco))
2337                 CERROR("echo client: drop an object failed");
2338 }
2339
2340 static void
2341 echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
2342 {
2343         unsigned long stripe_count;
2344         unsigned long stripe_size;
2345         unsigned long width;
2346         unsigned long woffset;
2347         int           stripe_index;
2348         obd_off       offset;
2349
2350         if (lsm->lsm_stripe_count <= 1)
2351                 return;
2352
2353         offset       = *offp;
2354         stripe_size  = lsm->lsm_stripe_size;
2355         stripe_count = lsm->lsm_stripe_count;
2356
2357         /* width = # bytes in all stripes */
2358         width = stripe_size * stripe_count;
2359
2360         /* woffset = offset within a width; offset = whole number of widths */
2361         woffset = do_div (offset, width);
2362
2363         stripe_index = woffset / stripe_size;
2364
2365         *idp = lsm->lsm_oinfo[stripe_index]->loi_id;
2366         *offp = offset * stripe_size + woffset % stripe_size;
2367 }
2368
2369 static void
2370 echo_client_page_debug_setup(struct lov_stripe_md *lsm,
2371                              cfs_page_t *page, int rw, obd_id id,
2372                              obd_off offset, obd_off count)
2373 {
2374         char    *addr;
2375         obd_off  stripe_off;
2376         obd_id   stripe_id;
2377         int      delta;
2378
2379         /* no partial pages on the client */
2380         LASSERT(count == CFS_PAGE_SIZE);
2381
2382         addr = cfs_kmap(page);
2383
2384         for (delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2385                 if (rw == OBD_BRW_WRITE) {
2386                         stripe_off = offset + delta;
2387                         stripe_id = id;
2388                         echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id);
2389                 } else {
2390                         stripe_off = 0xdeadbeef00c0ffeeULL;
2391                         stripe_id = 0xdeadbeef00c0ffeeULL;
2392                 }
2393                 block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE,
2394                                   stripe_off, stripe_id);
2395         }
2396
2397         cfs_kunmap(page);
2398 }
2399
2400 static int echo_client_page_debug_check(struct lov_stripe_md *lsm,
2401                                         cfs_page_t *page, obd_id id,
2402                                         obd_off offset, obd_off count)
2403 {
2404         obd_off stripe_off;
2405         obd_id  stripe_id;
2406         char   *addr;
2407         int     delta;
2408         int     rc;
2409         int     rc2;
2410
2411         /* no partial pages on the client */
2412         LASSERT(count == CFS_PAGE_SIZE);
2413
2414         addr = cfs_kmap(page);
2415
2416         for (rc = delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2417                 stripe_off = offset + delta;
2418                 stripe_id = id;
2419                 echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
2420
2421                 rc2 = block_debug_check("test_brw",
2422                                         addr + delta, OBD_ECHO_BLOCK_SIZE,
2423                                         stripe_off, stripe_id);
2424                 if (rc2 != 0) {
2425                         CERROR ("Error in echo object "LPX64"\n", id);
2426                         rc = rc2;
2427                 }
2428         }
2429
2430         cfs_kunmap(page);
2431         return rc;
2432 }
2433
2434 static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
2435                             struct echo_object *eco, obd_off offset,
2436                             obd_size count, int async,
2437                             struct obd_trans_info *oti)
2438 {
2439         struct lov_stripe_md   *lsm = eco->eo_lsm;
2440         obd_count               npages;
2441         struct brw_page        *pga;
2442         struct brw_page        *pgp;
2443         cfs_page_t            **pages;
2444         obd_off                 off;
2445         int                     i;
2446         int                     rc;
2447         int                     verify;
2448         int                     gfp_mask;
2449         int                     brw_flags = 0;
2450         ENTRY;
2451
2452         verify = ((oa->o_id) != ECHO_PERSISTENT_OBJID &&
2453                   (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
2454                   (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
2455
2456         gfp_mask = ((oa->o_id & 2) == 0) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER;
2457
2458         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
2459         LASSERT(lsm != NULL);
2460         LASSERT(lsm->lsm_object_id == oa->o_id);
2461
2462         if (count <= 0 ||
2463             (count & (~CFS_PAGE_MASK)) != 0)
2464                 RETURN(-EINVAL);
2465
2466         /* XXX think again with misaligned I/O */
2467         npages = count >> CFS_PAGE_SHIFT;
2468
2469         if (rw == OBD_BRW_WRITE)
2470                 brw_flags = OBD_BRW_ASYNC;
2471
2472         OBD_ALLOC(pga, npages * sizeof(*pga));
2473         if (pga == NULL)
2474                 RETURN(-ENOMEM);
2475
2476         OBD_ALLOC(pages, npages * sizeof(*pages));
2477         if (pages == NULL) {
2478                 OBD_FREE(pga, npages * sizeof(*pga));
2479                 RETURN(-ENOMEM);
2480         }
2481
2482         for (i = 0, pgp = pga, off = offset;
2483              i < npages;
2484              i++, pgp++, off += CFS_PAGE_SIZE) {
2485
2486                 LASSERT (pgp->pg == NULL);      /* for cleanup */
2487
2488                 rc = -ENOMEM;
2489                 OBD_PAGE_ALLOC(pgp->pg, gfp_mask);
2490                 if (pgp->pg == NULL)
2491                         goto out;
2492
2493                 pages[i] = pgp->pg;
2494                 pgp->count = CFS_PAGE_SIZE;
2495                 pgp->off = off;
2496                 pgp->flag = brw_flags;
2497
2498                 if (verify)
2499                         echo_client_page_debug_setup(lsm, pgp->pg, rw,
2500                                                      oa->o_id, off, pgp->count);
2501         }
2502
2503         /* brw mode can only be used at client */
2504         LASSERT(ed->ed_next != NULL);
2505         rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async);
2506
2507  out:
2508         if (rc != 0 || rw != OBD_BRW_READ)
2509                 verify = 0;
2510
2511         for (i = 0, pgp = pga; i < npages; i++, pgp++) {
2512                 if (pgp->pg == NULL)
2513                         continue;
2514
2515                 if (verify) {
2516                         int vrc;
2517                         vrc = echo_client_page_debug_check(lsm, pgp->pg, oa->o_id,
2518                                                            pgp->off, pgp->count);
2519                         if (vrc != 0 && rc == 0)
2520                                 rc = vrc;
2521                 }
2522                 OBD_PAGE_FREE(pgp->pg);
2523         }
2524         OBD_FREE(pga, npages * sizeof(*pga));
2525         OBD_FREE(pages, npages * sizeof(*pages));
2526         RETURN(rc);
2527 }
2528
2529 static int echo_client_prep_commit(const struct lu_env *env,
2530                                    struct obd_export *exp, int rw,
2531                                    struct obdo *oa, struct echo_object *eco,
2532                                    obd_off offset, obd_size count,
2533                                    obd_size batch, struct obd_trans_info *oti,
2534                                    int async)
2535 {
2536         struct lov_stripe_md *lsm = eco->eo_lsm;
2537         struct obd_ioobj ioo;
2538         struct niobuf_local *lnb;
2539         struct niobuf_remote *rnb;
2540         obd_off off;
2541         obd_size npages, tot_pages;
2542         int i, ret = 0, brw_flags = 0;
2543
2544         ENTRY;
2545
2546         if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0 ||
2547             (lsm != NULL && lsm->lsm_object_id != oa->o_id))
2548                 RETURN(-EINVAL);
2549
2550         npages = batch >> CFS_PAGE_SHIFT;
2551         tot_pages = count >> CFS_PAGE_SHIFT;
2552
2553         OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local));
2554         OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote));
2555
2556         if (lnb == NULL || rnb == NULL)
2557                 GOTO(out, ret = -ENOMEM);
2558
2559         if (rw == OBD_BRW_WRITE && async)
2560                 brw_flags |= OBD_BRW_ASYNC;
2561
2562         obdo_to_ioobj(oa, &ioo);
2563
2564         off = offset;
2565
2566         for(; tot_pages; tot_pages -= npages) {
2567                 int lpages;
2568
2569                 if (tot_pages < npages)
2570                         npages = tot_pages;
2571
2572                 for (i = 0; i < npages; i++, off += CFS_PAGE_SIZE) {
2573                         rnb[i].offset = off;
2574                         rnb[i].len = CFS_PAGE_SIZE;
2575                         rnb[i].flags = brw_flags;
2576                 }
2577
2578                 ioo.ioo_bufcnt = npages;
2579                 oti->oti_transno = 0;
2580
2581                 lpages = npages;
2582                 ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
2583                                  lnb, oti, NULL);
2584                 if (ret != 0)
2585                         GOTO(out, ret);
2586                 LASSERT(lpages == npages);
2587
2588                 for (i = 0; i < lpages; i++) {
2589                         cfs_page_t *page = lnb[i].page;
2590
2591                         /* read past eof? */
2592                         if (page == NULL && lnb[i].rc == 0)
2593                                 continue;
2594
2595                         if (async)
2596                                 lnb[i].flags |= OBD_BRW_ASYNC;
2597
2598                         if (oa->o_id == ECHO_PERSISTENT_OBJID ||
2599                             (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
2600                             (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0)
2601                                 continue;
2602
2603                         if (rw == OBD_BRW_WRITE)
2604                                 echo_client_page_debug_setup(lsm, page, rw,
2605                                                              oa->o_id,
2606                                                              rnb[i].offset,
2607                                                              rnb[i].len);
2608                         else
2609                                 echo_client_page_debug_check(lsm, page,
2610                                                              oa->o_id,
2611                                                              rnb[i].offset,
2612                                                              rnb[i].len);
2613                 }
2614
2615                 ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
2616                                    rnb, npages, lnb, oti, ret);
2617                 if (ret != 0)
2618                         GOTO(out, ret);
2619
2620                 /* Reset oti otherwise it would confuse ldiskfs. */
2621                 memset(oti, 0, sizeof(*oti));
2622         }
2623
2624 out:
2625         if (lnb)
2626                 OBD_FREE(lnb, npages * sizeof(struct niobuf_local));
2627         if (rnb)
2628                 OBD_FREE(rnb, npages * sizeof(struct niobuf_remote));
2629         RETURN(ret);
2630 }
2631
2632 static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
2633                                  struct obd_export *exp,
2634                                  struct obd_ioctl_data *data,
2635                                  struct obd_trans_info *dummy_oti)
2636 {
2637         struct obd_device *obd = class_exp2obd(exp);
2638         struct echo_device *ed = obd2echo_dev(obd);
2639         struct echo_client_obd *ec = ed->ed_ec;
2640         struct obdo *oa = &data->ioc_obdo1;
2641         struct echo_object *eco;
2642         int rc;
2643         int async = 1;
2644         long test_mode;
2645         ENTRY;
2646
2647         LASSERT(oa->o_valid & OBD_MD_FLGROUP);
2648
2649         rc = echo_get_object(&eco, ed, oa);
2650         if (rc)
2651                 RETURN(rc);
2652
2653         oa->o_valid &= ~OBD_MD_FLHANDLE;
2654
2655         /* OFD/obdfilter works only via prep/commit */
2656         test_mode = (long)data->ioc_pbuf1;
2657         if (test_mode == 1)
2658                 async = 0;
2659
2660         if (ed->ed_next == NULL && test_mode != 3) {
2661                 test_mode = 3;
2662                 data->ioc_plen1 = data->ioc_count;
2663         }
2664
2665         /* Truncate batch size to maximum */
2666         if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE)
2667                 data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE;
2668
2669         switch (test_mode) {
2670         case 1:
2671                 /* fall through */
2672         case 2:
2673                 rc = echo_client_kbrw(ed, rw, oa,
2674                                       eco, data->ioc_offset,
2675                                       data->ioc_count, async, dummy_oti);
2676                 break;
2677         case 3:
2678                 rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa,
2679                                              eco, data->ioc_offset,
2680                                              data->ioc_count, data->ioc_plen1,
2681                                              dummy_oti, async);
2682                 break;
2683         default:
2684                 rc = -EINVAL;
2685         }
2686         echo_put_object(eco);
2687         RETURN(rc);
2688 }
2689
2690 static int
2691 echo_client_enqueue(struct obd_export *exp, struct obdo *oa,
2692                     int mode, obd_off offset, obd_size nob)
2693 {
2694         struct echo_device     *ed = obd2echo_dev(exp->exp_obd);
2695         struct lustre_handle   *ulh = &oa->o_handle;
2696         struct echo_object     *eco;
2697         obd_off                 end;
2698         int                     rc;
2699         ENTRY;
2700
2701         if (ed->ed_next == NULL)
2702                 RETURN(-EOPNOTSUPP);
2703
2704         if (!(mode == LCK_PR || mode == LCK_PW))
2705                 RETURN(-EINVAL);
2706
2707         if ((offset & (~CFS_PAGE_MASK)) != 0 ||
2708             (nob & (~CFS_PAGE_MASK)) != 0)
2709                 RETURN(-EINVAL);
2710
2711         rc = echo_get_object (&eco, ed, oa);
2712         if (rc != 0)
2713                 RETURN(rc);
2714
2715         end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1);
2716         rc = cl_echo_enqueue(eco, offset, end, mode, &ulh->cookie);
2717         if (rc == 0) {
2718                 oa->o_valid |= OBD_MD_FLHANDLE;
2719                 CDEBUG(D_INFO, "Cookie is "LPX64"\n", ulh->cookie);
2720         }
2721         echo_put_object(eco);
2722         RETURN(rc);
2723 }
2724
2725 static int
2726 echo_client_cancel(struct obd_export *exp, struct obdo *oa)
2727 {
2728         struct echo_device *ed     = obd2echo_dev(exp->exp_obd);
2729         __u64               cookie = oa->o_handle.cookie;
2730
2731         if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
2732                 return -EINVAL;
2733
2734         CDEBUG(D_INFO, "Cookie is "LPX64"\n", cookie);
2735         return cl_echo_cancel(ed, cookie);
2736 }
2737
2738 static int
2739 echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
2740                       void *karg, void *uarg)
2741 {
2742         struct obd_device      *obd = exp->exp_obd;
2743         struct echo_device     *ed = obd2echo_dev(obd);
2744         struct echo_client_obd *ec = ed->ed_ec;
2745         struct echo_object     *eco;
2746         struct obd_ioctl_data  *data = karg;
2747         struct obd_trans_info   dummy_oti;
2748         struct lu_env          *env;
2749         struct oti_req_ack_lock *ack_lock;
2750         struct obdo            *oa;
2751         struct lu_fid           fid;
2752         int                     rw = OBD_BRW_READ;
2753         int                     rc = 0;
2754         int                     i;
2755         ENTRY;
2756
2757         memset(&dummy_oti, 0, sizeof(dummy_oti));
2758
2759         oa = &data->ioc_obdo1;
2760         if (!(oa->o_valid & OBD_MD_FLGROUP)) {
2761                 oa->o_valid |= OBD_MD_FLGROUP;
2762                 oa->o_seq = FID_SEQ_ECHO;
2763         }
2764
2765         /* This FID is unpacked just for validation at this point */
2766         rc = fid_ostid_unpack(&fid, &oa->o_oi, 0);
2767         if (rc < 0)
2768                 RETURN(rc);
2769
2770         OBD_ALLOC_PTR(env);
2771         if (env == NULL)
2772                 RETURN(-ENOMEM);
2773
2774         rc = lu_env_init(env, LCT_DT_THREAD);
2775         if (rc)
2776                 GOTO(out, rc = -ENOMEM);
2777
2778         switch (cmd) {
2779         case OBD_IOC_CREATE:                    /* may create echo object */
2780                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2781                         GOTO (out, rc = -EPERM);
2782
2783                 rc = echo_create_object(env, ed, 1, oa, data->ioc_pbuf1,
2784                                         data->ioc_plen1, &dummy_oti);
2785                 GOTO(out, rc);
2786
2787         case OBD_IOC_ECHO_MD: {
2788                 int count;
2789                 int cmd;
2790                 char *dir = NULL;
2791                 int dirlen;
2792                 __u64 id;
2793
2794                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2795                         GOTO(out, rc = -EPERM);
2796
2797                 count = data->ioc_count;
2798                 cmd = data->ioc_command;
2799
2800                 id = data->ioc_obdo2.o_id;
2801
2802                 dirlen = data->ioc_plen1;
2803                 OBD_ALLOC(dir, dirlen + 1);
2804                 if (dir == NULL)
2805                         GOTO(out, rc = -ENOMEM);
2806
2807                 if (cfs_copy_from_user(dir, data->ioc_pbuf1, dirlen)) {
2808                         OBD_FREE(dir, data->ioc_plen1 + 1);
2809                         GOTO(out, rc = -EFAULT);
2810                 }
2811
2812                 rc = echo_md_handler(ed, cmd, dir, dirlen, id, count, data);
2813                 OBD_FREE(dir, dirlen + 1);
2814                 GOTO(out, rc);
2815         }
2816         case OBD_IOC_ECHO_ALLOC_SEQ: {
2817                 struct lu_env   *cl_env;
2818                 int              refcheck;
2819                 __u64            seq;
2820                 int              max_count;
2821
2822                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2823                         GOTO(out, rc = -EPERM);
2824
2825                 cl_env = cl_env_get(&refcheck);
2826                 if (IS_ERR(cl_env))
2827                         GOTO(out, rc = PTR_ERR(cl_env));
2828
2829                 rc = lu_env_refill_by_tags(cl_env, ECHO_MD_CTX_TAG,
2830                                             ECHO_MD_SES_TAG);
2831                 if (rc != 0) {
2832                         cl_env_put(cl_env, &refcheck);
2833                         GOTO(out, rc);
2834                 }
2835
2836                 rc = seq_client_get_seq(cl_env, ed->ed_cl_seq, &seq);
2837                 cl_env_put(cl_env, &refcheck);
2838                 if (rc < 0) {
2839                         CERROR("%s: Can not alloc seq: rc = %d\n",
2840                                obd->obd_name, rc);
2841                         GOTO(out, rc);
2842                 }
2843
2844                 if (cfs_copy_to_user(data->ioc_pbuf1, &seq, data->ioc_plen1))
2845                         return -EFAULT;
2846
2847                 max_count = LUSTRE_SEQ_MAX_WIDTH;
2848                 if (cfs_copy_to_user(data->ioc_pbuf2, &max_count,
2849                                      data->ioc_plen2))
2850                         return -EFAULT;
2851                 GOTO(out, rc);
2852         }
2853         case OBD_IOC_DESTROY:
2854                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2855                         GOTO (out, rc = -EPERM);
2856
2857                 rc = echo_get_object(&eco, ed, oa);
2858                 if (rc == 0) {
2859                         rc = obd_destroy(env, ec->ec_exp, oa, eco->eo_lsm,
2860                                          &dummy_oti, NULL, NULL);
2861                         if (rc == 0)
2862                                 eco->eo_deleted = 1;
2863                         echo_put_object(eco);
2864                 }
2865                 GOTO(out, rc);
2866
2867         case OBD_IOC_GETATTR:
2868                 rc = echo_get_object(&eco, ed, oa);
2869                 if (rc == 0) {
2870                         struct obd_info oinfo = { { { 0 } } };
2871                         oinfo.oi_md = eco->eo_lsm;
2872                         oinfo.oi_oa = oa;
2873                         rc = obd_getattr(env, ec->ec_exp, &oinfo);
2874                         echo_put_object(eco);
2875                 }
2876                 GOTO(out, rc);
2877
2878         case OBD_IOC_SETATTR:
2879                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2880                         GOTO (out, rc = -EPERM);
2881
2882                 rc = echo_get_object(&eco, ed, oa);
2883                 if (rc == 0) {
2884                         struct obd_info oinfo = { { { 0 } } };
2885                         oinfo.oi_oa = oa;
2886                         oinfo.oi_md = eco->eo_lsm;
2887
2888                         rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL);
2889                         echo_put_object(eco);
2890                 }
2891                 GOTO(out, rc);
2892
2893         case OBD_IOC_BRW_WRITE:
2894                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2895                         GOTO (out, rc = -EPERM);
2896
2897                 rw = OBD_BRW_WRITE;
2898                 /* fall through */
2899         case OBD_IOC_BRW_READ:
2900                 rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti);
2901                 GOTO(out, rc);
2902
2903         case ECHO_IOC_GET_STRIPE:
2904                 rc = echo_get_object(&eco, ed, oa);
2905                 if (rc == 0) {
2906                         rc = echo_copyout_lsm(eco->eo_lsm, data->ioc_pbuf1,
2907                                               data->ioc_plen1);
2908                         echo_put_object(eco);
2909                 }
2910                 GOTO(out, rc);
2911
2912         case ECHO_IOC_SET_STRIPE:
2913                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2914                         GOTO (out, rc = -EPERM);
2915
2916                 if (data->ioc_pbuf1 == NULL) {  /* unset */
2917                         rc = echo_get_object(&eco, ed, oa);
2918                         if (rc == 0) {
2919                                 eco->eo_deleted = 1;
2920                                 echo_put_object(eco);
2921                         }
2922                 } else {
2923                         rc = echo_create_object(env, ed, 0, oa,
2924                                                 data->ioc_pbuf1,
2925                                                 data->ioc_plen1, &dummy_oti);
2926                 }
2927                 GOTO (out, rc);
2928
2929         case ECHO_IOC_ENQUEUE:
2930                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2931                         GOTO (out, rc = -EPERM);
2932
2933                 rc = echo_client_enqueue(exp, oa,
2934                                          data->ioc_conn1, /* lock mode */
2935                                          data->ioc_offset,
2936                                          data->ioc_count);/*extent*/
2937                 GOTO (out, rc);
2938
2939         case ECHO_IOC_CANCEL:
2940                 rc = echo_client_cancel(exp, oa);
2941                 GOTO (out, rc);
2942
2943         default:
2944                 CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
2945                 GOTO (out, rc = -ENOTTY);
2946         }
2947
2948         EXIT;
2949 out:
2950         lu_env_fini(env);
2951         OBD_FREE_PTR(env);
2952
2953         /* XXX this should be in a helper also called by target_send_reply */
2954         for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4;
2955              i++, ack_lock++) {
2956                 if (!ack_lock->mode)
2957                         break;
2958                 ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
2959         }
2960
2961         return rc;
2962 }
2963
2964 static int echo_client_setup(const struct lu_env *env,
2965                              struct obd_device *obddev, struct lustre_cfg *lcfg)
2966 {
2967         struct echo_client_obd *ec = &obddev->u.echo_client;
2968         struct obd_device *tgt;
2969         struct obd_uuid echo_uuid = { "ECHO_UUID" };
2970         struct obd_connect_data *ocd = NULL;
2971         int rc;
2972         ENTRY;
2973
2974         if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
2975                 CERROR("requires a TARGET OBD name\n");
2976                 RETURN(-EINVAL);
2977         }
2978
2979         tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
2980         if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
2981                 CERROR("device not attached or not set up (%s)\n",
2982                        lustre_cfg_string(lcfg, 1));
2983                 RETURN(-EINVAL);
2984         }
2985
2986         cfs_spin_lock_init (&ec->ec_lock);
2987         CFS_INIT_LIST_HEAD (&ec->ec_objects);
2988         CFS_INIT_LIST_HEAD (&ec->ec_locks);
2989         ec->ec_unique = 0;
2990         ec->ec_nstripes = 0;
2991
2992         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
2993                 lu_context_tags_update(ECHO_MD_CTX_TAG);
2994                 lu_session_tags_update(ECHO_MD_SES_TAG);
2995                 RETURN(0);
2996         }
2997
2998         OBD_ALLOC(ocd, sizeof(*ocd));
2999         if (ocd == NULL) {
3000                 CERROR("Can't alloc ocd connecting to %s\n",
3001                        lustre_cfg_string(lcfg, 1));
3002                 return -ENOMEM;
3003         }
3004
3005         ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL |
3006                                  OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 |
3007                                  OBD_CONNECT_64BITHASH;
3008         ocd->ocd_version = LUSTRE_VERSION_CODE;
3009         ocd->ocd_group = FID_SEQ_ECHO;
3010
3011         rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
3012         if (rc == 0) {
3013                 /* Turn off pinger because it connects to tgt obd directly. */
3014                 cfs_spin_lock(&tgt->obd_dev_lock);
3015                 cfs_list_del_init(&ec->ec_exp->exp_obd_chain_timed);
3016                 cfs_spin_unlock(&tgt->obd_dev_lock);
3017         }
3018
3019         OBD_FREE(ocd, sizeof(*ocd));
3020
3021         if (rc != 0) {
3022                 CERROR("fail to connect to device %s\n",
3023                        lustre_cfg_string(lcfg, 1));
3024                 return (rc);
3025         }
3026
3027         RETURN(rc);
3028 }
3029
3030 static int echo_client_cleanup(struct obd_device *obddev)
3031 {
3032         struct echo_device *ed = obd2echo_dev(obddev);
3033         struct echo_client_obd *ec = &obddev->u.echo_client;
3034         int rc;
3035         ENTRY;
3036
3037         /*Do nothing for Metadata echo client*/
3038         if (ed == NULL )
3039                 RETURN(0);
3040
3041         if (ed->ed_next_ismd) {
3042                 lu_context_tags_clear(ECHO_MD_CTX_TAG);
3043                 lu_session_tags_clear(ECHO_MD_SES_TAG);
3044                 RETURN(0);
3045         }
3046
3047         if (!cfs_list_empty(&obddev->obd_exports)) {
3048                 CERROR("still has clients!\n");
3049                 RETURN(-EBUSY);
3050         }
3051
3052         LASSERT(cfs_atomic_read(&ec->ec_exp->exp_refcount) > 0);
3053         rc = obd_disconnect(ec->ec_exp);
3054         if (rc != 0)
3055                 CERROR("fail to disconnect device: %d\n", rc);
3056
3057         RETURN(rc);
3058 }
3059
3060 static int echo_client_connect(const struct lu_env *env,
3061                                struct obd_export **exp,
3062                                struct obd_device *src, struct obd_uuid *cluuid,
3063                                struct obd_connect_data *data, void *localdata)
3064 {
3065         int                rc;
3066         struct lustre_handle conn = { 0 };
3067
3068         ENTRY;
3069         rc = class_connect(&conn, src, cluuid);
3070         if (rc == 0) {
3071                 *exp = class_conn2export(&conn);
3072         }
3073
3074         RETURN (rc);
3075 }
3076
3077 static int echo_client_disconnect(struct obd_export *exp)
3078 {
3079 #if 0
3080         struct obd_device      *obd;
3081         struct echo_client_obd *ec;
3082         struct ec_lock         *ecl;
3083 #endif
3084         int                     rc;
3085         ENTRY;
3086
3087         if (exp == NULL)
3088                 GOTO(out, rc = -EINVAL);
3089
3090 #if 0
3091         obd = exp->exp_obd;
3092         ec = &obd->u.echo_client;
3093
3094         /* no more contention on export's lock list */
3095         while (!cfs_list_empty (&exp->exp_ec_data.eced_locks)) {
3096                 ecl = cfs_list_entry (exp->exp_ec_data.eced_locks.next,
3097                                       struct ec_lock, ecl_exp_chain);
3098                 cfs_list_del (&ecl->ecl_exp_chain);
3099
3100                 rc = obd_cancel(ec->ec_exp, ecl->ecl_object->eco_lsm,
3101                                  ecl->ecl_mode, &ecl->ecl_lock_handle);
3102
3103                 CDEBUG (D_INFO, "Cancel lock on object "LPX64" on disconnect "
3104                         "(%d)\n", ecl->ecl_object->eco_id, rc);
3105
3106                 echo_put_object (ecl->ecl_object);
3107                 OBD_FREE (ecl, sizeof (*ecl));
3108         }
3109 #endif
3110
3111         rc = class_disconnect(exp);
3112         GOTO(out, rc);
3113  out:
3114         return rc;
3115 }
3116
3117 static struct obd_ops echo_client_obd_ops = {
3118         .o_owner       = THIS_MODULE,
3119
3120 #if 0
3121         .o_setup       = echo_client_setup,
3122         .o_cleanup     = echo_client_cleanup,
3123 #endif
3124
3125         .o_iocontrol   = echo_client_iocontrol,
3126         .o_connect     = echo_client_connect,
3127         .o_disconnect  = echo_client_disconnect
3128 };
3129
3130 int echo_client_init(void)
3131 {
3132         struct lprocfs_static_vars lvars = { 0 };
3133         int rc;
3134
3135         lprocfs_echo_init_vars(&lvars);
3136
3137         rc = lu_kmem_init(echo_caches);
3138         if (rc == 0) {
3139                 rc = class_register_type(&echo_client_obd_ops, NULL,
3140                                          lvars.module_vars,
3141                                          LUSTRE_ECHO_CLIENT_NAME,
3142                                          &echo_device_type);
3143                 if (rc)
3144                         lu_kmem_fini(echo_caches);
3145         }
3146         return rc;
3147 }
3148
3149 void echo_client_exit(void)
3150 {
3151         class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
3152         lu_kmem_fini(echo_caches);
3153 }
3154
3155 #ifdef __KERNEL__
3156 static int __init obdecho_init(void)
3157 {
3158         struct lprocfs_static_vars lvars;
3159         int rc;
3160
3161         ENTRY;
3162         LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
3163
3164         LASSERT(CFS_PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
3165
3166         lprocfs_echo_init_vars(&lvars);
3167
3168 # ifdef HAVE_SERVER_SUPPORT
3169         rc = echo_persistent_pages_init();
3170         if (rc != 0)
3171                 goto failed_0;
3172
3173         rc = class_register_type(&echo_obd_ops, NULL, lvars.module_vars,
3174                                  LUSTRE_ECHO_NAME, NULL);
3175         if (rc != 0)
3176                 goto failed_1;
3177 # endif
3178
3179         rc = echo_client_init();
3180
3181 # ifdef HAVE_SERVER_SUPPORT
3182         if (rc == 0)
3183                 RETURN(0);
3184
3185         class_unregister_type(LUSTRE_ECHO_NAME);
3186 failed_1:
3187         echo_persistent_pages_fini();
3188 failed_0:
3189 # endif
3190         RETURN(rc);
3191 }
3192
3193 static void /*__exit*/ obdecho_exit(void)
3194 {
3195         echo_client_exit();
3196
3197 # ifdef HAVE_SERVER_SUPPORT
3198         class_unregister_type(LUSTRE_ECHO_NAME);
3199         echo_persistent_pages_fini();
3200 # endif
3201 }
3202
3203 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
3204 MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
3205 MODULE_LICENSE("GPL");
3206
3207 cfs_module(obdecho, LUSTRE_VERSION_STRING, obdecho_init, obdecho_exit);
3208 #endif /* __KERNEL__ */
3209
3210 /** @} echo_client */