Whamcloud - gitweb
29e4a91a5fceb1efa673a73f8d822a4be273d14d
[fs/lustre-release.git] / lustre / obdecho / echo_client.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_ECHO
38 #ifdef __KERNEL__
39 #include <libcfs/libcfs.h>
40 #else
41 #include <liblustre.h>
42 #endif
43
44 #include <obd.h>
45 #include <obd_support.h>
46 #include <obd_class.h>
47 #include <lustre_debug.h>
48 #include <lprocfs_status.h>
49 #include <cl_object.h>
50 #include <md_object.h>
51 #include <lustre_fid.h>
52 #include <lustre_acl.h>
53 #include <lustre_ioctl.h>
54 #include <lustre_net.h>
55
56 #include "echo_internal.h"
57
58 /** \defgroup echo_client Echo Client
59  * @{
60  */
61
62 struct echo_device {
63         struct cl_device        ed_cl;
64         struct echo_client_obd *ed_ec;
65
66         struct cl_site          ed_site_myself;
67         struct cl_site         *ed_site;
68         struct lu_device       *ed_next;
69         int                     ed_next_islov;
70         int                     ed_next_ismd;
71         struct lu_client_seq   *ed_cl_seq;
72 };
73
74 struct echo_object {
75         struct cl_object        eo_cl;
76         struct cl_object_header eo_hdr;
77
78         struct echo_device     *eo_dev;
79         struct list_head        eo_obj_chain;
80         struct lov_stripe_md   *eo_lsm;
81         atomic_t                eo_npages;
82         int                     eo_deleted;
83 };
84
85 struct echo_object_conf {
86         struct cl_object_conf  eoc_cl;
87         struct lov_stripe_md **eoc_md;
88 };
89
90 struct echo_page {
91         struct cl_page_slice    ep_cl;
92         struct mutex            ep_lock;
93 };
94
95 struct echo_lock {
96         struct cl_lock_slice    el_cl;
97         struct list_head        el_chain;
98         struct echo_object     *el_object;
99         __u64                   el_cookie;
100         atomic_t                el_refcount;
101 };
102
103 static int echo_client_setup(const struct lu_env *env,
104                              struct obd_device *obddev,
105                              struct lustre_cfg *lcfg);
106 static int echo_client_cleanup(struct obd_device *obddev);
107
108
109 /** \defgroup echo_helpers Helper functions
110  * @{
111  */
112 static inline struct echo_device *cl2echo_dev(const struct cl_device *dev)
113 {
114         return container_of0(dev, struct echo_device, ed_cl);
115 }
116
117 static inline struct cl_device *echo_dev2cl(struct echo_device *d)
118 {
119         return &d->ed_cl;
120 }
121
122 static inline struct echo_device *obd2echo_dev(const struct obd_device *obd)
123 {
124         return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev));
125 }
126
127 static inline struct cl_object *echo_obj2cl(struct echo_object *eco)
128 {
129         return &eco->eo_cl;
130 }
131
132 static inline struct echo_object *cl2echo_obj(const struct cl_object *o)
133 {
134         return container_of(o, struct echo_object, eo_cl);
135 }
136
137 static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s)
138 {
139         return container_of(s, struct echo_page, ep_cl);
140 }
141
142 static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s)
143 {
144         return container_of(s, struct echo_lock, el_cl);
145 }
146
147 static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl)
148 {
149         return ecl->el_cl.cls_lock;
150 }
151
152 static struct lu_context_key echo_thread_key;
153 static inline struct echo_thread_info *echo_env_info(const struct lu_env *env)
154 {
155         struct echo_thread_info *info;
156         info = lu_context_key_get(&env->le_ctx, &echo_thread_key);
157         LASSERT(info != NULL);
158         return info;
159 }
160
161 static inline
162 struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
163 {
164         return container_of(c, struct echo_object_conf, eoc_cl);
165 }
166
167 /** @} echo_helpers */
168
169 static struct echo_object *cl_echo_object_find(struct echo_device *d,
170                                                struct lov_stripe_md **lsm);
171 static int cl_echo_object_put(struct echo_object *eco);
172 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
173                               struct page **pages, int npages, int async);
174
175 struct echo_thread_info {
176         struct echo_object_conf eti_conf;
177         struct lustre_md        eti_md;
178
179         struct cl_2queue        eti_queue;
180         struct cl_io            eti_io;
181         struct cl_lock_descr    eti_descr;
182         struct lu_fid           eti_fid;
183         struct lu_fid           eti_fid2;
184 #ifdef HAVE_SERVER_SUPPORT
185         struct md_op_spec       eti_spec;
186         struct lov_mds_md_v3    eti_lmm;
187         struct lov_user_md_v3   eti_lum;
188         struct md_attr          eti_ma;
189         struct lu_name          eti_lname;
190         /* per-thread values, can be re-used */
191         void                    *eti_big_lmm;
192         int                     eti_big_lmmsize;
193         char                    eti_name[20];
194         struct lu_buf           eti_buf;
195         char                    eti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE];
196 #endif
197 };
198
199 /* No session used right now */
200 struct echo_session_info {
201         unsigned long dummy;
202 };
203
204 static struct kmem_cache *echo_lock_kmem;
205 static struct kmem_cache *echo_object_kmem;
206 static struct kmem_cache *echo_thread_kmem;
207 static struct kmem_cache *echo_session_kmem;
208 /* static struct kmem_cache *echo_req_kmem; */
209
210 static struct lu_kmem_descr echo_caches[] = {
211         {
212                 .ckd_cache = &echo_lock_kmem,
213                 .ckd_name  = "echo_lock_kmem",
214                 .ckd_size  = sizeof (struct echo_lock)
215         },
216         {
217                 .ckd_cache = &echo_object_kmem,
218                 .ckd_name  = "echo_object_kmem",
219                 .ckd_size  = sizeof (struct echo_object)
220         },
221         {
222                 .ckd_cache = &echo_thread_kmem,
223                 .ckd_name  = "echo_thread_kmem",
224                 .ckd_size  = sizeof (struct echo_thread_info)
225         },
226         {
227                 .ckd_cache = &echo_session_kmem,
228                 .ckd_name  = "echo_session_kmem",
229                 .ckd_size  = sizeof (struct echo_session_info)
230         },
231         {
232                 .ckd_cache = NULL
233         }
234 };
235
236 /** \defgroup echo_page Page operations
237  *
238  * Echo page operations.
239  *
240  * @{
241  */
242 static int echo_page_own(const struct lu_env *env,
243                          const struct cl_page_slice *slice,
244                          struct cl_io *io, int nonblock)
245 {
246         struct echo_page *ep = cl2echo_page(slice);
247
248         if (!nonblock)
249                 mutex_lock(&ep->ep_lock);
250         else if (!mutex_trylock(&ep->ep_lock))
251                 return -EAGAIN;
252         return 0;
253 }
254
255 static void echo_page_disown(const struct lu_env *env,
256                              const struct cl_page_slice *slice,
257                              struct cl_io *io)
258 {
259         struct echo_page *ep = cl2echo_page(slice);
260
261         LASSERT(mutex_is_locked(&ep->ep_lock));
262         mutex_unlock(&ep->ep_lock);
263 }
264
265 static void echo_page_discard(const struct lu_env *env,
266                               const struct cl_page_slice *slice,
267                               struct cl_io *unused)
268 {
269         cl_page_delete(env, slice->cpl_page);
270 }
271
272 static int echo_page_is_vmlocked(const struct lu_env *env,
273                                  const struct cl_page_slice *slice)
274 {
275         if (mutex_is_locked(&cl2echo_page(slice)->ep_lock))
276                 return -EBUSY;
277         return -ENODATA;
278 }
279
280 static void echo_page_completion(const struct lu_env *env,
281                                  const struct cl_page_slice *slice,
282                                  int ioret)
283 {
284         LASSERT(slice->cpl_page->cp_sync_io != NULL);
285 }
286
287 static void echo_page_fini(const struct lu_env *env,
288                            struct cl_page_slice *slice)
289 {
290         struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
291         ENTRY;
292
293         atomic_dec(&eco->eo_npages);
294         page_cache_release(slice->cpl_page->cp_vmpage);
295         EXIT;
296 }
297
298 static int echo_page_prep(const struct lu_env *env,
299                           const struct cl_page_slice *slice,
300                           struct cl_io *unused)
301 {
302         return 0;
303 }
304
305 static int echo_page_print(const struct lu_env *env,
306                            const struct cl_page_slice *slice,
307                            void *cookie, lu_printer_t printer)
308 {
309         struct echo_page *ep = cl2echo_page(slice);
310
311         (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
312                    ep, mutex_is_locked(&ep->ep_lock),
313                    slice->cpl_page->cp_vmpage);
314         return 0;
315 }
316
317 static const struct cl_page_operations echo_page_ops = {
318         .cpo_own           = echo_page_own,
319         .cpo_disown        = echo_page_disown,
320         .cpo_discard       = echo_page_discard,
321         .cpo_fini          = echo_page_fini,
322         .cpo_print         = echo_page_print,
323         .cpo_is_vmlocked   = echo_page_is_vmlocked,
324         .io = {
325                 [CRT_READ] = {
326                         .cpo_prep        = echo_page_prep,
327                         .cpo_completion  = echo_page_completion,
328                 },
329                 [CRT_WRITE] = {
330                         .cpo_prep        = echo_page_prep,
331                         .cpo_completion  = echo_page_completion,
332                 }
333         }
334 };
335 /** @} echo_page */
336
337 /** \defgroup echo_lock Locking
338  *
339  * echo lock operations
340  *
341  * @{
342  */
343 static void echo_lock_fini(const struct lu_env *env,
344                            struct cl_lock_slice *slice)
345 {
346         struct echo_lock *ecl = cl2echo_lock(slice);
347
348         LASSERT(list_empty(&ecl->el_chain));
349         OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
350 }
351
352 static void echo_lock_delete(const struct lu_env *env,
353                              const struct cl_lock_slice *slice)
354 {
355         struct echo_lock *ecl      = cl2echo_lock(slice);
356
357         LASSERT(list_empty(&ecl->el_chain));
358 }
359
360 static int echo_lock_fits_into(const struct lu_env *env,
361                                const struct cl_lock_slice *slice,
362                                const struct cl_lock_descr *need,
363                                const struct cl_io *unused)
364 {
365         return 1;
366 }
367
368 static struct cl_lock_operations echo_lock_ops = {
369         .clo_fini      = echo_lock_fini,
370         .clo_delete    = echo_lock_delete,
371         .clo_fits_into = echo_lock_fits_into
372 };
373
374 /** @} echo_lock */
375
376 /** \defgroup echo_cl_ops cl_object operations
377  *
378  * operations for cl_object
379  *
380  * @{
381  */
382 static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
383                           struct cl_page *page, pgoff_t index)
384 {
385         struct echo_page *ep = cl_object_page_slice(obj, page);
386         struct echo_object *eco = cl2echo_obj(obj);
387         ENTRY;
388
389         page_cache_get(page->cp_vmpage);
390         mutex_init(&ep->ep_lock);
391         cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops);
392         atomic_inc(&eco->eo_npages);
393         RETURN(0);
394 }
395
396 static int echo_io_init(const struct lu_env *env, struct cl_object *obj,
397                         struct cl_io *io)
398 {
399         return 0;
400 }
401
402 static int echo_lock_init(const struct lu_env *env,
403                           struct cl_object *obj, struct cl_lock *lock,
404                           const struct cl_io *unused)
405 {
406         struct echo_lock *el;
407         ENTRY;
408
409         OBD_SLAB_ALLOC_PTR_GFP(el, echo_lock_kmem, GFP_NOFS);
410         if (el != NULL) {
411                 cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops);
412                 el->el_object = cl2echo_obj(obj);
413                 INIT_LIST_HEAD(&el->el_chain);
414                 atomic_set(&el->el_refcount, 0);
415         }
416         RETURN(el == NULL ? -ENOMEM : 0);
417 }
418
419 static int echo_conf_set(const struct lu_env *env, struct cl_object *obj,
420                          const struct cl_object_conf *conf)
421 {
422         return 0;
423 }
424
425 static const struct cl_object_operations echo_cl_obj_ops = {
426         .coo_page_init = echo_page_init,
427         .coo_lock_init = echo_lock_init,
428         .coo_io_init   = echo_io_init,
429         .coo_conf_set  = echo_conf_set
430 };
431 /** @} echo_cl_ops */
432
433 /** \defgroup echo_lu_ops lu_object operations
434  *
435  * operations for echo lu object.
436  *
437  * @{
438  */
439 static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
440                             const struct lu_object_conf *conf)
441 {
442         struct echo_device *ed         = cl2echo_dev(lu2cl_dev(obj->lo_dev));
443         struct echo_client_obd *ec     = ed->ed_ec;
444         struct echo_object *eco        = cl2echo_obj(lu2cl(obj));
445         ENTRY;
446
447         if (ed->ed_next) {
448                 struct lu_object  *below;
449                 struct lu_device  *under;
450
451                 under = ed->ed_next;
452                 below = under->ld_ops->ldo_object_alloc(env, obj->lo_header,
453                                                         under);
454                 if (below == NULL)
455                         RETURN(-ENOMEM);
456                 lu_object_add(obj, below);
457         }
458
459         if (!ed->ed_next_ismd) {
460                 const struct cl_object_conf *cconf = lu2cl_conf(conf);
461                 struct echo_object_conf *econf = cl2echo_conf(cconf);
462
463                 LASSERT(econf->eoc_md);
464                 eco->eo_lsm = *econf->eoc_md;
465                 /* clear the lsm pointer so that it won't get freed. */
466                 *econf->eoc_md = NULL;
467         } else {
468                 eco->eo_lsm = NULL;
469         }
470
471         eco->eo_dev = ed;
472         atomic_set(&eco->eo_npages, 0);
473         cl_object_page_init(lu2cl(obj), sizeof(struct echo_page));
474
475         spin_lock(&ec->ec_lock);
476         list_add_tail(&eco->eo_obj_chain, &ec->ec_objects);
477         spin_unlock(&ec->ec_lock);
478
479         RETURN(0);
480 }
481
482 /* taken from osc_unpackmd() */
483 static int echo_alloc_memmd(struct echo_device *ed,
484                             struct lov_stripe_md **lsmp)
485 {
486         int lsm_size;
487
488         ENTRY;
489
490         /* If export is lov/osc then use their obd method */
491         if (ed->ed_next != NULL)
492                 return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
493         /* OFD has no unpackmd method, do everything here */
494         lsm_size = lov_stripe_md_size(1);
495
496         LASSERT(*lsmp == NULL);
497         OBD_ALLOC(*lsmp, lsm_size);
498         if (*lsmp == NULL)
499                 RETURN(-ENOMEM);
500
501         OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
502         if ((*lsmp)->lsm_oinfo[0] == NULL) {
503                 OBD_FREE(*lsmp, lsm_size);
504                 RETURN(-ENOMEM);
505         }
506
507         loi_init((*lsmp)->lsm_oinfo[0]);
508         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
509         ostid_set_seq_echo(&(*lsmp)->lsm_oi);
510
511         RETURN(lsm_size);
512 }
513
514 static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
515 {
516         int lsm_size;
517
518         ENTRY;
519
520         /* If export is lov/osc then use their obd method */
521         if (ed->ed_next != NULL)
522                 return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
523         /* OFD has no unpackmd method, do everything here */
524         lsm_size = lov_stripe_md_size(1);
525
526         LASSERT(*lsmp != NULL);
527         OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
528         OBD_FREE(*lsmp, lsm_size);
529         *lsmp = NULL;
530         RETURN(0);
531 }
532
533 static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
534 {
535         struct echo_object *eco    = cl2echo_obj(lu2cl(obj));
536         struct echo_client_obd *ec = eco->eo_dev->ed_ec;
537         ENTRY;
538
539         LASSERT(atomic_read(&eco->eo_npages) == 0);
540
541         spin_lock(&ec->ec_lock);
542         list_del_init(&eco->eo_obj_chain);
543         spin_unlock(&ec->ec_lock);
544
545         lu_object_fini(obj);
546         lu_object_header_fini(obj->lo_header);
547
548         if (eco->eo_lsm)
549                 echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
550         OBD_SLAB_FREE_PTR(eco, echo_object_kmem);
551         EXIT;
552 }
553
554 static int echo_object_print(const struct lu_env *env, void *cookie,
555                             lu_printer_t p, const struct lu_object *o)
556 {
557         struct echo_object *obj = cl2echo_obj(lu2cl(o));
558
559         return (*p)(env, cookie, "echoclient-object@%p", obj);
560 }
561
562 static const struct lu_object_operations echo_lu_obj_ops = {
563         .loo_object_init      = echo_object_init,
564         .loo_object_delete    = NULL,
565         .loo_object_release   = NULL,
566         .loo_object_free      = echo_object_free,
567         .loo_object_print     = echo_object_print,
568         .loo_object_invariant = NULL
569 };
570 /** @} echo_lu_ops */
571
572 /** \defgroup echo_lu_dev_ops  lu_device operations
573  *
574  * Operations for echo lu device.
575  *
576  * @{
577  */
578 static struct lu_object *echo_object_alloc(const struct lu_env *env,
579                                            const struct lu_object_header *hdr,
580                                            struct lu_device *dev)
581 {
582         struct echo_object *eco;
583         struct lu_object *obj = NULL;
584         ENTRY;
585
586         /* we're the top dev. */
587         LASSERT(hdr == NULL);
588         OBD_SLAB_ALLOC_PTR_GFP(eco, echo_object_kmem, GFP_NOFS);
589         if (eco != NULL) {
590                 struct cl_object_header *hdr = &eco->eo_hdr;
591
592                 obj = &echo_obj2cl(eco)->co_lu;
593                 cl_object_header_init(hdr);
594                 hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page));
595
596                 lu_object_init(obj, &hdr->coh_lu, dev);
597                 lu_object_add_top(&hdr->coh_lu, obj);
598
599                 eco->eo_cl.co_ops = &echo_cl_obj_ops;
600                 obj->lo_ops       = &echo_lu_obj_ops;
601         }
602         RETURN(obj);
603 }
604
605 static struct lu_device_operations echo_device_lu_ops = {
606         .ldo_object_alloc   = echo_object_alloc,
607 };
608
609 /** @} echo_lu_dev_ops */
610
611 static struct cl_device_operations echo_device_cl_ops = {
612 };
613
614 /** \defgroup echo_init Setup and teardown
615  *
616  * Init and fini functions for echo client.
617  *
618  * @{
619  */
620 static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
621 {
622         struct cl_site *site = &ed->ed_site_myself;
623         int rc;
624
625         /* initialize site */
626         rc = cl_site_init(site, &ed->ed_cl);
627         if (rc) {
628                 CERROR("Cannot initilize site for echo client(%d)\n", rc);
629                 return rc;
630         }
631
632         rc = lu_site_init_finish(&site->cs_lu);
633         if (rc)
634                 return rc;
635
636         ed->ed_site = site;
637         return 0;
638 }
639
640 static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
641 {
642         if (ed->ed_site) {
643                 if (!ed->ed_next_ismd)
644                         cl_site_fini(ed->ed_site);
645                 ed->ed_site = NULL;
646         }
647 }
648
649 static void *echo_thread_key_init(const struct lu_context *ctx,
650                                   struct lu_context_key *key)
651 {
652         struct echo_thread_info *info;
653
654         OBD_SLAB_ALLOC_PTR_GFP(info, echo_thread_kmem, GFP_NOFS);
655         if (info == NULL)
656                 info = ERR_PTR(-ENOMEM);
657         return info;
658 }
659
660 static void echo_thread_key_fini(const struct lu_context *ctx,
661                          struct lu_context_key *key, void *data)
662 {
663         struct echo_thread_info *info = data;
664         OBD_SLAB_FREE_PTR(info, echo_thread_kmem);
665 }
666
667 static void echo_thread_key_exit(const struct lu_context *ctx,
668                          struct lu_context_key *key, void *data)
669 {
670 }
671
672 static struct lu_context_key echo_thread_key = {
673         .lct_tags = LCT_CL_THREAD,
674         .lct_init = echo_thread_key_init,
675         .lct_fini = echo_thread_key_fini,
676         .lct_exit = echo_thread_key_exit
677 };
678
679 static void *echo_session_key_init(const struct lu_context *ctx,
680                                   struct lu_context_key *key)
681 {
682         struct echo_session_info *session;
683
684         OBD_SLAB_ALLOC_PTR_GFP(session, echo_session_kmem, GFP_NOFS);
685         if (session == NULL)
686                 session = ERR_PTR(-ENOMEM);
687         return session;
688 }
689
690 static void echo_session_key_fini(const struct lu_context *ctx,
691                                  struct lu_context_key *key, void *data)
692 {
693         struct echo_session_info *session = data;
694         OBD_SLAB_FREE_PTR(session, echo_session_kmem);
695 }
696
697 static void echo_session_key_exit(const struct lu_context *ctx,
698                                  struct lu_context_key *key, void *data)
699 {
700 }
701
702 static struct lu_context_key echo_session_key = {
703         .lct_tags = LCT_SESSION,
704         .lct_init = echo_session_key_init,
705         .lct_fini = echo_session_key_fini,
706         .lct_exit = echo_session_key_exit
707 };
708
709 LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
710
711 #ifdef HAVE_SERVER_SUPPORT
712 # define ECHO_SEQ_WIDTH 0xffffffff
713 static int echo_fid_init(struct echo_device *ed, char *obd_name,
714                          struct seq_server_site *ss)
715 {
716         char *prefix;
717         int rc;
718         ENTRY;
719
720         OBD_ALLOC_PTR(ed->ed_cl_seq);
721         if (ed->ed_cl_seq == NULL)
722                 RETURN(-ENOMEM);
723
724         OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
725         if (prefix == NULL)
726                 GOTO(out_free_seq, rc = -ENOMEM);
727
728         snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", obd_name);
729
730         /* Init client side sequence-manager */
731         rc = seq_client_init(ed->ed_cl_seq, NULL,
732                              LUSTRE_SEQ_METADATA,
733                              prefix, ss->ss_server_seq);
734         ed->ed_cl_seq->lcs_width = ECHO_SEQ_WIDTH;
735         OBD_FREE(prefix, MAX_OBD_NAME + 5);
736         if (rc)
737                 GOTO(out_free_seq, rc);
738
739         RETURN(0);
740
741 out_free_seq:
742         OBD_FREE_PTR(ed->ed_cl_seq);
743         ed->ed_cl_seq = NULL;
744         RETURN(rc);
745 }
746
747 static int echo_fid_fini(struct obd_device *obddev)
748 {
749         struct echo_device *ed = obd2echo_dev(obddev);
750         ENTRY;
751
752         if (ed->ed_cl_seq != NULL) {
753                 seq_client_fini(ed->ed_cl_seq);
754                 OBD_FREE_PTR(ed->ed_cl_seq);
755                 ed->ed_cl_seq = NULL;
756         }
757
758         RETURN(0);
759 }
760 #endif /* HAVE_SERVER_SUPPORT */
761
762 static struct lu_device *echo_device_alloc(const struct lu_env *env,
763                                            struct lu_device_type *t,
764                                            struct lustre_cfg *cfg)
765 {
766         struct lu_device   *next;
767         struct echo_device *ed;
768         struct cl_device   *cd;
769         struct obd_device  *obd = NULL; /* to keep compiler happy */
770         struct obd_device  *tgt;
771         const char *tgt_type_name;
772         int rc;
773         int cleanup = 0;
774         ENTRY;
775
776         OBD_ALLOC_PTR(ed);
777         if (ed == NULL)
778                 GOTO(out, rc = -ENOMEM);
779
780         cleanup = 1;
781         cd = &ed->ed_cl;
782         rc = cl_device_init(cd, t);
783         if (rc)
784                 GOTO(out, rc);
785
786         cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
787         cd->cd_ops = &echo_device_cl_ops;
788
789         cleanup = 2;
790         obd = class_name2obd(lustre_cfg_string(cfg, 0));
791         LASSERT(obd != NULL);
792         LASSERT(env != NULL);
793
794         tgt = class_name2obd(lustre_cfg_string(cfg, 1));
795         if (tgt == NULL) {
796                 CERROR("Can not find tgt device %s\n",
797                         lustre_cfg_string(cfg, 1));
798                 GOTO(out, rc = -ENODEV);
799         }
800
801         next = tgt->obd_lu_dev;
802         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
803                 ed->ed_next_ismd = 1;
804         } else {
805                 ed->ed_next_ismd = 0;
806                 rc = echo_site_init(env, ed);
807                 if (rc)
808                         GOTO(out, rc);
809         }
810         cleanup = 3;
811
812         rc = echo_client_setup(env, obd, cfg);
813         if (rc)
814                 GOTO(out, rc);
815
816         ed->ed_ec = &obd->u.echo_client;
817         cleanup = 4;
818
819         if (ed->ed_next_ismd) {
820 #ifdef HAVE_SERVER_SUPPORT
821                 /* Suppose to connect to some Metadata layer */
822                 struct lu_site *ls;
823                 struct lu_device *ld;
824                 int    found = 0;
825
826                 if (next == NULL) {
827                         CERROR("%s is not lu device type!\n",
828                                lustre_cfg_string(cfg, 1));
829                         GOTO(out, rc = -EINVAL);
830                 }
831
832                 tgt_type_name = lustre_cfg_string(cfg, 2);
833                 if (!tgt_type_name) {
834                         CERROR("%s no type name for echo %s setup\n",
835                                 lustre_cfg_string(cfg, 1),
836                                 tgt->obd_type->typ_name);
837                         GOTO(out, rc = -EINVAL);
838                 }
839
840                 ls = next->ld_site;
841
842                 spin_lock(&ls->ls_ld_lock);
843                 list_for_each_entry(ld, &ls->ls_ld_linkage, ld_linkage) {
844                         if (strcmp(ld->ld_type->ldt_name, tgt_type_name) == 0) {
845                                 found = 1;
846                                 break;
847                         }
848                 }
849                 spin_unlock(&ls->ls_ld_lock);
850
851                 if (found == 0) {
852                         CERROR("%s is not lu device type!\n",
853                                lustre_cfg_string(cfg, 1));
854                         GOTO(out, rc = -EINVAL);
855                 }
856
857                 next = ld;
858                 /* For MD echo client, it will use the site in MDS stack */
859                 ed->ed_site_myself.cs_lu = *ls;
860                 ed->ed_site = &ed->ed_site_myself;
861                 ed->ed_cl.cd_lu_dev.ld_site = &ed->ed_site_myself.cs_lu;
862                 rc = echo_fid_init(ed, obd->obd_name, lu_site2seq(ls));
863                 if (rc) {
864                         CERROR("echo fid init error %d\n", rc);
865                         GOTO(out, rc);
866                 }
867 #else /* !HAVE_SERVER_SUPPORT */
868                 CERROR("Local operations are NOT supported on client side. "
869                        "Only remote operations are supported. Metadata client "
870                        "must be run on server side.\n");
871                 GOTO(out, rc = -EOPNOTSUPP);
872 #endif
873         } else {
874                  /* if echo client is to be stacked upon ost device, the next is
875                   * NULL since ost is not a clio device so far */
876                 if (next != NULL && !lu_device_is_cl(next))
877                         next = NULL;
878
879                 tgt_type_name = tgt->obd_type->typ_name;
880                 if (next != NULL) {
881                         LASSERT(next != NULL);
882                         if (next->ld_site != NULL)
883                                 GOTO(out, rc = -EBUSY);
884
885                         next->ld_site = &ed->ed_site->cs_lu;
886                         rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
887                                                      next->ld_type->ldt_name,
888                                                      NULL);
889                         if (rc)
890                                 GOTO(out, rc);
891
892                         /* Tricky case, I have to determine the obd type since
893                          * CLIO uses the different parameters to initialize
894                          * objects for lov & osc. */
895                         if (strcmp(tgt_type_name, LUSTRE_LOV_NAME) == 0)
896                                 ed->ed_next_islov = 1;
897                         else
898                                 LASSERT(strcmp(tgt_type_name,
899                                                LUSTRE_OSC_NAME) == 0);
900                 } else
901                         LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
902         }
903
904         ed->ed_next = next;
905         RETURN(&cd->cd_lu_dev);
906 out:
907         switch(cleanup) {
908         case 4: {
909                 int rc2;
910                 rc2 = echo_client_cleanup(obd);
911                 if (rc2)
912                         CERROR("Cleanup obd device %s error(%d)\n",
913                                obd->obd_name, rc2);
914         }
915
916         case 3:
917                 echo_site_fini(env, ed);
918         case 2:
919                 cl_device_fini(&ed->ed_cl);
920         case 1:
921                 OBD_FREE_PTR(ed);
922         case 0:
923         default:
924                 break;
925         }
926         return(ERR_PTR(rc));
927 }
928
929 static int echo_device_init(const struct lu_env *env, struct lu_device *d,
930                           const char *name, struct lu_device *next)
931 {
932         LBUG();
933         return 0;
934 }
935
936 static struct lu_device *echo_device_fini(const struct lu_env *env,
937                                           struct lu_device *d)
938 {
939         struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
940         struct lu_device *next = ed->ed_next;
941
942         while (next && !ed->ed_next_ismd)
943                 next = next->ld_type->ldt_ops->ldto_device_fini(env, next);
944         return NULL;
945 }
946
947 static void echo_lock_release(const struct lu_env *env,
948                               struct echo_lock *ecl,
949                               int still_used)
950 {
951         struct cl_lock *clk = echo_lock2cl(ecl);
952
953         cl_lock_get(clk);
954         cl_unuse(env, clk);
955         cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
956         if (!still_used) {
957                 cl_lock_mutex_get(env, clk);
958                 cl_lock_cancel(env, clk);
959                 cl_lock_delete(env, clk);
960                 cl_lock_mutex_put(env, clk);
961         }
962         cl_lock_put(env, clk);
963 }
964
965 static struct lu_device *echo_device_free(const struct lu_env *env,
966                                           struct lu_device *d)
967 {
968         struct echo_device     *ed   = cl2echo_dev(lu2cl_dev(d));
969         struct echo_client_obd *ec   = ed->ed_ec;
970         struct echo_object     *eco;
971         struct lu_device       *next = ed->ed_next;
972
973         CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
974                ed, next);
975
976         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
977
978         /* check if there are objects still alive.
979          * It shouldn't have any object because lu_site_purge would cleanup
980          * all of cached objects. Anyway, probably the echo device is being
981          * parallelly accessed.
982          */
983         spin_lock(&ec->ec_lock);
984         list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain)
985                 eco->eo_deleted = 1;
986         spin_unlock(&ec->ec_lock);
987
988         /* purge again */
989         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
990
991         CDEBUG(D_INFO,
992                "Waiting for the reference of echo object to be dropped\n");
993
994         /* Wait for the last reference to be dropped. */
995         spin_lock(&ec->ec_lock);
996         while (!list_empty(&ec->ec_objects)) {
997                 spin_unlock(&ec->ec_lock);
998                 CERROR("echo_client still has objects at cleanup time, "
999                        "wait for 1 second\n");
1000                 schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
1001                                                    cfs_time_seconds(1));
1002                 lu_site_purge(env, &ed->ed_site->cs_lu, -1);
1003                 spin_lock(&ec->ec_lock);
1004         }
1005         spin_unlock(&ec->ec_lock);
1006
1007         LASSERT(list_empty(&ec->ec_locks));
1008
1009         CDEBUG(D_INFO, "No object exists, exiting...\n");
1010
1011         echo_client_cleanup(d->ld_obd);
1012 #ifdef HAVE_SERVER_SUPPORT
1013         echo_fid_fini(d->ld_obd);
1014 #endif
1015         while (next && !ed->ed_next_ismd)
1016                 next = next->ld_type->ldt_ops->ldto_device_free(env, next);
1017
1018         LASSERT(ed->ed_site == lu2cl_site(d->ld_site));
1019         echo_site_fini(env, ed);
1020         cl_device_fini(&ed->ed_cl);
1021         OBD_FREE_PTR(ed);
1022
1023         return NULL;
1024 }
1025
1026 static const struct lu_device_type_operations echo_device_type_ops = {
1027         .ldto_init = echo_type_init,
1028         .ldto_fini = echo_type_fini,
1029
1030         .ldto_start = echo_type_start,
1031         .ldto_stop  = echo_type_stop,
1032
1033         .ldto_device_alloc = echo_device_alloc,
1034         .ldto_device_free  = echo_device_free,
1035         .ldto_device_init  = echo_device_init,
1036         .ldto_device_fini  = echo_device_fini
1037 };
1038
1039 static struct lu_device_type echo_device_type = {
1040         .ldt_tags     = LU_DEVICE_CL,
1041         .ldt_name     = LUSTRE_ECHO_CLIENT_NAME,
1042         .ldt_ops      = &echo_device_type_ops,
1043         .ldt_ctx_tags = LCT_CL_THREAD | LCT_MD_THREAD | LCT_DT_THREAD,
1044 };
1045 /** @} echo_init */
1046
1047 /** \defgroup echo_exports Exported operations
1048  *
1049  * exporting functions to echo client
1050  *
1051  * @{
1052  */
1053
1054 /* Interfaces to echo client obd device */
1055 static struct echo_object *cl_echo_object_find(struct echo_device *d,
1056                                                struct lov_stripe_md **lsmp)
1057 {
1058         struct lu_env *env;
1059         struct echo_thread_info *info;
1060         struct echo_object_conf *conf;
1061         struct lov_stripe_md    *lsm;
1062         struct echo_object *eco;
1063         struct cl_object   *obj;
1064         struct lu_fid *fid;
1065         int refcheck;
1066         int rc;
1067         ENTRY;
1068
1069         LASSERT(lsmp);
1070         lsm = *lsmp;
1071         LASSERT(lsm);
1072         LASSERTF(ostid_id(&lsm->lsm_oi) != 0, DOSTID"\n", POSTID(&lsm->lsm_oi));
1073         LASSERTF(ostid_seq(&lsm->lsm_oi) == FID_SEQ_ECHO, DOSTID"\n",
1074                  POSTID(&lsm->lsm_oi));
1075
1076         /* Never return an object if the obd is to be freed. */
1077         if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
1078                 RETURN(ERR_PTR(-ENODEV));
1079
1080         env = cl_env_get(&refcheck);
1081         if (IS_ERR(env))
1082                 RETURN((void *)env);
1083
1084         info = echo_env_info(env);
1085         conf = &info->eti_conf;
1086         if (d->ed_next) {
1087                 if (!d->ed_next_islov) {
1088                         struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
1089                         LASSERT(oinfo != NULL);
1090                         oinfo->loi_oi = lsm->lsm_oi;
1091                         conf->eoc_cl.u.coc_oinfo = oinfo;
1092                 } else {
1093                         struct lustre_md *md;
1094                         md = &info->eti_md;
1095                         memset(md, 0, sizeof *md);
1096                         md->lsm = lsm;
1097                         conf->eoc_cl.u.coc_md = md;
1098                 }
1099         }
1100         conf->eoc_md = lsmp;
1101
1102         fid  = &info->eti_fid;
1103         rc = ostid_to_fid(fid, &lsm->lsm_oi, 0);
1104         if (rc != 0)
1105                 GOTO(out, eco = ERR_PTR(rc));
1106
1107         /* In the function below, .hs_keycmp resolves to
1108          * lu_obj_hop_keycmp() */
1109         /* coverity[overrun-buffer-val] */
1110         obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl);
1111         if (IS_ERR(obj))
1112                 GOTO(out, eco = (void*)obj);
1113
1114         eco = cl2echo_obj(obj);
1115         if (eco->eo_deleted) {
1116                 cl_object_put(env, obj);
1117                 eco = ERR_PTR(-EAGAIN);
1118         }
1119
1120 out:
1121         cl_env_put(env, &refcheck);
1122         RETURN(eco);
1123 }
1124
1125 static int cl_echo_object_put(struct echo_object *eco)
1126 {
1127         struct lu_env *env;
1128         struct cl_object *obj = echo_obj2cl(eco);
1129         int refcheck;
1130         ENTRY;
1131
1132         env = cl_env_get(&refcheck);
1133         if (IS_ERR(env))
1134                 RETURN(PTR_ERR(env));
1135
1136         /* an external function to kill an object? */
1137         if (eco->eo_deleted) {
1138                 struct lu_object_header *loh = obj->co_lu.lo_header;
1139                 LASSERT(&eco->eo_hdr == luh2coh(loh));
1140                 set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
1141         }
1142
1143         cl_object_put(env, obj);
1144         cl_env_put(env, &refcheck);
1145         RETURN(0);
1146 }
1147
1148 static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
1149                             obd_off start, obd_off end, int mode,
1150                             __u64 *cookie , __u32 enqflags)
1151 {
1152         struct cl_io *io;
1153         struct cl_lock *lck;
1154         struct cl_object *obj;
1155         struct cl_lock_descr *descr;
1156         struct echo_thread_info *info;
1157         int rc = -ENOMEM;
1158         ENTRY;
1159
1160         info = echo_env_info(env);
1161         io = &info->eti_io;
1162         descr = &info->eti_descr;
1163         obj = echo_obj2cl(eco);
1164
1165         descr->cld_obj   = obj;
1166         descr->cld_start = cl_index(obj, start);
1167         descr->cld_end   = cl_index(obj, end);
1168         descr->cld_mode  = mode == LCK_PW ? CLM_WRITE : CLM_READ;
1169         descr->cld_enq_flags = enqflags;
1170         io->ci_obj = obj;
1171
1172         lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
1173         if (lck) {
1174                 struct echo_client_obd *ec = eco->eo_dev->ed_ec;
1175                 struct echo_lock *el;
1176
1177                 rc = cl_wait(env, lck);
1178                 if (rc == 0) {
1179                         el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
1180                         spin_lock(&ec->ec_lock);
1181                         if (list_empty(&el->el_chain)) {
1182                                 list_add(&el->el_chain, &ec->ec_locks);
1183                                 el->el_cookie = ++ec->ec_unique;
1184                         }
1185                         atomic_inc(&el->el_refcount);
1186                         *cookie = el->el_cookie;
1187                         spin_unlock(&ec->ec_lock);
1188                 } else {
1189                         cl_lock_release(env, lck, "ec enqueue", current);
1190                 }
1191         }
1192         RETURN(rc);
1193 }
1194
1195 static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
1196                            __u64 cookie)
1197 {
1198         struct echo_client_obd *ec = ed->ed_ec;
1199         struct echo_lock       *ecl = NULL;
1200         struct list_head        *el;
1201         int found = 0, still_used = 0;
1202         ENTRY;
1203
1204         LASSERT(ec != NULL);
1205         spin_lock(&ec->ec_lock);
1206         list_for_each(el, &ec->ec_locks) {
1207                 ecl = list_entry(el, struct echo_lock, el_chain);
1208                 CDEBUG(D_INFO, "ecl: %p, cookie: "LPX64"\n", ecl, ecl->el_cookie);
1209                 found = (ecl->el_cookie == cookie);
1210                 if (found) {
1211                         if (atomic_dec_and_test(&ecl->el_refcount))
1212                                 list_del_init(&ecl->el_chain);
1213                         else
1214                                 still_used = 1;
1215                         break;
1216                 }
1217         }
1218         spin_unlock(&ec->ec_lock);
1219
1220         if (!found)
1221                 RETURN(-ENOENT);
1222
1223         echo_lock_release(env, ecl, still_used);
1224         RETURN(0);
1225 }
1226
1227 static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
1228                                 struct cl_page *page)
1229 {
1230         struct echo_thread_info *info;
1231         struct cl_2queue        *queue;
1232
1233         info = echo_env_info(env);
1234         LASSERT(io == &info->eti_io);
1235
1236         queue = &info->eti_queue;
1237         cl_page_list_add(&queue->c2_qout, page);
1238 }
1239
1240 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
1241                               struct page **pages, int npages, int async)
1242 {
1243         struct lu_env           *env;
1244         struct echo_thread_info *info;
1245         struct cl_object        *obj = echo_obj2cl(eco);
1246         struct echo_device      *ed  = eco->eo_dev;
1247         struct cl_2queue        *queue;
1248         struct cl_io            *io;
1249         struct cl_page          *clp;
1250         struct lustre_handle    lh = { 0 };
1251         int page_size = cl_page_size(obj);
1252         int refcheck;
1253         int rc;
1254         int i;
1255         ENTRY;
1256
1257         LASSERT((offset & ~CFS_PAGE_MASK) == 0);
1258         LASSERT(ed->ed_next != NULL);
1259         env = cl_env_get(&refcheck);
1260         if (IS_ERR(env))
1261                 RETURN(PTR_ERR(env));
1262
1263         info    = echo_env_info(env);
1264         io      = &info->eti_io;
1265         queue   = &info->eti_queue;
1266
1267         cl_2queue_init(queue);
1268
1269         io->ci_ignore_layout = 1;
1270         rc = cl_io_init(env, io, CIT_MISC, obj);
1271         if (rc < 0)
1272                 GOTO(out, rc);
1273         LASSERT(rc == 0);
1274
1275
1276         rc = cl_echo_enqueue0(env, eco, offset,
1277                               offset + npages * PAGE_CACHE_SIZE - 1,
1278                               rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
1279                               CEF_NEVER);
1280         if (rc < 0)
1281                 GOTO(error_lock, rc);
1282
1283         for (i = 0; i < npages; i++) {
1284                 LASSERT(pages[i]);
1285                 clp = cl_page_find(env, obj, cl_index(obj, offset),
1286                                    pages[i], CPT_TRANSIENT);
1287                 if (IS_ERR(clp)) {
1288                         rc = PTR_ERR(clp);
1289                         break;
1290                 }
1291                 LASSERT(clp->cp_type == CPT_TRANSIENT);
1292
1293                 rc = cl_page_own(env, io, clp);
1294                 if (rc) {
1295                         LASSERT(clp->cp_state == CPS_FREEING);
1296                         cl_page_put(env, clp);
1297                         break;
1298                 }
1299
1300                 cl_2queue_add(queue, clp);
1301
1302                 /* drop the reference count for cl_page_find, so that the page
1303                  * will be freed in cl_2queue_fini. */
1304                 cl_page_put(env, clp);
1305                 cl_page_clip(env, clp, 0, page_size);
1306
1307                 offset += page_size;
1308         }
1309
1310         if (rc == 0) {
1311                 enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE;
1312
1313                 async = async && (typ == CRT_WRITE);
1314                 if (async)
1315                         rc = cl_io_commit_async(env, io, &queue->c2_qin,
1316                                                 0, PAGE_SIZE,
1317                                                 echo_commit_callback);
1318                 else
1319                         rc = cl_io_submit_sync(env, io, typ, queue, 0);
1320                 CDEBUG(D_INFO, "echo_client %s write returns %d\n",
1321                        async ? "async" : "sync", rc);
1322         }
1323
1324         cl_echo_cancel0(env, ed, lh.cookie);
1325         EXIT;
1326 error_lock:
1327         cl_2queue_discard(env, io, queue);
1328         cl_2queue_disown(env, io, queue);
1329         cl_2queue_fini(env, queue);
1330         cl_io_fini(env, io);
1331 out:
1332         cl_env_put(env, &refcheck);
1333         return rc;
1334 }
1335 /** @} echo_exports */
1336
1337
1338 static obd_id last_object_id;
1339
1340 static int
1341 echo_copyin_lsm (struct echo_device *ed, struct lov_stripe_md *lsm,
1342                  void *ulsm, int ulsm_nob)
1343 {
1344         struct echo_client_obd *ec = ed->ed_ec;
1345         int                     i;
1346
1347         if (ulsm_nob < sizeof (*lsm))
1348                 return (-EINVAL);
1349
1350         if (copy_from_user (lsm, ulsm, sizeof (*lsm)))
1351                 return (-EFAULT);
1352
1353         if (lsm->lsm_stripe_count > ec->ec_nstripes ||
1354             lsm->lsm_magic != LOV_MAGIC ||
1355             (lsm->lsm_stripe_size & (~CFS_PAGE_MASK)) != 0 ||
1356             ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
1357                 return (-EINVAL);
1358
1359
1360         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1361                 if (copy_from_user(lsm->lsm_oinfo[i],
1362                                        ((struct lov_stripe_md *)ulsm)-> \
1363                                        lsm_oinfo[i],
1364                                        sizeof(lsm->lsm_oinfo[0])))
1365                         return (-EFAULT);
1366         }
1367         return (0);
1368 }
1369
1370 #ifdef HAVE_SERVER_SUPPORT
1371 static inline void echo_md_build_name(struct lu_name *lname, char *name,
1372                                       __u64 id)
1373 {
1374         sprintf(name, LPU64, id);
1375         lname->ln_name = name;
1376         lname->ln_namelen = strlen(name);
1377 }
1378
1379 /* similar to mdt_attr_get_complex */
1380 static int echo_big_lmm_get(const struct lu_env *env, struct md_object *o,
1381                             struct md_attr *ma)
1382 {
1383         struct echo_thread_info *info = echo_env_info(env);
1384         int                      rc;
1385
1386         ENTRY;
1387
1388         LASSERT(ma->ma_lmm_size > 0);
1389
1390         rc = mo_xattr_get(env, o, &LU_BUF_NULL, XATTR_NAME_LOV);
1391         if (rc < 0)
1392                 RETURN(rc);
1393
1394         /* big_lmm may need to be grown */
1395         if (info->eti_big_lmmsize < rc) {
1396                 int size = size_roundup_power2(rc);
1397
1398                 if (info->eti_big_lmmsize > 0) {
1399                         /* free old buffer */
1400                         LASSERT(info->eti_big_lmm);
1401                         OBD_FREE_LARGE(info->eti_big_lmm,
1402                                        info->eti_big_lmmsize);
1403                         info->eti_big_lmm = NULL;
1404                         info->eti_big_lmmsize = 0;
1405                 }
1406
1407                 OBD_ALLOC_LARGE(info->eti_big_lmm, size);
1408                 if (info->eti_big_lmm == NULL)
1409                         RETURN(-ENOMEM);
1410                 info->eti_big_lmmsize = size;
1411         }
1412         LASSERT(info->eti_big_lmmsize >= rc);
1413
1414         info->eti_buf.lb_buf = info->eti_big_lmm;
1415         info->eti_buf.lb_len = info->eti_big_lmmsize;
1416         rc = mo_xattr_get(env, o, &info->eti_buf, XATTR_NAME_LOV);
1417         if (rc < 0)
1418                 RETURN(rc);
1419
1420         ma->ma_valid |= MA_LOV;
1421         ma->ma_lmm = info->eti_big_lmm;
1422         ma->ma_lmm_size = rc;
1423
1424         RETURN(0);
1425 }
1426
1427 static int echo_attr_get_complex(const struct lu_env *env,
1428                                  struct md_object *next,
1429                                  struct md_attr *ma)
1430 {
1431         struct echo_thread_info *info = echo_env_info(env);
1432         struct lu_buf           *buf = &info->eti_buf;
1433         umode_t          mode = lu_object_attr(&next->mo_lu);
1434         int                      need = ma->ma_need;
1435         int                      rc = 0, rc2;
1436
1437         ENTRY;
1438
1439         ma->ma_valid = 0;
1440
1441         if (need & MA_INODE) {
1442                 ma->ma_need = MA_INODE;
1443                 rc = mo_attr_get(env, next, ma);
1444                 if (rc)
1445                         GOTO(out, rc);
1446                 ma->ma_valid |= MA_INODE;
1447         }
1448
1449         if (need & MA_LOV) {
1450                 if (S_ISREG(mode) || S_ISDIR(mode)) {
1451                         LASSERT(ma->ma_lmm_size > 0);
1452                         buf->lb_buf = ma->ma_lmm;
1453                         buf->lb_len = ma->ma_lmm_size;
1454                         rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_LOV);
1455                         if (rc2 > 0) {
1456                                 ma->ma_lmm_size = rc2;
1457                                 ma->ma_valid |= MA_LOV;
1458                         } else if (rc2 == -ENODATA) {
1459                                 /* no LOV EA */
1460                                 ma->ma_lmm_size = 0;
1461                         } else if (rc2 == -ERANGE) {
1462                                 rc2 = echo_big_lmm_get(env, next, ma);
1463                                 if (rc2 < 0)
1464                                         GOTO(out, rc = rc2);
1465                         } else {
1466                                 GOTO(out, rc = rc2);
1467                         }
1468                 }
1469         }
1470
1471 #ifdef CONFIG_FS_POSIX_ACL
1472         if (need & MA_ACL_DEF && S_ISDIR(mode)) {
1473                 buf->lb_buf = ma->ma_acl;
1474                 buf->lb_len = ma->ma_acl_size;
1475                 rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_ACL_DEFAULT);
1476                 if (rc2 > 0) {
1477                         ma->ma_acl_size = rc2;
1478                         ma->ma_valid |= MA_ACL_DEF;
1479                 } else if (rc2 == -ENODATA) {
1480                         /* no ACLs */
1481                         ma->ma_acl_size = 0;
1482                 } else {
1483                         GOTO(out, rc = rc2);
1484                 }
1485         }
1486 #endif
1487 out:
1488         ma->ma_need = need;
1489         CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64" ma_lmm=%p\n",
1490                rc, ma->ma_valid, ma->ma_lmm);
1491         RETURN(rc);
1492 }
1493
1494 static int
1495 echo_md_create_internal(const struct lu_env *env, struct echo_device *ed,
1496                         struct md_object *parent, struct lu_fid *fid,
1497                         struct lu_name *lname, struct md_op_spec *spec,
1498                         struct md_attr *ma)
1499 {
1500         struct lu_object        *ec_child, *child;
1501         struct lu_device        *ld = ed->ed_next;
1502         struct echo_thread_info *info = echo_env_info(env);
1503         struct lu_fid           *fid2 = &info->eti_fid2;
1504         struct lu_object_conf    conf = { .loc_flags = LOC_F_NEW };
1505         int                      rc;
1506
1507         ENTRY;
1508
1509         rc = mdo_lookup(env, parent, lname, fid2, spec);
1510         if (rc == 0)
1511                 return -EEXIST;
1512         else if (rc != -ENOENT)
1513                 return rc;
1514
1515         ec_child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev,
1516                                      fid, &conf);
1517         if (IS_ERR(ec_child)) {
1518                 CERROR("Can not find the child "DFID": rc = %ld\n", PFID(fid),
1519                         PTR_ERR(ec_child));
1520                 RETURN(PTR_ERR(ec_child));
1521         }
1522
1523         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1524         if (child == NULL) {
1525                 CERROR("Can not locate the child "DFID"\n", PFID(fid));
1526                 GOTO(out_put, rc = -EINVAL);
1527         }
1528
1529         CDEBUG(D_RPCTRACE, "Start creating object "DFID" %s %p\n",
1530                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1531
1532         /*
1533          * Do not perform lookup sanity check. We know that name does not exist.
1534          */
1535         spec->sp_cr_lookup = 0;
1536         rc = mdo_create(env, parent, lname, lu2md(child), spec, ma);
1537         if (rc) {
1538                 CERROR("Can not create child "DFID": rc = %d\n", PFID(fid), rc);
1539                 GOTO(out_put, rc);
1540         }
1541         CDEBUG(D_RPCTRACE, "End creating object "DFID" %s %p rc  = %d\n",
1542                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent, rc);
1543         EXIT;
1544 out_put:
1545         lu_object_put(env, ec_child);
1546         return rc;
1547 }
1548
1549 static int echo_set_lmm_size(const struct lu_env *env, struct lu_device *ld,
1550                              struct md_attr *ma)
1551 {
1552         struct echo_thread_info *info = echo_env_info(env);
1553
1554         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
1555                 ma->ma_lmm = (void *)&info->eti_lmm;
1556                 ma->ma_lmm_size = sizeof(info->eti_lmm);
1557         } else {
1558                 LASSERT(info->eti_big_lmmsize);
1559                 ma->ma_lmm = info->eti_big_lmm;
1560                 ma->ma_lmm_size = info->eti_big_lmmsize;
1561         }
1562
1563         return 0;
1564 }
1565
1566 static int echo_create_md_object(const struct lu_env *env,
1567                                  struct echo_device *ed,
1568                                  struct lu_object *ec_parent,
1569                                  struct lu_fid *fid,
1570                                  char *name, int namelen,
1571                                  __u64 id, __u32 mode, int count,
1572                                  int stripe_count, int stripe_offset)
1573 {
1574         struct lu_object        *parent;
1575         struct echo_thread_info *info = echo_env_info(env);
1576         struct lu_name          *lname = &info->eti_lname;
1577         struct md_op_spec       *spec = &info->eti_spec;
1578         struct md_attr          *ma = &info->eti_ma;
1579         struct lu_device        *ld = ed->ed_next;
1580         int                      rc = 0;
1581         int                      i;
1582
1583         ENTRY;
1584
1585         if (ec_parent == NULL)
1586                 return -1;
1587         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1588         if (parent == NULL)
1589                 RETURN(-ENXIO);
1590
1591         memset(ma, 0, sizeof(*ma));
1592         memset(spec, 0, sizeof(*spec));
1593         if (stripe_count != 0) {
1594                 spec->sp_cr_flags |= FMODE_WRITE;
1595                 echo_set_lmm_size(env, ld, ma);
1596                 if (stripe_count != -1) {
1597                         struct lov_user_md_v3 *lum = &info->eti_lum;
1598
1599                         lum->lmm_magic = LOV_USER_MAGIC_V3;
1600                         lum->lmm_stripe_count = stripe_count;
1601                         lum->lmm_stripe_offset = stripe_offset;
1602                         lum->lmm_pattern = 0;
1603                         spec->u.sp_ea.eadata = lum;
1604                         spec->u.sp_ea.eadatalen = sizeof(*lum);
1605                         spec->sp_cr_flags |= MDS_OPEN_HAS_EA;
1606                 }
1607         }
1608
1609         ma->ma_attr.la_mode = mode;
1610         ma->ma_attr.la_valid = LA_CTIME | LA_MODE;
1611         ma->ma_attr.la_ctime = cfs_time_current_64();
1612
1613         if (name != NULL) {
1614                 lname->ln_name = name;
1615                 lname->ln_namelen = namelen;
1616                 /* If name is specified, only create one object by name */
1617                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1618                                              spec, ma);
1619                 RETURN(rc);
1620         }
1621
1622         /* Create multiple object sequenced by id */
1623         for (i = 0; i < count; i++) {
1624                 char *tmp_name = info->eti_name;
1625
1626                 echo_md_build_name(lname, tmp_name, id);
1627
1628                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1629                                              spec, ma);
1630                 if (rc) {
1631                         CERROR("Can not create child %s: rc = %d\n", tmp_name,
1632                                 rc);
1633                         break;
1634                 }
1635                 id++;
1636                 fid->f_oid++;
1637         }
1638
1639         RETURN(rc);
1640 }
1641
1642 static struct lu_object *echo_md_lookup(const struct lu_env *env,
1643                                         struct echo_device *ed,
1644                                         struct md_object *parent,
1645                                         struct lu_name *lname)
1646 {
1647         struct echo_thread_info *info = echo_env_info(env);
1648         struct lu_fid           *fid = &info->eti_fid;
1649         struct lu_object        *child;
1650         int    rc;
1651         ENTRY;
1652
1653         CDEBUG(D_INFO, "lookup %s in parent "DFID" %p\n", lname->ln_name,
1654                PFID(fid), parent);
1655         rc = mdo_lookup(env, parent, lname, fid, NULL);
1656         if (rc) {
1657                 CERROR("lookup %s: rc = %d\n", lname->ln_name, rc);
1658                 RETURN(ERR_PTR(rc));
1659         }
1660
1661         /* In the function below, .hs_keycmp resolves to
1662          * lu_obj_hop_keycmp() */
1663         /* coverity[overrun-buffer-val] */
1664         child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
1665
1666         RETURN(child);
1667 }
1668
1669 static int echo_setattr_object(const struct lu_env *env,
1670                                struct echo_device *ed,
1671                                struct lu_object *ec_parent,
1672                                __u64 id, int count)
1673 {
1674         struct lu_object        *parent;
1675         struct echo_thread_info *info = echo_env_info(env);
1676         struct lu_name          *lname = &info->eti_lname;
1677         char                    *name = info->eti_name;
1678         struct lu_device        *ld = ed->ed_next;
1679         struct lu_buf           *buf = &info->eti_buf;
1680         int                      rc = 0;
1681         int                      i;
1682
1683         ENTRY;
1684
1685         if (ec_parent == NULL)
1686                 return -1;
1687         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1688         if (parent == NULL)
1689                 RETURN(-ENXIO);
1690
1691         for (i = 0; i < count; i++) {
1692                 struct lu_object *ec_child, *child;
1693
1694                 echo_md_build_name(lname, name, id);
1695
1696                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1697                 if (IS_ERR(ec_child)) {
1698                         CERROR("Can't find child %s: rc = %ld\n",
1699                                 lname->ln_name, PTR_ERR(ec_child));
1700                         RETURN(PTR_ERR(ec_child));
1701                 }
1702
1703                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1704                 if (child == NULL) {
1705                         CERROR("Can not locate the child %s\n", lname->ln_name);
1706                         lu_object_put(env, ec_child);
1707                         rc = -EINVAL;
1708                         break;
1709                 }
1710
1711                 CDEBUG(D_RPCTRACE, "Start setattr object "DFID"\n",
1712                        PFID(lu_object_fid(child)));
1713
1714                 buf->lb_buf = info->eti_xattr_buf;
1715                 buf->lb_len = sizeof(info->eti_xattr_buf);
1716
1717                 sprintf(name, "%s.test1", XATTR_USER_PREFIX);
1718                 rc = mo_xattr_set(env, lu2md(child), buf, name,
1719                                   LU_XATTR_CREATE);
1720                 if (rc < 0) {
1721                         CERROR("Can not setattr child "DFID": rc = %d\n",
1722                                 PFID(lu_object_fid(child)), rc);
1723                         lu_object_put(env, ec_child);
1724                         break;
1725                 }
1726                 CDEBUG(D_RPCTRACE, "End setattr object "DFID"\n",
1727                        PFID(lu_object_fid(child)));
1728                 id++;
1729                 lu_object_put(env, ec_child);
1730         }
1731         RETURN(rc);
1732 }
1733
1734 static int echo_getattr_object(const struct lu_env *env,
1735                                struct echo_device *ed,
1736                                struct lu_object *ec_parent,
1737                                __u64 id, int count)
1738 {
1739         struct lu_object        *parent;
1740         struct echo_thread_info *info = echo_env_info(env);
1741         struct lu_name          *lname = &info->eti_lname;
1742         char                    *name = info->eti_name;
1743         struct md_attr          *ma = &info->eti_ma;
1744         struct lu_device        *ld = ed->ed_next;
1745         int                      rc = 0;
1746         int                      i;
1747
1748         ENTRY;
1749
1750         if (ec_parent == NULL)
1751                 return -1;
1752         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1753         if (parent == NULL)
1754                 RETURN(-ENXIO);
1755
1756         memset(ma, 0, sizeof(*ma));
1757         ma->ma_need |= MA_INODE | MA_LOV | MA_PFID | MA_HSM | MA_ACL_DEF;
1758         ma->ma_acl = info->eti_xattr_buf;
1759         ma->ma_acl_size = sizeof(info->eti_xattr_buf);
1760
1761         for (i = 0; i < count; i++) {
1762                 struct lu_object *ec_child, *child;
1763
1764                 ma->ma_valid = 0;
1765                 echo_md_build_name(lname, name, id);
1766                 echo_set_lmm_size(env, ld, ma);
1767
1768                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1769                 if (IS_ERR(ec_child)) {
1770                         CERROR("Can't find child %s: rc = %ld\n",
1771                                lname->ln_name, PTR_ERR(ec_child));
1772                         RETURN(PTR_ERR(ec_child));
1773                 }
1774
1775                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1776                 if (child == NULL) {
1777                         CERROR("Can not locate the child %s\n", lname->ln_name);
1778                         lu_object_put(env, ec_child);
1779                         RETURN(-EINVAL);
1780                 }
1781
1782                 CDEBUG(D_RPCTRACE, "Start getattr object "DFID"\n",
1783                        PFID(lu_object_fid(child)));
1784                 rc = echo_attr_get_complex(env, lu2md(child), ma);
1785                 if (rc) {
1786                         CERROR("Can not getattr child "DFID": rc = %d\n",
1787                                 PFID(lu_object_fid(child)), rc);
1788                         lu_object_put(env, ec_child);
1789                         break;
1790                 }
1791                 CDEBUG(D_RPCTRACE, "End getattr object "DFID"\n",
1792                        PFID(lu_object_fid(child)));
1793                 id++;
1794                 lu_object_put(env, ec_child);
1795         }
1796
1797         RETURN(rc);
1798 }
1799
1800 static int echo_lookup_object(const struct lu_env *env,
1801                               struct echo_device *ed,
1802                               struct lu_object *ec_parent,
1803                               __u64 id, int count)
1804 {
1805         struct lu_object        *parent;
1806         struct echo_thread_info *info = echo_env_info(env);
1807         struct lu_name          *lname = &info->eti_lname;
1808         char                    *name = info->eti_name;
1809         struct lu_fid           *fid = &info->eti_fid;
1810         struct lu_device        *ld = ed->ed_next;
1811         int                      rc = 0;
1812         int                      i;
1813
1814         if (ec_parent == NULL)
1815                 return -1;
1816         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1817         if (parent == NULL)
1818                 return -ENXIO;
1819
1820         /*prepare the requests*/
1821         for (i = 0; i < count; i++) {
1822                 echo_md_build_name(lname, name, id);
1823
1824                 CDEBUG(D_RPCTRACE, "Start lookup object "DFID" %s %p\n",
1825                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1826
1827                 rc = mdo_lookup(env, lu2md(parent), lname, fid, NULL);
1828                 if (rc) {
1829                         CERROR("Can not lookup child %s: rc = %d\n", name, rc);
1830                         break;
1831                 }
1832                 CDEBUG(D_RPCTRACE, "End lookup object "DFID" %s %p\n",
1833                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1834
1835                 id++;
1836         }
1837         return rc;
1838 }
1839
1840 static int echo_md_destroy_internal(const struct lu_env *env,
1841                                     struct echo_device *ed,
1842                                     struct md_object *parent,
1843                                     struct lu_name *lname,
1844                                     struct md_attr *ma)
1845 {
1846         struct lu_device   *ld = ed->ed_next;
1847         struct lu_object   *ec_child;
1848         struct lu_object   *child;
1849         int                 rc;
1850
1851         ENTRY;
1852
1853         ec_child = echo_md_lookup(env, ed, parent, lname);
1854         if (IS_ERR(ec_child)) {
1855                 CERROR("Can't find child %s: rc = %ld\n", lname->ln_name,
1856                         PTR_ERR(ec_child));
1857                 RETURN(PTR_ERR(ec_child));
1858         }
1859
1860         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1861         if (child == NULL) {
1862                 CERROR("Can not locate the child %s\n", lname->ln_name);
1863                 GOTO(out_put, rc = -EINVAL);
1864         }
1865
1866         if (lu_object_remote(child)) {
1867                 CERROR("Can not destroy remote object %s: rc = %d\n",
1868                        lname->ln_name, -EPERM);
1869                 GOTO(out_put, rc = -EPERM);
1870         }
1871         CDEBUG(D_RPCTRACE, "Start destroy object "DFID" %s %p\n",
1872                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1873
1874         rc = mdo_unlink(env, parent, lu2md(child), lname, ma, 0);
1875         if (rc) {
1876                 CERROR("Can not unlink child %s: rc = %d\n",
1877                         lname->ln_name, rc);
1878                 GOTO(out_put, rc);
1879         }
1880         CDEBUG(D_RPCTRACE, "End destroy object "DFID" %s %p\n",
1881                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1882 out_put:
1883         lu_object_put(env, ec_child);
1884         return rc;
1885 }
1886
1887 static int echo_destroy_object(const struct lu_env *env,
1888                                struct echo_device *ed,
1889                                struct lu_object *ec_parent,
1890                                char *name, int namelen,
1891                                __u64 id, __u32 mode,
1892                                int count)
1893 {
1894         struct echo_thread_info *info = echo_env_info(env);
1895         struct lu_name          *lname = &info->eti_lname;
1896         struct md_attr          *ma = &info->eti_ma;
1897         struct lu_device        *ld = ed->ed_next;
1898         struct lu_object        *parent;
1899         int                      rc = 0;
1900         int                      i;
1901         ENTRY;
1902
1903         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1904         if (parent == NULL)
1905                 RETURN(-EINVAL);
1906
1907         memset(ma, 0, sizeof(*ma));
1908         ma->ma_attr.la_mode = mode;
1909         ma->ma_attr.la_valid = LA_CTIME;
1910         ma->ma_attr.la_ctime = cfs_time_current_64();
1911         ma->ma_need = MA_INODE;
1912         ma->ma_valid = 0;
1913
1914         if (name != NULL) {
1915                 lname->ln_name = name;
1916                 lname->ln_namelen = namelen;
1917                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
1918                                               ma);
1919                 RETURN(rc);
1920         }
1921
1922         /*prepare the requests*/
1923         for (i = 0; i < count; i++) {
1924                 char *tmp_name = info->eti_name;
1925
1926                 ma->ma_valid = 0;
1927                 echo_md_build_name(lname, tmp_name, id);
1928
1929                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
1930                                               ma);
1931                 if (rc) {
1932                         CERROR("Can not unlink child %s: rc = %d\n", name, rc);
1933                         break;
1934                 }
1935                 id++;
1936         }
1937
1938         RETURN(rc);
1939 }
1940
1941 static struct lu_object *echo_resolve_path(const struct lu_env *env,
1942                                            struct echo_device *ed, char *path,
1943                                            int path_len)
1944 {
1945         struct lu_device        *ld = ed->ed_next;
1946         struct md_device        *md = lu2md_dev(ld);
1947         struct echo_thread_info *info = echo_env_info(env);
1948         struct lu_fid           *fid = &info->eti_fid;
1949         struct lu_name          *lname = &info->eti_lname;
1950         struct lu_object        *parent = NULL;
1951         struct lu_object        *child = NULL;
1952         int rc = 0;
1953         ENTRY;
1954
1955         /*Only support MDD layer right now*/
1956         rc = md->md_ops->mdo_root_get(env, md, fid);
1957         if (rc) {
1958                 CERROR("get root error: rc = %d\n", rc);
1959                 RETURN(ERR_PTR(rc));
1960         }
1961
1962         /* In the function below, .hs_keycmp resolves to
1963          * lu_obj_hop_keycmp() */
1964         /* coverity[overrun-buffer-val] */
1965         parent = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
1966         if (IS_ERR(parent)) {
1967                 CERROR("Can not find the parent "DFID": rc = %ld\n",
1968                         PFID(fid), PTR_ERR(parent));
1969                 RETURN(parent);
1970         }
1971
1972         while (1) {
1973                 struct lu_object *ld_parent;
1974                 char *e;
1975
1976                 e = strsep(&path, "/");
1977                 if (e == NULL)
1978                         break;
1979
1980                 if (e[0] == 0) {
1981                         if (!path || path[0] == '\0')
1982                                 break;
1983                         continue;
1984                 }
1985
1986                 lname->ln_name = e;
1987                 lname->ln_namelen = strlen(e);
1988
1989                 ld_parent = lu_object_locate(parent->lo_header, ld->ld_type);
1990                 if (ld_parent == NULL) {
1991                         lu_object_put(env, parent);
1992                         rc = -EINVAL;
1993                         break;
1994                 }
1995
1996                 child = echo_md_lookup(env, ed, lu2md(ld_parent), lname);
1997                 lu_object_put(env, parent);
1998                 if (IS_ERR(child)) {
1999                         rc = (int)PTR_ERR(child);
2000                         CERROR("lookup %s under parent "DFID": rc = %d\n",
2001                                 lname->ln_name, PFID(lu_object_fid(ld_parent)),
2002                                 rc);
2003                         break;
2004                 }
2005                 parent = child;
2006         }
2007         if (rc)
2008                 RETURN(ERR_PTR(rc));
2009
2010         RETURN(parent);
2011 }
2012
2013 static void echo_ucred_init(struct lu_env *env)
2014 {
2015         struct lu_ucred *ucred = lu_ucred(env);
2016
2017         ucred->uc_valid = UCRED_INVALID;
2018
2019         ucred->uc_suppgids[0] = -1;
2020         ucred->uc_suppgids[1] = -1;
2021
2022         ucred->uc_uid = ucred->uc_o_uid  =
2023                                 from_kuid(&init_user_ns, current_uid());
2024         ucred->uc_gid = ucred->uc_o_gid  =
2025                                 from_kgid(&init_user_ns, current_gid());
2026         ucred->uc_fsuid = ucred->uc_o_fsuid =
2027                                 from_kuid(&init_user_ns, current_fsuid());
2028         ucred->uc_fsgid = ucred->uc_o_fsgid =
2029                                 from_kgid(&init_user_ns, current_fsgid());
2030         ucred->uc_cap = cfs_curproc_cap_pack();
2031
2032         /* remove fs privilege for non-root user. */
2033         if (ucred->uc_fsuid)
2034                 ucred->uc_cap &= ~CFS_CAP_FS_MASK;
2035         ucred->uc_valid = UCRED_NEW;
2036 }
2037
2038 static void echo_ucred_fini(struct lu_env *env)
2039 {
2040         struct lu_ucred *ucred = lu_ucred(env);
2041         ucred->uc_valid = UCRED_INIT;
2042 }
2043
2044 #define ECHO_MD_CTX_TAG (LCT_REMEMBER | LCT_MD_THREAD)
2045 #define ECHO_MD_SES_TAG (LCT_REMEMBER | LCT_SESSION | LCT_SERVER_SESSION)
2046 static int echo_md_handler(struct echo_device *ed, int command,
2047                            char *path, int path_len, __u64 id, int count,
2048                            struct obd_ioctl_data *data)
2049 {
2050         struct echo_thread_info *info;
2051         struct lu_device      *ld = ed->ed_next;
2052         struct lu_env         *env;
2053         int                    refcheck;
2054         struct lu_object      *parent;
2055         char                  *name = NULL;
2056         int                    namelen = data->ioc_plen2;
2057         int                    rc = 0;
2058         ENTRY;
2059
2060         if (ld == NULL) {
2061                 CERROR("MD echo client is not being initialized properly\n");
2062                 RETURN(-EINVAL);
2063         }
2064
2065         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
2066                 CERROR("Only support MDD layer right now!\n");
2067                 RETURN(-EINVAL);
2068         }
2069
2070         env = cl_env_get(&refcheck);
2071         if (IS_ERR(env))
2072                 RETURN(PTR_ERR(env));
2073
2074         rc = lu_env_refill_by_tags(env, ECHO_MD_CTX_TAG, ECHO_MD_SES_TAG);
2075         if (rc != 0)
2076                 GOTO(out_env, rc);
2077
2078         /* init big_lmm buffer */
2079         info = echo_env_info(env);
2080         LASSERT(info->eti_big_lmm == NULL);
2081         OBD_ALLOC_LARGE(info->eti_big_lmm, MIN_MD_SIZE);
2082         if (info->eti_big_lmm == NULL)
2083                 GOTO(out_env, rc = -ENOMEM);
2084         info->eti_big_lmmsize = MIN_MD_SIZE;
2085
2086         parent = echo_resolve_path(env, ed, path, path_len);
2087         if (IS_ERR(parent)) {
2088                 CERROR("Can not resolve the path %s: rc = %ld\n", path,
2089                         PTR_ERR(parent));
2090                 GOTO(out_free, rc = PTR_ERR(parent));
2091         }
2092
2093         if (namelen > 0) {
2094                 OBD_ALLOC(name, namelen + 1);
2095                 if (name == NULL)
2096                         GOTO(out_put, rc = -ENOMEM);
2097                 if (copy_from_user(name, data->ioc_pbuf2, namelen))
2098                         GOTO(out_name, rc = -EFAULT);
2099         }
2100
2101         echo_ucred_init(env);
2102
2103         switch (command) {
2104         case ECHO_MD_CREATE:
2105         case ECHO_MD_MKDIR: {
2106                 struct echo_thread_info *info = echo_env_info(env);
2107                 __u32 mode = data->ioc_obdo2.o_mode;
2108                 struct lu_fid *fid = &info->eti_fid;
2109                 int stripe_count = (int)data->ioc_obdo2.o_misc;
2110                 int stripe_index = (int)data->ioc_obdo2.o_stripe_idx;
2111
2112                 rc = ostid_to_fid(fid, &data->ioc_obdo1.o_oi, 0);
2113                 if (rc != 0)
2114                         break;
2115
2116                 /* In the function below, .hs_keycmp resolves to
2117                  * lu_obj_hop_keycmp() */
2118                 /* coverity[overrun-buffer-val] */
2119                 rc = echo_create_md_object(env, ed, parent, fid, name, namelen,
2120                                            id, mode, count, stripe_count,
2121                                            stripe_index);
2122                 break;
2123         }
2124         case ECHO_MD_DESTROY:
2125         case ECHO_MD_RMDIR: {
2126                 __u32 mode = data->ioc_obdo2.o_mode;
2127
2128                 rc = echo_destroy_object(env, ed, parent, name, namelen,
2129                                          id, mode, count);
2130                 break;
2131         }
2132         case ECHO_MD_LOOKUP:
2133                 rc = echo_lookup_object(env, ed, parent, id, count);
2134                 break;
2135         case ECHO_MD_GETATTR:
2136                 rc = echo_getattr_object(env, ed, parent, id, count);
2137                 break;
2138         case ECHO_MD_SETATTR:
2139                 rc = echo_setattr_object(env, ed, parent, id, count);
2140                 break;
2141         default:
2142                 CERROR("unknown command %d\n", command);
2143                 rc = -EINVAL;
2144                 break;
2145         }
2146         echo_ucred_fini(env);
2147
2148 out_name:
2149         if (name != NULL)
2150                 OBD_FREE(name, namelen + 1);
2151 out_put:
2152         lu_object_put(env, parent);
2153 out_free:
2154         LASSERT(info->eti_big_lmm);
2155         OBD_FREE_LARGE(info->eti_big_lmm, info->eti_big_lmmsize);
2156         info->eti_big_lmm = NULL;
2157         info->eti_big_lmmsize = 0;
2158 out_env:
2159         cl_env_put(env, &refcheck);
2160         return rc;
2161 }
2162 #endif /* HAVE_SERVER_SUPPORT */
2163
2164 static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
2165                               int on_target, struct obdo *oa, void *ulsm,
2166                               int ulsm_nob, struct obd_trans_info *oti)
2167 {
2168         struct echo_object     *eco;
2169         struct echo_client_obd *ec = ed->ed_ec;
2170         struct lov_stripe_md   *lsm = NULL;
2171         int                     rc;
2172         int                     created = 0;
2173         ENTRY;
2174
2175         if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
2176             (on_target ||                       /* set_stripe */
2177              ec->ec_nstripes != 0)) {           /* LOV */
2178                 CERROR ("No valid oid\n");
2179                 RETURN(-EINVAL);
2180         }
2181
2182         rc = echo_alloc_memmd(ed, &lsm);
2183         if (rc < 0) {
2184                 CERROR("Cannot allocate md: rc = %d\n", rc);
2185                 GOTO(failed, rc);
2186         }
2187
2188         if (ulsm != NULL) {
2189                 int i, idx;
2190
2191                 rc = echo_copyin_lsm (ed, lsm, ulsm, ulsm_nob);
2192                 if (rc != 0)
2193                         GOTO(failed, rc);
2194
2195                 if (lsm->lsm_stripe_count == 0)
2196                         lsm->lsm_stripe_count = ec->ec_nstripes;
2197
2198                 if (lsm->lsm_stripe_size == 0)
2199                         lsm->lsm_stripe_size = PAGE_CACHE_SIZE;
2200
2201                 idx = cfs_rand();
2202
2203                 /* setup stripes: indices + default ids if required */
2204                 for (i = 0; i < lsm->lsm_stripe_count; i++) {
2205                         if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) == 0)
2206                                 lsm->lsm_oinfo[i]->loi_oi = lsm->lsm_oi;
2207
2208                         lsm->lsm_oinfo[i]->loi_ost_idx =
2209                                 (idx + i) % ec->ec_nstripes;
2210                 }
2211         }
2212
2213         /* setup object ID here for !on_target and LOV hint */
2214         if (oa->o_valid & OBD_MD_FLID) {
2215                 LASSERT(oa->o_valid & OBD_MD_FLGROUP);
2216                 lsm->lsm_oi = oa->o_oi;
2217         }
2218
2219         if (ostid_id(&lsm->lsm_oi) == 0)
2220                 ostid_set_id(&lsm->lsm_oi, ++last_object_id);
2221
2222         rc = 0;
2223         if (on_target) {
2224                 /* Only echo objects are allowed to be created */
2225                 LASSERT((oa->o_valid & OBD_MD_FLGROUP) &&
2226                         (ostid_seq(&oa->o_oi) == FID_SEQ_ECHO));
2227                 rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
2228                 if (rc != 0) {
2229                         CERROR("Cannot create objects: rc = %d\n", rc);
2230                         GOTO(failed, rc);
2231                 }
2232                 created = 1;
2233         }
2234
2235         /* See what object ID we were given */
2236         oa->o_oi = lsm->lsm_oi;
2237         oa->o_valid |= OBD_MD_FLID;
2238
2239         eco = cl_echo_object_find(ed, &lsm);
2240         if (IS_ERR(eco))
2241                 GOTO(failed, rc = PTR_ERR(eco));
2242         cl_echo_object_put(eco);
2243
2244         CDEBUG(D_INFO, "oa oid "DOSTID"\n", POSTID(&oa->o_oi));
2245         EXIT;
2246
2247  failed:
2248         if (created && rc)
2249                 obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL, NULL);
2250         if (lsm)
2251                 echo_free_memmd(ed, &lsm);
2252         if (rc)
2253                 CERROR("create object failed with: rc = %d\n", rc);
2254         return (rc);
2255 }
2256
2257 static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
2258                            struct obdo *oa)
2259 {
2260         struct lov_stripe_md   *lsm = NULL;
2261         struct echo_object     *eco;
2262         int                     rc;
2263         ENTRY;
2264
2265         if ((oa->o_valid & OBD_MD_FLID) == 0 || ostid_id(&oa->o_oi) == 0) {
2266                 /* disallow use of object id 0 */
2267                 CERROR ("No valid oid\n");
2268                 RETURN(-EINVAL);
2269         }
2270
2271         rc = echo_alloc_memmd(ed, &lsm);
2272         if (rc < 0)
2273                 RETURN(rc);
2274
2275         lsm->lsm_oi = oa->o_oi;
2276         if (!(oa->o_valid & OBD_MD_FLGROUP))
2277                 ostid_set_seq_echo(&lsm->lsm_oi);
2278
2279         rc = 0;
2280         eco = cl_echo_object_find(ed, &lsm);
2281         if (!IS_ERR(eco))
2282                 *ecop = eco;
2283         else
2284                 rc = PTR_ERR(eco);
2285         if (lsm)
2286                 echo_free_memmd(ed, &lsm);
2287         RETURN(rc);
2288 }
2289
2290 static void echo_put_object(struct echo_object *eco)
2291 {
2292         int rc;
2293
2294         rc = cl_echo_object_put(eco);
2295         if (rc)
2296                 CERROR("%s: echo client drop an object failed: rc = %d\n",
2297                        eco->eo_dev->ed_ec->ec_exp->exp_obd->obd_name, rc);
2298 }
2299
2300 static void
2301 echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
2302 {
2303         unsigned long stripe_count;
2304         unsigned long stripe_size;
2305         unsigned long width;
2306         unsigned long woffset;
2307         int           stripe_index;
2308         obd_off       offset;
2309
2310         if (lsm->lsm_stripe_count <= 1)
2311                 return;
2312
2313         offset       = *offp;
2314         stripe_size  = lsm->lsm_stripe_size;
2315         stripe_count = lsm->lsm_stripe_count;
2316
2317         /* width = # bytes in all stripes */
2318         width = stripe_size * stripe_count;
2319
2320         /* woffset = offset within a width; offset = whole number of widths */
2321         woffset = do_div (offset, width);
2322
2323         stripe_index = woffset / stripe_size;
2324
2325         *idp = ostid_id(&lsm->lsm_oinfo[stripe_index]->loi_oi);
2326         *offp = offset * stripe_size + woffset % stripe_size;
2327 }
2328
2329 static void
2330 echo_client_page_debug_setup(struct lov_stripe_md *lsm,
2331                              struct page *page, int rw, obd_id id,
2332                              obd_off offset, obd_off count)
2333 {
2334         char    *addr;
2335         obd_off  stripe_off;
2336         obd_id   stripe_id;
2337         int      delta;
2338
2339         /* no partial pages on the client */
2340         LASSERT(count == PAGE_CACHE_SIZE);
2341
2342         addr = kmap(page);
2343
2344         for (delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2345                 if (rw == OBD_BRW_WRITE) {
2346                         stripe_off = offset + delta;
2347                         stripe_id = id;
2348                         echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id);
2349                 } else {
2350                         stripe_off = 0xdeadbeef00c0ffeeULL;
2351                         stripe_id = 0xdeadbeef00c0ffeeULL;
2352                 }
2353                 block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE,
2354                                   stripe_off, stripe_id);
2355         }
2356
2357         kunmap(page);
2358 }
2359
2360 static int echo_client_page_debug_check(struct lov_stripe_md *lsm,
2361                                         struct page *page, obd_id id,
2362                                         obd_off offset, obd_off count)
2363 {
2364         obd_off stripe_off;
2365         obd_id  stripe_id;
2366         char   *addr;
2367         int     delta;
2368         int     rc;
2369         int     rc2;
2370
2371         /* no partial pages on the client */
2372         LASSERT(count == PAGE_CACHE_SIZE);
2373
2374         addr = kmap(page);
2375
2376         for (rc = delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2377                 stripe_off = offset + delta;
2378                 stripe_id = id;
2379                 echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
2380
2381                 rc2 = block_debug_check("test_brw",
2382                                         addr + delta, OBD_ECHO_BLOCK_SIZE,
2383                                         stripe_off, stripe_id);
2384                 if (rc2 != 0) {
2385                         CERROR ("Error in echo object "LPX64"\n", id);
2386                         rc = rc2;
2387                 }
2388         }
2389
2390         kunmap(page);
2391         return rc;
2392 }
2393
2394 static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
2395                             struct echo_object *eco, obd_off offset,
2396                             obd_size count, int async,
2397                             struct obd_trans_info *oti)
2398 {
2399         struct lov_stripe_md   *lsm = eco->eo_lsm;
2400         obd_count               npages;
2401         struct brw_page        *pga;
2402         struct brw_page        *pgp;
2403         struct page            **pages;
2404         obd_off                 off;
2405         int                     i;
2406         int                     rc;
2407         int                     verify;
2408         int                     gfp_mask;
2409         int                     brw_flags = 0;
2410         ENTRY;
2411
2412         verify = (ostid_id(&oa->o_oi) != ECHO_PERSISTENT_OBJID &&
2413                   (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
2414                   (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
2415
2416         gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
2417
2418         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
2419         LASSERT(lsm != NULL);
2420         LASSERT(ostid_id(&lsm->lsm_oi) == ostid_id(&oa->o_oi));
2421
2422         if (count <= 0 ||
2423             (count & (~CFS_PAGE_MASK)) != 0)
2424                 RETURN(-EINVAL);
2425
2426         /* XXX think again with misaligned I/O */
2427         npages = count >> PAGE_CACHE_SHIFT;
2428
2429         if (rw == OBD_BRW_WRITE)
2430                 brw_flags = OBD_BRW_ASYNC;
2431
2432         OBD_ALLOC(pga, npages * sizeof(*pga));
2433         if (pga == NULL)
2434                 RETURN(-ENOMEM);
2435
2436         OBD_ALLOC(pages, npages * sizeof(*pages));
2437         if (pages == NULL) {
2438                 OBD_FREE(pga, npages * sizeof(*pga));
2439                 RETURN(-ENOMEM);
2440         }
2441
2442         for (i = 0, pgp = pga, off = offset;
2443              i < npages;
2444              i++, pgp++, off += PAGE_CACHE_SIZE) {
2445
2446                 LASSERT (pgp->pg == NULL);      /* for cleanup */
2447
2448                 rc = -ENOMEM;
2449                 OBD_PAGE_ALLOC(pgp->pg, gfp_mask);
2450                 if (pgp->pg == NULL)
2451                         goto out;
2452
2453                 pages[i] = pgp->pg;
2454                 pgp->count = PAGE_CACHE_SIZE;
2455                 pgp->off = off;
2456                 pgp->flag = brw_flags;
2457
2458                 if (verify)
2459                         echo_client_page_debug_setup(lsm, pgp->pg, rw,
2460                                                      ostid_id(&oa->o_oi), off,
2461                                                      pgp->count);
2462         }
2463
2464         /* brw mode can only be used at client */
2465         LASSERT(ed->ed_next != NULL);
2466         rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async);
2467
2468  out:
2469         if (rc != 0 || rw != OBD_BRW_READ)
2470                 verify = 0;
2471
2472         for (i = 0, pgp = pga; i < npages; i++, pgp++) {
2473                 if (pgp->pg == NULL)
2474                         continue;
2475
2476                 if (verify) {
2477                         int vrc;
2478                         vrc = echo_client_page_debug_check(lsm, pgp->pg,
2479                                                            ostid_id(&oa->o_oi),
2480                                                            pgp->off, pgp->count);
2481                         if (vrc != 0 && rc == 0)
2482                                 rc = vrc;
2483                 }
2484                 OBD_PAGE_FREE(pgp->pg);
2485         }
2486         OBD_FREE(pga, npages * sizeof(*pga));
2487         OBD_FREE(pages, npages * sizeof(*pages));
2488         RETURN(rc);
2489 }
2490
2491 static int echo_client_prep_commit(const struct lu_env *env,
2492                                    struct obd_export *exp, int rw,
2493                                    struct obdo *oa, struct echo_object *eco,
2494                                    obd_off offset, obd_size count,
2495                                    obd_size batch, struct obd_trans_info *oti,
2496                                    int async)
2497 {
2498         struct lov_stripe_md *lsm = eco->eo_lsm;
2499         struct obd_ioobj ioo;
2500         struct niobuf_local *lnb;
2501         struct niobuf_remote *rnb;
2502         obd_off off;
2503         obd_size npages, tot_pages;
2504         int i, ret = 0, brw_flags = 0;
2505
2506         ENTRY;
2507
2508         if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0 ||
2509             (lsm != NULL && ostid_id(&lsm->lsm_oi) != ostid_id(&oa->o_oi)))
2510                 RETURN(-EINVAL);
2511
2512         npages = batch >> PAGE_CACHE_SHIFT;
2513         tot_pages = count >> PAGE_CACHE_SHIFT;
2514
2515         OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local));
2516         OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote));
2517
2518         if (lnb == NULL || rnb == NULL)
2519                 GOTO(out, ret = -ENOMEM);
2520
2521         if (rw == OBD_BRW_WRITE && async)
2522                 brw_flags |= OBD_BRW_ASYNC;
2523
2524         obdo_to_ioobj(oa, &ioo);
2525
2526         off = offset;
2527
2528         for(; tot_pages; tot_pages -= npages) {
2529                 int lpages;
2530
2531                 if (tot_pages < npages)
2532                         npages = tot_pages;
2533
2534                 for (i = 0; i < npages; i++, off += PAGE_CACHE_SIZE) {
2535                         rnb[i].offset = off;
2536                         rnb[i].len = PAGE_CACHE_SIZE;
2537                         rnb[i].flags = brw_flags;
2538                 }
2539
2540                 ioo.ioo_bufcnt = npages;
2541
2542                 lpages = npages;
2543                 ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
2544                                  lnb, oti, NULL);
2545                 if (ret != 0)
2546                         GOTO(out, ret);
2547                 LASSERT(lpages == npages);
2548
2549                 for (i = 0; i < lpages; i++) {
2550                         struct page *page = lnb[i].page;
2551
2552                         /* read past eof? */
2553                         if (page == NULL && lnb[i].rc == 0)
2554                                 continue;
2555
2556                         if (async)
2557                                 lnb[i].flags |= OBD_BRW_ASYNC;
2558
2559                         if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
2560                             (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
2561                             (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0)
2562                                 continue;
2563
2564                         if (rw == OBD_BRW_WRITE)
2565                                 echo_client_page_debug_setup(lsm, page, rw,
2566                                                             ostid_id(&oa->o_oi),
2567                                                              rnb[i].offset,
2568                                                              rnb[i].len);
2569                         else
2570                                 echo_client_page_debug_check(lsm, page,
2571                                                             ostid_id(&oa->o_oi),
2572                                                              rnb[i].offset,
2573                                                              rnb[i].len);
2574                 }
2575
2576                 ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
2577                                    rnb, npages, lnb, oti, ret);
2578                 if (ret != 0)
2579                         GOTO(out, ret);
2580
2581                 /* Reset oti otherwise it would confuse ldiskfs. */
2582                 memset(oti, 0, sizeof(*oti));
2583
2584                 /* Reuse env context. */
2585                 lu_context_exit((struct lu_context *)&env->le_ctx);
2586                 lu_context_enter((struct lu_context *)&env->le_ctx);
2587         }
2588
2589 out:
2590         if (lnb)
2591                 OBD_FREE(lnb, npages * sizeof(struct niobuf_local));
2592         if (rnb)
2593                 OBD_FREE(rnb, npages * sizeof(struct niobuf_remote));
2594         RETURN(ret);
2595 }
2596
2597 static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
2598                                  struct obd_export *exp,
2599                                  struct obd_ioctl_data *data,
2600                                  struct obd_trans_info *dummy_oti)
2601 {
2602         struct obd_device *obd = class_exp2obd(exp);
2603         struct echo_device *ed = obd2echo_dev(obd);
2604         struct echo_client_obd *ec = ed->ed_ec;
2605         struct obdo *oa = &data->ioc_obdo1;
2606         struct echo_object *eco;
2607         int rc;
2608         int async = 0;
2609         long test_mode;
2610         ENTRY;
2611
2612         LASSERT(oa->o_valid & OBD_MD_FLGROUP);
2613
2614         rc = echo_get_object(&eco, ed, oa);
2615         if (rc)
2616                 RETURN(rc);
2617
2618         oa->o_valid &= ~OBD_MD_FLHANDLE;
2619
2620         /* OFD/obdfilter works only via prep/commit */
2621         test_mode = (long)data->ioc_pbuf1;
2622         if (ed->ed_next == NULL && test_mode != 3) {
2623                 test_mode = 3;
2624                 data->ioc_plen1 = data->ioc_count;
2625         }
2626
2627         if (test_mode == 3)
2628                 async = 1;
2629
2630         /* Truncate batch size to maximum */
2631         if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE)
2632                 data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE;
2633
2634         switch (test_mode) {
2635         case 1:
2636                 /* fall through */
2637         case 2:
2638                 rc = echo_client_kbrw(ed, rw, oa,
2639                                       eco, data->ioc_offset,
2640                                       data->ioc_count, async, dummy_oti);
2641                 break;
2642         case 3:
2643                 rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa,
2644                                              eco, data->ioc_offset,
2645                                              data->ioc_count, data->ioc_plen1,
2646                                              dummy_oti, async);
2647                 break;
2648         default:
2649                 rc = -EINVAL;
2650         }
2651         echo_put_object(eco);
2652         RETURN(rc);
2653 }
2654
2655 static int
2656 echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
2657                       void *karg, void *uarg)
2658 {
2659 #ifdef HAVE_SERVER_SUPPORT
2660         struct tgt_session_info *tsi;
2661 #endif
2662         struct obd_device      *obd = exp->exp_obd;
2663         struct echo_device     *ed = obd2echo_dev(obd);
2664         struct echo_client_obd *ec = ed->ed_ec;
2665         struct echo_object     *eco;
2666         struct obd_ioctl_data  *data = karg;
2667         struct obd_trans_info   dummy_oti;
2668         struct lu_env          *env;
2669         struct oti_req_ack_lock *ack_lock;
2670         struct obdo            *oa;
2671         struct lu_fid           fid;
2672         int                     rw = OBD_BRW_READ;
2673         int                     rc = 0;
2674         int                     i;
2675 #ifdef HAVE_SERVER_SUPPORT
2676         struct lu_context        echo_session;
2677 #endif
2678         ENTRY;
2679
2680         memset(&dummy_oti, 0, sizeof(dummy_oti));
2681
2682         oa = &data->ioc_obdo1;
2683         if (!(oa->o_valid & OBD_MD_FLGROUP)) {
2684                 oa->o_valid |= OBD_MD_FLGROUP;
2685                 ostid_set_seq_echo(&oa->o_oi);
2686         }
2687
2688         /* This FID is unpacked just for validation at this point */
2689         rc = ostid_to_fid(&fid, &oa->o_oi, 0);
2690         if (rc < 0)
2691                 RETURN(rc);
2692
2693         OBD_ALLOC_PTR(env);
2694         if (env == NULL)
2695                 RETURN(-ENOMEM);
2696
2697         rc = lu_env_init(env, LCT_DT_THREAD);
2698         if (rc)
2699                 GOTO(out_alloc, rc = -ENOMEM);
2700
2701 #ifdef HAVE_SERVER_SUPPORT
2702         env->le_ses = &echo_session;
2703         rc = lu_context_init(env->le_ses, LCT_SERVER_SESSION | LCT_NOREF);
2704         if (unlikely(rc < 0))
2705                 GOTO(out_env, rc);
2706         lu_context_enter(env->le_ses);
2707
2708         tsi = tgt_ses_info(env);
2709         tsi->tsi_exp = ec->ec_exp;
2710         tsi->tsi_jobid = NULL;
2711 #endif
2712         switch (cmd) {
2713         case OBD_IOC_CREATE:                    /* may create echo object */
2714                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2715                         GOTO (out, rc = -EPERM);
2716
2717                 rc = echo_create_object(env, ed, 1, oa, data->ioc_pbuf1,
2718                                         data->ioc_plen1, &dummy_oti);
2719                 GOTO(out, rc);
2720
2721 #ifdef HAVE_SERVER_SUPPORT
2722         case OBD_IOC_ECHO_MD: {
2723                 int count;
2724                 int cmd;
2725                 char *dir = NULL;
2726                 int dirlen;
2727                 __u64 id;
2728
2729                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2730                         GOTO(out, rc = -EPERM);
2731
2732                 count = data->ioc_count;
2733                 cmd = data->ioc_command;
2734
2735                 id = data->ioc_obdo2.o_oi.oi.oi_id;
2736                 dirlen = data->ioc_plen1;
2737                 OBD_ALLOC(dir, dirlen + 1);
2738                 if (dir == NULL)
2739                         GOTO(out, rc = -ENOMEM);
2740
2741                 if (copy_from_user(dir, data->ioc_pbuf1, dirlen)) {
2742                         OBD_FREE(dir, data->ioc_plen1 + 1);
2743                         GOTO(out, rc = -EFAULT);
2744                 }
2745
2746                 rc = echo_md_handler(ed, cmd, dir, dirlen, id, count, data);
2747                 OBD_FREE(dir, dirlen + 1);
2748                 GOTO(out, rc);
2749         }
2750         case OBD_IOC_ECHO_ALLOC_SEQ: {
2751                 struct lu_env   *cl_env;
2752                 int              refcheck;
2753                 __u64            seq;
2754                 int              max_count;
2755
2756                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2757                         GOTO(out, rc = -EPERM);
2758
2759                 cl_env = cl_env_get(&refcheck);
2760                 if (IS_ERR(cl_env))
2761                         GOTO(out, rc = PTR_ERR(cl_env));
2762
2763                 rc = lu_env_refill_by_tags(cl_env, ECHO_MD_CTX_TAG,
2764                                             ECHO_MD_SES_TAG);
2765                 if (rc != 0) {
2766                         cl_env_put(cl_env, &refcheck);
2767                         GOTO(out, rc);
2768                 }
2769
2770                 rc = seq_client_get_seq(cl_env, ed->ed_cl_seq, &seq);
2771                 cl_env_put(cl_env, &refcheck);
2772                 if (rc < 0) {
2773                         CERROR("%s: Can not alloc seq: rc = %d\n",
2774                                obd->obd_name, rc);
2775                         GOTO(out, rc);
2776                 }
2777
2778                 if (copy_to_user(data->ioc_pbuf1, &seq, data->ioc_plen1))
2779                         return -EFAULT;
2780
2781                 max_count = LUSTRE_METADATA_SEQ_MAX_WIDTH;
2782                 if (copy_to_user(data->ioc_pbuf2, &max_count,
2783                                      data->ioc_plen2))
2784                         return -EFAULT;
2785                 GOTO(out, rc);
2786         }
2787 #endif /* HAVE_SERVER_SUPPORT */
2788         case OBD_IOC_DESTROY:
2789                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2790                         GOTO (out, rc = -EPERM);
2791
2792                 rc = echo_get_object(&eco, ed, oa);
2793                 if (rc == 0) {
2794                         rc = obd_destroy(env, ec->ec_exp, oa, eco->eo_lsm,
2795                                          &dummy_oti, NULL, NULL);
2796                         if (rc == 0)
2797                                 eco->eo_deleted = 1;
2798                         echo_put_object(eco);
2799                 }
2800                 GOTO(out, rc);
2801
2802         case OBD_IOC_GETATTR:
2803                 rc = echo_get_object(&eco, ed, oa);
2804                 if (rc == 0) {
2805                         struct obd_info oinfo = { { { 0 } } };
2806                         oinfo.oi_md = eco->eo_lsm;
2807                         oinfo.oi_oa = oa;
2808                         rc = obd_getattr(env, ec->ec_exp, &oinfo);
2809                         echo_put_object(eco);
2810                 }
2811                 GOTO(out, rc);
2812
2813         case OBD_IOC_SETATTR:
2814                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2815                         GOTO (out, rc = -EPERM);
2816
2817                 rc = echo_get_object(&eco, ed, oa);
2818                 if (rc == 0) {
2819                         struct obd_info oinfo = { { { 0 } } };
2820                         oinfo.oi_oa = oa;
2821                         oinfo.oi_md = eco->eo_lsm;
2822
2823                         rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL);
2824                         echo_put_object(eco);
2825                 }
2826                 GOTO(out, rc);
2827
2828         case OBD_IOC_BRW_WRITE:
2829                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2830                         GOTO (out, rc = -EPERM);
2831
2832                 rw = OBD_BRW_WRITE;
2833                 /* fall through */
2834         case OBD_IOC_BRW_READ:
2835                 rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti);
2836                 GOTO(out, rc);
2837
2838         default:
2839                 CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
2840                 GOTO (out, rc = -ENOTTY);
2841         }
2842
2843         EXIT;
2844 out:
2845 #ifdef HAVE_SERVER_SUPPORT
2846         lu_context_exit(env->le_ses);
2847         lu_context_fini(env->le_ses);
2848 out_env:
2849 #endif
2850         lu_env_fini(env);
2851 out_alloc:
2852         OBD_FREE_PTR(env);
2853
2854         /* XXX this should be in a helper also called by target_send_reply */
2855         for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4;
2856              i++, ack_lock++) {
2857                 if (!ack_lock->mode)
2858                         break;
2859                 ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
2860         }
2861
2862         return rc;
2863 }
2864
2865 static int echo_client_setup(const struct lu_env *env,
2866                              struct obd_device *obddev, struct lustre_cfg *lcfg)
2867 {
2868         struct echo_client_obd *ec = &obddev->u.echo_client;
2869         struct obd_device *tgt;
2870         struct obd_uuid echo_uuid = { "ECHO_UUID" };
2871         struct obd_connect_data *ocd = NULL;
2872         int rc;
2873         ENTRY;
2874
2875         if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
2876                 CERROR("requires a TARGET OBD name\n");
2877                 RETURN(-EINVAL);
2878         }
2879
2880         tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
2881         if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
2882                 CERROR("device not attached or not set up (%s)\n",
2883                        lustre_cfg_string(lcfg, 1));
2884                 RETURN(-EINVAL);
2885         }
2886
2887         spin_lock_init(&ec->ec_lock);
2888         INIT_LIST_HEAD(&ec->ec_objects);
2889         INIT_LIST_HEAD(&ec->ec_locks);
2890         ec->ec_unique = 0;
2891         ec->ec_nstripes = 0;
2892
2893         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
2894 #ifdef HAVE_SERVER_SUPPORT
2895                 lu_context_tags_update(ECHO_MD_CTX_TAG);
2896                 lu_session_tags_update(ECHO_MD_SES_TAG);
2897 #else
2898                 CERROR("Local operations are NOT supported on client side. "
2899                        "Only remote operations are supported. Metadata client "
2900                        "must be run on server side.\n");
2901 #endif
2902                 RETURN(0);
2903         }
2904
2905         OBD_ALLOC(ocd, sizeof(*ocd));
2906         if (ocd == NULL) {
2907                 CERROR("Can't alloc ocd connecting to %s\n",
2908                        lustre_cfg_string(lcfg, 1));
2909                 return -ENOMEM;
2910         }
2911
2912         ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL |
2913                                  OBD_CONNECT_BRW_SIZE |
2914                                  OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 |
2915                                  OBD_CONNECT_64BITHASH | OBD_CONNECT_LVB_TYPE |
2916                                  OBD_CONNECT_FID;
2917         ocd->ocd_brw_size = DT_MAX_BRW_SIZE;
2918         ocd->ocd_version = LUSTRE_VERSION_CODE;
2919         ocd->ocd_group = FID_SEQ_ECHO;
2920
2921         rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
2922         if (rc == 0) {
2923                 /* Turn off pinger because it connects to tgt obd directly. */
2924                 spin_lock(&tgt->obd_dev_lock);
2925                 list_del_init(&ec->ec_exp->exp_obd_chain_timed);
2926                 spin_unlock(&tgt->obd_dev_lock);
2927         }
2928
2929         OBD_FREE(ocd, sizeof(*ocd));
2930
2931         if (rc != 0) {
2932                 CERROR("fail to connect to device %s\n",
2933                        lustre_cfg_string(lcfg, 1));
2934                 return (rc);
2935         }
2936
2937         RETURN(rc);
2938 }
2939
2940 static int echo_client_cleanup(struct obd_device *obddev)
2941 {
2942         struct echo_device *ed = obd2echo_dev(obddev);
2943         struct echo_client_obd *ec = &obddev->u.echo_client;
2944         int rc;
2945         ENTRY;
2946
2947         /*Do nothing for Metadata echo client*/
2948         if (ed == NULL )
2949                 RETURN(0);
2950
2951         if (ed->ed_next_ismd) {
2952 #ifdef HAVE_SERVER_SUPPORT
2953                 lu_context_tags_clear(ECHO_MD_CTX_TAG);
2954                 lu_session_tags_clear(ECHO_MD_SES_TAG);
2955 #else
2956                 CERROR("This is client-side only module, does not support "
2957                         "metadata echo client.\n");
2958 #endif
2959                 RETURN(0);
2960         }
2961
2962         if (!list_empty(&obddev->obd_exports)) {
2963                 CERROR("still has clients!\n");
2964                 RETURN(-EBUSY);
2965         }
2966
2967         LASSERT(atomic_read(&ec->ec_exp->exp_refcount) > 0);
2968         rc = obd_disconnect(ec->ec_exp);
2969         if (rc != 0)
2970                 CERROR("fail to disconnect device: %d\n", rc);
2971
2972         RETURN(rc);
2973 }
2974
2975 static int echo_client_connect(const struct lu_env *env,
2976                                struct obd_export **exp,
2977                                struct obd_device *src, struct obd_uuid *cluuid,
2978                                struct obd_connect_data *data, void *localdata)
2979 {
2980         int                rc;
2981         struct lustre_handle conn = { 0 };
2982
2983         ENTRY;
2984         rc = class_connect(&conn, src, cluuid);
2985         if (rc == 0) {
2986                 *exp = class_conn2export(&conn);
2987         }
2988
2989         RETURN (rc);
2990 }
2991
2992 static int echo_client_disconnect(struct obd_export *exp)
2993 {
2994         int                     rc;
2995         ENTRY;
2996
2997         if (exp == NULL)
2998                 GOTO(out, rc = -EINVAL);
2999
3000         rc = class_disconnect(exp);
3001         GOTO(out, rc);
3002  out:
3003         return rc;
3004 }
3005
3006 static struct obd_ops echo_client_obd_ops = {
3007         .o_owner       = THIS_MODULE,
3008         .o_iocontrol   = echo_client_iocontrol,
3009         .o_connect     = echo_client_connect,
3010         .o_disconnect  = echo_client_disconnect
3011 };
3012
3013 int echo_client_init(void)
3014 {
3015         int rc;
3016
3017         rc = lu_kmem_init(echo_caches);
3018         if (rc == 0) {
3019                 rc = class_register_type(&echo_client_obd_ops, NULL, true, NULL,
3020 #ifndef HAVE_ONLY_PROCFS_SEQ
3021                                          NULL,
3022 #endif
3023                                          LUSTRE_ECHO_CLIENT_NAME,
3024                                          &echo_device_type);
3025                 if (rc)
3026                         lu_kmem_fini(echo_caches);
3027         }
3028         return rc;
3029 }
3030
3031 void echo_client_exit(void)
3032 {
3033         class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
3034         lu_kmem_fini(echo_caches);
3035 }
3036
3037 #ifdef __KERNEL__
3038 static int __init obdecho_init(void)
3039 {
3040         int rc;
3041
3042         ENTRY;
3043         LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
3044
3045         LASSERT(PAGE_CACHE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
3046
3047 # ifdef HAVE_SERVER_SUPPORT
3048         rc = echo_persistent_pages_init();
3049         if (rc != 0)
3050                 goto failed_0;
3051
3052         rc = class_register_type(&echo_obd_ops, NULL, true, NULL,
3053 #ifndef HAVE_ONLY_PROCFS_SEQ
3054                                 NULL,
3055 #endif
3056                                 LUSTRE_ECHO_NAME, NULL);
3057         if (rc != 0)
3058                 goto failed_1;
3059 # endif
3060
3061         rc = echo_client_init();
3062
3063 # ifdef HAVE_SERVER_SUPPORT
3064         if (rc == 0)
3065                 RETURN(0);
3066
3067         class_unregister_type(LUSTRE_ECHO_NAME);
3068 failed_1:
3069         echo_persistent_pages_fini();
3070 failed_0:
3071 # endif
3072         RETURN(rc);
3073 }
3074
3075 static void /*__exit*/ obdecho_exit(void)
3076 {
3077         echo_client_exit();
3078
3079 # ifdef HAVE_SERVER_SUPPORT
3080         class_unregister_type(LUSTRE_ECHO_NAME);
3081         echo_persistent_pages_fini();
3082 # endif
3083 }
3084
3085 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
3086 MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
3087 MODULE_LICENSE("GPL");
3088
3089 cfs_module(obdecho, LUSTRE_VERSION_STRING, obdecho_init, obdecho_exit);
3090 #endif /* __KERNEL__ */
3091
3092 /** @} echo_client */