Whamcloud - gitweb
ff7334d32e7e89fe1b60751aaf0bc7dc9f1ca645
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24 #define DEBUG_SUBSYSTEM S_MDC
25
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/obd_lov.h>
33
34 #define REQUEST_MINOR 244
35
36 extern int mds_queue_req(struct ptlrpc_request *);
37
38 /* should become mdc_getinfo() */
39 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
40 {
41         struct ptlrpc_request *req;
42         struct mds_body *body;
43         int rc, size = sizeof(*body);
44         ENTRY;
45
46         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
47                               NULL);
48         if (!req)
49                 GOTO(out, rc = -ENOMEM);
50
51         body = lustre_msg_buf(req->rq_reqmsg, 0);
52         req->rq_level = LUSTRE_CONN_CON;
53         req->rq_replen = lustre_msg_size(1, &size);
54
55         mds_pack_req_body(req);
56         rc = ptlrpc_queue_wait(req);
57         rc = ptlrpc_check_status(req, rc);
58
59         if (!rc) {
60                 body = lustre_msg_buf(req->rq_repmsg, 0);
61                 mds_unpack_body(body);
62                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
63
64                 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
65                        ", last_xid="LPU64"\n",
66                        rootfid->id, req->rq_repmsg->last_committed,
67                        req->rq_repmsg->last_xid);
68         }
69
70         EXIT;
71  out:
72         ptlrpc_req_finished(req);
73         return rc;
74 }
75
76 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
77                    struct ptlrpc_request **request)
78 {
79         struct ptlrpc_request *req;
80         struct mds_status_req *streq;
81         int rc, size[2] = {sizeof(*streq)};
82         ENTRY;
83
84         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
85                               size, NULL);
86         if (!req)
87                 GOTO(out, rc = -ENOMEM);
88
89         *request = req;
90         streq = lustre_msg_buf(req->rq_reqmsg, 0);
91         streq->flags = HTON__u32(MDS_STATUS_LOV);
92         streq->repbuf = HTON__u32(8192);
93
94         /* prepare for reply */
95         req->rq_level = LUSTRE_CONN_CON;
96         size[0] = 512;
97         size[1] = 8192;
98         req->rq_replen = lustre_msg_size(2, size);
99
100         rc = ptlrpc_queue_wait(req);
101         rc = ptlrpc_check_status(req, rc);
102
103  out:
104         RETURN(rc);
105 }
106
107
108 int mdc_getattr(struct lustre_handle *conn,
109                 obd_id ino, int type, unsigned long valid, size_t ea_size,
110                 struct ptlrpc_request **request)
111 {
112         struct ptlrpc_request *req;
113         struct mds_body *body;
114         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
115         ENTRY;
116
117         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
118                               NULL);
119         if (!req)
120                 GOTO(out, rc = -ENOMEM);
121
122         body = lustre_msg_buf(req->rq_reqmsg, 0);
123         ll_ino2fid(&body->fid1, ino, 0, type);
124         body->valid = valid;
125
126         if (S_ISREG(type)) {
127                 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
128                 bufcount = 2;
129                 size[1] = mdc->cl_max_mds_easize;
130         } else if (valid & OBD_MD_LINKNAME) {
131                 bufcount = 2;
132                 size[1] = ea_size;
133                 body->size = ea_size;
134                 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
135                        ea_size);
136         }
137         req->rq_replen = lustre_msg_size(bufcount, size);
138         mds_pack_req_body(req);
139
140         rc = ptlrpc_queue_wait(req);
141         rc = ptlrpc_check_status(req, rc);
142
143         if (!rc) {
144                 body = lustre_msg_buf(req->rq_repmsg, 0);
145                 mds_unpack_body(body);
146                 CDEBUG(D_NET, "mode: %o\n", body->mode);
147         }
148
149         EXIT;
150  out:
151         *request = req;
152         return rc;
153 }
154
155 static void d_delete_aliases(struct inode *inode)
156 {
157         struct dentry *dentry = NULL;
158         struct list_head *tmp;
159         int dentry_count = 0;
160         ENTRY;
161
162         spin_lock(&dcache_lock);
163         list_for_each(tmp, &inode->i_dentry) {
164                 dentry = list_entry(tmp, struct dentry, d_alias);
165                 dentry_count++;
166         }
167
168         /* XXX FIXME tell phil/peter that you see this -- unless you're playing
169          * with hard links, in which case, stop. */
170         LASSERT(dentry_count <= 1);
171
172         if (dentry_count == 0) {
173                 spin_unlock(&dcache_lock);
174                 EXIT;
175                 return;
176         }
177
178         CDEBUG(D_INODE, "d_deleting dentry %p\n", dentry);
179         dget_locked(dentry);
180         spin_unlock(&dcache_lock);
181         d_delete(dentry);
182         dput(dentry);
183         EXIT;
184 }
185
186 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
187                             void *data, __u32 data_len, int flag)
188 {
189         int rc;
190         struct lustre_handle lockh;
191         ENTRY;
192
193         switch (flag) {
194         case LDLM_CB_BLOCKING:
195                 ldlm_lock2handle(lock, &lockh);
196                 rc = ldlm_cli_cancel(&lockh);
197                 if (rc < 0) {
198                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
199                         RETURN(rc);
200                 }
201                 break;
202         case LDLM_CB_CANCELING: {
203                 /* Invalidate all dentries associated with this inode */
204                 struct inode *inode = data;
205
206                 LASSERT(inode != NULL);
207                 LASSERT(data_len == sizeof(*inode));
208
209                 if (S_ISDIR(inode->i_mode)) {
210                         CDEBUG(D_INODE, "invalidating inode %ld\n",
211                                inode->i_ino);
212                         ll_invalidate_inode_pages(inode);
213                 }
214
215                 LASSERT(igrab(inode) == inode);
216                 d_delete_aliases(inode);
217                 iput(inode);
218                 break;
219         }
220         default:
221                 LBUG();
222         }
223
224         RETURN(0);
225 }
226
227 struct create_replay_data {
228         struct super_block *sb;
229         u32                 generation;
230 };
231
232 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
233 static int create_replay_find_inode(struct inode *inode, unsigned long ino,
234                                     void *opaque)
235 #else
236 static int create_replay_find_inode(struct inode *inode, void *opaque)
237 #endif
238 {
239         struct ptlrpc_request *req = opaque;
240         struct create_replay_data *saved;
241         struct mds_body *body;
242         
243         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
244         
245         if (saved->generation != inode->i_generation) {
246                 CDEBUG(D_HA,
247                        "generation mismatch for ino %u: saved %u != inode %u\n",
248                        inode->i_ino, saved->generation, inode->i_generation);
249                 return 0;
250         }
251
252         body = lustre_msg_buf(req->rq_repmsg, 1);
253
254         /* XXX do I need more out of ll_update_inode? */
255         CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
256                inode->i_ino, inode->i_generation, body->generation);
257
258         inode->i_generation = body->generation;
259
260         return 1;
261 }
262
263 static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
264                                    struct ptlrpc_request *req,
265                                    struct inode *inode)
266 {
267         struct ldlm_request *lockreq; 
268         struct mds_rec_link *rec; /* representative, two-fid op structure */
269         int opc;
270
271         if (fixreq->rq_import != req->rq_import) {
272                 DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
273                 return;
274         }
275
276         DEBUG_REQ(D_HA, fixreq, "fixing");
277         
278         /* XXX check replay_state to see if we'll actually replay. */
279
280         /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
281         if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
282                 lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
283
284                 if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN &&
285                     !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) {
286                         DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
287                         return;
288                 }
289
290                 if (fixreq->rq_reqmsg->bufcount < 2) {
291                         DEBUG_REQ(D_HA, fixreq,
292                                   "short intent (probably readdir), skipping");
293                         return;
294                 }
295
296                 /* XXX endianness is probably very very wrong here. Very. */
297                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
298         } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
299                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
300         } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
301                 struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
302                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
303                           body->fid1.generation, inode->i_generation);
304                 body->fid1.generation = inode->i_generation;
305                 return;
306         } else {
307                 DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
308                 return;
309         }
310         
311         if (rec->lk_fid1.id == inode->i_ino) {
312                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
313                           rec->lk_fid1.generation, inode->i_generation);
314                 rec->lk_fid1.generation = inode->i_generation;
315         }
316         
317         /* Some ops have two FIDs. ZZZ We rely on the identical
318          * placement of that second FID in all such ops' messages.
319          */
320         opc = rec->lk_opcode & REINT_OPCODE_MASK;
321         if ((opc == REINT_LINK || opc == REINT_UNLINK ||
322              opc == REINT_RENAME) &&
323             rec->lk_fid2.id == inode->i_ino) {
324                 DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
325                           rec->lk_fid2.generation, inode->i_generation);
326                 rec->lk_fid2.generation = inode->i_generation;
327         }
328 }
329
330 static void mdc_replay_create(struct ptlrpc_request *req)
331 {
332         struct create_replay_data *saved;
333         struct mds_body *body;
334         struct inode *inode;
335         struct list_head *tmp;
336
337         if (req->rq_reqmsg->opc == MDS_REINT)
338                 LBUG(); /* XXX don't handle the non-intent case yet */
339
340         body = lustre_msg_buf(req->rq_repmsg, 1);
341         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
342
343         CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
344                body->fid1.id, saved->generation, body->generation);
345         /* XXX cargo-culted right out of ll_iget */
346 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
347         inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
348 #endif
349 #if 0
350         {
351                 extern int ll_read_inode2(struct inode *inode, void *opaque);
352                 inode = iget5_locked(saved->sb, body->fid1.id,
353                                      create_replay_find_inode, 
354                                      ll_read_inode2, req);
355
356                 if (!inode)
357                         LBUG(); /* XXX ick */
358                 
359                 if (inode->i_state & I_NEW)
360                         unlock_new_inode(inode);
361         }
362 #endif
363
364         /* Now that we've updated the generation, we need to go and find all
365          * the other requests that refer to this file and will be replayed,
366          * and teach them about our new generation.
367          */
368         list_for_each(tmp, &req->rq_connection->c_sending_head) {
369                 struct ptlrpc_request *fixreq =
370                         list_entry(tmp, struct ptlrpc_request, rq_list);
371
372                 fixup_req_for_recreate(fixreq, req, inode);
373         }
374
375         list_for_each(tmp, &req->rq_connection->c_delayed_head) {
376                 struct ptlrpc_request *fixreq =
377                         list_entry(tmp, struct ptlrpc_request, rq_list);
378
379                 fixup_req_for_recreate(fixreq, req, inode);
380         }
381 }
382
383 void mdc_store_create_replay_data(struct ptlrpc_request *req,
384                                   struct super_block *sb)
385 {
386         struct create_replay_data *saved = 
387                 lustre_msg_buf(req->rq_reqmsg, 5);
388         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
389
390
391         if (req->rq_reqmsg->opc == MDS_REINT)
392                 LBUG(); /* XXX don't handle the non-intent case yet */
393
394         saved->generation = body->generation;
395         saved->sb = sb; /* XXX is this safe? */
396
397         req->rq_replay_cb = mdc_replay_create;
398 }
399
400 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
401                 struct lookup_intent *it, int lock_mode, struct inode *dir,
402                 struct dentry *de, struct lustre_handle *lockh,
403                 char *tgt, int tgtlen, void *data, int datalen)
404 {
405         struct ptlrpc_request *req;
406         struct obd_device *obddev = class_conn2obd(conn);
407         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
408         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
409         int rc, flags = LDLM_FL_HAS_INTENT;
410         int repsize[3] = {sizeof(struct ldlm_reply),
411                           sizeof(struct mds_body),
412                           obddev->u.cli.cl_max_mds_easize};
413         struct ldlm_reply *dlm_rep;
414         struct ldlm_intent *lit;
415         struct ldlm_request *lockreq;
416         ENTRY;
417
418         LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
419                           dir->i_ino);
420
421         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
422                 switch (it->it_op) {
423                 case IT_MKDIR:
424                         it->it_mode |= S_IFDIR;
425                         break;
426                 case (IT_CREAT|IT_OPEN):
427                 case IT_CREAT:
428                         it->it_mode |= S_IFREG;
429                         break;
430                 case IT_SYMLINK:
431                         it->it_mode |= S_IFLNK;
432                         break;
433                 }
434                 it->it_mode &= ~current->fs->umask;
435
436                 size[2] = sizeof(struct mds_rec_create);
437                 size[3] = de->d_name.len + 1;
438                 size[4] = tgtlen + 1;
439                 size[5] = sizeof(struct create_replay_data);
440                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
441                                       size, NULL);
442                 if (!req)
443                         RETURN(-ENOMEM);
444
445                 /* pack the intent */
446                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
447                 lit->opc = NTOH__u64((__u64)it->it_op);
448
449                 /* pack the intended request */
450                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
451                                 current->fsgid, CURRENT_TIME, de->d_name.name,
452                                 de->d_name.len, tgt, tgtlen);
453                 req->rq_replen = lustre_msg_size(3, repsize);
454         } else if (it->it_op == IT_RENAME2) {
455                 struct dentry *old_de = it->it_data;
456
457                 size[2] = sizeof(struct mds_rec_rename);
458                 size[3] = old_de->d_name.len + 1;
459                 size[4] = de->d_name.len + 1;
460                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
461                                       size, NULL);
462                 if (!req)
463                         RETURN(-ENOMEM);
464
465                 /* pack the intent */
466                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
467                 lit->opc = NTOH__u64((__u64)it->it_op);
468
469                 /* pack the intended request */
470                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
471                                 old_de->d_name.name, old_de->d_name.len,
472                                 de->d_name.name, de->d_name.len);
473                 req->rq_replen = lustre_msg_size(3, repsize);
474         } else if (it->it_op == IT_LINK2) {
475                 struct dentry *old_de = it->it_data;
476
477                 size[2] = sizeof(struct mds_rec_link);
478                 size[3] = de->d_name.len + 1;
479                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
480                                       size, NULL);
481                 if (!req)
482                         RETURN(-ENOMEM);
483
484                 /* pack the intent */
485                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
486                 lit->opc = NTOH__u64((__u64)it->it_op);
487
488                 /* pack the intended request */
489                 mds_link_pack(req, 2, old_de->d_inode, dir,
490                               de->d_name.name, de->d_name.len);
491                 req->rq_replen = lustre_msg_size(3, repsize);
492         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
493                 size[2] = sizeof(struct mds_rec_unlink);
494                 size[3] = de->d_name.len + 1;
495                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
496                                       size, NULL);
497                 if (!req)
498                         RETURN(-ENOMEM);
499
500                 /* pack the intent */
501                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
502                 lit->opc = NTOH__u64((__u64)it->it_op);
503
504                 /* pack the intended request */
505                 mds_unlink_pack(req, 2, dir, NULL,
506                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
507                                 de->d_name.name, de->d_name.len);
508
509                 req->rq_replen = lustre_msg_size(3, repsize);
510         } else if (it->it_op  & (IT_GETATTR | IT_RENAME | IT_LINK | 
511                    IT_OPEN |  IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
512                 size[2] = sizeof(struct mds_body);
513                 size[3] = de->d_name.len + 1;
514
515                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
516                                       size, NULL);
517                 if (!req)
518                         RETURN(-ENOMEM);
519
520                 /* pack the intent */
521                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
522                 lit->opc = NTOH__u64((__u64)it->it_op);
523
524                 /* pack the intended request */
525                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
526
527                 /* we need to replay opens */
528                 if (it->it_op == IT_OPEN)
529                         req->rq_flags |= PTL_RPC_FL_REPLAY;
530
531                 /* get ready for the reply */
532                 req->rq_replen = lustre_msg_size(3, repsize);
533         } else if (it->it_op == IT_READDIR) {
534                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
535                                       size, NULL);
536                 if (!req)
537                         RETURN(-ENOMEM);
538
539                 /* get ready for the reply */
540                 req->rq_replen = lustre_msg_size(1, repsize);
541         } else {
542                 LBUG();
543                 RETURN(-EINVAL);
544         }
545 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
546         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
547                               lock_type, NULL, 0, lock_mode, &flags,
548                               ldlm_completion_ast, mdc_blocking_ast, data,
549                               datalen, lockh);
550         if (rc == -ENOENT) {
551                 /* This can go when we're sure that this can never happen */
552                 LBUG();
553         }
554         if (rc == ELDLM_LOCK_ABORTED) {
555                 lock_mode = 0;
556                 memset(lockh, 0, sizeof(*lockh));
557                 /* rc = 0 */
558         } else if (rc != 0) {
559                 CERROR("ldlm_cli_enqueue: %d\n", rc);
560                 RETURN(rc);
561         }
562
563         /* On replay, we don't want the lock granted. */
564         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
565         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
566
567         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
568         it->it_disposition = (int) dlm_rep->lock_policy_res1;
569         it->it_status = (int) dlm_rep->lock_policy_res2;
570         it->it_lock_mode = lock_mode;
571         it->it_data = req;
572
573         RETURN(0);
574 }
575
576 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
577                       int flags)
578 {
579         __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
580         struct obd_device *obddev = class_conn2obd(conn);
581         ENTRY;
582         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
583 }
584
585 struct replay_open_data {
586         struct lustre_handle *fh;
587 };
588
589 static void mdc_replay_open(struct ptlrpc_request *req)
590 {
591         int offset;
592         struct replay_open_data *saved;
593         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
594
595         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
596                 offset = 2;
597         else
598                 offset = 1;
599
600         saved = lustre_msg_buf(req->rq_reqmsg, offset);
601         mds_unpack_body(body);
602         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
603                saved->fh->addr, saved->fh->cookie,
604                body->handle.addr, body->handle.cookie);
605         memcpy(saved->fh, &body->handle, sizeof(body->handle));
606 }
607
608 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
609              struct lov_stripe_md *lsm, struct lustre_handle *fh,
610              struct ptlrpc_request **request)
611 {
612         struct mds_body *body;
613         struct replay_open_data *replay_data;
614         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
615         struct ptlrpc_request *req;
616         ENTRY;
617
618         if (lsm) {
619                 bufcount = 3;
620                 size[2] = size[1]; /* shuffle the spare data along */
621
622                 size[1] = lsm->lsm_mds_easize;
623         }
624
625         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
626                               NULL);
627         if (!req)
628                 GOTO(out, rc = -ENOMEM);
629
630         if (lsm)
631                 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
632
633
634         req->rq_flags |= PTL_RPC_FL_REPLAY;
635         body = lustre_msg_buf(req->rq_reqmsg, 0);
636
637         ll_ino2fid(&body->fid1, ino, 0, type);
638         body->flags = HTON__u32(flags);
639         memcpy(&body->handle, fh, sizeof(body->handle));
640
641         if (lsm)
642                 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
643
644         req->rq_replen = lustre_msg_size(1, size);
645
646         rc = ptlrpc_queue_wait(req);
647         rc = ptlrpc_check_status(req, rc);
648         if (!rc) {
649                 body = lustre_msg_buf(req->rq_repmsg, 0);
650                 mds_unpack_body(body);
651                 memcpy(fh, &body->handle, sizeof(*fh));
652         }
653
654         /* If open is replayed, we need to fix up the fh. */
655         req->rq_replay_cb = mdc_replay_open;
656         replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
657         replay_data->fh = fh;
658         
659         EXIT;
660  out:
661         *request = req;
662         return rc;
663 }
664
665 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
666               struct lustre_handle *fh, struct ptlrpc_request **request)
667 {
668         struct mds_body *body;
669         int rc, size = sizeof(*body);
670         struct ptlrpc_request *req;
671
672         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
673                               NULL);
674         if (!req)
675                 GOTO(out, rc = -ENOMEM);
676
677         body = lustre_msg_buf(req->rq_reqmsg, 0);
678         ll_ino2fid(&body->fid1, ino, 0, type);
679         memcpy(&body->handle, fh, sizeof(body->handle));
680
681         req->rq_replen = lustre_msg_size(0, NULL);
682
683         rc = ptlrpc_queue_wait(req);
684         rc = ptlrpc_check_status(req, rc);
685
686         EXIT;
687  out:
688         *request = req;
689         return rc;
690 }
691
692 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
693                  char *addr, struct ptlrpc_request **request)
694 {
695         struct ptlrpc_connection *connection = 
696                 client_conn2cli(conn)->cl_import.imp_connection;
697         struct ptlrpc_request *req = NULL;
698         struct ptlrpc_bulk_desc *desc = NULL;
699         struct ptlrpc_bulk_page *bulk = NULL;
700         struct mds_body *body;
701         int rc, size = sizeof(*body);
702         ENTRY;
703
704         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
705
706         desc = ptlrpc_prep_bulk(connection);
707         if (desc == NULL)
708                 GOTO(out, rc = -ENOMEM);
709
710         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
711                               NULL);
712         if (!req)
713                 GOTO(out2, rc = -ENOMEM);
714
715         bulk = ptlrpc_prep_bulk_page(desc);
716         bulk->bp_buflen = PAGE_SIZE;
717         bulk->bp_buf = addr;
718         bulk->bp_xid = req->rq_xid;
719         desc->bd_portal = MDS_BULK_PORTAL;
720
721         rc = ptlrpc_register_bulk(desc);
722         if (rc) {
723                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
724                 GOTO(out2, rc);
725         }
726
727         body = lustre_msg_buf(req->rq_reqmsg, 0);
728         body->fid1.id = ino;
729         body->fid1.f_type = type;
730         body->size = offset;
731
732         req->rq_replen = lustre_msg_size(1, &size);
733         rc = ptlrpc_queue_wait(req);
734         rc = ptlrpc_check_status(req, rc);
735         if (rc) {
736                 ptlrpc_abort_bulk(desc);
737                 GOTO(out2, rc);
738         } else {
739                 body = lustre_msg_buf(req->rq_repmsg, 0);
740                 mds_unpack_body(body);
741         }
742
743         EXIT;
744  out2:
745         ptlrpc_free_bulk(desc);
746  out:
747         *request = req;
748         return rc;
749 }
750
751 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
752 {
753         struct ptlrpc_request *req;
754         int rc, size = sizeof(*osfs);
755         ENTRY;
756
757         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
758                               NULL);
759         if (!req)
760                 RETURN(-ENOMEM);
761
762         req->rq_replen = lustre_msg_size(1, &size);
763
764         rc = ptlrpc_queue_wait(req);
765         rc = ptlrpc_check_status(req, rc);
766
767         if (rc)
768                 GOTO(out, rc);
769
770         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
771
772         EXIT;
773 out:
774         ptlrpc_req_finished(req);
775
776         return rc;
777 }
778
779 struct obd_ops mdc_obd_ops = {
780         o_setup:   client_obd_setup,
781         o_cleanup: client_obd_cleanup,
782         o_connect: client_obd_connect,
783         o_disconnect: client_obd_disconnect,
784         o_statfs: mdc_statfs,
785 };
786
787 static int __init ptlrpc_request_init(void)
788 {
789         return class_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME);
790 }
791
792 static void __exit ptlrpc_request_exit(void)
793 {
794         class_unregister_type(LUSTRE_MDC_NAME);
795 }
796
797 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
798 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
799 MODULE_LICENSE("GPL");
800
801 EXPORT_SYMBOL(mdc_getstatus);
802 EXPORT_SYMBOL(mdc_getlovinfo);
803 EXPORT_SYMBOL(mdc_enqueue);
804 EXPORT_SYMBOL(mdc_cancel_unused);
805 EXPORT_SYMBOL(mdc_getattr);
806 EXPORT_SYMBOL(mdc_create);
807 EXPORT_SYMBOL(mdc_unlink);
808 EXPORT_SYMBOL(mdc_rename);
809 EXPORT_SYMBOL(mdc_link);
810 EXPORT_SYMBOL(mdc_readpage);
811 EXPORT_SYMBOL(mdc_setattr);
812 EXPORT_SYMBOL(mdc_close);
813 EXPORT_SYMBOL(mdc_open);
814
815 EXPORT_SYMBOL(mdc_store_create_replay_data);
816
817 module_init(ptlrpc_request_init);
818 module_exit(ptlrpc_request_exit);