Whamcloud - gitweb
4362ed158112a00ceb81da39a6969341595861bd
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1
2
3 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
4  * vim:expandtab:shiftwidth=8:tabstop=8:
5  *
6  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  *
23  */
24
25 #define EXPORT_SYMTAB
26 #define DEBUG_SUBSYSTEM S_MDC
27
28 #include <linux/module.h>
29 #include <linux/miscdevice.h>
30 #include <linux/lustre_mds.h>
31 #include <linux/lustre_lite.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/init.h>
34 #include <linux/obd_lov.h>
35 #include <linux/lprocfs_status.h>
36
37 #define REQUEST_MINOR 244
38
39 extern int mds_queue_req(struct ptlrpc_request *);
40 extern lprocfs_vars_t status_var_nm_1[];
41 extern lprocfs_vars_t status_class_var[];
42
43 /* should become mdc_getinfo() */
44 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
45 {
46         struct ptlrpc_request *req;
47         struct mds_body *body;
48         int rc, size = sizeof(*body);
49         ENTRY;
50
51         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
52                               NULL);
53         if (!req)
54                 GOTO(out, rc = -ENOMEM);
55
56         body = lustre_msg_buf(req->rq_reqmsg, 0);
57         req->rq_level = LUSTRE_CONN_CON;
58         req->rq_replen = lustre_msg_size(1, &size);
59
60         mds_pack_req_body(req);
61         rc = ptlrpc_queue_wait(req);
62         rc = ptlrpc_check_status(req, rc);
63
64         if (!rc) {
65                 body = lustre_msg_buf(req->rq_repmsg, 0);
66                 mds_unpack_body(body);
67                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
68
69                 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
70                        ", last_xid="LPU64"\n",
71                        rootfid->id, req->rq_repmsg->last_committed,
72                        req->rq_repmsg->last_xid);
73         }
74
75         EXIT;
76  out:
77         ptlrpc_req_finished(req);
78         return rc;
79 }
80
81 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
82                    struct ptlrpc_request **request)
83 {
84         struct ptlrpc_request *req;
85         struct mds_status_req *streq;
86         int rc, size[2] = {sizeof(*streq)};
87         ENTRY;
88
89         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
90                               size, NULL);
91         if (!req)
92                 GOTO(out, rc = -ENOMEM);
93
94         *request = req;
95         streq = lustre_msg_buf(req->rq_reqmsg, 0);
96         streq->flags = HTON__u32(MDS_STATUS_LOV);
97         streq->repbuf = HTON__u32(8192);
98
99         /* prepare for reply */
100         req->rq_level = LUSTRE_CONN_CON;
101         size[0] = 512;
102         size[1] = 8192;
103         req->rq_replen = lustre_msg_size(2, size);
104
105         rc = ptlrpc_queue_wait(req);
106         rc = ptlrpc_check_status(req, rc);
107
108  out:
109         RETURN(rc);
110 }
111
112
113 int mdc_getattr(struct lustre_handle *conn,
114                 obd_id ino, int type, unsigned long valid, size_t ea_size,
115                 struct ptlrpc_request **request)
116 {
117         struct ptlrpc_request *req;
118         struct mds_body *body;
119         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
120         ENTRY;
121
122         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
123                               NULL);
124         if (!req)
125                 GOTO(out, rc = -ENOMEM);
126
127         body = lustre_msg_buf(req->rq_reqmsg, 0);
128         ll_ino2fid(&body->fid1, ino, 0, type);
129         body->valid = valid;
130
131         if (S_ISREG(type)) {
132                 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
133                 bufcount = 2;
134                 size[1] = mdc->cl_max_mds_easize;
135         } else if (valid & OBD_MD_LINKNAME) {
136                 bufcount = 2;
137                 size[1] = ea_size;
138                 body->size = ea_size;
139                 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
140                        ea_size);
141         }
142         req->rq_replen = lustre_msg_size(bufcount, size);
143         mds_pack_req_body(req);
144
145         rc = ptlrpc_queue_wait(req);
146         rc = ptlrpc_check_status(req, rc);
147
148         if (!rc) {
149                 body = lustre_msg_buf(req->rq_repmsg, 0);
150                 mds_unpack_body(body);
151                 CDEBUG(D_NET, "mode: %o\n", body->mode);
152         }
153
154         EXIT;
155  out:
156         *request = req;
157         return rc;
158 }
159
160 void d_delete_aliases(struct inode *inode)
161 {
162         struct dentry *dentry = NULL;
163         struct list_head *tmp;
164         struct ll_sb_info *sbi = ll_i2sbi(inode);
165         ENTRY;
166
167         spin_lock(&dcache_lock);
168         list_for_each(tmp, &inode->i_dentry) {
169                 dentry = list_entry(tmp, struct dentry, d_alias);
170
171                 //                if (atomic_read(&dentry->d_count))
172                 //      continue;
173                 //if (!list_empty(&dentry->d_lru))
174                 //        continue;
175
176                 list_del_init(&dentry->d_hash);
177                 list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
178         }
179
180         spin_unlock(&dcache_lock);
181         EXIT;
182 }
183
184 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
185                             void *data, __u32 data_len, int flag)
186 {
187         int rc;
188         struct lustre_handle lockh;
189         ENTRY;
190
191         switch (flag) {
192         case LDLM_CB_BLOCKING:
193                 ldlm_lock2handle(lock, &lockh);
194                 rc = ldlm_cli_cancel(&lockh);
195                 if (rc < 0) {
196                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
197                         RETURN(rc);
198                 }
199                 break;
200         case LDLM_CB_CANCELING: {
201                 /* Invalidate all dentries associated with this inode */
202                 struct inode *inode = data;
203
204                 LASSERT(inode != NULL);
205                 LASSERT(data_len == sizeof(*inode));
206
207                 if (S_ISDIR(inode->i_mode)) {
208                         CDEBUG(D_INODE, "invalidating inode %ld\n",
209                                inode->i_ino);
210
211                         ll_invalidate_inode_pages(inode);
212                 }
213
214                 LASSERT(igrab(inode) == inode);
215                 d_delete_aliases(inode);
216                 iput(inode);
217                 break;
218         }
219         default:
220                 LBUG();
221         }
222
223         RETURN(0);
224 }
225
226 struct create_replay_data {
227         struct super_block *sb;
228         u32                 generation;
229 };
230
231 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
232 static int create_replay_find_inode(struct inode *inode, unsigned long ino,
233                                     void *opaque)
234 #else
235 static int create_replay_find_inode(struct inode *inode, void *opaque)
236 #endif
237 {
238         struct ptlrpc_request *req = opaque;
239         struct create_replay_data *saved;
240         struct mds_body *body;
241         
242         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
243         
244         if (saved->generation != inode->i_generation) {
245                 CDEBUG(D_HA,
246                        "generation mismatch for ino %u: saved %u != inode %u\n",
247                        inode->i_ino, saved->generation, inode->i_generation);
248                 return 0;
249         }
250
251         body = lustre_msg_buf(req->rq_repmsg, 1);
252
253         /* XXX do I need more out of ll_update_inode? */
254         CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
255                inode->i_ino, inode->i_generation, body->generation);
256
257         inode->i_generation = body->generation;
258
259         return 1;
260 }
261
262 static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
263                                    struct ptlrpc_request *req,
264                                    struct inode *inode)
265 {
266         struct ldlm_request *lockreq; 
267         struct mds_rec_link *rec; /* representative, two-fid op structure */
268         int opc;
269
270         if (fixreq->rq_import != req->rq_import) {
271                 DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
272                 return;
273         }
274
275         DEBUG_REQ(D_HA, fixreq, "fixing");
276         
277         /* XXX check replay_state to see if we'll actually replay. */
278
279         /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
280         if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
281                 lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
282
283                 if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN &&
284                     !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) {
285                         DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
286                         return;
287                 }
288
289                 if (fixreq->rq_reqmsg->bufcount < 2) {
290                         DEBUG_REQ(D_HA, fixreq,
291                                   "short intent (probably readdir), skipping");
292                         return;
293                 }
294
295                 /* XXX endianness is probably very very wrong here. Very. */
296                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
297         } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
298                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
299         } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
300                 struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
301                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
302                           body->fid1.generation, inode->i_generation);
303                 body->fid1.generation = inode->i_generation;
304                 return;
305         } else {
306                 DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
307                 return;
308         }
309         
310         if (rec->lk_fid1.id == inode->i_ino) {
311                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
312                           rec->lk_fid1.generation, inode->i_generation);
313                 rec->lk_fid1.generation = inode->i_generation;
314         }
315         
316         /* Some ops have two FIDs. ZZZ We rely on the identical
317          * placement of that second FID in all such ops' messages.
318          */
319         opc = rec->lk_opcode & REINT_OPCODE_MASK;
320         if ((opc == REINT_LINK || opc == REINT_UNLINK ||
321              opc == REINT_RENAME) &&
322             rec->lk_fid2.id == inode->i_ino) {
323                 DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
324                           rec->lk_fid2.generation, inode->i_generation);
325                 rec->lk_fid2.generation = inode->i_generation;
326         }
327 }
328
329 static void mdc_replay_create(struct ptlrpc_request *req)
330 {
331         struct create_replay_data *saved;
332         struct mds_body *body;
333         struct inode *inode;
334         struct list_head *tmp;
335
336         if (req->rq_reqmsg->opc == MDS_REINT)
337                 LBUG(); /* XXX don't handle the non-intent case yet */
338
339         body = lustre_msg_buf(req->rq_repmsg, 1);
340         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
341
342         CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
343                body->fid1.id, saved->generation, body->generation);
344         /* XXX cargo-culted right out of ll_iget */
345 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
346         inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
347 #endif
348 #if 0
349         {
350                 extern int ll_read_inode2(struct inode *inode, void *opaque);
351                 inode = iget5_locked(saved->sb, body->fid1.id,
352                                      create_replay_find_inode, 
353                                      ll_read_inode2, req);
354
355                 if (!inode)
356                         LBUG(); /* XXX ick */
357                 
358                 if (inode->i_state & I_NEW)
359                         unlock_new_inode(inode);
360         }
361 #endif
362
363         /* Now that we've updated the generation, we need to go and find all
364          * the other requests that refer to this file and will be replayed,
365          * and teach them about our new generation.
366          */
367         list_for_each(tmp, &req->rq_connection->c_sending_head) {
368                 struct ptlrpc_request *fixreq =
369                         list_entry(tmp, struct ptlrpc_request, rq_list);
370
371                 fixup_req_for_recreate(fixreq, req, inode);
372         }
373
374         list_for_each(tmp, &req->rq_connection->c_delayed_head) {
375                 struct ptlrpc_request *fixreq =
376                         list_entry(tmp, struct ptlrpc_request, rq_list);
377
378                 fixup_req_for_recreate(fixreq, req, inode);
379         }
380 }
381
382 void mdc_store_create_replay_data(struct ptlrpc_request *req,
383                                   struct super_block *sb)
384 {
385         struct create_replay_data *saved = 
386                 lustre_msg_buf(req->rq_reqmsg, 5);
387         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
388
389
390         if (req->rq_reqmsg->opc == MDS_REINT)
391                 LBUG(); /* XXX don't handle the non-intent case yet */
392
393         saved->generation = body->generation;
394         saved->sb = sb; /* XXX is this safe? */
395
396         req->rq_replay_cb = mdc_replay_create;
397 }
398
399 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
400                 struct lookup_intent *it, int lock_mode, struct inode *dir,
401                 struct dentry *de, struct lustre_handle *lockh,
402                 char *tgt, int tgtlen, void *data, int datalen)
403 {
404         struct ptlrpc_request *req;
405         struct obd_device *obddev = class_conn2obd(conn);
406         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
407         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
408         int rc, flags = LDLM_FL_HAS_INTENT;
409         int repsize[3] = {sizeof(struct ldlm_reply),
410                           sizeof(struct mds_body),
411                           obddev->u.cli.cl_max_mds_easize};
412         struct ldlm_reply *dlm_rep;
413         struct ldlm_intent *lit;
414         struct ldlm_request *lockreq;
415         ENTRY;
416
417         LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
418                           dir->i_ino);
419
420         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
421                 switch (it->it_op) {
422                 case IT_MKDIR:
423                         it->it_mode |= S_IFDIR;
424                         break;
425                 case (IT_CREAT|IT_OPEN):
426                 case IT_CREAT:
427                         it->it_mode |= S_IFREG;
428                         break;
429                 case IT_SYMLINK:
430                         it->it_mode |= S_IFLNK;
431                         break;
432                 }
433                 it->it_mode &= ~current->fs->umask;
434
435                 size[2] = sizeof(struct mds_rec_create);
436                 size[3] = de->d_name.len + 1;
437                 size[4] = tgtlen + 1;
438                 size[5] = sizeof(struct create_replay_data);
439                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
440                                       size, NULL);
441                 if (!req)
442                         RETURN(-ENOMEM);
443
444                 /* pack the intent */
445                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
446                 lit->opc = NTOH__u64((__u64)it->it_op);
447
448                 /* pack the intended request */
449                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
450                                 current->fsgid, CURRENT_TIME, de->d_name.name,
451                                 de->d_name.len, tgt, tgtlen);
452                 req->rq_replen = lustre_msg_size(3, repsize);
453         } else if (it->it_op == IT_RENAME2) {
454                 struct dentry *old_de = it->it_data;
455
456                 size[2] = sizeof(struct mds_rec_rename);
457                 size[3] = old_de->d_name.len + 1;
458                 size[4] = de->d_name.len + 1;
459                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
460                                       size, NULL);
461                 if (!req)
462                         RETURN(-ENOMEM);
463
464                 /* pack the intent */
465                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
466                 lit->opc = NTOH__u64((__u64)it->it_op);
467
468                 /* pack the intended request */
469                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
470                                 old_de->d_name.name, old_de->d_name.len,
471                                 de->d_name.name, de->d_name.len);
472                 req->rq_replen = lustre_msg_size(3, repsize);
473         } else if (it->it_op == IT_LINK2) {
474                 struct dentry *old_de = it->it_data;
475
476                 size[2] = sizeof(struct mds_rec_link);
477                 size[3] = de->d_name.len + 1;
478                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
479                                       size, NULL);
480                 if (!req)
481                         RETURN(-ENOMEM);
482
483                 /* pack the intent */
484                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
485                 lit->opc = NTOH__u64((__u64)it->it_op);
486
487                 /* pack the intended request */
488                 mds_link_pack(req, 2, old_de->d_inode, dir,
489                               de->d_name.name, de->d_name.len);
490                 req->rq_replen = lustre_msg_size(3, repsize);
491         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
492                 size[2] = sizeof(struct mds_rec_unlink);
493                 size[3] = de->d_name.len + 1;
494                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
495                                       size, NULL);
496                 if (!req)
497                         RETURN(-ENOMEM);
498
499                 /* pack the intent */
500                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
501                 lit->opc = NTOH__u64((__u64)it->it_op);
502
503                 /* pack the intended request */
504                 mds_unlink_pack(req, 2, dir, NULL,
505                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
506                                 de->d_name.name, de->d_name.len);
507
508                 req->rq_replen = lustre_msg_size(3, repsize);
509         } else if (it->it_op  & (IT_GETATTR | IT_RENAME | IT_LINK | 
510                    IT_OPEN |  IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
511                 size[2] = sizeof(struct mds_body);
512                 size[3] = de->d_name.len + 1;
513
514                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
515                                       size, NULL);
516                 if (!req)
517                         RETURN(-ENOMEM);
518
519                 /* pack the intent */
520                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
521                 lit->opc = NTOH__u64((__u64)it->it_op);
522
523                 /* pack the intended request */
524                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
525
526                 /* get ready for the reply */
527                 req->rq_replen = lustre_msg_size(3, repsize);
528         } else if (it->it_op == IT_READDIR) {
529                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
530                                       size, NULL);
531                 if (!req)
532                         RETURN(-ENOMEM);
533
534                 /* get ready for the reply */
535                 req->rq_replen = lustre_msg_size(1, repsize);
536         } else {
537                 LBUG();
538                 RETURN(-EINVAL);
539         }
540 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
541         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
542                               lock_type, NULL, 0, lock_mode, &flags,
543                               ldlm_completion_ast, mdc_blocking_ast, data,
544                               datalen, lockh);
545         if (rc == -ENOENT) {
546                 /* This can go when we're sure that this can never happen */
547                 LBUG();
548         }
549         if (rc == ELDLM_LOCK_ABORTED) {
550                 lock_mode = 0;
551                 memset(lockh, 0, sizeof(*lockh));
552                 /* rc = 0 */
553         } else if (rc != 0) {
554                 CERROR("ldlm_cli_enqueue: %d\n", rc);
555                 RETURN(rc);
556         }
557
558         /* On replay, we don't want the lock granted. */
559         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
560         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
561
562         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
563         it->it_disposition = (int) dlm_rep->lock_policy_res1;
564         it->it_status = (int) dlm_rep->lock_policy_res2;
565         it->it_lock_mode = lock_mode;
566         it->it_data = req;
567
568         RETURN(0);
569 }
570
571 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
572                       int flags)
573 {
574         __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
575         struct obd_device *obddev = class_conn2obd(conn);
576         ENTRY;
577         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
578 }
579
580 struct replay_open_data {
581         struct lustre_handle *fh;
582 };
583
584 static void mdc_replay_open(struct ptlrpc_request *req)
585 {
586         int offset;
587         struct replay_open_data *saved;
588         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
589
590         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
591                 offset = 2;
592         else
593                 offset = 1;
594
595         saved = lustre_msg_buf(req->rq_reqmsg, offset);
596         mds_unpack_body(body);
597         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
598                saved->fh->addr, saved->fh->cookie,
599                body->handle.addr, body->handle.cookie);
600         memcpy(saved->fh, &body->handle, sizeof(body->handle));
601 }
602
603 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
604              struct lov_stripe_md *lsm, struct lustre_handle *fh,
605              struct ptlrpc_request **request)
606 {
607         struct mds_body *body;
608         struct replay_open_data *replay_data;
609         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
610         struct ptlrpc_request *req;
611         ENTRY;
612
613         if (lsm) {
614                 bufcount = 3;
615                 size[2] = size[1]; /* shuffle the spare data along */
616
617                 size[1] = lsm->lsm_mds_easize;
618         }
619
620         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
621                               NULL);
622         if (!req)
623                 GOTO(out, rc = -ENOMEM);
624
625         if (lsm)
626                 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
627
628
629         req->rq_flags |= PTL_RPC_FL_REPLAY;
630         body = lustre_msg_buf(req->rq_reqmsg, 0);
631
632         ll_ino2fid(&body->fid1, ino, 0, type);
633         body->flags = HTON__u32(flags);
634         memcpy(&body->handle, fh, sizeof(body->handle));
635
636         if (lsm)
637                 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
638
639         req->rq_replen = lustre_msg_size(1, size);
640
641         rc = ptlrpc_queue_wait(req);
642         rc = ptlrpc_check_status(req, rc);
643         if (!rc) {
644                 body = lustre_msg_buf(req->rq_repmsg, 0);
645                 mds_unpack_body(body);
646                 memcpy(fh, &body->handle, sizeof(*fh));
647         }
648
649         /* If open is replayed, we need to fix up the fh. */
650         req->rq_replay_cb = mdc_replay_open;
651         replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
652         replay_data->fh = fh;
653         
654         EXIT;
655  out:
656         *request = req;
657         return rc;
658 }
659
660 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
661               struct lustre_handle *fh, struct ptlrpc_request **request)
662 {
663         struct mds_body *body;
664         int rc, size = sizeof(*body);
665         struct ptlrpc_request *req;
666
667         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
668                               NULL);
669         if (!req)
670                 GOTO(out, rc = -ENOMEM);
671
672         body = lustre_msg_buf(req->rq_reqmsg, 0);
673         ll_ino2fid(&body->fid1, ino, 0, type);
674         memcpy(&body->handle, fh, sizeof(body->handle));
675
676         req->rq_replen = lustre_msg_size(0, NULL);
677
678         rc = ptlrpc_queue_wait(req);
679         rc = ptlrpc_check_status(req, rc);
680
681         EXIT;
682  out:
683         *request = req;
684         return rc;
685 }
686
687 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
688                  char *addr, struct ptlrpc_request **request)
689 {
690         struct ptlrpc_connection *connection = 
691                 client_conn2cli(conn)->cl_import.imp_connection;
692         struct ptlrpc_request *req = NULL;
693         struct ptlrpc_bulk_desc *desc = NULL;
694         struct ptlrpc_bulk_page *bulk = NULL;
695         struct mds_body *body;
696         int rc, size = sizeof(*body);
697         ENTRY;
698
699         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
700
701         desc = ptlrpc_prep_bulk(connection);
702         if (desc == NULL)
703                 GOTO(out, rc = -ENOMEM);
704
705         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
706                               NULL);
707         if (!req)
708                 GOTO(out2, rc = -ENOMEM);
709
710         bulk = ptlrpc_prep_bulk_page(desc);
711         bulk->bp_buflen = PAGE_SIZE;
712         bulk->bp_buf = addr;
713         bulk->bp_xid = req->rq_xid;
714         desc->bd_portal = MDS_BULK_PORTAL;
715
716         rc = ptlrpc_register_bulk(desc);
717         if (rc) {
718                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
719                 GOTO(out2, rc);
720         }
721
722         body = lustre_msg_buf(req->rq_reqmsg, 0);
723         body->fid1.id = ino;
724         body->fid1.f_type = type;
725         body->size = offset;
726
727         req->rq_replen = lustre_msg_size(1, &size);
728         rc = ptlrpc_queue_wait(req);
729         rc = ptlrpc_check_status(req, rc);
730         if (rc) {
731                 ptlrpc_abort_bulk(desc);
732                 GOTO(out2, rc);
733         } else {
734                 body = lustre_msg_buf(req->rq_repmsg, 0);
735                 mds_unpack_body(body);
736         }
737
738         EXIT;
739  out2:
740         ptlrpc_free_bulk(desc);
741  out:
742         *request = req;
743         return rc;
744 }
745
746 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
747 {
748         struct ptlrpc_request *req;
749         int rc, size = sizeof(*osfs);
750         ENTRY;
751
752         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
753                               NULL);
754         if (!req)
755                 RETURN(-ENOMEM);
756
757         req->rq_replen = lustre_msg_size(1, &size);
758
759         rc = ptlrpc_queue_wait(req);
760         rc = ptlrpc_check_status(req, rc);
761
762         if (rc)
763                 GOTO(out, rc);
764
765         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
766
767         EXIT;
768 out:
769         ptlrpc_req_finished(req);
770
771         return rc;
772 }
773 int mdc_attach(struct obd_device *dev, 
774                    obd_count len, void *data)
775 {
776         int rc;
777         rc = lprocfs_reg_obd(dev, (lprocfs_vars_t*)status_var_nm_1, (void*)dev);
778         return rc; 
779 }
780
781 int mdc_detach(struct obd_device *dev)
782 {
783         int rc;
784         rc = lprocfs_dereg_obd(dev);
785         return rc;
786
787 }
788 struct obd_ops mdc_obd_ops = {
789         o_attach: mdc_attach,
790         o_detach: mdc_detach,
791         o_setup:   client_obd_setup,
792         o_cleanup: client_obd_cleanup,
793         o_connect: client_obd_connect,
794         o_disconnect: client_obd_disconnect,
795         o_statfs: mdc_statfs,
796 };
797
798 static int __init ptlrpc_request_init(void)
799 {
800         int rc;
801         rc = class_register_type(&mdc_obd_ops, 
802                                  (lprocfs_vars_t*)status_class_var, 
803                                  LUSTRE_MDC_NAME);
804         if(rc)
805                 RETURN(rc);
806         return 0;
807         
808 }
809
810 static void __exit ptlrpc_request_exit(void)
811 {
812         
813         class_unregister_type(LUSTRE_MDC_NAME);
814         
815 }
816
817 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
818 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
819 MODULE_LICENSE("GPL");
820
821 EXPORT_SYMBOL(d_delete_aliases);
822 EXPORT_SYMBOL(mdc_getstatus);
823 EXPORT_SYMBOL(mdc_getlovinfo);
824 EXPORT_SYMBOL(mdc_enqueue);
825 EXPORT_SYMBOL(mdc_cancel_unused);
826 EXPORT_SYMBOL(mdc_getattr);
827 EXPORT_SYMBOL(mdc_create);
828 EXPORT_SYMBOL(mdc_unlink);
829 EXPORT_SYMBOL(mdc_rename);
830 EXPORT_SYMBOL(mdc_link);
831 EXPORT_SYMBOL(mdc_readpage);
832 EXPORT_SYMBOL(mdc_setattr);
833 EXPORT_SYMBOL(mdc_close);
834 EXPORT_SYMBOL(mdc_open);
835
836 EXPORT_SYMBOL(mdc_store_create_replay_data);
837
838 module_init(ptlrpc_request_init);
839 module_exit(ptlrpc_request_exit);