1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2003 Cluster File Systems, Inc.
6 * This file is part of the Lustre file system, http://www.lustre.org
7 * Lustre is a trademark of Cluster File Systems, Inc.
9 * You may have signed or agreed to another license before downloading
10 * this software. If so, you are bound by the terms and conditions
11 * of that agreement, and the following does not apply to you. See the
12 * LICENSE file included with this distribution for more information.
14 * If you did not agree to a different license, then this copy of Lustre
15 * is open source software; you can redistribute it and/or modify it
16 * under the terms of version 2 of the GNU General Public License as
17 * published by the Free Software Foundation.
19 * In either case, Lustre is distributed in the hope that it will be
20 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
21 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * license text for more details.
25 #define DEBUG_SUBSYSTEM S_MDC
28 # include <liblustre.h>
30 #include <lustre/lustre_idl.h>
31 #include <lustre_net.h>
32 #include "mdc_internal.h"
35 /* some liblustre hackings here */
41 void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off,
42 __u32 size, struct ll_fid *fid)
46 b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
47 b->fsuid = current->fsuid;
48 b->fsgid = current->fsgid;
49 b->capability = current->cap_effective;
51 b->size = pg_off; /* !! */
53 b->nlink = size; /* !! */
56 static void mdc_pack_body(struct mds_body *b)
60 b->fsuid = current->fsuid;
61 b->fsgid = current->fsgid;
62 b->capability = current->cap_effective;
65 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
66 __u64 valid, struct ll_fid *fid, int ea_size, int flags)
68 struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
73 b->eadatasize = ea_size;
78 /* packing of MDS records */
79 void mdc_create_pack(struct ptlrpc_request *req, int offset,
80 struct mdc_op_data *op_data, const void *data, int datalen,
81 __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
84 struct mds_rec_create *rec;
86 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
88 rec->cr_opcode = REINT_CREATE;
91 rec->cr_cap = cap_effective;
92 rec->cr_fid = op_data->fid1;
93 memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
96 rec->cr_time = op_data->mod_time;
97 rec->cr_suppgid = op_data->suppgids[0];
99 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
100 LOGL0(op_data->name, op_data->namelen, tmp);
103 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
104 memcpy (tmp, data, datalen);
108 static __u32 mds_pack_open_flags(__u32 flags)
110 __u32 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
111 MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
112 MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE |
115 cr_flags |= MDS_OPEN_CREAT;
117 cr_flags |= MDS_OPEN_EXCL;
119 cr_flags |= MDS_OPEN_TRUNC;
120 if (flags & O_APPEND)
121 cr_flags |= MDS_OPEN_APPEND;
123 cr_flags |= MDS_OPEN_SYNC;
124 if (flags & O_DIRECTORY)
125 cr_flags |= MDS_OPEN_DIRECTORY;
126 if (flags & O_JOIN_FILE)
127 cr_flags |= MDS_OPEN_JOIN_FILE;
129 if (flags & FMODE_EXEC)
130 cr_flags |= MDS_FMODE_EXEC;
135 /* packing of MDS records */
136 void mdc_join_pack(struct ptlrpc_request *req, int offset,
137 struct mdc_op_data *op_data, __u64 head_size)
139 struct mds_rec_join *rec;
141 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec));
142 LASSERT(rec != NULL);
143 rec->jr_fid = op_data->fid2;
144 rec->jr_headsize = head_size;
147 void mdc_open_pack(struct ptlrpc_request *req, int offset,
148 struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
149 __u32 flags, const void *lmm, int lmmlen)
151 struct mds_rec_create *rec;
153 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
155 /* XXX do something about time, uid, gid */
156 rec->cr_opcode = REINT_OPEN;
157 rec->cr_fsuid = current->fsuid;
158 rec->cr_fsgid = current->fsgid;
159 rec->cr_cap = current->cap_effective;
160 rec->cr_fid = op_data->fid1;
161 memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
163 rec->cr_flags = mds_pack_open_flags(flags);
165 rec->cr_time = op_data->mod_time;
166 rec->cr_suppgid = op_data->suppgids[0];
169 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
170 op_data->namelen + 1);
171 LOGL0(op_data->name, op_data->namelen, tmp);
175 rec->cr_flags |= MDS_OPEN_HAS_EA;
177 /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */
178 rec->cr_replayfid = op_data->fid2;
180 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen);
181 memcpy (tmp, lmm, lmmlen);
185 static inline __u64 attr_pack(unsigned int ia_valid) {
188 if (ia_valid & ATTR_MODE)
189 sa_valid |= MDS_ATTR_MODE;
190 if (ia_valid & ATTR_UID)
191 sa_valid |= MDS_ATTR_UID;
192 if (ia_valid & ATTR_GID)
193 sa_valid |= MDS_ATTR_GID;
194 if (ia_valid & ATTR_SIZE)
195 sa_valid |= MDS_ATTR_SIZE;
196 if (ia_valid & ATTR_ATIME)
197 sa_valid |= MDS_ATTR_ATIME;
198 if (ia_valid & ATTR_MTIME)
199 sa_valid |= MDS_ATTR_MTIME;
200 if (ia_valid & ATTR_CTIME)
201 sa_valid |= MDS_ATTR_CTIME;
202 if (ia_valid & ATTR_ATIME_SET)
203 sa_valid |= MDS_ATTR_ATIME_SET;
204 if (ia_valid & ATTR_MTIME_SET)
205 sa_valid |= MDS_ATTR_MTIME_SET;
206 if (ia_valid & ATTR_FORCE)
207 sa_valid |= MDS_ATTR_FORCE;
208 if (ia_valid & ATTR_ATTR_FLAG)
209 sa_valid |= MDS_ATTR_ATTR_FLAG;
210 if (ia_valid & ATTR_KILL_SUID)
211 sa_valid |= MDS_ATTR_KILL_SUID;
212 if (ia_valid & ATTR_KILL_SGID)
213 sa_valid |= MDS_ATTR_KILL_SGID;
214 if (ia_valid & ATTR_CTIME_SET)
215 sa_valid |= MDS_ATTR_CTIME_SET;
216 if (ia_valid & ATTR_FROM_OPEN)
217 sa_valid |= MDS_ATTR_FROM_OPEN;
218 if (ia_valid & MDS_OPEN_OWNEROVERRIDE)
219 /* NFSD hack (see bug 5781) */
220 sa_valid |= MDS_OPEN_OWNEROVERRIDE;
224 void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
225 struct mdc_op_data *data, struct iattr *iattr, void *ea,
226 int ealen, void *ea2, int ea2len)
228 struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset,
230 rec->sa_opcode = REINT_SETATTR;
231 rec->sa_fsuid = current->fsuid;
232 rec->sa_fsgid = current->fsgid;
233 rec->sa_cap = current->cap_effective;
234 rec->sa_fid = data->fid1;
235 rec->sa_suppgid = -1;
238 rec->sa_valid = attr_pack(iattr->ia_valid);
239 rec->sa_mode = iattr->ia_mode;
240 rec->sa_uid = iattr->ia_uid;
241 rec->sa_gid = iattr->ia_gid;
242 rec->sa_size = iattr->ia_size;
243 rec->sa_atime = LTIME_S(iattr->ia_atime);
244 rec->sa_mtime = LTIME_S(iattr->ia_mtime);
245 rec->sa_ctime = LTIME_S(iattr->ia_ctime);
247 ((struct ll_iattr_struct *)iattr)->ia_attr_flags;
248 if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
249 rec->sa_suppgid = iattr->ia_gid;
251 rec->sa_suppgid = data->suppgids[0];
257 memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen);
262 memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len);
265 void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
266 struct mdc_op_data *data)
268 struct mds_rec_unlink *rec;
271 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
272 LASSERT (rec != NULL);
274 rec->ul_opcode = REINT_UNLINK;
275 rec->ul_fsuid = current->fsuid;
276 rec->ul_fsgid = current->fsgid;
277 rec->ul_cap = current->cap_effective;
278 rec->ul_mode = data->create_mode;
279 rec->ul_suppgid = data->suppgids[0];
280 rec->ul_fid1 = data->fid1;
281 rec->ul_fid2 = data->fid2;
282 rec->ul_time = data->mod_time;
284 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
285 LASSERT (tmp != NULL);
286 LOGL0(data->name, data->namelen, tmp);
289 void mdc_link_pack(struct ptlrpc_request *req, int offset,
290 struct mdc_op_data *data)
292 struct mds_rec_link *rec;
295 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
297 rec->lk_opcode = REINT_LINK;
298 rec->lk_fsuid = current->fsuid;
299 rec->lk_fsgid = current->fsgid;
300 rec->lk_cap = current->cap_effective;
301 rec->lk_suppgid1 = data->suppgids[0];
302 rec->lk_suppgid2 = data->suppgids[1];
303 rec->lk_fid1 = data->fid1;
304 rec->lk_fid2 = data->fid2;
305 rec->lk_time = data->mod_time;
307 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
308 LOGL0(data->name, data->namelen, tmp);
311 void mdc_rename_pack(struct ptlrpc_request *req, int offset,
312 struct mdc_op_data *data,
313 const char *old, int oldlen, const char *new, int newlen)
315 struct mds_rec_rename *rec;
318 rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
320 /* XXX do something about time, uid, gid */
321 rec->rn_opcode = REINT_RENAME;
322 rec->rn_fsuid = current->fsuid;
323 rec->rn_fsgid = current->fsgid;
324 rec->rn_cap = current->cap_effective;
325 rec->rn_suppgid1 = data->suppgids[0];
326 rec->rn_suppgid2 = data->suppgids[1];
327 rec->rn_fid1 = data->fid1;
328 rec->rn_fid2 = data->fid2;
329 rec->rn_time = data->mod_time;
331 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1);
332 LOGL0(old, oldlen, tmp);
335 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1);
336 LOGL0(new, newlen, tmp);
340 void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid,
341 int flags, struct mdc_op_data *data)
344 b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
346 b->fsuid = current->fsuid;
347 b->fsgid = current->fsgid;
348 b->capability = current->cap_effective;
350 b->flags = flags | MDS_BFLAG_EXT_FLAGS;
351 /* skip MDS_BFLAG_EXT_FLAGS to verify the "client < 1.4.7" case
352 * refer to bug 12848.
354 if (OBD_FAIL_CHECK(OBD_FAIL_MDC_OLD_EXT_FLAGS))
355 b->flags &= ~MDS_BFLAG_EXT_FLAGS;
356 b->suppgid = data->suppgids[0];
358 b->fid1 = data->fid1;
359 b->fid2 = data->fid2;
362 tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
364 memcpy(tmp, data->name, data->namelen);
369 void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa,
370 __u64 valid, struct obd_client_handle *och)
372 struct mds_body *body;
374 body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
376 mdc_pack_fid(&body->fid1, oa->o_id, 0, oa->o_mode);
377 memcpy(&body->handle, &och->och_fh, sizeof(body->handle));
378 if (oa->o_valid & OBD_MD_FLATIME) {
379 body->atime = oa->o_atime;
380 body->valid |= OBD_MD_FLATIME;
382 if (oa->o_valid & OBD_MD_FLMTIME) {
383 body->mtime = oa->o_mtime;
384 body->valid |= OBD_MD_FLMTIME;
386 if (oa->o_valid & OBD_MD_FLCTIME) {
387 body->ctime = oa->o_ctime;
388 body->valid |= OBD_MD_FLCTIME;
390 if (oa->o_valid & OBD_MD_FLSIZE) {
391 body->size = oa->o_size;
392 body->valid |= OBD_MD_FLSIZE;
394 if (oa->o_valid & OBD_MD_FLBLOCKS) {
395 body->blocks = oa->o_blocks;
396 body->valid |= OBD_MD_FLBLOCKS;
398 if (oa->o_valid & OBD_MD_FLFLAGS) {
399 body->flags = oa->o_flags;
400 body->valid |= OBD_MD_FLFLAGS;
404 struct mdc_cache_waiter {
405 struct list_head mcw_entry;
406 wait_queue_head_t mcw_waitq;
409 static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
413 spin_lock(&cli->cl_loi_list_lock);
414 rc = list_empty(&mcw->mcw_entry);
415 spin_unlock(&cli->cl_loi_list_lock);
419 /* We record requests in flight in cli->cl_r_in_flight here.
420 * There is only one write rpc possible in mdc anyway. If this to change
421 * in the future - the code may need to be revisited. */
422 void mdc_enter_request(struct client_obd *cli)
424 struct mdc_cache_waiter mcw;
425 struct l_wait_info lwi = { 0 };
427 spin_lock(&cli->cl_loi_list_lock);
428 if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
429 list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
430 init_waitqueue_head(&mcw.mcw_waitq);
431 spin_unlock(&cli->cl_loi_list_lock);
432 l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi);
434 cli->cl_r_in_flight++;
435 spin_unlock(&cli->cl_loi_list_lock);
439 void mdc_exit_request(struct client_obd *cli)
441 struct list_head *l, *tmp;
442 struct mdc_cache_waiter *mcw;
444 spin_lock(&cli->cl_loi_list_lock);
445 cli->cl_r_in_flight--;
447 list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
448 if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
449 /* No free request slots anymore */
453 mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
454 list_del_init(&mcw->mcw_entry);
455 cli->cl_r_in_flight++;
456 wake_up(&mcw->mcw_waitq);
458 /* Empty waiting list? Decrease reqs in-flight number */
460 spin_unlock(&cli->cl_loi_list_lock);