Whamcloud - gitweb
1216fe77c37f8d2232e570440f30807e245c02b3
[fs/lustre-release.git] / lustre / ptlrpc / nodemap_member.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2013, Trustees of Indiana University
24  * Author: Joshua Walgenbach <jjw@iu.edu>
25  */
26 #include <linux/module.h>
27 #include <lustre_net.h>
28 #include <obd_class.h>
29 #include "nodemap_internal.h"
30
31 #define HASH_NODEMAP_MEMBER_BKT_BITS 3
32 #define HASH_NODEMAP_MEMBER_CUR_BITS 3
33 #define HASH_NODEMAP_MEMBER_MAX_BITS 7
34
35 /**
36  * member hash functions
37  *
38  * The purpose of this hash is to maintain the list of
39  * exports that are connected and associated with a
40  * particular nodemap
41  */
42 static void nm_member_getref(struct obd_export *exp)
43 {
44 }
45
46 void nm_member_putref(struct obd_export *exp)
47 {
48 }
49
50 static __u32 nm_member_hashfn(struct cfs_hash *hash_body,
51                            const void *key, unsigned mask)
52 {
53         return hash_long((unsigned long)key, hash_body->hs_bkt_bits) & mask;
54 }
55
56 static void *nm_member_hs_key(struct hlist_node *hnode)
57 {
58         struct obd_export       *exp;
59
60         exp = hlist_entry(hnode, struct obd_export,
61                           exp_target_data.ted_nodemap_member);
62
63         return exp;
64 }
65
66 static int nm_member_hs_keycmp(const void *key, struct hlist_node *hnode)
67 {
68         struct obd_export       *exp;
69
70         exp = hlist_entry(hnode, struct obd_export,
71                           exp_target_data.ted_nodemap_member);
72
73         return key == exp;
74 }
75
76 static void *nm_member_hs_hashobject(struct hlist_node *hnode)
77 {
78         return hlist_entry(hnode, struct obd_export,
79                            exp_target_data.ted_nodemap_member);
80 }
81
82 static void nm_member_hs_get(struct cfs_hash *hs, struct hlist_node *hnode)
83 {
84         struct obd_export       *exp;
85
86         exp = hlist_entry(hnode, struct obd_export,
87                           exp_target_data.ted_nodemap_member);
88         nm_member_getref(exp);
89 }
90
91 static void nm_member_hs_put_locked(struct cfs_hash *hs,
92                                  struct hlist_node *hnode)
93 {
94         struct obd_export       *exp;
95
96         exp = hlist_entry(hnode, struct obd_export,
97                           exp_target_data.ted_nodemap_member);
98         nm_member_putref(exp);
99 }
100
101 /**
102  * Delete a member from a member hash
103  *
104  * \param       nodemap         nodemap containing hash
105  * \paraa       nid             nid of member to delete
106  */
107 void nm_member_del(struct lu_nodemap *nodemap, struct obd_export *exp)
108 {
109         struct obd_export *exp1;
110
111         exp1 = cfs_hash_del_key(nodemap->nm_member_hash, exp);
112         if (exp1 != NULL)
113                 class_export_put(exp1);
114
115         LASSERT(hlist_unhashed(&exp->exp_target_data.ted_nodemap_member));
116         exp->exp_target_data.ted_nodemap = NULL;
117 }
118
119 static struct cfs_hash_ops nm_member_hash_operations = {
120         .hs_hash        = nm_member_hashfn,
121         .hs_key         = nm_member_hs_key,
122         .hs_keycmp      = nm_member_hs_keycmp,
123         .hs_object      = nm_member_hs_hashobject,
124         .hs_get         = nm_member_hs_get,
125         .hs_put_locked  = nm_member_hs_put_locked,
126 };
127
128 /**
129  * Init a member hash of a nodemap
130  *
131  * \param       nodemap         nodemap containing the member hash
132  */
133 int nm_member_init_hash(struct lu_nodemap *nodemap)
134 {
135         char nodemap_hashname[LUSTRE_NODEMAP_NAME_LENGTH + 3];
136
137
138         snprintf(nodemap_hashname, sizeof(nodemap_hashname),
139                  "nm-%s", nodemap->nm_name);
140         nodemap->nm_member_hash = cfs_hash_create(nodemap_hashname,
141                                           HASH_NODEMAP_MEMBER_CUR_BITS,
142                                           HASH_NODEMAP_MEMBER_MAX_BITS,
143                                           HASH_NODEMAP_MEMBER_BKT_BITS, 0,
144                                           CFS_HASH_MIN_THETA,
145                                           CFS_HASH_MAX_THETA,
146                                           &nm_member_hash_operations,
147                                           CFS_HASH_DEFAULT);
148         if (nodemap->nm_member_hash == NULL)
149                 return -ENOMEM;
150
151         return 0;
152 }
153
154 /**
155  * Callback from deleting a hash member
156  */
157 static int nm_member_delete_hash_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
158                                  struct hlist_node *hnode, void *data)
159 {
160         struct obd_export       *exp;
161
162         exp = hlist_entry(hnode, struct obd_export,
163                           exp_target_data.ted_nodemap_member);
164
165         exp->exp_target_data.ted_nodemap = NULL;
166         cfs_hash_bd_del_locked(hs, bd, hnode);
167         class_export_put(exp);
168
169         return 0;
170 }
171
172 /**
173  * Delete a member hash from a nodemap
174  *
175  * \param       nodemap         nodemap to remove the hash from
176  */
177 void nm_member_delete_hash(struct lu_nodemap *nodemap)
178 {
179         cfs_hash_for_each_safe(nodemap->nm_member_hash,
180                                nm_member_delete_hash_cb,
181                                nodemap);
182         cfs_hash_putref(nodemap->nm_member_hash);
183 }
184
185 /**
186  * Add a member export to a nodemap
187  *
188  * \param       nodemap         nodemap to search
189  * \param       exp             obd_export to search
190  * \retval      -EEXIST         export is already hashed to a different nodemap
191  * \retval      -EINVAL         export is NULL
192  */
193 int nm_member_add(struct lu_nodemap *nodemap, struct obd_export *exp)
194 {
195         int     rc = 0;
196
197         if (exp == NULL) {
198                 CWARN("attempted to add null export to nodemap %s\n",
199                       nodemap->nm_name);
200                 return -EINVAL;
201         }
202
203         if (hlist_unhashed(&exp->exp_target_data.ted_nodemap_member) == 0) {
204                 /* export is already member of nodemap */
205                 if (exp->exp_target_data.ted_nodemap == nodemap)
206                         return 0;
207
208                 /* possibly reconnecting while about to be reclassified */
209                 CWARN("export %p %s already hashed, failed to add to "
210                       "nodemap %s already member of %s\n", exp,
211                       exp->exp_client_uuid.uuid,
212                       nodemap->nm_name,
213                       (exp->exp_target_data.ted_nodemap == NULL) ? "unknown" :
214                                 exp->exp_target_data.ted_nodemap->nm_name);
215                 return -EEXIST;
216         }
217
218         exp->exp_target_data.ted_nodemap = nodemap;
219
220         rc = cfs_hash_add_unique(nodemap->nm_member_hash, exp,
221                                  &exp->exp_target_data.ted_nodemap_member);
222
223         if (rc == 0)
224                 class_export_get(exp);
225         /* else -EALREADY - exp already in nodemap hash */
226
227         return rc;
228 }
229
230 /**
231  * Revokes the locks on an export if it is attached to an MDT and not in
232  * recovery. As a performance enhancement, the lock revoking process could
233  * revoke only the locks that cover files affected by the nodemap change.
234  */
235 static void nm_member_exp_revoke(struct obd_export *exp)
236 {
237         struct obd_type *type = exp->exp_obd->obd_type;
238         if (strcmp(type->typ_name, LUSTRE_MDT_NAME) != 0)
239                 return;
240         if (exp->exp_obd->obd_recovering)
241                 return;
242
243         ldlm_revoke_export_locks(exp);
244 }
245
246 static int nm_member_reclassify_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
247                                    struct hlist_node *hnode, void *data)
248 {
249         struct obd_export       *exp;
250         struct lu_nodemap       *nodemap;
251
252         exp = hlist_entry(hnode, struct obd_export,
253                           exp_target_data.ted_nodemap_member);
254         if (exp == NULL)
255                 goto out;
256
257         /* Must use bd_del_locked inside a cfs_hash callback, and exp->nodemap
258          * should never be NULL. For those reasons, can't use member_del.
259          */
260         read_lock(&nm_range_tree_lock);
261         nodemap = nodemap_classify_nid(exp->exp_connection->c_peer.nid);
262         if (exp->exp_target_data.ted_nodemap != nodemap) {
263                 cfs_hash_bd_del_locked(hs, bd, hnode);
264                 exp->exp_target_data.ted_nodemap = nodemap;
265                 cfs_hash_add_unique(nodemap->nm_member_hash, exp,
266                                 &exp->exp_target_data.ted_nodemap_member);
267         }
268         read_unlock(&nm_range_tree_lock);
269
270         nm_member_exp_revoke(exp);
271 out:
272         return 0;
273 }
274
275 /* Mutex used to serialize calls to reclassify_nodemap_lock */
276 DEFINE_MUTEX(reclassify_nodemap_lock);
277
278 /**
279  * Reclassify the members of a nodemap after range changes or activation.
280  * This function reclassifies the members of a nodemap based on the member
281  * export's NID and the nodemap's new NID ranges. Exports that are no longer
282  * classified as being part of this nodemap are moved to the nodemap whose
283  * NID ranges contain the export's NID, and their locks are revoked.
284  *
285  * Calls to this function are serialized due to a potential deadlock: Say there
286  * is a nodemap A and a nodemap B that both need to reclassify their members.
287  * If there is a member in nodemap A that should be in nodemap B, reclassify
288  * will attempt to add the member to nodemap B. If nodemap B is also
289  * reclassifying its members, then its hash is locked and nodemap A's attempt
290  * to add will block and wait for nodemap B's reclassify to finish. If
291  * nodemap B's reclassify then attempts to reclassify a member that should be
292  * in nodemap A, it will also try add the member to nodemap A's locked hash,
293  * causing a deadlock.
294  *
295  * \param       nodemap         nodemap with members to reclassify
296  */
297 void nm_member_reclassify_nodemap(struct lu_nodemap *nodemap)
298 {
299         /* reclassify only one nodemap at a time to avoid deadlock */
300         mutex_lock(&reclassify_nodemap_lock);
301         cfs_hash_for_each_safe(nodemap->nm_member_hash,
302                                nm_member_reclassify_cb,
303                                NULL);
304         mutex_unlock(&reclassify_nodemap_lock);
305 }
306
307 static int
308 nm_member_revoke_locks_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd,
309                           struct hlist_node *hnode, void *data)
310 {
311         struct obd_export       *exp;
312         exp = hlist_entry(hnode, struct obd_export,
313                           exp_target_data.ted_nodemap_member);
314         if (exp == NULL)
315                 return 0;
316
317         nm_member_exp_revoke(exp);
318         return 0;
319 }
320
321 /**
322  * Revoke the locks for member exports. Changing the idmap is
323  * akin to deleting the security context. If the locks are not
324  * canceled, the client could cache permissions that are no
325  * longer correct with the map.
326  *
327  * \param       nodemap         nodemap that has been altered
328  */
329 void nm_member_revoke_locks(struct lu_nodemap *nodemap)
330 {
331         cfs_hash_for_each(nodemap->nm_member_hash, nm_member_revoke_locks_cb,
332                           NULL);
333 }