Whamcloud - gitweb
LU-8010 mdt: fix orphan layout_lock cases for restore
[fs/lustre-release.git] / lustre / mdt / mdt_hsm_cdt_agent.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
24  *     alternatives
25  *
26  */
27 /*
28  * lustre/mdt/mdt_hsm_cdt_agent.c
29  *
30  * Lustre HSM Coordinator
31  *
32  * Author: Jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
33  * Author: Aurelien Degremont <aurelien.degremont@cea.fr>
34  */
35
36 #define DEBUG_SUBSYSTEM S_MDS
37
38 #include <obd.h>
39 #include <obd_support.h>
40 #include <lustre_export.h>
41 #include <lustre/lustre_user.h>
42 #include <lprocfs_status.h>
43 #include <lustre_kernelcomm.h>
44 #include "mdt_internal.h"
45
46 /*
47  * Agent external API
48  */
49
50 /*
51  * find a hsm_agent by uuid
52  * lock cdt_agent_lock needs to be held by caller
53  * \param cdt [IN] coordinator
54  * \param uuid [IN] agent UUID
55  * \retval hsm_agent pointer or NULL if not found
56  */
57 static struct hsm_agent *mdt_hsm_agent_lookup(struct coordinator *cdt,
58                                               const struct obd_uuid *uuid)
59 {
60         struct hsm_agent        *ha;
61
62         list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
63                 if (obd_uuid_equals(&ha->ha_uuid, uuid))
64                         return ha;
65         }
66         return NULL;
67 }
68
69 /**
70  * register a copy tool
71  * \param mti [IN] MDT context
72  * \param uuid [IN] client UUID to be registered
73  * \param count [IN] number of archives agent serves
74  * \param archive_id [IN] vector of archive number served by the copytool
75  * \retval 0 success
76  * \retval -ve failure
77  */
78 int mdt_hsm_agent_register(struct mdt_thread_info *mti,
79                            const struct obd_uuid *uuid,
80                            int nr_archives, __u32 *archive_id)
81 {
82         struct coordinator      *cdt = &mti->mti_mdt->mdt_coordinator;
83         struct hsm_agent        *ha, *tmp;
84         int                      rc;
85         ENTRY;
86
87         /* no coordinator started, so we cannot serve requests */
88         if (cdt->cdt_state == CDT_STOPPED) {
89                 LCONSOLE_WARN("HSM coordinator thread is not running - "
90                               "denying agent registration.\n");
91                 RETURN(-ENXIO);
92         }
93
94         OBD_ALLOC_PTR(ha);
95         if (ha == NULL)
96                 GOTO(out, rc = -ENOMEM);
97
98         ha->ha_uuid = *uuid;
99         ha->ha_archive_cnt = nr_archives;
100         if (ha->ha_archive_cnt != 0) {
101                 int sz;
102
103                 sz = ha->ha_archive_cnt * sizeof(*ha->ha_archive_id);
104                 OBD_ALLOC(ha->ha_archive_id, sz);
105                 if (ha->ha_archive_id == NULL)
106                         GOTO(out_free, rc = -ENOMEM);
107                 memcpy(ha->ha_archive_id, archive_id, sz);
108         }
109         atomic_set(&ha->ha_requests, 0);
110         atomic_set(&ha->ha_success, 0);
111         atomic_set(&ha->ha_failure, 0);
112
113         down_write(&cdt->cdt_agent_lock);
114         tmp = mdt_hsm_agent_lookup(cdt, uuid);
115         if (tmp != NULL) {
116                 LCONSOLE_WARN("HSM agent %s already registered\n",
117                               obd_uuid2str(uuid));
118                 up_write(&cdt->cdt_agent_lock);
119                 GOTO(out_free, rc = -EEXIST);
120         }
121
122         list_add_tail(&ha->ha_list, &cdt->cdt_agents);
123
124         if (ha->ha_archive_cnt == 0)
125                 CDEBUG(D_HSM, "agent %s registered for all archives\n",
126                        obd_uuid2str(&ha->ha_uuid));
127         else
128                 CDEBUG(D_HSM, "agent %s registered for %d archives\n",
129                        obd_uuid2str(&ha->ha_uuid), ha->ha_archive_cnt);
130
131         up_write(&cdt->cdt_agent_lock);
132         GOTO(out, rc = 0);
133
134 out_free:
135
136         if (ha != NULL && ha->ha_archive_id != NULL)
137                 OBD_FREE(ha->ha_archive_id,
138                          ha->ha_archive_cnt * sizeof(*ha->ha_archive_id));
139         if (ha != NULL)
140                 OBD_FREE_PTR(ha);
141 out:
142         return rc;
143 }
144
145 /**
146  * register a copy tool
147  * \param mti [IN] MDT context
148  * \param uuid [IN] uuid to be registered
149  * \param archive_mask [IN] bitmask of archive number served by the copytool
150  * \retval 0 success
151  * \retval -ve failure
152  */
153 int mdt_hsm_agent_register_mask(struct mdt_thread_info *mti,
154                                 const struct obd_uuid *uuid, __u32 archive_mask)
155 {
156         int              rc, i, nr_archives = 0;
157         __u32           *archive_id = NULL;
158         ENTRY;
159
160         nr_archives = hweight32(archive_mask);
161
162         if (nr_archives != 0) {
163                 OBD_ALLOC(archive_id, nr_archives * sizeof(*archive_id));
164                 if (!archive_id)
165                         RETURN(-ENOMEM);
166
167                 nr_archives = 0;
168                 for (i = 0; i < sizeof(archive_mask) * 8; i++) {
169                         if ((1 << i) & archive_mask) {
170                                 archive_id[nr_archives] = i + 1;
171                                 nr_archives++;
172                         }
173                 }
174         }
175
176         rc = mdt_hsm_agent_register(mti, uuid, nr_archives, archive_id);
177
178         if (archive_id != NULL)
179                 OBD_FREE(archive_id, nr_archives * sizeof(*archive_id));
180
181         RETURN(rc);
182 }
183
184 /**
185  * unregister a copy tool
186  * \param mti [IN] MDT context
187  * \param uuid [IN] uuid to be unregistered
188  * \retval 0 success
189  * \retval -ve failure
190  */
191 int mdt_hsm_agent_unregister(struct mdt_thread_info *mti,
192                              const struct obd_uuid *uuid)
193 {
194         struct coordinator      *cdt = &mti->mti_mdt->mdt_coordinator;
195         struct hsm_agent        *ha;
196         int                      rc;
197         ENTRY;
198
199         /* no coordinator started, so we cannot serve requests */
200         if (cdt->cdt_state == CDT_STOPPED)
201                 RETURN(-ENXIO);
202
203         down_write(&cdt->cdt_agent_lock);
204
205         ha = mdt_hsm_agent_lookup(cdt, uuid);
206         if (ha != NULL)
207                 list_del_init(&ha->ha_list);
208
209         up_write(&cdt->cdt_agent_lock);
210
211         if (ha == NULL)
212                 GOTO(out, rc = -ENOENT);
213
214         if (ha->ha_archive_cnt != 0)
215                 OBD_FREE(ha->ha_archive_id,
216                          ha->ha_archive_cnt * sizeof(*ha->ha_archive_id));
217         OBD_FREE_PTR(ha);
218
219         GOTO(out, rc = 0);
220 out:
221         CDEBUG(D_HSM, "agent %s unregistration: %d\n", obd_uuid2str(uuid), rc);
222
223         return rc;
224 }
225
226 /**
227  * update agent statistics
228  * \param mdt [IN] MDT device
229  * \param succ_rq [IN] number of success
230  * \param fail_rq [IN] number of failure
231  * \param new_rq [IN] number of new requests
232  * \param uuid [IN] agent uuid
233  * if all counters == 0, clear counters
234  * \retval 0 success
235  * \retval -ve failure
236  */
237 int mdt_hsm_agent_update_statistics(struct coordinator *cdt,
238                                     int succ_rq, int fail_rq, int new_rq,
239                                     const struct obd_uuid *uuid)
240 {
241         struct hsm_agent        *ha;
242         int                      rc;
243         ENTRY;
244
245         down_read(&cdt->cdt_agent_lock);
246         list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
247                 if (obd_uuid_equals(&ha->ha_uuid, uuid)) {
248                         if (succ_rq == 0 && fail_rq == 0 && new_rq == 0) {
249                                 atomic_set(&ha->ha_success, 0);
250                                 atomic_set(&ha->ha_failure, 0);
251                                 atomic_set(&ha->ha_requests, 0);
252                         } else {
253                                 atomic_add(succ_rq, &ha->ha_success);
254                                 atomic_add(fail_rq, &ha->ha_failure);
255                                 atomic_add(new_rq, &ha->ha_requests);
256                                 atomic_sub(succ_rq, &ha->ha_requests);
257                                 atomic_sub(fail_rq, &ha->ha_requests);
258                         }
259                         GOTO(out, rc = 0);
260                 }
261
262         }
263         rc = -ENOENT;
264 out:
265         up_read(&cdt->cdt_agent_lock);
266         RETURN(rc);
267 }
268
269 /**
270  * find the best agent
271  * \param cdt [IN] coordinator
272  * \param archive [IN] archive number
273  * \param uuid [OUT] agent who can serve archive
274  * \retval 0 success
275  * \retval -ve failure
276  */
277 int mdt_hsm_find_best_agent(struct coordinator *cdt, __u32 archive,
278                             struct obd_uuid *uuid)
279 {
280         int                      rc = -EAGAIN, i, load = -1;
281         struct hsm_agent        *ha;
282         ENTRY;
283
284         /* Choose an export to send a copytool req to */
285         down_read(&cdt->cdt_agent_lock);
286         list_for_each_entry(ha, &cdt->cdt_agents, ha_list) {
287                 for (i = 0; (i < ha->ha_archive_cnt) &&
288                               (ha->ha_archive_id[i] != archive); i++) {
289                         /* nothing to do, just skip unmatching records */
290                 }
291
292                 /* archive count == 0 means copy tool serves any backend */
293                 if (ha->ha_archive_cnt != 0 && i == ha->ha_archive_cnt)
294                         continue;
295
296                 if (load == -1 || load > atomic_read(&ha->ha_requests)) {
297                         load = atomic_read(&ha->ha_requests);
298                         *uuid = ha->ha_uuid;
299                         rc = 0;
300                 }
301                 if (atomic_read(&ha->ha_requests) == 0)
302                         break;
303         }
304         up_read(&cdt->cdt_agent_lock);
305
306         RETURN(rc);
307 }
308
309 /**
310  * send a compound request to the agent
311  * \param mti [IN] context
312  * \param hal [IN] request (can be a kuc payload)
313  * \param purge [IN] purge mode (no record)
314  * \retval 0 success
315  * \retval -ve failure
316  * This function supposes:
317  *  - all actions are for the same archive number
318  *  - in case of cancel, all cancel are for the same agent
319  * This implies that request split has to be done
320  *  before when building the hal
321  */
322 int mdt_hsm_agent_send(struct mdt_thread_info *mti,
323                        struct hsm_action_list *hal, bool purge)
324 {
325         struct obd_export       *exp;
326         struct mdt_device       *mdt = mti->mti_mdt;
327         struct coordinator      *cdt = &mti->mti_mdt->mdt_coordinator;
328         struct hsm_action_list  *buf = NULL;
329         struct hsm_action_item  *hai;
330         struct obd_uuid          uuid;
331         int                      len, i, rc = 0;
332         bool                     fail_request;
333         bool                     is_registered = false;
334         ENTRY;
335
336         rc = mdt_hsm_find_best_agent(cdt, hal->hal_archive_id, &uuid);
337         if (rc) {
338                 CERROR("%s: Cannot find agent for archive %d: rc = %d\n",
339                        mdt_obd_name(mdt), hal->hal_archive_id, rc);
340                 RETURN(rc);
341         }
342
343         CDEBUG(D_HSM, "Agent %s selected for archive %d\n", obd_uuid2str(&uuid),
344                hal->hal_archive_id);
345
346         len = hal_size(hal);
347         buf = kuc_alloc(len, KUC_TRANSPORT_HSM, HMT_ACTION_LIST);
348         if (IS_ERR(buf))
349                 RETURN(PTR_ERR(buf));
350         memcpy(buf, hal, len);
351
352         /* Check if request is still valid (cf file hsm flags) */
353         fail_request = false;
354         hai = hai_first(hal);
355         for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
356                 if (hai->hai_action != HSMA_CANCEL) {
357                         struct mdt_object *obj;
358                         struct md_hsm hsm;
359
360                         obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &hsm);
361                         if (!IS_ERR(obj) && obj != NULL) {
362                                 mdt_object_put(mti->mti_env, obj);
363                         } else {
364                                 if (hai->hai_action == HSMA_REMOVE)
365                                         continue;
366
367                                 if (obj == NULL) {
368                                         fail_request = true;
369                                         rc = mdt_agent_record_update(
370                                                              mti->mti_env, mdt,
371                                                              &hai->hai_cookie,
372                                                              1, ARS_FAILED);
373                                         if (rc) {
374                                                 CERROR(
375                                               "%s: mdt_agent_record_update() "
376                                               "failed, cannot update "
377                                               "status to %s for cookie "
378                                               LPX64": rc = %d\n",
379                                               mdt_obd_name(mdt),
380                                               agent_req_status2name(ARS_FAILED),
381                                               hai->hai_cookie, rc);
382                                                 GOTO(out_buf, rc);
383                                         }
384                                         continue;
385                                 }
386                                 GOTO(out_buf, rc = PTR_ERR(obj));
387                         }
388
389                         if (!mdt_hsm_is_action_compat(hai, hal->hal_archive_id,
390                                                       hal->hal_flags, &hsm)) {
391                                 /* incompatible request, we abort the request */
392                                 /* next time coordinator will wake up, it will
393                                  * make the same compound with valid only
394                                  * records */
395                                 fail_request = true;
396                                 rc = mdt_agent_record_update(mti->mti_env, mdt,
397                                                              &hai->hai_cookie,
398                                                              1, ARS_FAILED);
399                                 if (rc) {
400                                         CERROR("%s: mdt_agent_record_update() "
401                                               "failed, cannot update "
402                                               "status to %s for cookie "
403                                               LPX64": rc = %d\n",
404                                               mdt_obd_name(mdt),
405                                               agent_req_status2name(ARS_FAILED),
406                                               hai->hai_cookie, rc);
407                                         GOTO(out_buf, rc);
408                                 }
409
410                                 /* if restore and record status updated, give
411                                  * back granted layout lock */
412                                 if (hai->hai_action == HSMA_RESTORE) {
413                                         struct cdt_restore_handle *crh = NULL;
414                                         struct mdt_object *obj = NULL;
415
416                                         mutex_lock(&cdt->cdt_restore_lock);
417                                         crh = mdt_hsm_restore_hdl_find(cdt,
418                                                                 &hai->hai_fid);
419                                         if (crh != NULL)
420                                                 list_del(&crh->crh_list);
421                                         mutex_unlock(&cdt->cdt_restore_lock);
422                                         obj = mdt_object_find(mti->mti_env,
423                                                               mti->mti_mdt,
424                                                               &hai->hai_fid);
425                                         if (!IS_ERR(obj) && crh != NULL)
426                                                 mdt_object_unlock(mti, obj,
427                                                                   &crh->crh_lh,
428                                                                   1);
429                                         if (crh != NULL)
430                                                 OBD_SLAB_FREE_PTR(crh,
431                                                         mdt_hsm_cdt_kmem);
432                                         if (!IS_ERR(obj))
433                                                 mdt_object_put(mti->mti_env,
434                                                                obj);
435                                 }
436                         }
437                 }
438         }
439
440         /* we found incompatible requests, so the compound cannot be send
441          * as is. Bad records have been invalidated in llog.
442          * Valid one will be reschedule next time coordinator will wake up
443          * So no need the rebuild a full valid compound request now
444          */
445         if (fail_request)
446                 GOTO(out_buf, rc = 0);
447
448         /* Cancel memory registration is useless for purge
449          * non registration avoid a deadlock :
450          * in case of failure we have to take the write lock
451          * to remove entry which conflict with the read loack needed
452          * by purge
453          */
454         if (!purge) {
455                 /* set is_registered even if failure because we may have
456                  * partial work done */
457                 is_registered = true;
458                 rc = mdt_hsm_add_hal(mti, hal, &uuid);
459                 if (rc)
460                         GOTO(out_buf, rc);
461         }
462
463         /* Uses the ldlm reverse import; this rpc will be seen by
464          *  the ldlm_callback_handler. Note this sends a request RPC
465          * from a server (MDT) to a client (MDC), backwards of normal comms.
466          */
467         exp = cfs_hash_lookup(mdt2obd_dev(mdt)->obd_uuid_hash, &uuid);
468         if (exp == NULL || exp->exp_disconnected) {
469                 /* This should clean up agents on evicted exports */
470                 rc = -ENOENT;
471                 CERROR("%s: agent uuid (%s) not found, unregistering:"
472                        " rc = %d\n",
473                        mdt_obd_name(mdt), obd_uuid2str(&uuid), rc);
474                 mdt_hsm_agent_unregister(mti, &uuid);
475                 GOTO(out, rc);
476         }
477
478         /* send request to agent */
479         rc = do_set_info_async(exp->exp_imp_reverse, LDLM_SET_INFO,
480                                LUSTRE_OBD_VERSION,
481                                sizeof(KEY_HSM_COPYTOOL_SEND),
482                                KEY_HSM_COPYTOOL_SEND,
483                                kuc_len(len), kuc_ptr(buf), NULL);
484
485         if (rc)
486                 CERROR("%s: cannot send request to agent '%s': rc = %d\n",
487                        mdt_obd_name(mdt), obd_uuid2str(&uuid), rc);
488
489         class_export_put(exp);
490
491         if (rc == -EPIPE) {
492                 CDEBUG(D_HSM, "Lost connection to agent '%s', unregistering\n",
493                        obd_uuid2str(&uuid));
494                 mdt_hsm_agent_unregister(mti, &uuid);
495         }
496
497 out:
498         if (rc != 0 && is_registered) {
499                 /* in case of error, we have to unregister requests */
500                 hai = hai_first(hal);
501                 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
502                         if (hai->hai_action == HSMA_CANCEL)
503                                 continue;
504                         mdt_cdt_remove_request(cdt, hai->hai_cookie);
505                 }
506         }
507
508 out_buf:
509         kuc_free(buf, len);
510
511         RETURN(rc);
512 }
513
514 /**
515  * update status of a request
516  * \param mti [IN]
517  * \param pgs [IN] progress of the copy tool
518  * \retval 0 success
519  * \retval -ve failure
520  */
521 int mdt_hsm_coordinator_update(struct mdt_thread_info *mti,
522                                struct hsm_progress_kernel *pgs)
523 {
524         int      rc;
525
526         ENTRY;
527         /* ask to coordinator to update request state and
528          * to record on disk the result */
529         rc = mdt_hsm_update_request_state(mti, pgs, 1);
530         RETURN(rc);
531 }
532
533 /**
534  * seq_file method called to start access to /proc file
535  */
536 static void *mdt_hsm_agent_proc_start(struct seq_file *s, loff_t *off)
537 {
538         struct mdt_device       *mdt = s->private;
539         struct coordinator      *cdt = &mdt->mdt_coordinator;
540         struct list_head        *pos;
541         loff_t                   i;
542         ENTRY;
543
544         down_read(&cdt->cdt_agent_lock);
545
546         if (list_empty(&cdt->cdt_agents))
547                 RETURN(NULL);
548
549         if (*off == 0)
550                 RETURN(SEQ_START_TOKEN);
551
552         i = 0;
553         list_for_each(pos, &cdt->cdt_agents) {
554                 i++;
555                 if (i >= *off)
556                         RETURN(pos);
557         }
558
559         RETURN(NULL);
560 }
561
562 /**
563  * seq_file method called to get next item
564  * just returns NULL at eof
565  */
566 static void *mdt_hsm_agent_proc_next(struct seq_file *s, void *v, loff_t *p)
567 {
568         struct mdt_device       *mdt = s->private;
569         struct coordinator      *cdt = &mdt->mdt_coordinator;
570         struct list_head        *pos = v;
571         ENTRY;
572
573         if (pos == SEQ_START_TOKEN)
574                 pos = cdt->cdt_agents.next;
575         else
576                 pos = pos->next;
577
578         (*p)++;
579         if (pos != &cdt->cdt_agents)
580                 RETURN(pos);
581
582         RETURN(NULL);
583 }
584
585 /**
586  */
587 static int mdt_hsm_agent_proc_show(struct seq_file *s, void *v)
588 {
589         struct list_head        *pos = v;
590         struct hsm_agent        *ha;
591         int                      i;
592         ENTRY;
593
594         if (pos == SEQ_START_TOKEN)
595                 RETURN(0);
596
597         ha = list_entry(pos, struct hsm_agent, ha_list);
598         seq_printf(s, "uuid=%s archive_id=", ha->ha_uuid.uuid);
599         if (ha->ha_archive_cnt == 0) {
600                 seq_printf(s, "ANY");
601         } else {
602                 seq_printf(s, "%d", ha->ha_archive_id[0]);
603                 for (i = 1; i < ha->ha_archive_cnt; i++)
604                         seq_printf(s, ",%d", ha->ha_archive_id[i]);
605         }
606
607         seq_printf(s, " requests=[current:%d ok:%d errors:%d]\n",
608                    atomic_read(&ha->ha_requests),
609                    atomic_read(&ha->ha_success),
610                    atomic_read(&ha->ha_failure));
611         RETURN(0);
612 }
613
614 /**
615  * seq_file method called to stop access to /proc file
616  */
617 static void mdt_hsm_agent_proc_stop(struct seq_file *s, void *v)
618 {
619         struct mdt_device       *mdt = s->private;
620         struct coordinator      *cdt = &mdt->mdt_coordinator;
621
622         up_read(&cdt->cdt_agent_lock);
623 }
624
625 /* hsm agent list proc functions */
626 static const struct seq_operations mdt_hsm_agent_proc_ops = {
627         .start  = mdt_hsm_agent_proc_start,
628         .next   = mdt_hsm_agent_proc_next,
629         .show   = mdt_hsm_agent_proc_show,
630         .stop   = mdt_hsm_agent_proc_stop,
631 };
632
633 /**
634  * public function called at open of /proc file to get
635  * list of agents
636  */
637 static int lprocfs_open_hsm_agent(struct inode *inode, struct file *file)
638 {
639         struct seq_file *s;
640         int              rc;
641         ENTRY;
642
643         rc = seq_open(file, &mdt_hsm_agent_proc_ops);
644         if (rc)
645                 RETURN(rc);
646
647         s = file->private_data;
648         s->private = PDE_DATA(inode);
649
650         RETURN(rc);
651 }
652
653 /* methods to access hsm agent list */
654 const struct file_operations mdt_hsm_agent_fops = {
655         .owner          = THIS_MODULE,
656         .open           = lprocfs_open_hsm_agent,
657         .read           = seq_read,
658         .llseek         = seq_lseek,
659         .release        = lprocfs_seq_release,
660 };
661