pacemaker 2.1.5-a3f44794f94
Scalable High-Availability cluster resource manager
membership.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2022 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#ifndef _GNU_SOURCE
13# define _GNU_SOURCE
14#endif
15
16#include <sys/param.h>
17#include <sys/types.h>
18#include <stdio.h>
19#include <unistd.h>
20#include <string.h>
21#include <glib.h>
22#include <crm/common/ipc.h>
25#include <crm/msg_xml.h>
26#include <crm/stonith-ng.h>
27#include "crmcluster_private.h"
28
29/* The peer cache remembers cluster nodes that have been seen.
30 * This is managed mostly automatically by libcluster, based on
31 * cluster membership events.
32 *
33 * Because cluster nodes can have conflicting names or UUIDs,
34 * the hash table key is a uniquely generated ID.
35 */
36GHashTable *crm_peer_cache = NULL;
37
38/*
39 * The remote peer cache tracks pacemaker_remote nodes. While the
40 * value has the same type as the peer cache's, it is tracked separately for
41 * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
42 * so the name (which is also the UUID) is used as the hash table key; there
43 * is no equivalent of membership events, so management is not automatic; and
44 * most users of the peer cache need to exclude pacemaker_remote nodes.
45 *
46 * That said, using a single cache would be more logical and less error-prone,
47 * so it would be a good idea to merge them one day.
48 *
49 * libcluster provides two avenues for populating the cache:
50 * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
51 * while crm_remote_peer_cache_refresh() populates it via the CIB.
52 */
53GHashTable *crm_remote_peer_cache = NULL;
54
55/*
56 * The known node cache tracks cluster and remote nodes that have been seen in
57 * the CIB. It is useful mainly when a caller needs to know about a node that
58 * may no longer be in the membership, but doesn't want to add the node to the
59 * main peer cache tables.
60 */
61static GHashTable *known_node_cache = NULL;
62
63unsigned long long crm_peer_seq = 0;
64gboolean crm_have_quorum = FALSE;
65static gboolean crm_autoreap = TRUE;
66
67// Flag setting and clearing for crm_node_t:flags
68
69#define set_peer_flags(peer, flags_to_set) do { \
70 (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
71 "Peer", (peer)->uname, \
72 (peer)->flags, (flags_to_set), \
73 #flags_to_set); \
74 } while (0)
75
76#define clear_peer_flags(peer, flags_to_clear) do { \
77 (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
78 LOG_TRACE, \
79 "Peer", (peer)->uname, \
80 (peer)->flags, (flags_to_clear), \
81 #flags_to_clear); \
82 } while (0)
83
84static void update_peer_uname(crm_node_t *node, const char *uname);
85
86int
88{
89 if (crm_remote_peer_cache == NULL) {
90 return 0;
91 }
92 return g_hash_table_size(crm_remote_peer_cache);
93}
94
107crm_remote_peer_get(const char *node_name)
108{
109 crm_node_t *node;
110
111 if (node_name == NULL) {
112 errno = -EINVAL;
113 return NULL;
114 }
115
116 /* Return existing cache entry if one exists */
117 node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
118 if (node) {
119 return node;
120 }
121
122 /* Allocate a new entry */
123 node = calloc(1, sizeof(crm_node_t));
124 if (node == NULL) {
125 return NULL;
126 }
127
128 /* Populate the essential information */
130 node->uuid = strdup(node_name);
131 if (node->uuid == NULL) {
132 free(node);
133 errno = -ENOMEM;
134 return NULL;
135 }
136
137 /* Add the new entry to the cache */
138 g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
139 crm_trace("added %s to remote cache", node_name);
140
141 /* Update the entry's uname, ensuring peer status callbacks are called */
142 update_peer_uname(node, node_name);
143 return node;
144}
145
146void
147crm_remote_peer_cache_remove(const char *node_name)
148{
149 if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
150 crm_trace("removed %s from remote peer cache", node_name);
151 }
152}
153
165static const char *
166remote_state_from_cib(xmlNode *node_state)
167{
168 bool status = false;
169
170 if (pcmk__xe_get_bool_attr(node_state, XML_NODE_IN_CLUSTER, &status) == pcmk_rc_ok && !status) {
171 return CRM_NODE_LOST;
172 } else {
173 return CRM_NODE_MEMBER;
174 }
175}
176
177/* user data for looping through remote node xpath searches */
178struct refresh_data {
179 const char *field; /* XML attribute to check for node name */
180 gboolean has_state; /* whether to update node state based on XML */
181};
182
190static void
191remote_cache_refresh_helper(xmlNode *result, void *user_data)
192{
193 struct refresh_data *data = user_data;
194 const char *remote = crm_element_value(result, data->field);
195 const char *state = NULL;
196 crm_node_t *node;
197
198 CRM_CHECK(remote != NULL, return);
199
200 /* Determine node's state, if the result has it */
201 if (data->has_state) {
202 state = remote_state_from_cib(result);
203 }
204
205 /* Check whether cache already has entry for node */
206 node = g_hash_table_lookup(crm_remote_peer_cache, remote);
207
208 if (node == NULL) {
209 /* Node is not in cache, so add a new entry for it */
210 node = crm_remote_peer_get(remote);
211 CRM_ASSERT(node);
212 if (state) {
213 pcmk__update_peer_state(__func__, node, state, 0);
214 }
215
216 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
217 /* Node is in cache and hasn't been updated already, so mark it clean */
219 if (state) {
220 pcmk__update_peer_state(__func__, node, state, 0);
221 }
222 }
223}
224
225static void
226mark_dirty(gpointer key, gpointer value, gpointer user_data)
227{
229}
230
231static gboolean
232is_dirty(gpointer key, gpointer value, gpointer user_data)
233{
234 return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
235}
236
242void
244{
245 struct refresh_data data;
246
248
249 /* First, we mark all existing cache entries as dirty,
250 * so that later we can remove any that weren't in the CIB.
251 * We don't empty the cache, because we need to detect changes in state.
252 */
253 g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
254
255 /* Look for guest nodes and remote nodes in the status section */
256 data.field = "id";
257 data.has_state = TRUE;
259 remote_cache_refresh_helper, &data);
260
261 /* Look for guest nodes and remote nodes in the configuration section,
262 * because they may have just been added and not have a status entry yet.
263 * In that case, the cached node state will be left NULL, so that the
264 * peer status callback isn't called until we're sure the node started
265 * successfully.
266 */
267 data.field = "value";
268 data.has_state = FALSE;
270 remote_cache_refresh_helper, &data);
271 data.field = "id";
272 data.has_state = FALSE;
274 remote_cache_refresh_helper, &data);
275
276 /* Remove all old cache entries that weren't seen in the CIB */
277 g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
278}
279
280gboolean
282{
283 if(node == NULL) {
284 return FALSE;
285 }
286
287 if (pcmk_is_set(node->flags, crm_remote_node)) {
288 /* remote nodes are never considered active members. This
289 * guarantees they will never be considered for DC membership.*/
290 return FALSE;
291 }
292#if SUPPORT_COROSYNC
293 if (is_corosync_cluster()) {
294 return crm_is_corosync_peer_active(node);
295 }
296#endif
297 crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
298 return FALSE;
299}
300
301static gboolean
302crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
303{
304 crm_node_t *node = value;
305 crm_node_t *search = user_data;
306
307 if (search == NULL) {
308 return FALSE;
309
310 } else if (search->id && node->id != search->id) {
311 return FALSE;
312
313 } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
314 return FALSE;
315
316 } else if (crm_is_peer_active(value) == FALSE) {
317 crm_info("Removing node with name %s and id %u from membership cache",
318 (node->uname? node->uname : "unknown"), node->id);
319 return TRUE;
320 }
321 return FALSE;
322}
323
332guint
333reap_crm_member(uint32_t id, const char *name)
334{
335 int matches = 0;
336 crm_node_t search = { 0, };
337
338 if (crm_peer_cache == NULL) {
339 crm_trace("Membership cache not initialized, ignoring purge request");
340 return 0;
341 }
342
343 search.id = id;
344 pcmk__str_update(&search.uname, name);
345 matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
346 if(matches) {
347 crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
348 matches, pcmk__plural_s(matches), search.id,
349 (search.uname? " and/or uname=" : ""),
350 (search.uname? search.uname : ""));
351
352 } else {
353 crm_info("No peers with id=%u%s%s to purge from the membership cache",
354 search.id, (search.uname? " and/or uname=" : ""),
355 (search.uname? search.uname : ""));
356 }
357
358 free(search.uname);
359 return matches;
360}
361
362static void
363count_peer(gpointer key, gpointer value, gpointer user_data)
364{
365 guint *count = user_data;
366 crm_node_t *node = value;
367
368 if (crm_is_peer_active(node)) {
369 *count = *count + 1;
370 }
371}
372
373guint
375{
376 guint count = 0;
377
378 if (crm_peer_cache) {
379 g_hash_table_foreach(crm_peer_cache, count_peer, &count);
380 }
381 return count;
382}
383
384static void
385destroy_crm_node(gpointer data)
386{
387 crm_node_t *node = data;
388
389 crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
390
391 free(node->uname);
392 free(node->state);
393 free(node->uuid);
394 free(node->expected);
395 free(node);
396}
397
398void
400{
401 if (crm_peer_cache == NULL) {
402 crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node);
403 }
404
405 if (crm_remote_peer_cache == NULL) {
406 crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node);
407 }
408
409 if (known_node_cache == NULL) {
410 known_node_cache = pcmk__strikey_table(free, destroy_crm_node);
411 }
412}
413
414void
416{
417 if (crm_peer_cache != NULL) {
418 crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
419 g_hash_table_destroy(crm_peer_cache);
420 crm_peer_cache = NULL;
421 }
422
423 if (crm_remote_peer_cache != NULL) {
424 crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
425 g_hash_table_destroy(crm_remote_peer_cache);
427 }
428
429 if (known_node_cache != NULL) {
430 crm_trace("Destroying known node cache with %d members",
431 g_hash_table_size(known_node_cache));
432 g_hash_table_destroy(known_node_cache);
433 known_node_cache = NULL;
434 }
435
436}
437
438static void (*peer_status_callback)(enum crm_status_type, crm_node_t *,
439 const void *) = NULL;
440
451void
452crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
453{
454 peer_status_callback = dispatch;
455}
456
468void
469crm_set_autoreap(gboolean autoreap)
470{
471 crm_autoreap = autoreap;
472}
473
474static void
475dump_peer_hash(int level, const char *caller)
476{
477 GHashTableIter iter;
478 const char *id = NULL;
479 crm_node_t *node = NULL;
480
481 g_hash_table_iter_init(&iter, crm_peer_cache);
482 while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
483 do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
484 }
485}
486
487static gboolean
488hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
489{
490 return value == user_data;
491}
492
504pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
505{
506 crm_node_t *node = NULL;
507
508 CRM_ASSERT(id > 0 || uname != NULL);
509
511
512 if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) {
513 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
514 }
515
516 if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
518 }
519 return node;
520}
521
532crm_get_peer_full(unsigned int id, const char *uname, int flags)
533{
534 crm_node_t *node = NULL;
535
536 CRM_ASSERT(id > 0 || uname != NULL);
537
539
541 node = g_hash_table_lookup(crm_remote_peer_cache, uname);
542 }
543
544 if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) {
545 node = crm_get_peer(id, uname);
546 }
547 return node;
548}
549
560pcmk__search_cluster_node_cache(unsigned int id, const char *uname)
561{
562 GHashTableIter iter;
563 crm_node_t *node = NULL;
564 crm_node_t *by_id = NULL;
565 crm_node_t *by_name = NULL;
566
567 CRM_ASSERT(id > 0 || uname != NULL);
568
570
571 if (uname != NULL) {
572 g_hash_table_iter_init(&iter, crm_peer_cache);
573 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
574 if(node->uname && strcasecmp(node->uname, uname) == 0) {
575 crm_trace("Name match: %s = %p", node->uname, node);
576 by_name = node;
577 break;
578 }
579 }
580 }
581
582 if (id > 0) {
583 g_hash_table_iter_init(&iter, crm_peer_cache);
584 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
585 if(node->id == id) {
586 crm_trace("ID match: %u = %p", node->id, node);
587 by_id = node;
588 break;
589 }
590 }
591 }
592
593 node = by_id; /* Good default */
594 if(by_id == by_name) {
595 /* Nothing to do if they match (both NULL counts) */
596 crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
597
598 } else if(by_id == NULL && by_name) {
599 crm_trace("Only one: %p for %u/%s", by_name, id, uname);
600
601 if(id && by_name->id) {
602 dump_peer_hash(LOG_WARNING, __func__);
603 crm_crit("Node %u and %u share the same name '%s'",
604 id, by_name->id, uname);
605 node = NULL; /* Create a new one */
606
607 } else {
608 node = by_name;
609 }
610
611 } else if(by_name == NULL && by_id) {
612 crm_trace("Only one: %p for %u/%s", by_id, id, uname);
613
614 if(uname && by_id->uname) {
615 dump_peer_hash(LOG_WARNING, __func__);
616 crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
617 uname, by_id->uname, id, uname);
618 }
619
620 } else if(uname && by_id->uname) {
621 if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
622 crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
623 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
624
625 } else {
626 crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
627 dump_peer_hash(LOG_INFO, __func__);
628 crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
629 TRUE);
630 }
631
632 } else if(id && by_name->id) {
633 crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
634
635 } else {
636 /* Simple merge */
637
638 /* Only corosync-based clusters use node IDs. The functions that call
639 * pcmk__update_peer_state() and crm_update_peer_proc() only know
640 * nodeid, so 'by_id' is authoritative when merging.
641 */
642 dump_peer_hash(LOG_DEBUG, __func__);
643
644 crm_info("Merging %p into %p", by_name, by_id);
645 g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name);
646 }
647
648 return node;
649}
650
651#if SUPPORT_COROSYNC
652static guint
653remove_conflicting_peer(crm_node_t *node)
654{
655 int matches = 0;
656 GHashTableIter iter;
657 crm_node_t *existing_node = NULL;
658
659 if (node->id == 0 || node->uname == NULL) {
660 return 0;
661 }
662
664 return 0;
665 }
666
667 g_hash_table_iter_init(&iter, crm_peer_cache);
668 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
669 if (existing_node->id > 0
670 && existing_node->id != node->id
671 && existing_node->uname != NULL
672 && strcasecmp(existing_node->uname, node->uname) == 0) {
673
674 if (crm_is_peer_active(existing_node)) {
675 continue;
676 }
677
678 crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
679 existing_node->id, existing_node->uname, node->id);
680
681 g_hash_table_iter_remove(&iter);
682 matches++;
683 }
684 }
685
686 return matches;
687}
688#endif
689
698/* coverity[-alloc] Memory is referenced in one or both hashtables */
700crm_get_peer(unsigned int id, const char *uname)
701{
702 crm_node_t *node = NULL;
703 char *uname_lookup = NULL;
704
705 CRM_ASSERT(id > 0 || uname != NULL);
706
708
710
711 /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
712 * we need to do a lookup of the node name using the id in the cluster membership. */
713 if ((node == NULL || node->uname == NULL) && (uname == NULL)) {
714 uname_lookup = get_node_name(id);
715 }
716
717 if (uname_lookup) {
718 uname = uname_lookup;
719 crm_trace("Inferred a name of '%s' for node %u", uname, id);
720
721 /* try to turn up the node one more time now that we know the uname. */
722 if (node == NULL) {
724 }
725 }
726
727
728 if (node == NULL) {
729 char *uniqueid = crm_generate_uuid();
730
731 node = calloc(1, sizeof(crm_node_t));
732 CRM_ASSERT(node);
733
734 crm_info("Created entry %s/%p for node %s/%u (%d total)",
735 uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
736 g_hash_table_replace(crm_peer_cache, uniqueid, node);
737 }
738
739 if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
740 crm_info("Node %u is now known as %s", id, uname);
741 }
742
743 if(id > 0 && node->id == 0) {
744 node->id = id;
745 }
746
747 if (uname && (node->uname == NULL)) {
748 update_peer_uname(node, uname);
749 }
750
751 if(node->uuid == NULL) {
752 const char *uuid = crm_peer_uuid(node);
753
754 if (uuid) {
755 crm_info("Node %u has uuid %s", id, uuid);
756
757 } else {
758 crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
759 }
760 }
761
762 free(uname_lookup);
763
764 return node;
765}
766
778static void
779update_peer_uname(crm_node_t *node, const char *uname)
780{
781 CRM_CHECK(uname != NULL,
782 crm_err("Bug: can't update node name without name"); return);
783 CRM_CHECK(node != NULL,
784 crm_err("Bug: can't update node name to %s without node", uname);
785 return);
786
787 if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
788 crm_debug("Node uname '%s' did not change", uname);
789 return;
790 }
791
792 for (const char *c = uname; *c; ++c) {
793 if ((*c >= 'A') && (*c <= 'Z')) {
794 crm_warn("Node names with capitals are discouraged, consider changing '%s'",
795 uname);
796 break;
797 }
798 }
799
801
802 if (peer_status_callback != NULL) {
803 peer_status_callback(crm_status_uname, node, NULL);
804 }
805
806#if SUPPORT_COROSYNC
808 remove_conflicting_peer(node);
809 }
810#endif
811}
812
821static inline const char *
822proc2text(enum crm_proc_flag proc)
823{
824 const char *text = "unknown";
825
826 switch (proc) {
827 case crm_proc_none:
828 text = "none";
829 break;
830 case crm_proc_based:
831 text = "pacemaker-based";
832 break;
834 text = "pacemaker-controld";
835 break;
837 text = "pacemaker-schedulerd";
838 break;
839 case crm_proc_execd:
840 text = "pacemaker-execd";
841 break;
842 case crm_proc_attrd:
843 text = "pacemaker-attrd";
844 break;
845 case crm_proc_fenced:
846 text = "pacemaker-fenced";
847 break;
848 case crm_proc_cpg:
849 text = "corosync-cpg";
850 break;
851 }
852 return text;
853}
854
872crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
873{
874 uint32_t last = 0;
875 gboolean changed = FALSE;
876
877 CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
878 source, proc2text(flag), status);
879 return NULL);
880
881 /* Pacemaker doesn't spawn processes on remote nodes */
882 if (pcmk_is_set(node->flags, crm_remote_node)) {
883 return node;
884 }
885
886 last = node->processes;
887 if (status == NULL) {
888 node->processes = flag;
889 if (node->processes != last) {
890 changed = TRUE;
891 }
892
893 } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
894 if ((node->processes & flag) != flag) {
895 node->processes = pcmk__set_flags_as(__func__, __LINE__,
896 LOG_TRACE, "Peer process",
897 node->uname, node->processes,
898 flag, "processes");
899 changed = TRUE;
900 }
901
902 } else if (node->processes & flag) {
903 node->processes = pcmk__clear_flags_as(__func__, __LINE__,
904 LOG_TRACE, "Peer process",
905 node->uname, node->processes,
906 flag, "processes");
907 changed = TRUE;
908 }
909
910 if (changed) {
911 if (status == NULL && flag <= crm_proc_none) {
912 crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
913 node->id);
914 } else {
915 crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
916 proc2text(flag), status);
917 }
918
919 /* Call the client callback first, then update the peer state,
920 * in case the node will be reaped
921 */
922 if (peer_status_callback != NULL) {
923 peer_status_callback(crm_status_processes, node, &last);
924 }
925
926 /* The client callback shouldn't touch the peer caches,
927 * but as a safety net, bail if the peer cache was destroyed.
928 */
929 if (crm_peer_cache == NULL) {
930 return NULL;
931 }
932
933 if (crm_autoreap) {
934 const char *peer_state = NULL;
935
936 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
937 peer_state = CRM_NODE_MEMBER;
938 } else {
939 peer_state = CRM_NODE_LOST;
940 }
941 node = pcmk__update_peer_state(__func__, node, peer_state, 0);
942 }
943 } else {
944 crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
945 proc2text(flag), status);
946 }
947 return node;
948}
949
958void
959pcmk__update_peer_expected(const char *source, crm_node_t *node,
960 const char *expected)
961{
962 char *last = NULL;
963 gboolean changed = FALSE;
964
965 CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
966 return);
967
968 /* Remote nodes don't participate in joins */
969 if (pcmk_is_set(node->flags, crm_remote_node)) {
970 return;
971 }
972
973 last = node->expected;
974 if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
975 node->expected = strdup(expected);
976 changed = TRUE;
977 }
978
979 if (changed) {
980 crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
981 expected, last);
982 free(last);
983 } else {
984 crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
985 node->id, expected);
986 }
987}
988
1005static crm_node_t *
1006update_peer_state_iter(const char *source, crm_node_t *node, const char *state,
1007 uint64_t membership, GHashTableIter *iter)
1008{
1009 gboolean is_member;
1010
1011 CRM_CHECK(node != NULL,
1012 crm_err("Could not set state for unknown host to %s"
1013 CRM_XS " source=%s", state, source);
1014 return NULL);
1015
1016 is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
1017 if (is_member) {
1018 node->when_lost = 0;
1019 if (membership) {
1020 node->last_seen = membership;
1021 }
1022 }
1023
1024 if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
1025 char *last = node->state;
1026
1027 node->state = strdup(state);
1028 crm_notice("Node %s state is now %s " CRM_XS
1029 " nodeid=%u previous=%s source=%s", node->uname, state,
1030 node->id, (last? last : "unknown"), source);
1031 if (peer_status_callback != NULL) {
1032 peer_status_callback(crm_status_nstate, node, last);
1033 }
1034 free(last);
1035
1036 if (crm_autoreap && !is_member
1037 && !pcmk_is_set(node->flags, crm_remote_node)) {
1038 /* We only autoreap from the peer cache, not the remote peer cache,
1039 * because the latter should be managed only by
1040 * crm_remote_peer_cache_refresh().
1041 */
1042 if(iter) {
1043 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
1044 g_hash_table_iter_remove(iter);
1045
1046 } else {
1047 reap_crm_member(node->id, node->uname);
1048 }
1049 node = NULL;
1050 }
1051
1052 } else {
1053 crm_trace("Node %s state is unchanged (%s) " CRM_XS
1054 " nodeid=%u source=%s", node->uname, state, node->id, source);
1055 }
1056 return node;
1057}
1058
1074crm_node_t *
1075pcmk__update_peer_state(const char *source, crm_node_t *node,
1076 const char *state, uint64_t membership)
1077{
1078 return update_peer_state_iter(source, node, state, membership, NULL);
1079}
1080
1087void
1088pcmk__reap_unseen_nodes(uint64_t membership)
1089{
1090 GHashTableIter iter;
1091 crm_node_t *node = NULL;
1092
1093 crm_trace("Reaping unseen nodes...");
1094 g_hash_table_iter_init(&iter, crm_peer_cache);
1095 while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
1096 if (node->last_seen != membership) {
1097 if (node->state) {
1098 /*
1099 * Calling update_peer_state_iter() allows us to
1100 * remove the node from crm_peer_cache without
1101 * invalidating our iterator
1102 */
1103 update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1104 membership, &iter);
1105
1106 } else {
1107 crm_info("State of node %s[%u] is still unknown",
1108 node->uname, node->id);
1109 }
1110 }
1111 }
1112}
1113
1114static crm_node_t *
1115find_known_node(const char *id, const char *uname)
1116{
1117 GHashTableIter iter;
1118 crm_node_t *node = NULL;
1119 crm_node_t *by_id = NULL;
1120 crm_node_t *by_name = NULL;
1121
1122 if (uname) {
1123 g_hash_table_iter_init(&iter, known_node_cache);
1124 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1125 if (node->uname && strcasecmp(node->uname, uname) == 0) {
1126 crm_trace("Name match: %s = %p", node->uname, node);
1127 by_name = node;
1128 break;
1129 }
1130 }
1131 }
1132
1133 if (id) {
1134 g_hash_table_iter_init(&iter, known_node_cache);
1135 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1136 if(strcasecmp(node->uuid, id) == 0) {
1137 crm_trace("ID match: %s= %p", id, node);
1138 by_id = node;
1139 break;
1140 }
1141 }
1142 }
1143
1144 node = by_id; /* Good default */
1145 if (by_id == by_name) {
1146 /* Nothing to do if they match (both NULL counts) */
1147 crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1148
1149 } else if (by_id == NULL && by_name) {
1150 crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1151
1152 if (id) {
1153 node = NULL;
1154
1155 } else {
1156 node = by_name;
1157 }
1158
1159 } else if (by_name == NULL && by_id) {
1160 crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1161
1162 if (uname) {
1163 node = NULL;
1164 }
1165
1166 } else if (uname && by_id->uname
1167 && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1168 /* Multiple nodes have the same uname in the CIB.
1169 * Return by_id. */
1170
1171 } else if (id && by_name->uuid
1172 && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1173 /* Multiple nodes have the same id in the CIB.
1174 * Return by_name. */
1175 node = by_name;
1176
1177 } else {
1178 node = NULL;
1179 }
1180
1181 if (node == NULL) {
1182 crm_debug("Couldn't find node%s%s%s%s",
1183 id? " " : "",
1184 id? id : "",
1185 uname? " with name " : "",
1186 uname? uname : "");
1187 }
1188
1189 return node;
1190}
1191
1192static void
1193known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data)
1194{
1195 const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1196 const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1197 crm_node_t * node = NULL;
1198
1199 CRM_CHECK(id != NULL && uname !=NULL, return);
1200 node = find_known_node(id, uname);
1201
1202 if (node == NULL) {
1203 char *uniqueid = crm_generate_uuid();
1204
1205 node = calloc(1, sizeof(crm_node_t));
1206 CRM_ASSERT(node != NULL);
1207
1208 node->uname = strdup(uname);
1209 CRM_ASSERT(node->uname != NULL);
1210
1211 node->uuid = strdup(id);
1212 CRM_ASSERT(node->uuid != NULL);
1213
1214 g_hash_table_replace(known_node_cache, uniqueid, node);
1215
1216 } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1217 pcmk__str_update(&node->uname, uname);
1218
1219 /* Node is in cache and hasn't been updated already, so mark it clean */
1221 }
1222
1223}
1224
1225static void
1226refresh_known_node_cache(xmlNode *cib)
1227{
1228 crm_peer_init();
1229
1230 g_hash_table_foreach(known_node_cache, mark_dirty, NULL);
1231
1233 known_node_cache_refresh_helper, NULL);
1234
1235 /* Remove all old cache entries that weren't seen in the CIB */
1236 g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL);
1237}
1238
1239void
1241{
1243 refresh_known_node_cache(cib);
1244}
1245
1256crm_node_t *
1257pcmk__search_known_node_cache(unsigned int id, const char *uname,
1258 uint32_t flags)
1259{
1260 crm_node_t *node = NULL;
1261 char *id_str = NULL;
1262
1263 CRM_ASSERT(id > 0 || uname != NULL);
1264
1266
1267 if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1268 return node;
1269 }
1270
1271 if (id > 0) {
1272 id_str = crm_strdup_printf("%u", id);
1273 }
1274
1275 node = find_known_node(id_str, uname);
1276
1277 free(id_str);
1278 return node;
1279}
1280
1281
1282// Deprecated functions kept only for backward API compatibility
1283// LCOV_EXCL_START
1284
1285#include <crm/cluster/compat.h>
1286
1287int
1288crm_terminate_member(int nodeid, const char *uname, void *unused)
1289{
1290 return stonith_api_kick(nodeid, uname, 120, TRUE);
1291}
1292
1293int
1294crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
1295{
1296 return stonith_api_kick(nodeid, uname, 120, TRUE);
1297}
1298
1299// LCOV_EXCL_STOP
1300// End deprecated API
const char * name
Definition: cib.c:24
crm_proc_flag
Definition: internal.h:17
@ crm_proc_attrd
Definition: internal.h:27
@ crm_proc_schedulerd
Definition: internal.h:28
@ crm_proc_controld
Definition: internal.h:26
@ crm_proc_cpg
Definition: internal.h:21
@ crm_proc_based
Definition: internal.h:25
@ crm_proc_execd
Definition: internal.h:24
@ crm_proc_none
Definition: internal.h:18
@ crm_proc_fenced
Definition: internal.h:29
gboolean is_corosync_cluster(void)
Check whether the local cluster is a Corosync cluster.
Definition: cluster.c:375
#define CRM_NODE_LOST
Definition: cluster.h:32
const char * crm_peer_uuid(crm_node_t *node)
Get (and set if needed) a node's UUID.
Definition: cluster.c:38
enum cluster_type_e get_cluster_type(void)
Get (and validate) the local cluster type.
Definition: cluster.c:311
#define CRM_NODE_MEMBER
Definition: cluster.h:33
char * get_node_name(uint32_t nodeid)
Get the node name corresponding to a cluster node ID.
Definition: cluster.c:175
const char * name_for_cluster_type(enum cluster_type_e type)
Get a log-friendly string equivalent of a cluster type.
Definition: cluster.c:290
@ CRM_GET_PEER_CLUSTER
Definition: cluster.h:118
@ CRM_GET_PEER_REMOTE
Definition: cluster.h:119
@ crm_remote_node
Definition: cluster.h:46
@ crm_node_dirty
Definition: cluster.h:49
crm_status_type
Definition: cluster.h:166
@ crm_status_processes
Definition: cluster.h:169
@ crm_status_nstate
Definition: cluster.h:168
@ crm_status_uname
Definition: cluster.h:167
int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value)
Definition: nvpair.c:948
uint64_t flags
Definition: remote.c:3
#define ONLINESTATUS
Definition: util.h:39
char * crm_generate_uuid(void)
Definition: utils.c:509
void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork)
Definition: utils.c:397
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:121
Deprecated Pacemaker cluster API.
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Check whether a Corosync cluster peer is active.
Definition: corosync.c:531
bool pcmk__corosync_has_nodelist(void)
Definition: corosync.c:730
char uname[MAX_NAME]
Definition: cpg.c:5
char data[0]
Definition: cpg.c:10
uint32_t id
Definition: cpg.c:0
IPC interface to Pacemaker daemons.
#define crm_info(fmt, args...)
Definition: logging.h:362
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:168
#define crm_warn(fmt, args...)
Definition: logging.h:360
#define CRM_XS
Definition: logging.h:55
#define crm_crit(fmt, args...)
Definition: logging.h:358
#define crm_notice(fmt, args...)
Definition: logging.h:361
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:227
#define crm_debug(fmt, args...)
Definition: logging.h:364
#define crm_err(fmt, args...)
Definition: logging.h:359
#define crm_trace(fmt, args...)
Definition: logging.h:365
#define LOG_TRACE
Definition: logging.h:37
gboolean crm_have_quorum
Definition: membership.c:64
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *node, uint32_t flag, const char *status)
Definition: membership.c:872
void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected)
Definition: membership.c:959
crm_node_t * pcmk__search_known_node_cache(unsigned int id, const char *uname, uint32_t flags)
Definition: membership.c:1257
int crm_remote_peer_cache_size(void)
Definition: membership.c:87
GHashTable * crm_peer_cache
Definition: membership.c:36
void crm_remote_peer_cache_refresh(xmlNode *cib)
Repopulate the remote peer cache based on CIB XML.
Definition: membership.c:243
#define clear_peer_flags(peer, flags_to_clear)
Definition: membership.c:76
void pcmk__reap_unseen_nodes(uint64_t membership)
Definition: membership.c:1088
int crm_terminate_member(int nodeid, const char *uname, void *unused)
Definition: membership.c:1288
guint reap_crm_member(uint32_t id, const char *name)
Remove all peer cache entries matching a node ID and/or uname.
Definition: membership.c:333
crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname)
Definition: membership.c:560
void crm_set_autoreap(gboolean autoreap)
Tell the library whether to automatically reap lost nodes.
Definition: membership.c:469
void crm_set_status_callback(void(*dispatch)(enum crm_status_type, crm_node_t *, const void *))
Set a client function that will be called after peer status changes.
Definition: membership.c:452
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Get a cluster node cache entry.
Definition: membership.c:700
crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags)
Definition: membership.c:504
crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags)
Get a node cache entry (cluster or Pacemaker Remote)
Definition: membership.c:532
void pcmk__refresh_node_caches_from_cib(xmlNode *cib)
Definition: membership.c:1240
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
Definition: membership.c:1294
GHashTable * crm_remote_peer_cache
Definition: membership.c:53
void crm_remote_peer_cache_remove(const char *node_name)
Definition: membership.c:147
#define set_peer_flags(peer, flags_to_set)
Definition: membership.c:69
crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership)
Update a node's state and membership information.
Definition: membership.c:1075
unsigned long long crm_peer_seq
Definition: membership.c:63
gboolean crm_is_peer_active(const crm_node_t *node)
Definition: membership.c:281
void crm_peer_init(void)
Definition: membership.c:399
void crm_peer_destroy(void)
Definition: membership.c:415
guint crm_active_peers(void)
Definition: membership.c:374
crm_node_t * crm_remote_peer_get(const char *node_name)
Get a remote node peer cache entry, creating it if necessary.
Definition: membership.c:107
#define XML_ATTR_UNAME
Definition: msg_xml.h:157
#define XML_ATTR_ID
Definition: msg_xml.h:134
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:280
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:517
pcmk__action_result_t result
Definition: pcmk_fence.c:35
#define CRM_ASSERT(expr)
Definition: results.h:42
@ pcmk_rc_ok
Definition: results.h:148
Fencing aka. STONITH.
int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
Definition: st_client.c:1959
#define pcmk__plural_s(i)
void pcmk__str_update(char **str, const char *value)
Definition: strings.c:1190
@ pcmk__str_casei
GHashTable * pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:649
uint32_t processes
Definition: cluster.h:58
char * uname
Definition: cluster.h:53
char * expected
Definition: cluster.h:71
uint64_t last_seen
Definition: cluster.h:57
uint32_t id
Definition: cluster.h:66
char * state
Definition: cluster.h:55
char * uuid
Definition: cluster.h:54
time_t when_lost
Definition: cluster.h:67
uint64_t flags
Definition: cluster.h:56
void crm_foreach_xpath_result(xmlNode *xml, const char *xpath, void(*helper)(xmlNode *, void *), void *user_data)
Run a supplied function for each result of an xpath search.
Definition: xpath.c:173
#define PCMK__XP_REMOTE_NODE_CONFIG
Definition: xml_internal.h:140
#define PCMK__XP_REMOTE_NODE_STATUS
Definition: xml_internal.h:145
#define PCMK__XP_MEMBER_NODE_CONFIG
Definition: xml_internal.h:129
#define PCMK__XP_GUEST_NODE_CONFIG
Definition: xml_internal.h:134