pacemaker 2.1.5-a3f44794f94
Scalable High-Availability cluster resource manager
pcmk_sched_fencing.c
Go to the documentation of this file.
1/*
2 * Copyright 2004-2022 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU General Public License version 2
7 * or later (GPLv2+) WITHOUT ANY WARRANTY.
8 */
9
10#include <crm_internal.h>
11
12#include <glib.h>
13
14#include <crm/crm.h>
15#include <crm/pengine/status.h>
16#include <pacemaker-internal.h>
18
28static bool
29rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node)
30{
31 if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
32 return TRUE;
33
34 } else if ((rsc->variant == pe_native)
35 && pe_rsc_is_anon_clone(rsc->parent)
36 && pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
37 /* We check only the parent, not the uber-parent, because we cannot
38 * assume that the resource is known if it is in an anonymously cloned
39 * group (which may be only partially known).
40 */
41 return TRUE;
42 }
43 return FALSE;
44}
45
54static void
55order_start_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
57{
59 GList *gIter = NULL;
60
61 CRM_CHECK(stonith_op && stonith_op->node, return);
62 target = stonith_op->node;
63
64 for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
65 pe_action_t *action = (pe_action_t *) gIter->data;
66
67 switch (action->needs) {
68 case rsc_req_nothing:
69 // Anything other than start or promote requires nothing
70 break;
71
72 case rsc_req_stonith:
74 break;
75
76 case rsc_req_quorum:
77 if (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)
78 && pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
79 && !rsc_is_known_on(rsc, target)) {
80
81 /* If we don't know the status of the resource on the node
82 * we're about to shoot, we have to assume it may be active
83 * there. Order the resource start after the fencing. This
84 * is analogous to waiting for all the probes for a resource
85 * to complete before starting it.
86 *
87 * The most likely explanation is that the DC died and took
88 * its status with it.
89 */
90 pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
91 pe__node_name(target));
92 order_actions(stonith_op, action,
94 }
95 break;
96 }
97 }
98}
99
108static void
109order_stop_vs_fencing(pe_resource_t *rsc, pe_action_t *stonith_op,
111{
112 GList *gIter = NULL;
113 GList *action_list = NULL;
114 bool order_implicit = false;
115
116 pe_resource_t *top = uber_parent(rsc);
117 pe_action_t *parent_stop = NULL;
119
120 CRM_CHECK(stonith_op && stonith_op->node, return);
121 target = stonith_op->node;
122
123 /* Get a list of stop actions potentially implied by the fencing */
124 action_list = pe__resource_actions(rsc, target, RSC_STOP, FALSE);
125
126 /* If resource requires fencing, implicit actions must occur after fencing.
127 *
128 * Implied stops and demotes of resources running on guest nodes are always
129 * ordered after fencing, even if the resource does not require fencing,
130 * because guest node "fencing" is actually just a resource stop.
131 */
134
135 order_implicit = true;
136 }
137
138 if (action_list && order_implicit) {
139 parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
140 }
141
142 for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
143 pe_action_t *action = (pe_action_t *) gIter->data;
144
145 // The stop would never complete, so convert it into a pseudo-action.
147
148 if (order_implicit) {
150
151 /* Order the stonith before the parent stop (if any).
152 *
153 * Also order the stonith before the resource stop, unless the
154 * resource is inside a bundle -- that would cause a graph loop.
155 * We can rely on the parent stop's ordering instead.
156 *
157 * User constraints must not order a resource in a guest node
158 * relative to the guest node container resource. The
159 * pe_order_preserve flag marks constraints as generated by the
160 * cluster and thus immune to that check (and is irrelevant if
161 * target is not a guest).
162 */
163 if (!pe_rsc_is_bundled(rsc)) {
165 }
166 order_actions(stonith_op, parent_stop, pe_order_preserve);
167 }
168
169 if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
170 crm_notice("Stop of failed resource %s is implicit %s %s is fenced",
171 rsc->id, (order_implicit? "after" : "because"),
172 pe__node_name(target));
173 } else {
174 crm_info("%s is implicit %s %s is fenced",
175 action->uuid, (order_implicit? "after" : "because"),
176 pe__node_name(target));
177 }
178
179 if (pcmk_is_set(rsc->flags, pe_rsc_notify)) {
180 pe__order_notifs_after_fencing(action, rsc, stonith_op);
181 }
182
183#if 0
184 /* It might be a good idea to stop healthy resources on a node about to
185 * be fenced, when possible.
186 *
187 * However, fencing must be done before a failed resource's
188 * (pseudo-)stop action, so that could create a loop. For example, given
189 * a group of A and B running on node N with a failed stop of B:
190 *
191 * fence N -> stop B (pseudo-op) -> stop A -> fence N
192 *
193 * The block below creates the stop A -> fence N ordering and therefore
194 * must (at least for now) be disabled. Instead, run the block above and
195 * treat all resources on N as B would be (i.e., as a pseudo-op after
196 * the fencing).
197 *
198 * @TODO Maybe break the "A requires B" dependency in
199 * pcmk__update_action_for_orderings() and use this block for healthy
200 * resources instead of the above.
201 */
202 crm_info("Moving healthy resource %s off %s before fencing",
203 rsc->id, pe__node_name(node));
204 pcmk__new_ordering(rsc, stop_key(rsc), NULL, NULL,
205 strdup(CRM_OP_FENCE), stonith_op,
207#endif
208 }
209
210 g_list_free(action_list);
211
212 /* Get a list of demote actions potentially implied by the fencing */
213 action_list = pe__resource_actions(rsc, target, RSC_DEMOTE, FALSE);
214
215 for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
216 pe_action_t *action = (pe_action_t *) gIter->data;
217
218 if (!(action->node->details->online) || action->node->details->unclean
219 || pcmk_is_set(rsc->flags, pe_rsc_failed)) {
220
221 if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
222 pe_rsc_info(rsc,
223 "Demote of failed resource %s is implicit after %s is fenced",
224 rsc->id, pe__node_name(target));
225 } else {
226 pe_rsc_info(rsc, "%s is implicit after %s is fenced",
227 action->uuid, pe__node_name(target));
228 }
229
230 /* The demote would never complete and is now implied by the
231 * fencing, so convert it into a pseudo-action.
232 */
234
235 if (pe_rsc_is_bundled(rsc)) {
236 // Do nothing, let recovery be ordered after parent's implied stop
237
238 } else if (order_implicit) {
240 }
241 }
242 }
243
244 g_list_free(action_list);
245}
246
255static void
256rsc_stonith_ordering(pe_resource_t *rsc, pe_action_t *stonith_op,
258{
259 if (rsc->children) {
260 GList *gIter = NULL;
261
262 for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
263 pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
264
265 rsc_stonith_ordering(child_rsc, stonith_op, data_set);
266 }
267
268 } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
269 pe_rsc_trace(rsc,
270 "Skipping fencing constraints for unmanaged resource: %s",
271 rsc->id);
272
273 } else {
274 order_start_vs_fencing(rsc, stonith_op, data_set);
275 order_stop_vs_fencing(rsc, stonith_op, data_set);
276 }
277}
278
290void
292{
293 CRM_CHECK(stonith_op && data_set, return);
294 for (GList *r = data_set->resources; r != NULL; r = r->next) {
295 rsc_stonith_ordering((pe_resource_t *) r->data, stonith_op, data_set);
296 }
297}
298
308void
310 enum pe_ordering order)
311{
312 /* When unfencing is in use, we order unfence actions before any probe or
313 * start of resources that require unfencing, and also of fence devices.
314 *
315 * This might seem to violate the principle that fence devices require
316 * only quorum. However, fence agents that unfence often don't have enough
317 * information to even probe or start unless the node is first unfenced.
318 */
322
323 /* Start with an optional ordering. Requiring unfencing would result in
324 * the node being unfenced, and all its resources being stopped,
325 * whenever a new resource is added -- which would be highly suboptimal.
326 */
327 pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE,
328 rsc->cluster);
329
330 order_actions(unfence, action, order);
331
332 if (!pcmk__node_unfenced(node)) {
333 // But unfencing is required if it has never been done
334 char *reason = crm_strdup_printf("required by %s %s",
335 rsc->id, action->task);
336
337 trigger_unfencing(NULL, node, reason, NULL, rsc->cluster);
338 free(reason);
339 }
340 }
341}
342
349void
351{
352 pe_resource_t *container = NULL;
353 pe_action_t *stop = NULL;
354 pe_action_t *stonith_op = NULL;
355
356 /* The fence action is just a label; we don't do anything differently for
357 * off vs. reboot. We specify it explicitly, rather than let it default to
358 * cluster's default action, because we are not _initiating_ fencing -- we
359 * are creating a pseudo-event to describe fencing that is already occurring
360 * by other means (container recovery).
361 */
362 const char *fence_action = "off";
363
364 CRM_ASSERT(node != NULL);
365
366 /* Check whether guest's container resource has any explicit stop or
367 * start (the stop may be implied by fencing of the guest's host).
368 */
369 container = node->details->remote_rsc->container;
370 if (container) {
371 stop = find_first_action(container->actions, NULL, CRMD_ACTION_STOP,
372 NULL);
373
374 if (find_first_action(container->actions, NULL, CRMD_ACTION_START,
375 NULL)) {
376 fence_action = "reboot";
377 }
378 }
379
380 /* Create a fence pseudo-event, so we have an event to order actions
381 * against, and the controller can always detect it.
382 */
383 stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean",
384 FALSE, node->details->data_set);
386
387 /* We want to imply stops/demotes after the guest is stopped, not wait until
388 * it is restarted, so we always order pseudo-fencing after stop, not start
389 * (even though start might be closer to what is done for a real reboot).
390 */
391 if ((stop != NULL) && pcmk_is_set(stop->flags, pe_action_pseudo)) {
392 pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE,
393 NULL, FALSE,
394 node->details->data_set);
395
396 crm_info("Implying guest %s is down (action %d) after %s fencing",
397 pe__node_name(node), stonith_op->id,
398 pe__node_name(stop->node));
399 order_actions(parent_stonith_op, stonith_op,
401
402 } else if (stop) {
403 order_actions(stop, stonith_op,
405 crm_info("Implying guest %s is down (action %d) "
406 "after container %s is stopped (action %d)",
407 pe__node_name(node), stonith_op->id,
408 container->id, stop->id);
409 } else {
410 /* If we're fencing the guest node but there's no stop for the guest
411 * resource, we must think the guest is already stopped. However, we may
412 * think so because its resource history was just cleaned. To avoid
413 * unnecessarily considering the guest node down if it's really up,
414 * order the pseudo-fencing after any stop of the connection resource,
415 * which will be ordered after any container (re-)probe.
416 */
417 stop = find_first_action(node->details->remote_rsc->actions, NULL,
418 RSC_STOP, NULL);
419
420 if (stop) {
421 order_actions(stop, stonith_op, pe_order_optional);
422 crm_info("Implying guest %s is down (action %d) "
423 "after connection is stopped (action %d)",
424 pe__node_name(node), stonith_op->id, stop->id);
425 } else {
426 /* Not sure why we're fencing, but everything must already be
427 * cleanly stopped.
428 */
429 crm_info("Implying guest %s is down (action %d) ",
430 pe__node_name(node), stonith_op->id);
431 }
432 }
433
434 // Order/imply other actions relative to pseudo-fence as with real fence
435 pcmk__order_vs_fence(stonith_op, node->details->data_set);
436}
437
447bool
449{
450 const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
451
452 return !pcmk__str_eq(unfenced, "0", pcmk__str_null_matches);
453}
454
462void
463pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
464{
465 pe_node_t *node = (pe_node_t *) data;
466 pe_resource_t *rsc = (pe_resource_t *) user_data;
467
468 pe_action_t *unfence = pe_fence_op(node, "on", true, NULL, false,
469 rsc->cluster);
470
471 crm_debug("Ordering any stops of %s before %s, and any starts after",
472 rsc->id, unfence->uuid);
473
474 /*
475 * It would be more efficient to order clone resources once,
476 * rather than order each instance, but ordering the instance
477 * allows us to avoid unnecessary dependencies that might conflict
478 * with user constraints.
479 *
480 * @TODO: This constraint can still produce a transition loop if the
481 * resource has a stop scheduled on the node being unfenced, and
482 * there is a user ordering constraint to start some other resource
483 * (which will be ordered after the unfence) before stopping this
484 * resource. An example is "start some slow-starting cloned service
485 * before stopping an associated virtual IP that may be moving to
486 * it":
487 * stop this -> unfencing -> start that -> stop this
488 */
489 pcmk__new_ordering(rsc, stop_key(rsc), NULL,
490 NULL, strdup(unfence->uuid), unfence,
492 rsc->cluster);
493
494 pcmk__new_ordering(NULL, strdup(unfence->uuid), unfence,
495 rsc, start_key(rsc), NULL,
497 rsc->cluster);
498}
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:121
@ rsc_req_quorum
Definition: common.h:87
@ rsc_req_stonith
Definition: common.h:88
@ rsc_req_nothing
Definition: common.h:86
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:912
char data[0]
Definition: cpg.c:10
A dumping ground.
#define CRMD_ACTION_STOP
Definition: crm.h:177
#define RSC_DEMOTE
Definition: crm.h:207
#define RSC_START
Definition: crm.h:199
#define RSC_STOP
Definition: crm.h:202
#define CRMD_ACTION_START
Definition: crm.h:174
#define CRM_ATTR_UNFENCED
Definition: crm.h:120
#define CRM_OP_FENCE
Definition: crm.h:144
G_GNUC_INTERNAL void pcmk__new_ordering(pe_resource_t *first_rsc, char *first_task, pe_action_t *first_action, pe_resource_t *then_rsc, char *then_task, pe_action_t *then_action, uint32_t flags, pe_working_set_t *data_set)
#define crm_info(fmt, args...)
Definition: logging.h:362
#define crm_notice(fmt, args...)
Definition: logging.h:361
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:227
#define crm_debug(fmt, args...)
Definition: logging.h:364
pe_working_set_t * data_set
const char * action
Definition: pcmk_fence.c:30
const char * target
Definition: pcmk_fence.c:29
void pcmk__order_vs_fence(pe_action_t *stonith_op, pe_working_set_t *data_set)
void pcmk__order_vs_unfence(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, enum pe_ordering order)
void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
bool pcmk__node_unfenced(pe_node_t *node)
void pcmk__fence_guest(pe_node_t *node)
#define pe_rsc_notify
Definition: pe_types.h:261
#define pe_rsc_fence_device
Definition: pe_types.h:263
#define pe_rsc_needs_unfencing
Definition: pe_types.h:295
#define pe_rsc_managed
Definition: pe_types.h:257
pe_ordering
Definition: pe_types.h:479
@ pe_order_implies_then
Definition: pe_types.h:485
@ pe_order_same_node
Definition: pe_types.h:506
@ pe_order_implies_then_on_node
Definition: pe_types.h:494
@ pe_order_preserve
Definition: pe_types.h:516
@ pe_order_optional
Definition: pe_types.h:481
@ pe_order_runnable_left
Definition: pe_types.h:491
#define pe_flag_enable_unfencing
Definition: pe_types.h:101
@ pe_action_runnable
Definition: pe_types.h:300
@ pe_action_implied_by_stonith
Definition: pe_types.h:305
@ pe_action_pseudo
Definition: pe_types.h:299
@ pe_native
Definition: pe_types.h:38
#define pe_rsc_needs_fencing
Definition: pe_types.h:294
#define pe_rsc_failed
Definition: pe_types.h:276
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1398
const char * pe_node_attribute_raw(const pe_node_t *node, const char *name)
Definition: common.c:562
void trigger_unfencing(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t *data_set)
Definition: utils.c:604
pe_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pe_node_t *on_node)
Definition: pe_actions.c:1296
#define start_key(rsc)
Definition: internal.h:420
#define stop_key(rsc)
Definition: internal.h:414
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:46
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:474
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:47
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: pe_actions.c:1081
void pe__order_notifs_after_fencing(pe_action_t *action, pe_resource_t *rsc, pe_action_t *stonith_op)
Definition: pe_notif.c:977
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:45
#define pe__set_action_flags(action, flags_to_set)
Definition: internal.h:86
bool pe__is_guest_node(const pe_node_t *node)
Definition: remote.c:33
#define CRM_ASSERT(expr)
Definition: results.h:42
Cluster status and scheduling.
@ pcmk__str_null_matches
@ pcmk__str_casei
char * uuid
Definition: pe_types.h:411
pe_node_t * node
Definition: pe_types.h:407
enum pe_action_flags flags
Definition: pe_types.h:415
struct pe_node_shared_s * details
Definition: pe_types.h:252
const char * id
Definition: pe_types.h:215
pe_working_set_t * data_set
Cluster that this node is part of.
Definition: pe_types.h:245
pe_resource_t * remote_rsc
Definition: pe_types.h:237
GList * actions
Definition: pe_types.h:366
enum pe_obj_types variant
Definition: pe_types.h:338
GList * children
Definition: pe_types.h:384
GHashTable * known_on
Definition: pe_types.h:374
pe_working_set_t * cluster
Definition: pe_types.h:335
pe_resource_t * container
Definition: pe_types.h:387
char * id
Definition: pe_types.h:329
GHashTable * allowed_nodes
Definition: pe_types.h:375
unsigned long long flags
Definition: pe_types.h:355
pe_resource_t * parent
Definition: pe_types.h:336
GList * resources
Definition: pe_types.h:165
unsigned long long flags
Definition: pe_types.h:153