Skip to content

Commit fa968ce

Browse files
authored
Allow delaying machine config pool upgrades (node reboots) (#64)
1 parent cae7dbc commit fa968ce

12 files changed

Lines changed: 777 additions & 27 deletions

api/v1beta1/upgradejob_types.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,19 @@ const (
1818
UpgradeJobConditionUpgradeCompleted = "UpgradeCompleted"
1919
// UpgradeJobConditionPostHealthCheckDone is the condition type for a post health check done upgrade job
2020
UpgradeJobConditionPostHealthCheckDone = "PostHealthCheckDone"
21+
// UpgradeJobConditionPaused is the condition type for a paused upgrade job.
22+
// A upgrade job can be paused if `.spec.machineConfigPools` matches a pool and `delayUpgrade` is set.
23+
UpgradeJobConditionPaused = "Paused"
24+
// UpgradeJobConditionMachineConfigPoolsPaused is true if the controller paused any machine config pools.
25+
// Does not correlate with any upgrade specific condition.
26+
UpgradeJobConditionMachineConfigPoolsPaused = "MachineConfigPoolsPaused"
2127

2228
// UpgradeJobReasonFailed is the generic reason for a failed upgrade job
2329
UpgradeJobReasonFailed = "Failed"
2430
// UpgradeJobReasonExpired is used when the upgrade job is not started before the startBefore time
2531
UpgradeJobReasonExpired = "Expired"
32+
// UpgradeJobReasonUnpausingPoolsExpired is used when the upgrade job was not able to unpause the machine config pools before the delayMax time
33+
UpgradeJobReasonUnpausingPoolsExpired = "UnpausingPoolsExpired"
2634
// UpgradeJobReasonTimedOut is used when the upgrade job is not completed before the upgradeTimeout time
2735
UpgradeJobReasonTimedOut = "TimedOut"
2836
// UpgradeJobReasonPreHealthCheckFailed is used when the health check failed
@@ -41,6 +49,10 @@ const (
4149
UpgradeJobReasonCompleted = "Completed"
4250
// UpgradeJobReasonInProgress is used when the pre health check was done
4351
UpgradeJobReasonInProgress = "InProgress"
52+
// UpgradeJobReasonNoManagedPools is used when no machine config pools are managed by the upgrade job
53+
UpgradeJobReasonNoManagedPools = "NoManagedPools"
54+
// UpgradeJobReasonDelaySet is used if the upgrade job paused machine config pools due to delayUpgrade
55+
UpgradeJobReasonDelaySet = "DelaySet"
4456
)
4557

4658
// UpgradeJobSpec defines the desired state of UpgradeJob
@@ -62,7 +74,8 @@ type UpgradeJobSpec struct {
6274

6375
// UpgradeJobConfig defines the configuration for the upgrade job
6476
type UpgradeJobConfig struct {
65-
// UpgradeTimeout defines the timeout after which the upgrade is considered failed
77+
// UpgradeTimeout defines the timeout after which the upgrade is considered failed.
78+
// Relative to the `.spec.startAfter` timestamp of the upgrade job.
6679
// +kubebuilder:validation:Type=string
6780
// +kubebuilder:validation:Format=duration
6881
// +kubebuilder:default:="12h"
@@ -72,6 +85,36 @@ type UpgradeJobConfig struct {
7285
PreUpgradeHealthChecks UpgradeJobHealthCheck `json:"preUpgradeHealthChecks"`
7386
// PostUpgradeHealthChecks defines the health checks to be performed after the upgrade
7487
PostUpgradeHealthChecks UpgradeJobHealthCheck `json:"postUpgradeHealthChecks"`
88+
89+
// MachineConfigPools defines the machine config pool specific configuration for the upgrade job
90+
// +optional
91+
MachineConfigPools []UpgradeJobMachineConfigPoolSpec `json:"machineConfigPools,omitempty"`
92+
}
93+
94+
// UpgradeJobMachineConfigPoolSpec allows configuring the upgrade of a machine config pool
95+
type UpgradeJobMachineConfigPoolSpec struct {
96+
// MatchLabels defines the labels to match the machine config pool.
97+
// If empty, all machine config pools are matched.
98+
// If nil, no machine config pools are matched.
99+
// +optional
100+
MatchLabels *metav1.LabelSelector `json:"matchLabels,omitempty"`
101+
102+
// DelayUpgrade defines whether to delay the upgrade of the machine config pool
103+
// +optional
104+
DelayUpgrade UpgradeJobMachineConfigPoolDelayUpgradeSpec `json:"delayUpgrade,omitempty"`
105+
}
106+
107+
// UpgradeJobMachineConfigPoolDelayUpgradeSpec defines the delay for the upgrade of a machine config pool
108+
type UpgradeJobMachineConfigPoolDelayUpgradeSpec struct {
109+
// DelayMin defines the delay after which the upgrade of the machine config pool should start.
110+
// Relative to the `.spec.startAfter` timestamp of the upgrade job.
111+
// +optional
112+
DelayMin metav1.Duration `json:"delayMin,omitempty"`
113+
// DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
114+
// Relative to the `.spec.startBefore` timestamp of the upgrade job.
115+
// If the upgrade of the machine config pool can't be started before this time, it is considered failed.
116+
// +optional
117+
DelayMax metav1.Duration `json:"delayMax,omitempty"`
75118
}
76119

77120
// UpgradeJobHealthCheck defines the health checks to be performed

api/v1beta1/upgradejobhook_types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ const (
2424
EventCreate UpgradeEvent = "Create"
2525
// EventStart is the event type for when a job is started.
2626
EventStart UpgradeEvent = "Start"
27-
// UpgradeCompleted is the event type for when the upgrade is completed and health checks have passed.
27+
// UpgradeCompleted is the event type for when the upgrade is completed and health checks have passed,
28+
// but before any paused MachineConfigPools are done upgrading.
2829
EventUpgradeComplete UpgradeEvent = "UpgradeComplete"
2930

3031
// EventFinish is the event type for when a job is finished regardless of outcome.

api/v1beta1/zz_generated.deepcopy.go

Lines changed: 48 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/managedupgrade.appuio.io_upgradeconfigs.yaml

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,79 @@ spec:
7070
description: UpgradeJobConfig defines the configuration for
7171
the upgrade job
7272
properties:
73+
machineConfigPools:
74+
description: MachineConfigPools defines the machine config
75+
pool specific configuration for the upgrade job
76+
items:
77+
description: UpgradeJobMachineConfigPoolSpec allows
78+
configuring the upgrade of a machine config pool
79+
properties:
80+
delayUpgrade:
81+
description: DelayUpgrade defines whether to delay
82+
the upgrade of the machine config pool
83+
properties:
84+
delayMax:
85+
description: |-
86+
DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
87+
Relative to the `.spec.startBefore` timestamp of the upgrade job.
88+
If the upgrade of the machine config pool can't be started before this time, it is considered failed.
89+
type: string
90+
delayMin:
91+
description: |-
92+
DelayMin defines the delay after which the upgrade of the machine config pool should start.
93+
Relative to the `.spec.startAfter` timestamp of the upgrade job.
94+
type: string
95+
type: object
96+
matchLabels:
97+
description: |-
98+
MatchLabels defines the labels to match the machine config pool.
99+
If empty, all machine config pools are matched.
100+
If nil, no machine config pools are matched.
101+
properties:
102+
matchExpressions:
103+
description: matchExpressions is a list of label
104+
selector requirements. The requirements are
105+
ANDed.
106+
items:
107+
description: |-
108+
A label selector requirement is a selector that contains values, a key, and an operator that
109+
relates the key and values.
110+
properties:
111+
key:
112+
description: key is the label key that
113+
the selector applies to.
114+
type: string
115+
operator:
116+
description: |-
117+
operator represents a key's relationship to a set of values.
118+
Valid operators are In, NotIn, Exists and DoesNotExist.
119+
type: string
120+
values:
121+
description: |-
122+
values is an array of string values. If the operator is In or NotIn,
123+
the values array must be non-empty. If the operator is Exists or DoesNotExist,
124+
the values array must be empty. This array is replaced during a strategic
125+
merge patch.
126+
items:
127+
type: string
128+
type: array
129+
required:
130+
- key
131+
- operator
132+
type: object
133+
type: array
134+
matchLabels:
135+
additionalProperties:
136+
type: string
137+
description: |-
138+
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
139+
map is equivalent to an element of matchExpressions, whose key field is "key", the
140+
operator is "In", and the values array contains only "value". The requirements are ANDed.
141+
type: object
142+
type: object
143+
x-kubernetes-map-type: atomic
144+
type: object
145+
type: array
73146
postUpgradeHealthChecks:
74147
description: PostUpgradeHealthChecks defines the health
75148
checks to be performed after the upgrade
@@ -108,8 +181,9 @@ spec:
108181
type: object
109182
upgradeTimeout:
110183
default: 12h
111-
description: UpgradeTimeout defines the timeout after
112-
which the upgrade is considered failed
184+
description: |-
185+
UpgradeTimeout defines the timeout after which the upgrade is considered failed.
186+
Relative to the `.spec.startAfter` timestamp of the upgrade job.
113187
format: duration
114188
type: string
115189
required:

config/crd/bases/managedupgrade.appuio.io_upgradejobs.yaml

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,78 @@ spec:
4343
description: UpgradeJobConfig defines the configuration for the upgrade
4444
job
4545
properties:
46+
machineConfigPools:
47+
description: MachineConfigPools defines the machine config pool
48+
specific configuration for the upgrade job
49+
items:
50+
description: UpgradeJobMachineConfigPoolSpec allows configuring
51+
the upgrade of a machine config pool
52+
properties:
53+
delayUpgrade:
54+
description: DelayUpgrade defines whether to delay the upgrade
55+
of the machine config pool
56+
properties:
57+
delayMax:
58+
description: |-
59+
DelayMax defines the maximum delay after which the upgrade of the machine config pool should start.
60+
Relative to the `.spec.startBefore` timestamp of the upgrade job.
61+
If the upgrade of the machine config pool can't be started before this time, it is considered failed.
62+
type: string
63+
delayMin:
64+
description: |-
65+
DelayMin defines the delay after which the upgrade of the machine config pool should start.
66+
Relative to the `.spec.startAfter` timestamp of the upgrade job.
67+
type: string
68+
type: object
69+
matchLabels:
70+
description: |-
71+
MatchLabels defines the labels to match the machine config pool.
72+
If empty, all machine config pools are matched.
73+
If nil, no machine config pools are matched.
74+
properties:
75+
matchExpressions:
76+
description: matchExpressions is a list of label selector
77+
requirements. The requirements are ANDed.
78+
items:
79+
description: |-
80+
A label selector requirement is a selector that contains values, a key, and an operator that
81+
relates the key and values.
82+
properties:
83+
key:
84+
description: key is the label key that the selector
85+
applies to.
86+
type: string
87+
operator:
88+
description: |-
89+
operator represents a key's relationship to a set of values.
90+
Valid operators are In, NotIn, Exists and DoesNotExist.
91+
type: string
92+
values:
93+
description: |-
94+
values is an array of string values. If the operator is In or NotIn,
95+
the values array must be non-empty. If the operator is Exists or DoesNotExist,
96+
the values array must be empty. This array is replaced during a strategic
97+
merge patch.
98+
items:
99+
type: string
100+
type: array
101+
required:
102+
- key
103+
- operator
104+
type: object
105+
type: array
106+
matchLabels:
107+
additionalProperties:
108+
type: string
109+
description: |-
110+
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
111+
map is equivalent to an element of matchExpressions, whose key field is "key", the
112+
operator is "In", and the values array contains only "value". The requirements are ANDed.
113+
type: object
114+
type: object
115+
x-kubernetes-map-type: atomic
116+
type: object
117+
type: array
46118
postUpgradeHealthChecks:
47119
description: PostUpgradeHealthChecks defines the health checks
48120
to be performed after the upgrade
@@ -81,8 +153,9 @@ spec:
81153
type: object
82154
upgradeTimeout:
83155
default: 12h
84-
description: UpgradeTimeout defines the timeout after which the
85-
upgrade is considered failed
156+
description: |-
157+
UpgradeTimeout defines the timeout after which the upgrade is considered failed.
158+
Relative to the `.spec.startAfter` timestamp of the upgrade job.
86159
format: duration
87160
type: string
88161
required:

0 commit comments

Comments
 (0)