diff --git a/DOC.md b/DOC.md
index fc56e71..da1cc45 100644
--- a/DOC.md
+++ b/DOC.md
@@ -94,6 +94,13 @@ ApplicationDisruptionBudgetSpec defines the desired state of ApplicationDisrupti
A NodeDisruption is allowed if at most "maxDisruptions" nodes selected by selectors are unavailable after the disruption.
true |
+
+ | freeze |
+ object |
+
+ Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+ |
+ false |
| healthHook |
object |
@@ -121,6 +128,40 @@ Maintenance will proceed only if the endpoint responds 2XX.
+### ApplicationDisruptionBudget.spec.freeze
+[↩ Parent](#applicationdisruptionbudgetspec)
+
+
+
+Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+
+
+
+
+ | Name |
+ Type |
+ Description |
+ Required |
+
+
+
+ | enabled |
+ boolean |
+
+ Freeze the budget to prevent any disruptions
+ |
+ false |
+
+ | reason |
+ string |
+
+ Reason of the freeze
+ |
+ false |
+
+
+
+
### ApplicationDisruptionBudget.spec.healthHook
[↩ Parent](#applicationdisruptionbudgetspec)
@@ -492,6 +533,13 @@ NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget
A NodeDisruption is allowed if at most "minUndisruptedNodes" nodes selected by selectors are unavailable after the disruption.
true |
+
+ | freeze |
+ object |
+
+ Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+ |
+ false |
| nodeSelector |
object |
@@ -503,6 +551,40 @@ NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget
+### NodeDisruptionBudget.spec.freeze
+[↩ Parent](#nodedisruptionbudgetspec)
+
+
+
+Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+
+
+
+
+ | Name |
+ Type |
+ Description |
+ Required |
+
+
+
+ | enabled |
+ boolean |
+
+ Freeze the budget to prevent any disruptions
+ |
+ false |
+
+ | reason |
+ string |
+
+ Reason of the freeze
+ |
+ false |
+
+
+
+
### NodeDisruptionBudget.spec.nodeSelector
[↩ Parent](#nodedisruptionbudgetspec)
diff --git a/Makefile b/Makefile
index 5b6ce47..8181796 100644
--- a/Makefile
+++ b/Makefile
@@ -75,7 +75,7 @@ test: manifests generate fmt vet envtest lint ## Run tests.
##@ Build
.PHONY: build
-build: manifests generate fmt vet ## Build manager binary.
+build: manifests generate fmt vet gen-doc ## Build manager binary.
CGO_ENABLED=0 go build -o bin/manager cmd/main.go
.PHONY: run
diff --git a/README.md b/README.md
index cdd010a..31881d0 100644
--- a/README.md
+++ b/README.md
@@ -162,7 +162,6 @@ In some cases, an application can be unhealthy even if all its pods are running.
You can select Pods and/or PVCs.
-
##### PVC selector
The main reason of using a PVC selector is to ensure that node that contains data don't enter maintenance
@@ -182,6 +181,11 @@ The hook will be called with a POST method containing the JSON encoded NodeDisru
Note: It is not a replacement for readiness probes but a complement.
+##### Freeze
+
+Budgets support freezing disruptions. By setting `spec.Freeze.Enabled`, the budget will reject all disruptions and give the reason specified in `spec.Freeze.Reason`.
+It is equivalent to setting 0 as the max disruptions but it provide better messages.
+
#### Sample object
```yaml
diff --git a/api/v1alpha1/applicationdisruptionbudget_types.go b/api/v1alpha1/applicationdisruptionbudget_types.go
index 6d1a740..34bd40e 100644
--- a/api/v1alpha1/applicationdisruptionbudget_types.go
+++ b/api/v1alpha1/applicationdisruptionbudget_types.go
@@ -43,6 +43,16 @@ type ApplicationDisruptionBudgetSpec struct {
// Maintenance will proceed only if the endpoint responds 2XX.
// +kubebuilder:validation:Optional
HealthHook HealthHookSpec `json:"healthHook,omitempty"`
+ // Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+ Freeze FreezeSpec `json:"freeze,omitempty"`
+}
+
+// FreezeSpec defines the freeze status of the budget
+type FreezeSpec struct {
+ // Freeze the budget to prevent any disruptions
+ Enabled bool `json:"enabled,omitempty"`
+ // Reason of the freeze
+ Reason string `json:"reason,omitempty"`
}
type HealthHookSpec struct {
diff --git a/api/v1alpha1/nodedisruptionbudget_types.go b/api/v1alpha1/nodedisruptionbudget_types.go
index a22da6c..df1df78 100644
--- a/api/v1alpha1/nodedisruptionbudget_types.go
+++ b/api/v1alpha1/nodedisruptionbudget_types.go
@@ -37,6 +37,8 @@ type NodeDisruptionBudgetSpec struct {
MinUndisruptedNodes int `json:"minUndisruptedNodes"`
// NodeSelector query over pods whose nodes are managed by the disruption budget.
NodeSelector metav1.LabelSelector `json:"nodeSelector,omitempty"`
+ // Define the freeze status of the budget. Frozen budget reject all disruptions ignoring any other constraints
+ Freeze FreezeSpec `json:"freeze,omitempty"`
}
//+kubebuilder:object:root=true
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 8ffcaf9..04327a4 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -89,6 +89,7 @@ func (in *ApplicationDisruptionBudgetSpec) DeepCopyInto(out *ApplicationDisrupti
in.PodSelector.DeepCopyInto(&out.PodSelector)
in.PVCSelector.DeepCopyInto(&out.PVCSelector)
out.HealthHook = in.HealthHook
+ out.Freeze = in.Freeze
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ApplicationDisruptionBudgetSpec.
@@ -157,6 +158,21 @@ func (in *DisruptionBudgetStatus) DeepCopy() *DisruptionBudgetStatus {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *FreezeSpec) DeepCopyInto(out *FreezeSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FreezeSpec.
+func (in *FreezeSpec) DeepCopy() *FreezeSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(FreezeSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HealthHookSpec) DeepCopyInto(out *HealthHookSpec) {
*out = *in
@@ -277,6 +293,7 @@ func (in *NodeDisruptionBudgetList) DeepCopyObject() runtime.Object {
func (in *NodeDisruptionBudgetSpec) DeepCopyInto(out *NodeDisruptionBudgetSpec) {
*out = *in
in.NodeSelector.DeepCopyInto(&out.NodeSelector)
+ out.Freeze = in.Freeze
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDisruptionBudgetSpec.
diff --git a/chart/templates/applicationdisruptionbudget-crd.yaml b/chart/templates/applicationdisruptionbudget-crd.yaml
index 83d2c9d..724eda5 100644
--- a/chart/templates/applicationdisruptionbudget-crd.yaml
+++ b/chart/templates/applicationdisruptionbudget-crd.yaml
@@ -54,6 +54,17 @@ spec:
description: ApplicationDisruptionBudgetSpec defines the desired state of
ApplicationDisruptionBudget
properties:
+ freeze:
+ description: Define the freeze status of the budget. Frozen budget reject
+ all disruptions ignoring any other constraints
+ properties:
+ enabled:
+ description: Freeze the budget to prevent any disruptions
+ type: boolean
+ reason:
+ description: Reason of the freeze
+ type: string
+ type: object
healthHook:
description: |-
Define a optional hook to call when validating a NodeDisruption.
diff --git a/chart/templates/nodedisruptionbudget-crd.yaml b/chart/templates/nodedisruptionbudget-crd.yaml
index cb9612f..5306384 100644
--- a/chart/templates/nodedisruptionbudget-crd.yaml
+++ b/chart/templates/nodedisruptionbudget-crd.yaml
@@ -56,6 +56,17 @@ spec:
spec:
description: NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget
properties:
+ freeze:
+ description: Define the freeze status of the budget. Frozen budget reject
+ all disruptions ignoring any other constraints
+ properties:
+ enabled:
+ description: Freeze the budget to prevent any disruptions
+ type: boolean
+ reason:
+ description: Reason of the freeze
+ type: string
+ type: object
maxDisruptedNodes:
description: A NodeDisruption is allowed if at most "maxDisruptedNodes"
nodes selected by selectors are unavailable after the disruption.
diff --git a/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml b/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml
index bd2f461..dbbaf7c 100644
--- a/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml
+++ b/config/crd/bases/nodedisruption.criteo.com_applicationdisruptionbudgets.yaml
@@ -53,6 +53,17 @@ spec:
description: ApplicationDisruptionBudgetSpec defines the desired state
of ApplicationDisruptionBudget
properties:
+ freeze:
+ description: Define the freeze status of the budget. Frozen budget
+ reject all disruptions ignoring any other constraints
+ properties:
+ enabled:
+ description: Freeze the budget to prevent any disruptions
+ type: boolean
+ reason:
+ description: Reason of the freeze
+ type: string
+ type: object
healthHook:
description: |-
Define a optional hook to call when validating a NodeDisruption.
diff --git a/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml b/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml
index b418220..d804c59 100644
--- a/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml
+++ b/config/crd/bases/nodedisruption.criteo.com_nodedisruptionbudgets.yaml
@@ -55,6 +55,17 @@ spec:
spec:
description: NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget
properties:
+ freeze:
+ description: Define the freeze status of the budget. Frozen budget
+ reject all disruptions ignoring any other constraints
+ properties:
+ enabled:
+ description: Freeze the budget to prevent any disruptions
+ type: boolean
+ reason:
+ description: Reason of the freeze
+ type: string
+ type: object
maxDisruptedNodes:
description: A NodeDisruption is allowed if at most "maxDisruptedNodes"
nodes selected by selectors are unavailable after the disruption.
diff --git a/internal/controller/applicationdisruptionbudget_controller.go b/internal/controller/applicationdisruptionbudget_controller.go
index 7e70ec9..bf64e61 100644
--- a/internal/controller/applicationdisruptionbudget_controller.go
+++ b/internal/controller/applicationdisruptionbudget_controller.go
@@ -115,6 +115,12 @@ func PruneADBMetrics(ref nodedisruptionv1alpha1.NamespacedName) {
// UpdateADBMetrics update metrics for an ADB
func UpdateADBMetrics(ref nodedisruptionv1alpha1.NamespacedName, adb *nodedisruptionv1alpha1.ApplicationDisruptionBudget) {
DisruptionBudgetMaxDisruptions.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(float64(adb.Spec.MaxDisruptions))
+ if adb.Spec.Freeze.Enabled {
+ DisruptionBudgetFrozen.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(1)
+ } else {
+ DisruptionBudgetFrozen.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(0)
+ }
+
UpdateBudgetStatusMetrics(ref, adb.Status)
}
@@ -199,8 +205,29 @@ func (r *ApplicationDisruptionBudgetResolver) IsImpacted(disruptedNodes resolver
}
// Return the number of disruption allowed considering a list of current node disruptions
-func (r *ApplicationDisruptionBudgetResolver) TolerateDisruption(_ resolver.NodeSet) bool {
- return r.ApplicationDisruptionBudget.Status.DisruptionsAllowed-1 >= 0
+func (r *ApplicationDisruptionBudgetResolver) TryValidateDisruptionFromBudgetConstraints(_ resolver.NodeSet) nodedisruptionv1alpha1.DisruptedBudgetStatus {
+ if r.ApplicationDisruptionBudget.Spec.Freeze.Enabled {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: fmt.Sprintf("Budget frozen: %s", r.ApplicationDisruptionBudget.Spec.Freeze.Reason),
+ Ok: false,
+ }
+ }
+
+ if r.ApplicationDisruptionBudget.Status.DisruptionsAllowed-1 >= 0 {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: "",
+ Ok: true,
+ }
+ } else {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: fmt.Sprintf("Number of allowed disruption exceeded (Remaining allowed disruptions: %d, current disruptions: %d)",
+ r.ApplicationDisruptionBudget.Status.DisruptionsAllowed, r.ApplicationDisruptionBudget.Status.CurrentDisruptions),
+ Ok: false,
+ }
+ }
}
func (r *ApplicationDisruptionBudgetResolver) UpdateStatus(ctx context.Context) error {
@@ -215,8 +242,24 @@ func (r *ApplicationDisruptionBudgetResolver) GetNamespacedName() nodedisruption
}
}
+func (r *ApplicationDisruptionBudgetResolver) TryValidateDisruptionFromHealthHook(ctx context.Context, nd nodedisruptionv1alpha1.NodeDisruption) nodedisruptionv1alpha1.DisruptedBudgetStatus {
+ err := r.callHealthHook(ctx, nd)
+ if err != nil {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: fmt.Sprintf("Failed to validate with healthHook: %s", err),
+ Ok: false,
+ }
+ }
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: "",
+ Ok: true,
+ }
+}
+
// Call a lifecycle hook in order to synchronously validate a Node Disruption
-func (r *ApplicationDisruptionBudgetResolver) CallHealthHook(ctx context.Context, nd nodedisruptionv1alpha1.NodeDisruption) error {
+func (r *ApplicationDisruptionBudgetResolver) callHealthHook(ctx context.Context, nd nodedisruptionv1alpha1.NodeDisruption) error {
if r.ApplicationDisruptionBudget.Spec.HealthHook.URL == "" {
return nil
}
@@ -226,7 +269,7 @@ func (r *ApplicationDisruptionBudgetResolver) CallHealthHook(ctx context.Context
data, err := json.Marshal(nd)
if err != nil {
- return err
+ return fmt.Errorf("controller error: Failed to serialize node disruption: %w", err)
}
namespacedName := r.GetNamespacedName()
@@ -234,7 +277,7 @@ func (r *ApplicationDisruptionBudgetResolver) CallHealthHook(ctx context.Context
req, err := http.NewRequestWithContext(ctx, http.MethodPost, r.ApplicationDisruptionBudget.Spec.HealthHook.URL, bytes.NewReader(data))
if err != nil {
DisruptionBudgetCheckHealthHookErrorTotal.WithLabelValues(namespacedName.Namespace, namespacedName.Name, namespacedName.Kind).Inc()
- return err
+ return fmt.Errorf("controller error: Error while building request: %w", err)
}
headers["Content-Type"] = []string{"application/json"}
@@ -244,13 +287,13 @@ func (r *ApplicationDisruptionBudgetResolver) CallHealthHook(ctx context.Context
resp, err := client.Do(req)
if err != nil {
DisruptionBudgetCheckHealthHookErrorTotal.WithLabelValues(namespacedName.Namespace, namespacedName.Name, namespacedName.Kind).Inc()
- return err
+ return fmt.Errorf("error while performing request on healthHook: %w", err)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
DisruptionBudgetCheckHealthHookErrorTotal.WithLabelValues(namespacedName.Namespace, namespacedName.Name, namespacedName.Kind).Inc()
- return err
+ return fmt.Errorf("error while reading response fron healthHook: %w", err)
}
DisruptionBudgetCheckHealthHookStatusCodeTotal.WithLabelValues(namespacedName.Namespace, namespacedName.Name, namespacedName.Kind, strconv.Itoa(resp.StatusCode)).Inc()
@@ -258,7 +301,7 @@ func (r *ApplicationDisruptionBudgetResolver) CallHealthHook(ctx context.Context
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
return nil
}
- return fmt.Errorf("http server responded with non 2XX status code: %s", string(body))
+ return fmt.Errorf("HealthHook responded with non 2XX status code: %s", string(body))
}
func (r *ApplicationDisruptionBudgetResolver) GetSelectedNodes(ctx context.Context) (resolver.NodeSet, error) {
diff --git a/internal/controller/budget.go b/internal/controller/budget.go
index df7b2dc..104df01 100644
--- a/internal/controller/budget.go
+++ b/internal/controller/budget.go
@@ -14,10 +14,10 @@ type Budget interface {
Sync(context.Context) error
// Check if the budget would be impacted by an operation on the provided set of nodes
IsImpacted(resolver.NodeSet) bool
- // Return the number of disruption allowed considering a list of current node disruptions
- TolerateDisruption(resolver.NodeSet) bool
+ // Return the status of this budget when trying to compute the status for it
+ TryValidateDisruptionFromBudgetConstraints(resolver.NodeSet) nodedisruptionv1alpha1.DisruptedBudgetStatus
// Call a lifecycle hook in order to synchronously validate a Node Disruption
- CallHealthHook(context.Context, nodedisruptionv1alpha1.NodeDisruption) error
+ TryValidateDisruptionFromHealthHook(context.Context, nodedisruptionv1alpha1.NodeDisruption) nodedisruptionv1alpha1.DisruptedBudgetStatus
// Apply the budget's status to Kubernetes
UpdateStatus(context.Context) error
// Get the name, namespace and kind of bduget
diff --git a/internal/controller/budget_test.go b/internal/controller/budget_test.go
index ef0319a..1af9b1f 100644
--- a/internal/controller/budget_test.go
+++ b/internal/controller/budget_test.go
@@ -31,14 +31,30 @@ func (m *MockBudget) IsImpacted(resolver.NodeSet) bool {
}
// Return the number of disruption allowed considering a list of current node disruptions
-func (m *MockBudget) TolerateDisruption(resolver.NodeSet) bool {
- return m.tolerate
+func (m *MockBudget) TryValidateDisruptionFromBudgetConstraints(resolver.NodeSet) nodedisruptionv1alpha1.DisruptedBudgetStatus {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: nodedisruptionv1alpha1.NamespacedName{},
+ Reason: "foo",
+ Ok: m.tolerate,
+ }
}
// Check health make a synchronous health check on the underlying resource of a budget
-func (m *MockBudget) CallHealthHook(context.Context, nodedisruptionv1alpha1.NodeDisruption) error {
+func (m *MockBudget) TryValidateDisruptionFromHealthHook(context.Context, nodedisruptionv1alpha1.NodeDisruption) nodedisruptionv1alpha1.DisruptedBudgetStatus {
m.healthChecked = true
- return m.health
+ if m.health != nil {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: nodedisruptionv1alpha1.NamespacedName{},
+ Reason: m.health.Error(),
+ Ok: false,
+ }
+ } else {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: nodedisruptionv1alpha1.NamespacedName{},
+ Reason: "",
+ Ok: true,
+ }
+ }
}
// Apply the budget's status to Kubernetes
@@ -108,7 +124,6 @@ func TestValidationImpactedAllOk(t *testing.T) {
}
budgets := []controller.Budget{&budget1, &budget2}
-
anyFailed, statuses := reconciler.ValidateWithBudgetConstraints(context.Background(), budgets)
assert.False(t, anyFailed)
assert.Equal(t, len(statuses), 2)
diff --git a/internal/controller/metrics.go b/internal/controller/metrics.go
index 80597be..cddd879 100644
--- a/internal/controller/metrics.go
+++ b/internal/controller/metrics.go
@@ -139,4 +139,11 @@ var (
},
[]string{"disruption_budget_namespace", "disruption_budget_name", "disruption_budget_kind", "node_disruption_name"},
)
+ DisruptionBudgetFrozen = promauto.With(metrics.Registry).NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: METIC_PREFIX + "budget_disruption_frozen",
+ Help: "Budget frozen",
+ },
+ []string{"disruption_budget_namespace", "disruption_budget_name", "disruption_budget_kind"},
+ )
)
diff --git a/internal/controller/nodedisruption_controller.go b/internal/controller/nodedisruption_controller.go
index 80e565e..c3ca88c 100644
--- a/internal/controller/nodedisruption_controller.go
+++ b/internal/controller/nodedisruption_controller.go
@@ -346,17 +346,12 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
continue
}
- if !budget.TolerateDisruption(disruptedNodes) {
+ status := budget.TryValidateDisruptionFromBudgetConstraints(disruptedNodes)
+ if !status.Ok {
anyFailed = true
- ref := budget.GetNamespacedName()
- status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
- Reference: ref,
- Reason: "No more disruption allowed",
- Ok: false,
- }
statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
- DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
+ DisruptionBudgetRejectedTotal.WithLabelValues(status.Reference.Namespace, status.Reference.Name, status.Reference.Kind).Inc()
break
}
impactedBudgets = append(impactedBudgets, budget)
@@ -367,26 +362,16 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
}
for _, budget := range impactedBudgets {
- err := budget.CallHealthHook(ctx, ndr.NodeDisruption)
+ status := budget.TryValidateDisruptionFromHealthHook(ctx, ndr.NodeDisruption)
ref := budget.GetNamespacedName()
- if err != nil {
+ statuses = append(statuses, status)
+ if !status.Ok {
anyFailed = true
- status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
- Reference: ref,
- Reason: fmt.Sprintf("Unhealthy: %s", err),
- Ok: false,
- }
- statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
break
}
DisruptionBudgetGrantedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
- statuses = append(statuses, nodedisruptionv1alpha1.DisruptedBudgetStatus{
- Reference: budget.GetNamespacedName(),
- Reason: "",
- Ok: true,
- })
}
return anyFailed, statuses
diff --git a/internal/controller/nodedisruption_controller_test.go b/internal/controller/nodedisruption_controller_test.go
index b582b39..5ea79a0 100644
--- a/internal/controller/nodedisruption_controller_test.go
+++ b/internal/controller/nodedisruption_controller_test.go
@@ -372,6 +372,147 @@ var _ = Describe("NodeDisruption controller", func() {
})
})
+ When("there is a node disruption budget that is frozen", func() {
+ It("rejects the node disruption", func() {
+ By("creating a budget that accepts everything")
+ ndb := &nodedisruptionv1alpha1.NodeDisruptionBudget{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "nodedisruption.criteo.com/v1alpha1",
+ Kind: "NodeDisruptionBudget",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ },
+ Spec: nodedisruptionv1alpha1.NodeDisruptionBudgetSpec{
+ NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1},
+ MaxDisruptedNodes: 99,
+ Freeze: nodedisruptionv1alpha1.FreezeSpec{
+ Enabled: false,
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, ndb.DeepCopy())).Should(Succeed())
+
+ By("creating a new NodeDisruption")
+ disruption := &nodedisruptionv1alpha1.NodeDisruption{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "nodedisruption.criteo.com/v1alpha1",
+ Kind: "NodeDisruption",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: NDName,
+ Namespace: NDNamespace,
+ },
+ Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{
+ NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1},
+ },
+ }
+ Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed())
+
+ By("checking the NodeDisruption is being granted")
+ createdDisruption := &nodedisruptionv1alpha1.NodeDisruption{}
+
+ Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState {
+ err := k8sClient.Get(ctx, NDLookupKey, createdDisruption)
+ if err != nil {
+ panic("should be able to get")
+ }
+ return createdDisruption.Status.State
+ }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Granted))
+
+ By("creating a budget that accepts everything")
+ Expect(k8sClient.Delete(ctx, ndb)).Should(Succeed())
+ ndb.Spec.Freeze.Enabled = true
+ ndb.Spec.Freeze.Reason = "foobar"
+ Expect(k8sClient.Create(ctx, ndb)).Should(Succeed())
+
+ By("Re-creating a new NodeDisruption")
+ Expect(k8sClient.Delete(ctx, disruption)).Should(Succeed())
+ Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed())
+
+ By("checking the NodeDisruption is being rejected")
+ Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState {
+ err := k8sClient.Get(ctx, NDLookupKey, createdDisruption)
+ if err != nil {
+ panic("should be able to get")
+ }
+ return createdDisruption.Status.State
+ }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Rejected))
+
+ Expect(createdDisruption.Status.DisruptedDisruptionBudgets[0].Reason).Should(Equal("Budget frozen: foobar"))
+ })
+ })
+
+ When("there is a application disruption budget that is frozen", func() {
+ It("rejects the node disruption", func() {
+ By("creating a budget that accepts everything")
+ adb := &nodedisruptionv1alpha1.ApplicationDisruptionBudget{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "nodedisruption.criteo.com/v1alpha1",
+ Kind: "ApplicationDisruptionBudget",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: NDNamespace,
+ },
+ Spec: nodedisruptionv1alpha1.ApplicationDisruptionBudgetSpec{
+ PodSelector: metav1.LabelSelector{MatchLabels: podLabels},
+ PVCSelector: metav1.LabelSelector{MatchLabels: podLabels},
+ MaxDisruptions: 1,
+ },
+ }
+ Expect(k8sClient.Create(ctx, adb.DeepCopy())).Should(Succeed())
+
+ By("creating a new NodeDisruption")
+ disruption := &nodedisruptionv1alpha1.NodeDisruption{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "nodedisruption.criteo.com/v1alpha1",
+ Kind: "NodeDisruption",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: NDName,
+ Namespace: NDNamespace,
+ },
+ Spec: nodedisruptionv1alpha1.NodeDisruptionSpec{
+ NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabels1},
+ },
+ }
+ Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed())
+
+ By("checking the NodeDisruption is being granted")
+ createdDisruption := &nodedisruptionv1alpha1.NodeDisruption{}
+
+ Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState {
+ err := k8sClient.Get(ctx, NDLookupKey, createdDisruption)
+ if err != nil {
+ panic("should be able to get")
+ }
+ return createdDisruption.Status.State
+ }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Granted))
+
+ By("creating a budget that accepts everything")
+ Expect(k8sClient.Delete(ctx, adb)).Should(Succeed())
+ adb.Spec.Freeze.Enabled = true
+ adb.Spec.Freeze.Reason = "foobar"
+ Expect(k8sClient.Create(ctx, adb)).Should(Succeed())
+
+ By("Re-creating a new NodeDisruption")
+ Expect(k8sClient.Delete(ctx, disruption)).Should(Succeed())
+ Expect(k8sClient.Create(ctx, disruption.DeepCopy())).Should(Succeed())
+
+ By("checking the NodeDisruption is being rejected")
+ Eventually(func() nodedisruptionv1alpha1.NodeDisruptionState {
+ err := k8sClient.Get(ctx, NDLookupKey, createdDisruption)
+ if err != nil {
+ panic("should be able to get")
+ }
+ return createdDisruption.Status.State
+ }, timeout, interval).Should(Equal(nodedisruptionv1alpha1.Rejected))
+
+ Expect(createdDisruption.Status.DisruptedDisruptionBudgets[0].Reason).Should(Equal("Budget frozen: foobar"))
+ })
+ })
+
When("there is a budget that doesn't support any disruption and retry is activated", func() {
It("rejects the node disruption", func() {
By("creating a budget that rejects everything")
@@ -528,7 +669,7 @@ var _ = Describe("NodeDisruption controller", func() {
Name: "test",
Kind: "NodeDisruptionBudget",
},
- Reason: "No more disruption allowed",
+ Reason: "Number of allowed disrupted nodes exceeded (Remaining disruptions allowed: 0, Number of current node disrupted: 0, Requested nodes to disrupt: 2)",
Ok: false,
}}
Expect(disruption.Status.DisruptedDisruptionBudgets).Should(Equal(expectedStatus))
@@ -553,7 +694,7 @@ var _ = Describe("NodeDisruption controller", func() {
Name: "test",
Kind: "NodeDisruptionBudget",
},
- Reason: "No more disruption allowed",
+ Reason: "Number of allowed disrupted nodes exceeded (Remaining disruptions allowed: 0, Number of current node disrupted: 0, Requested nodes to disrupt: 2)",
Ok: false,
}, {
Reference: nodedisruptionv1alpha1.NamespacedName{
diff --git a/internal/controller/nodedisruptionbudget_controller.go b/internal/controller/nodedisruptionbudget_controller.go
index cb677bb..5591fc9 100644
--- a/internal/controller/nodedisruptionbudget_controller.go
+++ b/internal/controller/nodedisruptionbudget_controller.go
@@ -18,6 +18,7 @@ package controller
import (
"context"
+ "fmt"
"math"
"reflect"
@@ -108,6 +109,11 @@ func PruneNDBMetrics(ref nodedisruptionv1alpha1.NamespacedName) {
func UpdateNDBMetrics(ref nodedisruptionv1alpha1.NamespacedName, ndb *nodedisruptionv1alpha1.NodeDisruptionBudget) {
DisruptionBudgetMaxDisruptedNodes.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(float64(ndb.Spec.MaxDisruptedNodes))
DisruptionBudgetMinUndisruptedNodes.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(float64(ndb.Spec.MinUndisruptedNodes))
+ if ndb.Spec.Freeze.Enabled {
+ DisruptionBudgetFrozen.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(1)
+ } else {
+ DisruptionBudgetFrozen.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Set(0)
+ }
UpdateBudgetStatusMetrics(ref, ndb.Status)
}
@@ -190,15 +196,40 @@ func (r *NodeDisruptionBudgetResolver) IsImpacted(disruptedNodes resolver.NodeSe
}
// Return the number of disruption allowed considering a list of current node disruptions
-func (r *NodeDisruptionBudgetResolver) TolerateDisruption(disruptedNodes resolver.NodeSet) bool {
+func (r *NodeDisruptionBudgetResolver) TryValidateDisruptionFromBudgetConstraints(disruptedNodes resolver.NodeSet) nodedisruptionv1alpha1.DisruptedBudgetStatus {
+ if r.NodeDisruptionBudget.Spec.Freeze.Enabled {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: fmt.Sprintf("Budget frozen: %s", r.NodeDisruptionBudget.Spec.Freeze.Reason),
+ Ok: false,
+ }
+ }
+
watchedNodes := resolver.NewNodeSetFromStringList(r.NodeDisruptionBudget.Status.WatchedNodes)
disruptedNodesCount := watchedNodes.Intersection(disruptedNodes).Len()
- return r.NodeDisruptionBudget.Status.DisruptionsAllowed-disruptedNodesCount >= 0
+ if r.NodeDisruptionBudget.Status.DisruptionsAllowed-disruptedNodesCount >= 0 {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: "",
+ Ok: true,
+ }
+ } else {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: fmt.Sprintf("Number of allowed disrupted nodes exceeded (Remaining disruptions allowed: %d, Number of current node disrupted: %d, Requested nodes to disrupt: %d)",
+ r.NodeDisruptionBudget.Status.DisruptionsAllowed, r.NodeDisruptionBudget.Status.CurrentDisruptions, disruptedNodesCount),
+ Ok: false,
+ }
+ }
}
// Call a lifecycle hook in order to synchronously validate a Node Disruption
-func (r *NodeDisruptionBudgetResolver) CallHealthHook(_ context.Context, _ nodedisruptionv1alpha1.NodeDisruption) error {
- return nil
+func (r *NodeDisruptionBudgetResolver) TryValidateDisruptionFromHealthHook(_ context.Context, _ nodedisruptionv1alpha1.NodeDisruption) nodedisruptionv1alpha1.DisruptedBudgetStatus {
+ return nodedisruptionv1alpha1.DisruptedBudgetStatus{
+ Reference: r.GetNamespacedName(),
+ Reason: "",
+ Ok: true,
+ }
}
func (r *NodeDisruptionBudgetResolver) UpdateStatus(ctx context.Context) error {