Skip to content

Commit 0d71cdf

Browse files
committed
Add Drifted condition
This commit introduces a new Kubernetes condition type "Drifted" to improve observability of Helm release drift detection. Signed-off-by: Yasin Özel <yozel@nebius.com>
1 parent 865b5a6 commit 0d71cdf

4 files changed

Lines changed: 181 additions & 6 deletions

File tree

api/v2/condition_types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ const (
2929
// (uninstall/rollback) due to a failure of the last release attempt against the
3030
// latest desired state.
3131
RemediatedCondition string = "Remediated"
32+
33+
// DriftedCondition represents the status of the Helm release drift detection,
34+
// indicating that the deployed release has drifted from the desired state.
35+
DriftedCondition string = "Drifted"
3236
)
3337

3438
const (
@@ -79,4 +83,12 @@ const (
7983
// DependencyNotReadyReason represents the fact that
8084
// one of the dependencies is not ready.
8185
DependencyNotReadyReason string = "DependencyNotReady"
86+
87+
// DriftDetectedReason represents the fact that drift has been detected in the
88+
// Helm release compared to the expected state.
89+
DriftDetectedReason string = "DriftDetected"
90+
91+
// NoDriftDetectedReason represents the fact that no drift has been detected in
92+
// the Helm release compared to the expected state.
93+
NoDriftDetectedReason string = "NoDriftDetected"
8294
)

docs/spec/v2/helmreleases.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,8 +1886,9 @@ A HelmRelease enters various states during its lifecycle, reflected as
18861886
[Kubernetes Conditions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties).
18871887
It can be [reconciling](#reconciling-helmrelease) when it is being processed by
18881888
the controller, it can be [ready](#ready-helmrelease) when the Helm release is
1889-
installed and up-to-date, or it can [fail](#failed-helmrelease) during
1890-
reconciliation.
1889+
installed and up-to-date, it can [fail](#failed-helmrelease) during
1890+
reconciliation, or it can be [drifted](#drifted-helmrelease) if the
1891+
drift detection mode is set to enabled/warn and there is a drift.
18911892

18921893
The HelmRelease API is compatible with the [kstatus specification](https://github.com/kubernetes-sigs/cli-utils/tree/master/pkg/kstatus),
18931894
and reports `Reconciling` and `Stalled` conditions where applicable to provide
@@ -1972,6 +1973,29 @@ HelmRelease's `.status.conditions`:
19721973
The `TestSuccess` Condition will retain a status value of `"True"` until the
19731974
next Helm install or upgrade occurs, or the Helm tests are disabled.
19741975

1976+
#### Drifted HelmRelease
1977+
1978+
The helm-controller marks the HelmRelease as _drifted_ when it has the following
1979+
characteristics:
1980+
1981+
- The HelmRelease have drift detection mode set to enabled or warn.
1982+
- There is a drift detected against the cluster state.
1983+
1984+
When the HelmRelease is "drifted", the controller sets a Condition with the
1985+
following attributes in the HelmRelease's `.status.conditions`:
1986+
1987+
- `type: Drifted`
1988+
- `status: "True"`
1989+
- `reason: DriftDetected`
1990+
1991+
When the HelmRelease have drift detection mode set to enabled or warn there
1992+
and there is no drift, the controller sets a Condition with the following
1993+
attributes in the HelmRelease's `.status.conditions`:
1994+
1995+
- `type: Drifted`
1996+
- `status: "False"`
1997+
- `reason: NoDriftDetected`
1998+
19751999
#### Failed HelmRelease
19762000

19772001
The helm-controller may get stuck trying to determine state or produce a Helm

internal/reconcile/atomic_release.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
385385
conditions.MarkTrue(req.Object, v2.ReleasedCondition, v2.UpgradeSucceededReason, "%s", msg)
386386
}
387387

388+
if req.Object.GetDriftDetection().MustDetectChanges() {
389+
conditions.MarkFalse(req.Object, v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state")
390+
}
391+
388392
return nil, nil
389393
case ReleaseStatusLocked:
390394
log.Info(msgWithReason("release locked", state.Reason))
@@ -440,10 +444,10 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
440444
}
441445
}
442446

443-
r.eventRecorder.Eventf(req.Object, corev1.EventTypeWarning, "DriftDetected",
444-
"Cluster state of release %s has drifted from the desired state:\n%s",
445-
req.Object.Status.History.Latest().FullReleaseName(), diff.SummarizeDiffSet(state.Diff),
446-
)
447+
msg := fmt.Sprintf("Cluster state of release %s has drifted from the desired state:\n%s",
448+
req.Object.Status.History.Latest().FullReleaseName(), diff.SummarizeDiffSet(state.Diff))
449+
r.eventRecorder.Eventf(req.Object, corev1.EventTypeWarning, v2.DriftDetectedReason, msg)
450+
conditions.MarkTrue(req.Object, v2.DriftedCondition, v2.DriftDetectedReason, "%s", msg)
447451

448452
if req.Object.GetDriftDetection().GetMode() == v2.DriftDetectionEnabled {
449453
return NewCorrectClusterDrift(r.configFactory, r.eventRecorder, state.Diff, kube.ManagedFieldsManager), nil

internal/reconcile/atomic_release_test.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,6 +1552,63 @@ func TestAtomicRelease_actionForState(t *testing.T) {
15521552
*conditions.FalseCondition(meta.ReadyCondition, v2.UpgradeFailedReason, "upgrade failed"),
15531553
},
15541554
},
1555+
{
1556+
name: "in-sync release with drift detection enabled sets Drifted condition to false",
1557+
spec: func(spec *v2.HelmReleaseSpec) {
1558+
spec.DriftDetection = &v2.DriftDetection{
1559+
Mode: v2.DriftDetectionEnabled,
1560+
}
1561+
},
1562+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1563+
return v2.HelmReleaseStatus{
1564+
History: v2.Snapshots{
1565+
{Version: 1},
1566+
},
1567+
}
1568+
},
1569+
state: ReleaseState{Status: ReleaseStatusInSync},
1570+
want: nil,
1571+
assertConditions: []metav1.Condition{
1572+
*conditions.FalseCondition(v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state"),
1573+
},
1574+
},
1575+
{
1576+
name: "in-sync release with drift detection warn sets Drifted condition to false",
1577+
spec: func(spec *v2.HelmReleaseSpec) {
1578+
spec.DriftDetection = &v2.DriftDetection{
1579+
Mode: v2.DriftDetectionWarn,
1580+
}
1581+
},
1582+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1583+
return v2.HelmReleaseStatus{
1584+
History: v2.Snapshots{
1585+
{Version: 1},
1586+
},
1587+
}
1588+
},
1589+
state: ReleaseState{Status: ReleaseStatusInSync},
1590+
want: nil,
1591+
assertConditions: []metav1.Condition{
1592+
*conditions.FalseCondition(v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state"),
1593+
},
1594+
},
1595+
{
1596+
name: "in-sync release with drift detection disabled does not set Drifted condition",
1597+
spec: func(spec *v2.HelmReleaseSpec) {
1598+
spec.DriftDetection = &v2.DriftDetection{
1599+
Mode: v2.DriftDetectionDisabled,
1600+
}
1601+
},
1602+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1603+
return v2.HelmReleaseStatus{
1604+
History: v2.Snapshots{
1605+
{Version: 1},
1606+
},
1607+
}
1608+
},
1609+
state: ReleaseState{Status: ReleaseStatusInSync},
1610+
want: nil,
1611+
},
15551612
{
15561613
name: "locked release triggers unlock action",
15571614
state: ReleaseState{Status: ReleaseStatusLocked},
@@ -1634,6 +1691,9 @@ func TestAtomicRelease_actionForState(t *testing.T) {
16341691
"Deployment/something/mock removed",
16351692
),
16361693
},
1694+
assertConditions: []metav1.Condition{
1695+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock removed"),
1696+
},
16371697
},
16381698
{
16391699
name: "drifted release only triggers event if mode is warn",
@@ -1703,6 +1763,81 @@ func TestAtomicRelease_actionForState(t *testing.T) {
17031763
"Deployment/something/mock changed (0 additions, 1 changes, 0 removals)",
17041764
),
17051765
},
1766+
assertConditions: []metav1.Condition{
1767+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock changed (0 additions, 1 changes, 0 removals)"),
1768+
},
1769+
},
1770+
{
1771+
name: "drifted release sets Drifted condition if mode is warn",
1772+
spec: func(spec *v2.HelmReleaseSpec) {
1773+
spec.DriftDetection = &v2.DriftDetection{
1774+
Mode: v2.DriftDetectionWarn,
1775+
}
1776+
},
1777+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1778+
return v2.HelmReleaseStatus{
1779+
History: v2.Snapshots{
1780+
{
1781+
Name: mockReleaseName,
1782+
Namespace: mockReleaseNamespace,
1783+
Version: 1,
1784+
},
1785+
},
1786+
}
1787+
},
1788+
state: ReleaseState{Status: ReleaseStatusDrifted, Diff: jsondiff.DiffSet{
1789+
{
1790+
Type: jsondiff.DiffTypeUpdate,
1791+
DesiredObject: &unstructured.Unstructured{
1792+
Object: map[string]interface{}{
1793+
"apiVersion": "apps/v1",
1794+
"kind": "Deployment",
1795+
"metadata": map[string]interface{}{
1796+
"name": "mock",
1797+
"namespace": "something",
1798+
},
1799+
"spec": map[string]interface{}{
1800+
"replicas": 2,
1801+
},
1802+
},
1803+
},
1804+
ClusterObject: &unstructured.Unstructured{
1805+
Object: map[string]interface{}{
1806+
"apiVersion": "apps/v1",
1807+
"kind": "Deployment",
1808+
"metadata": map[string]interface{}{
1809+
"name": "mock",
1810+
"namespace": "something",
1811+
},
1812+
"spec": map[string]interface{}{
1813+
"replicas": 1,
1814+
},
1815+
},
1816+
},
1817+
Patch: extjsondiff.Patch{
1818+
{
1819+
Type: extjsondiff.OperationReplace,
1820+
Path: "/spec/replicas",
1821+
OldValue: 1,
1822+
Value: 2,
1823+
},
1824+
},
1825+
},
1826+
}},
1827+
want: nil,
1828+
wantErr: nil,
1829+
wantEvent: &corev1.Event{
1830+
Reason: "DriftDetected",
1831+
Type: corev1.EventTypeWarning,
1832+
Message: fmt.Sprintf(
1833+
"Cluster state of release %s has drifted from the desired state:\n%s",
1834+
mockReleaseNamespace+"/"+mockReleaseName+".v1",
1835+
"Deployment/something/mock changed (0 additions, 1 changes, 0 removals)",
1836+
),
1837+
},
1838+
assertConditions: []metav1.Condition{
1839+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock changed (0 additions, 1 changes, 0 removals)"),
1840+
},
17061841
},
17071842
{
17081843
name: "out-of-sync release triggers upgrade",

0 commit comments

Comments
 (0)