Skip to content

Commit 1a1b85a

Browse files
Add error_message field to bundle deploy telemetry (#4793)
## Changes Adds an `error_message` field to `bundle deploy` telemetry events, giving visibility into user-facing deploy errors. ### Error capture `LogDeployTelemetry` is now called from a `defer` in `ProcessBundleRet`, so it runs on all exit paths — both diagnostic errors (via `logdiag.GetFirstErrorSummary`) and regular Go errors. The legacy empty `BundleDeployEvent` in `cmd/root/root.go` is removed; the `HasConfigUsed` auth guard is now in `telemetry.Upload` so it applies to all telemetry. ### Scrubbing Error messages are treated as PII by the logging infrastructure. Before including them in telemetry, a best-effort regex scrubber (`telemetry_scrub.go`) redacts paths and emails to avoid collecting more information than necessary: | Pattern | Label | Example | |---------|-------|---------| | Absolute paths | `[REDACTED_PATH]` | `/home/user/project/file.yml` → `[REDACTED_PATH](yml)` | | `~/...` paths | `[REDACTED_PATH]` | `~/.databricks/config.json` → `[REDACTED_PATH](json)` | | `/Workspace/...` paths | `[REDACTED_WORKSPACE_PATH]` | `/Workspace/Users/dev/.bundle` → `[REDACTED_WORKSPACE_PATH]` | | Windows paths | `[REDACTED_PATH]` | `C:\Users\...\file.yml` → `[REDACTED_PATH](yml)` | | Relative paths | `[REDACTED_REL_PATH]` | `./resources/job.yml` → `[REDACTED_REL_PATH](yml)` | | Emails | `[REDACTED_EMAIL]` | `user@example.com` → `[REDACTED_EMAIL]` | Known file extensions (`.yml`, `.json`, `.py`, `.tf`, etc.) are preserved in parentheses to help understand usage patterns without capturing sensitive information. Messages are capped at 500 characters. ### Tests - Unit tests for the scrubber (`telemetry_scrub_test.go`) - Acceptance test (`bundle/telemetry/deploy-error-message`)
1 parent 9093081 commit 1a1b85a

13 files changed

Lines changed: 451 additions & 15 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
bundle:
2+
name: test-bundle
3+
4+
variables:
5+
myvar:
6+
description: a required variable

acceptance/bundle/telemetry/deploy-error-message/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
>>> [CLI] bundle deploy
3+
Error: no value assigned to required variable myvar. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the BUNDLE_VAR_myvar environment variable, or .databricks/bundle/<target>/variable-overrides.json
4+
5+
6+
Exit code: 1
7+
8+
>>> cat out.requests.txt
9+
no value assigned to required variable myvar. Variables are usually assigned in databricks.yml, and they can be overridden using "--var", the BUNDLE_VAR_myvar environment variable, or [REDACTED_REL_PATH](json)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
errcode trace $CLI bundle deploy
2+
3+
trace cat out.requests.txt | jq -r 'select(has("path") and .path == "/telemetry-ext") | .body.protoLogs[] | fromjson | .entry.databricks_cli_log.bundle_deploy_event.error_message'
4+
5+
rm out.requests.txt

bundle/phases/deploy.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand
208208
return
209209
}
210210

211-
logDeployTelemetry(ctx, b)
212211
bundle.ApplyContext(ctx, b, scripts.Execute(config.ScriptPostDeploy))
213212
}
214213

bundle/phases/telemetry.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,17 @@ func getExecutionTimes(b *bundle.Bundle) []protos.IntMapEntry {
3333
return executionTimes
3434
}
3535

36-
func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) {
36+
// Maximum length of the error message included in telemetry.
37+
const maxErrorMessageLength = 500
38+
39+
// LogDeployTelemetry logs a telemetry event for a bundle deploy command.
40+
func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) {
41+
errMsg = scrubForTelemetry(errMsg)
42+
43+
if len(errMsg) > maxErrorMessageLength {
44+
errMsg = errMsg[:maxErrorMessageLength]
45+
}
46+
3747
resourcesCount := int64(0)
3848
_, err := dyn.MapByPattern(b.Config.Value(), dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
3949
resourcesCount++
@@ -149,6 +159,7 @@ func logDeployTelemetry(ctx context.Context, b *bundle.Bundle) {
149159
BundleDeployEvent: &protos.BundleDeployEvent{
150160
BundleUuid: bundleUuid,
151161
DeploymentId: b.Metrics.DeploymentId.String(),
162+
ErrorMessage: errMsg,
152163

153164
ResourceCount: resourcesCount,
154165
ResourceJobCount: int64(len(b.Config.Resources.Jobs)),

bundle/phases/telemetry_scrub.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package phases
2+
3+
import (
4+
"path"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// Scrub sensitive information from error messages before sending to telemetry.
10+
// Inspired by VS Code's telemetry path scrubbing and Sentry's @userpath pattern.
11+
//
12+
// Path regexes use [\s:,"'] as boundary characters to delimit where a path
13+
// ends. While these characters are technically valid in file paths, in error
14+
// messages they act as delimiters (e.g. "error: /path/to/file: not found",
15+
// or "failed to read '/some/path', skipping"). This is a practical tradeoff:
16+
// paths containing colons, commas, or quotes are extremely rare, and without
17+
// these boundaries the regexes would over-match into surrounding message text.
18+
//
19+
// References:
20+
// - VS Code: https://github.com/microsoft/vscode/blob/main/src/vs/platform/telemetry/common/telemetryUtils.ts
21+
// - Sentry: https://github.com/getsentry/relay (PII rule: @userpath)
22+
var (
23+
// Matches Windows absolute paths with backslashes and at least two components
24+
// (e.g., C:\foo\bar, D:\Users\project).
25+
windowsBackslashPathRegexp = regexp.MustCompile(`[A-Za-z]:\\[^\s:,"'/\\]+\\[^\s:,"']+`)
26+
27+
// Matches Windows absolute paths with forward slashes and at least two components
28+
// (e.g., C:/foo/bar, D:/Users/project).
29+
windowsFwdslashPathRegexp = regexp.MustCompile(`[A-Za-z]:/[^\s:,"'/\\]+/[^\s:,"']+`)
30+
31+
// Matches Databricks workspace paths (/Workspace/...).
32+
workspacePathRegexp = regexp.MustCompile(`(^|[\s:,"'])(/Workspace/[^\s:,"']+)`)
33+
34+
// Matches absolute Unix paths with at least two components
35+
// (e.g., /home/user/..., /tmp/foo, ~/.config/databricks).
36+
absPathRegexp = regexp.MustCompile(`(^|[\s:,"'])(~?/[^\s:,"'/]+/[^\s:,"']+)`)
37+
38+
// Matches relative paths:
39+
// - Explicit: ./foo, ../foo
40+
// - Dot-prefixed directories: .databricks/bundle/..., .cache/foo
41+
explicitRelPathRegexp = regexp.MustCompile(`(^|[\s:,"'])((?:\.\.?|\.[a-zA-Z][^\s:,"'/]*)/[^\s:,"']+)`)
42+
43+
// Matches implicit relative paths: at least two path components where
44+
// the last component has a file extension (e.g., "resources/job.yml",
45+
// "bundle/dev/state.json").
46+
implicitRelPathRegexp = regexp.MustCompile(`(^|[\s:,"'])([a-zA-Z0-9_][^\s:,"']*/[^\s:,"']*\.[a-zA-Z][^\s:,"']*)`)
47+
48+
// Matches email addresses. Workspace paths in Databricks often contain
49+
// emails (e.g., /Workspace/Users/user@example.com/.bundle/dev).
50+
emailRegexp = regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`)
51+
)
52+
53+
// Known file extensions that are safe to retain in redacted paths.
54+
// These help understand usage patterns without capturing sensitive information.
55+
var knownExtensions = map[string]bool{
56+
// Configuration and data formats
57+
".yml": true,
58+
".yaml": true,
59+
".json": true,
60+
".toml": true,
61+
".cfg": true,
62+
".ini": true,
63+
".env": true,
64+
".xml": true,
65+
".properties": true,
66+
".conf": true,
67+
68+
// Notebook and script languages
69+
".py": true,
70+
".r": true,
71+
".scala": true,
72+
".sql": true,
73+
".ipynb": true,
74+
".sh": true,
75+
76+
// Web / Apps
77+
".js": true,
78+
".ts": true,
79+
".jsx": true,
80+
".tsx": true,
81+
".html": true,
82+
".css": true,
83+
84+
// Terraform
85+
".tf": true,
86+
".hcl": true,
87+
".tfstate": true,
88+
".tfvars": true,
89+
90+
// Build artifacts and archives
91+
".whl": true,
92+
".jar": true,
93+
".egg": true,
94+
".zip": true,
95+
".tar": true,
96+
".gz": true,
97+
".tgz": true,
98+
".dbc": true,
99+
100+
// Data formats
101+
".txt": true,
102+
".csv": true,
103+
".md": true,
104+
".parquet": true,
105+
".avro": true,
106+
107+
// Logs and locks
108+
".log": true,
109+
".lock": true,
110+
111+
// Certificates and keys
112+
".pem": true,
113+
".crt": true,
114+
}
115+
116+
// scrubForTelemetry is a best-effort scrubber that removes sensitive path and
117+
// PII information from error messages before they are sent to telemetry.
118+
// The error message is treated as PII by the logging infrastructure but we
119+
// scrub to avoid collecting more information than necessary.
120+
func scrubForTelemetry(msg string) string {
121+
// Redact absolute paths.
122+
msg = replacePathRegexp(msg, windowsBackslashPathRegexp, "[REDACTED_WIN_PATH]", false)
123+
msg = replacePathRegexp(msg, windowsFwdslashPathRegexp, "[REDACTED_WIN_FPATH]", false)
124+
msg = replacePathRegexp(msg, workspacePathRegexp, "[REDACTED_WORKSPACE_PATH]", true)
125+
msg = replacePathRegexp(msg, absPathRegexp, "[REDACTED_PATH]", true)
126+
127+
// Redact relative paths.
128+
msg = replacePathRegexp(msg, explicitRelPathRegexp, "[REDACTED_REL_PATH]", true)
129+
msg = replacePathRegexp(msg, implicitRelPathRegexp, "[REDACTED_REL_PATH]", true)
130+
131+
// Redact email addresses.
132+
msg = emailRegexp.ReplaceAllString(msg, "[REDACTED_EMAIL]")
133+
134+
return msg
135+
}
136+
137+
// replacePathRegexp replaces path matches with the given label, retaining
138+
// known file extensions. When hasDelimiterGroup is true, the first character
139+
// of the match is preserved as a delimiter prefix.
140+
func replacePathRegexp(msg string, re *regexp.Regexp, label string, hasDelimiterGroup bool) string {
141+
return re.ReplaceAllStringFunc(msg, func(match string) string {
142+
prefix := ""
143+
p := match
144+
if hasDelimiterGroup && len(match) > 0 {
145+
first := match[0]
146+
if strings.ContainsRune(" \t\n:,\"'", rune(first)) {
147+
prefix = match[:1]
148+
p = match[1:]
149+
}
150+
}
151+
152+
ext := path.Ext(p)
153+
if knownExtensions[ext] {
154+
return prefix + label + "(" + ext[1:] + ")"
155+
}
156+
return prefix + label
157+
})
158+
}

0 commit comments

Comments
 (0)