-
-
Notifications
You must be signed in to change notification settings - Fork 636
fix(proxy): normalize lenient JSON request bodies before forwarding #386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,188 @@ | ||
| package utils | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "encoding/json" | ||
| "regexp" | ||
| "strings" | ||
|
|
||
| hjson "github.com/hjson/hjson-go/v4" | ||
| "gopkg.in/yaml.v3" | ||
| ) | ||
|
|
||
| var ( | ||
| jsonKeyPattern = regexp.MustCompile(`([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)`) | ||
| barewordValuePattern = regexp.MustCompile(`(:\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*[,}\]])`) | ||
| barewordArrayItemPattern = regexp.MustCompile(`([\[,]\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*[\],])`) | ||
| strictJSONNumberPattern = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) | ||
| ) | ||
|
|
||
| // NormalizeJSONRequestBody attempts to normalize non-standard JSON-like bodies | ||
| // (for example unquoted object keys) into strict JSON bytes. | ||
| // Returns (normalizedBytes, true) when normalization succeeds. | ||
| func NormalizeJSONRequestBody(body []byte, contentType string) ([]byte, bool) { | ||
| trimmed := bytes.TrimSpace(body) | ||
| if len(trimmed) == 0 { | ||
| return body, false | ||
| } | ||
|
|
||
| if !shouldAttemptJSONNormalization(trimmed, contentType) { | ||
| return body, false | ||
| } | ||
|
|
||
| if json.Valid(trimmed) { | ||
| return body, false | ||
| } | ||
|
|
||
| if normalized, ok := normalizeByLooseTokenRepair(trimmed); ok { | ||
| return normalized, true | ||
| } | ||
|
|
||
| var parsed any | ||
| if err := hjson.Unmarshal(trimmed, &parsed); err != nil { | ||
| // YAML is kept as a secondary fallback for simple JSON-like payloads | ||
| // that are not accepted by hjson parser. | ||
| if err := yaml.Unmarshal(trimmed, &parsed); err != nil { | ||
| return body, false | ||
| } | ||
| } | ||
|
Comment on lines
+41
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. YAML fallback can silently transform non-JSON-like YAML into JSON. If hjson fails but YAML succeeds, inputs like With 🤖 Prompt for AI Agents |
||
|
|
||
| normalized, ok := toJSONCompatible(parsed) | ||
| if !ok { | ||
| return body, false | ||
| } | ||
|
|
||
| switch normalized.(type) { | ||
| case map[string]any, []any: | ||
| // supported | ||
| default: | ||
| return body, false | ||
| } | ||
|
|
||
| normalizedBytes, err := json.Marshal(normalized) | ||
| if err != nil { | ||
| return body, false | ||
| } | ||
|
|
||
| return normalizedBytes, true | ||
| } | ||
|
|
||
| func normalizeByLooseTokenRepair(trimmed []byte) ([]byte, bool) { | ||
| repaired := jsonKeyPattern.ReplaceAll(trimmed, []byte(`${1}"${2}"${3}`)) | ||
| repaired = quoteBarewordTokens(repaired, barewordValuePattern) | ||
| repaired = quoteBarewordTokens(repaired, barewordArrayItemPattern) | ||
| if !json.Valid(repaired) { | ||
| return nil, false | ||
| } | ||
| return repaired, true | ||
| } | ||
|
|
||
| func quoteBarewordTokens(input []byte, pattern *regexp.Regexp) []byte { | ||
| indices := pattern.FindAllSubmatchIndex(input, -1) | ||
| if len(indices) == 0 { | ||
| return input | ||
| } | ||
|
|
||
| var out bytes.Buffer | ||
| last := 0 | ||
| for _, idx := range indices { | ||
| fullStart, fullEnd := idx[0], idx[1] | ||
| prefixStart, prefixEnd := idx[2], idx[3] | ||
| tokenStart, tokenEnd := idx[4], idx[5] | ||
| suffixStart, suffixEnd := idx[6], idx[7] | ||
|
|
||
| token := string(input[tokenStart:tokenEnd]) | ||
| out.Write(input[last:fullStart]) | ||
| out.Write(input[prefixStart:prefixEnd]) | ||
| if shouldQuoteBarewordToken(token) { | ||
| out.WriteByte('"') | ||
| out.Write(input[tokenStart:tokenEnd]) | ||
| out.WriteByte('"') | ||
| } else { | ||
| out.Write(input[tokenStart:tokenEnd]) | ||
| } | ||
| out.Write(input[suffixStart:suffixEnd]) | ||
| last = fullEnd | ||
| } | ||
| out.Write(input[last:]) | ||
| return out.Bytes() | ||
| } | ||
|
|
||
| func shouldQuoteBarewordToken(token string) bool { | ||
| switch token { | ||
| case "true", "false", "null": | ||
| return false | ||
| } | ||
| return !strictJSONNumberPattern.MatchString(token) | ||
| } | ||
|
|
||
| func shouldAttemptJSONNormalization(trimmed []byte, contentType string) bool { | ||
| if len(trimmed) == 0 { | ||
| return false | ||
| } | ||
|
|
||
| first := trimmed[0] | ||
| if first != '{' && first != '[' { | ||
| return false | ||
| } | ||
|
|
||
| ct := strings.ToLower(strings.TrimSpace(contentType)) | ||
| if ct == "" { | ||
| return true | ||
| } | ||
| return strings.Contains(ct, "application/json") || strings.Contains(ct, "+json") | ||
| } | ||
|
|
||
| func toJSONCompatible(v any) (any, bool) { | ||
| switch value := v.(type) { | ||
| case map[string]any: | ||
| out := make(map[string]any, len(value)) | ||
| for k, raw := range value { | ||
| converted, ok := toJSONCompatible(raw) | ||
| if !ok { | ||
| return nil, false | ||
| } | ||
| out[k] = converted | ||
| } | ||
| return out, true | ||
| case map[any]any: | ||
| out := make(map[string]any, len(value)) | ||
| for k, raw := range value { | ||
| key, ok := k.(string) | ||
| if !ok { | ||
| return nil, false | ||
| } | ||
| converted, ok := toJSONCompatible(raw) | ||
| if !ok { | ||
| return nil, false | ||
| } | ||
| out[key] = converted | ||
| } | ||
| return out, true | ||
| case []any: | ||
| out := make([]any, len(value)) | ||
| for i := range value { | ||
| converted, ok := toJSONCompatible(value[i]) | ||
| if !ok { | ||
| return nil, false | ||
| } | ||
| out[i] = converted | ||
| } | ||
| return out, true | ||
| case nil, bool, string, | ||
| int, int8, int16, int32, int64, | ||
| uint, uint8, uint16, uint32, uint64, | ||
| float32, float64, json.Number: | ||
| return value, true | ||
| default: | ||
| encoded, err := json.Marshal(value) | ||
| if err != nil { | ||
| return nil, false | ||
| } | ||
| var out any | ||
| if err := json.Unmarshal(encoded, &out); err != nil { | ||
| return nil, false | ||
| } | ||
| return out, true | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| package utils | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "reflect" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestNormalizeJSONRequestBodyValidJSONNoChange(t *testing.T) { | ||
| original := []byte(`{"model":"mistral-large-latest","max_tokens":16}`) | ||
| got, normalized := NormalizeJSONRequestBody(original, "application/json") | ||
|
|
||
| if normalized { | ||
| t.Fatalf("expected normalized=false, got true") | ||
| } | ||
| if string(got) != string(original) { | ||
| t.Fatalf("expected body unchanged, got %s", string(got)) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeJSONRequestBodyLenientObject(t *testing.T) { | ||
| original := []byte(`{messages:[{content:ping,role:user}],model:mistral-large-latest,max_tokens:16}`) | ||
| got, normalized := NormalizeJSONRequestBody(original, "application/json") | ||
|
|
||
| if !normalized { | ||
| t.Fatalf("expected normalized=true, got false") | ||
| } | ||
|
|
||
| var parsed map[string]any | ||
| if err := json.Unmarshal(got, &parsed); err != nil { | ||
| t.Fatalf("normalized output is not valid JSON: %v", err) | ||
| } | ||
|
|
||
| expected := map[string]any{ | ||
| "model": "mistral-large-latest", | ||
| "max_tokens": float64(16), | ||
| "messages": []any{ | ||
| map[string]any{ | ||
| "content": "ping", | ||
| "role": "user", | ||
| }, | ||
| }, | ||
| } | ||
|
|
||
| if !reflect.DeepEqual(parsed, expected) { | ||
| t.Fatalf("unexpected parsed JSON.\nexpected: %#v\ngot: %#v", expected, parsed) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeJSONRequestBodySkipsNonJSONContentType(t *testing.T) { | ||
| original := []byte(`{messages:[{content:ping,role:user}],model:mistral-large-latest,max_tokens:16}`) | ||
| got, normalized := NormalizeJSONRequestBody(original, "text/plain") | ||
|
|
||
| if normalized { | ||
| t.Fatalf("expected normalized=false for text/plain") | ||
| } | ||
| if string(got) != string(original) { | ||
| t.Fatalf("expected body unchanged, got %s", string(got)) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeJSONRequestBodyInvalidGarbage(t *testing.T) { | ||
| original := []byte(`{this is not parseable:::`) | ||
| got, normalized := NormalizeJSONRequestBody(original, "application/json") | ||
|
|
||
| if normalized { | ||
| t.Fatalf("expected normalized=false for invalid garbage") | ||
| } | ||
| if string(got) != string(original) { | ||
| t.Fatalf("expected body unchanged, got %s", string(got)) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regex-based array item quoting silently skips middle elements in arrays with 3+ bareword items.
barewordArrayItemPattern's suffix group(\s*[\],])consumes the delimiter (,), which the next match needs as its prefix([\[,]\s*). SinceFindAllSubmatchIndexreturns non-overlapping matches, for input like[a,b,c], the engine matches[a,and,c]but skipsbentirely — producing["a",b,"c"](invalid JSON).The overall
NormalizeJSONRequestBodyfunction still works because the invalid result failsjson.Validand falls through to hjson/yaml. However, if you want the regex fast-path to succeed for these inputs, the suffix group should use a lookahead instead of consuming the delimiter.Proposed fix using a zero-width lookahead
Note: Go's
regexp(RE2) does not support lookaheads. So either:If option 2, add a brief comment on line 16 documenting this known limitation.
🤖 Prompt for AI Agents