Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/glebarez/sqlite v1.11.0
github.com/go-sql-driver/mysql v1.8.1
github.com/google/uuid v1.6.0
github.com/hjson/hjson-go/v4 v4.6.0
github.com/jackc/pgx/v5 v5.6.0
github.com/joho/godotenv v1.5.1
github.com/klauspost/compress v1.18.1
Expand All @@ -21,6 +22,7 @@ require (
go.uber.org/dig v1.19.0
golang.org/x/crypto v0.37.0
golang.org/x/text v0.28.0
gopkg.in/yaml.v3 v3.0.1
gorm.io/datatypes v1.2.1
gorm.io/driver/mysql v1.6.0
gorm.io/driver/postgres v1.6.0
Expand Down Expand Up @@ -63,7 +65,6 @@ require (
golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.32.0 // indirect
google.golang.org/protobuf v1.36.6 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
modernc.org/libc v1.22.5 // indirect
modernc.org/mathutil v1.5.0 // indirect
modernc.org/memory v1.5.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hjson/hjson-go/v4 v4.6.0 h1:16e6ViyVfAANKsXo/46h8szUADez7FJs67xl/l+KHS4=
github.com/hjson/hjson-go/v4 v4.6.0/go.mod h1:4zx6c7Y0vWcm8IRyVoQJUHAPJLXLvbG6X8nk1RLigSo=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
Expand Down
7 changes: 7 additions & 0 deletions internal/proxy/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ func (ps *ProxyServer) HandleProxy(c *gin.Context) {
}
c.Request.Body.Close()

if normalizedBody, normalized := utils.NormalizeJSONRequestBody(bodyBytes, c.GetHeader("Content-Type")); normalized {
logrus.WithFields(logrus.Fields{
"group": group.Name,
}).Debug("request body normalized from lenient JSON")
bodyBytes = normalizedBody
}

finalBodyBytes, err := ps.applyParamOverrides(bodyBytes, group)
if err != nil {
response.Error(c, app_errors.NewAPIError(app_errors.ErrInternalServer, fmt.Sprintf("Failed to apply parameter overrides: %v", err)))
Expand Down
188 changes: 188 additions & 0 deletions internal/utils/json_normalizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
package utils

import (
"bytes"
"encoding/json"
"regexp"
"strings"

hjson "github.com/hjson/hjson-go/v4"
"gopkg.in/yaml.v3"
)

var (
jsonKeyPattern = regexp.MustCompile(`([{,]\s*)([A-Za-z_][A-Za-z0-9_-]*)(\s*:)`)
barewordValuePattern = regexp.MustCompile(`(:\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*[,}\]])`)
barewordArrayItemPattern = regexp.MustCompile(`([\[,]\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*[\],])`)
strictJSONNumberPattern = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`)
)
Comment on lines +14 to +18
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Regex-based array item quoting silently skips middle elements in arrays with 3+ bareword items.

barewordArrayItemPattern's suffix group (\s*[\],]) consumes the delimiter (,), which the next match needs as its prefix ([\[,]\s*). Since FindAllSubmatchIndex returns non-overlapping matches, for input like [a,b,c], the engine matches [a, and ,c] but skips b entirely — producing ["a",b,"c"] (invalid JSON).

The overall NormalizeJSONRequestBody function still works because the invalid result fails json.Valid and falls through to hjson/yaml. However, if you want the regex fast-path to succeed for these inputs, the suffix group should use a lookahead instead of consuming the delimiter.

Proposed fix using a zero-width lookahead
-	barewordArrayItemPattern = regexp.MustCompile(`([\[,]\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*[\],])`)
+	barewordArrayItemPattern = regexp.MustCompile(`([\[,]\s*)([A-Za-z_./:@?&=%+~\-][A-Za-z0-9_./:@?&=%+~\-]*)(\s*(?=[\],]))`)

Note: Go's regexp (RE2) does not support lookaheads. So either:

  1. Apply the replacement iteratively (loop until stable), or
  2. Accept the current behavior since hjson/yaml fallback covers these cases.

If option 2, add a brief comment on line 16 documenting this known limitation.

🤖 Prompt for AI Agents
In `@internal/utils/json_normalizer.go` around lines 14 - 18, The
barewordArrayItemPattern regex consumes the array delimiter causing middle items
to be skipped (e.g., [a,b,c] -> ["a",b,"c"]), so update NormalizeJSONRequestBody
to avoid relying on a lookahead (RE2 doesn't support it): either replace matches
iteratively until the string stabilizes (apply the barewordArrayItemPattern
replacement in a loop until no changes) so adjacent items are quoted correctly,
or keep the current regex but add a concise comment next to
barewordArrayItemPattern and in NormalizeJSONRequestBody documenting this RE2
limitation and that hjson/yaml fallback will handle such cases; reference
barewordArrayItemPattern and NormalizeJSONRequestBody when making the change.


// NormalizeJSONRequestBody attempts to normalize non-standard JSON-like bodies
// (for example unquoted object keys) into strict JSON bytes.
// Returns (normalizedBytes, true) when normalization succeeds.
func NormalizeJSONRequestBody(body []byte, contentType string) ([]byte, bool) {
trimmed := bytes.TrimSpace(body)
if len(trimmed) == 0 {
return body, false
}

if !shouldAttemptJSONNormalization(trimmed, contentType) {
return body, false
}

if json.Valid(trimmed) {
return body, false
}

if normalized, ok := normalizeByLooseTokenRepair(trimmed); ok {
return normalized, true
}

var parsed any
if err := hjson.Unmarshal(trimmed, &parsed); err != nil {
// YAML is kept as a secondary fallback for simple JSON-like payloads
// that are not accepted by hjson parser.
if err := yaml.Unmarshal(trimmed, &parsed); err != nil {
return body, false
}
}
Comment on lines +41 to +48
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

YAML fallback can silently transform non-JSON-like YAML into JSON.

If hjson fails but YAML succeeds, inputs like key: value\nother: thing (which start with { check was already done, so this is gated) could be parsed. The shouldAttemptJSONNormalization gate (requires { or [ prefix) mitigates most risk here, but YAML can still interpret JSON-like-but-semantically-different constructs (e.g., {yes: no}{"true": false} due to YAML 1.1 bool coercion).

With gopkg.in/yaml.v3, the YAML 1.1 bool quirk (yes/no → bool) is mostly resolved, but on/off or y/n may still be affected depending on context. Worth a brief comment or a test case.

🤖 Prompt for AI Agents
In `@internal/utils/json_normalizer.go` around lines 41 - 48, The YAML fallback in
the json normalization path can silently coerce values (e.g., `{yes: no}` →
boolean) when hjson.Unmarshal fails; update the json_normalizer.go code near the
hjson.Unmarshal / yaml.Unmarshal block (references: parsed variable,
hjson.Unmarshal, yaml.Unmarshal, and shouldAttemptJSONNormalization) to include
a brief inline comment warning about YAML 1.1 coercion quirks and why the
fallback is limited, and add a unit test (e.g., in json_normalizer_test) that
exercises a JSON-like input such as `{yes: no}` to assert the normalized output
behavior so future changes don’t accidentally rely on YAML coercions.


normalized, ok := toJSONCompatible(parsed)
if !ok {
return body, false
}

switch normalized.(type) {
case map[string]any, []any:
// supported
default:
return body, false
}

normalizedBytes, err := json.Marshal(normalized)
if err != nil {
return body, false
}

return normalizedBytes, true
}

func normalizeByLooseTokenRepair(trimmed []byte) ([]byte, bool) {
repaired := jsonKeyPattern.ReplaceAll(trimmed, []byte(`${1}"${2}"${3}`))
repaired = quoteBarewordTokens(repaired, barewordValuePattern)
repaired = quoteBarewordTokens(repaired, barewordArrayItemPattern)
if !json.Valid(repaired) {
return nil, false
}
return repaired, true
}

func quoteBarewordTokens(input []byte, pattern *regexp.Regexp) []byte {
indices := pattern.FindAllSubmatchIndex(input, -1)
if len(indices) == 0 {
return input
}

var out bytes.Buffer
last := 0
for _, idx := range indices {
fullStart, fullEnd := idx[0], idx[1]
prefixStart, prefixEnd := idx[2], idx[3]
tokenStart, tokenEnd := idx[4], idx[5]
suffixStart, suffixEnd := idx[6], idx[7]

token := string(input[tokenStart:tokenEnd])
out.Write(input[last:fullStart])
out.Write(input[prefixStart:prefixEnd])
if shouldQuoteBarewordToken(token) {
out.WriteByte('"')
out.Write(input[tokenStart:tokenEnd])
out.WriteByte('"')
} else {
out.Write(input[tokenStart:tokenEnd])
}
out.Write(input[suffixStart:suffixEnd])
last = fullEnd
}
out.Write(input[last:])
return out.Bytes()
}

func shouldQuoteBarewordToken(token string) bool {
switch token {
case "true", "false", "null":
return false
}
return !strictJSONNumberPattern.MatchString(token)
}

func shouldAttemptJSONNormalization(trimmed []byte, contentType string) bool {
if len(trimmed) == 0 {
return false
}

first := trimmed[0]
if first != '{' && first != '[' {
return false
}

ct := strings.ToLower(strings.TrimSpace(contentType))
if ct == "" {
return true
}
return strings.Contains(ct, "application/json") || strings.Contains(ct, "+json")
}

func toJSONCompatible(v any) (any, bool) {
switch value := v.(type) {
case map[string]any:
out := make(map[string]any, len(value))
for k, raw := range value {
converted, ok := toJSONCompatible(raw)
if !ok {
return nil, false
}
out[k] = converted
}
return out, true
case map[any]any:
out := make(map[string]any, len(value))
for k, raw := range value {
key, ok := k.(string)
if !ok {
return nil, false
}
converted, ok := toJSONCompatible(raw)
if !ok {
return nil, false
}
out[key] = converted
}
return out, true
case []any:
out := make([]any, len(value))
for i := range value {
converted, ok := toJSONCompatible(value[i])
if !ok {
return nil, false
}
out[i] = converted
}
return out, true
case nil, bool, string,
int, int8, int16, int32, int64,
uint, uint8, uint16, uint32, uint64,
float32, float64, json.Number:
return value, true
default:
encoded, err := json.Marshal(value)
if err != nil {
return nil, false
}
var out any
if err := json.Unmarshal(encoded, &out); err != nil {
return nil, false
}
return out, true
}
}
72 changes: 72 additions & 0 deletions internal/utils/json_normalizer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package utils

import (
"encoding/json"
"reflect"
"testing"
)

func TestNormalizeJSONRequestBodyValidJSONNoChange(t *testing.T) {
original := []byte(`{"model":"mistral-large-latest","max_tokens":16}`)
got, normalized := NormalizeJSONRequestBody(original, "application/json")

if normalized {
t.Fatalf("expected normalized=false, got true")
}
if string(got) != string(original) {
t.Fatalf("expected body unchanged, got %s", string(got))
}
}

func TestNormalizeJSONRequestBodyLenientObject(t *testing.T) {
original := []byte(`{messages:[{content:ping,role:user}],model:mistral-large-latest,max_tokens:16}`)
got, normalized := NormalizeJSONRequestBody(original, "application/json")

if !normalized {
t.Fatalf("expected normalized=true, got false")
}

var parsed map[string]any
if err := json.Unmarshal(got, &parsed); err != nil {
t.Fatalf("normalized output is not valid JSON: %v", err)
}

expected := map[string]any{
"model": "mistral-large-latest",
"max_tokens": float64(16),
"messages": []any{
map[string]any{
"content": "ping",
"role": "user",
},
},
}

if !reflect.DeepEqual(parsed, expected) {
t.Fatalf("unexpected parsed JSON.\nexpected: %#v\ngot: %#v", expected, parsed)
}
}

func TestNormalizeJSONRequestBodySkipsNonJSONContentType(t *testing.T) {
original := []byte(`{messages:[{content:ping,role:user}],model:mistral-large-latest,max_tokens:16}`)
got, normalized := NormalizeJSONRequestBody(original, "text/plain")

if normalized {
t.Fatalf("expected normalized=false for text/plain")
}
if string(got) != string(original) {
t.Fatalf("expected body unchanged, got %s", string(got))
}
}

func TestNormalizeJSONRequestBodyInvalidGarbage(t *testing.T) {
original := []byte(`{this is not parseable:::`)
got, normalized := NormalizeJSONRequestBody(original, "application/json")

if normalized {
t.Fatalf("expected normalized=false for invalid garbage")
}
if string(got) != string(original) {
t.Fatalf("expected body unchanged, got %s", string(got))
}
}