-
Notifications
You must be signed in to change notification settings - Fork 207
feat(graph): add MS Graph colon-syntax path lookup middleware #2688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
dschmidt
wants to merge
9
commits into
opencloud-eu:main
Choose a base branch
from
dschmidt:feat/colon-path-middleware
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
b72f335
feat(graph): add MS Graph colon-syntax path lookup middleware
dschmidt 618dd2b
fix(graph): address Copilot review feedback on path lookup middleware
dschmidt 79d03f1
fix(graph): preserve RawPath workaround + use NopLogger in tests
dschmidt 884ab06
fix(graph): map UNAUTHENTICATED to 401, validate drive/item id pair
dschmidt 7378975
refactor(graph): drop MS Graph framing from colon-path middleware logs
dschmidt cdba6f9
test(graph): acceptance tests for MS Graph colon-syntax path lookup
dschmidt 74aa7f9
test(graph): pin chi URLParam round-trip for IDs with `!` sub-delim
dschmidt ec7a044
test(graph): add negative cases proving the colon-syntax regex doesn'…
dschmidt 72f1d58
fix(graph): anchor colon-path regex on the configured HTTP root
dschmidt File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,307 @@ | ||
| package middleware | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "net/http" | ||
| "regexp" | ||
| "strings" | ||
|
|
||
| gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" | ||
| cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" | ||
| storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" | ||
|
|
||
| "github.com/rs/zerolog" | ||
|
|
||
| "github.com/opencloud-eu/opencloud/pkg/log" | ||
| "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" | ||
| "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" | ||
| "github.com/opencloud-eu/reva/v2/pkg/storagespace" | ||
| "github.com/opencloud-eu/reva/v2/pkg/utils" | ||
| ) | ||
|
|
||
| // compileColonRegexes builds the two rewrite patterns with `httpRoot` baked | ||
| // in as a fixed prefix. The HTTP root is configurable (default "/graph"), | ||
| // so we accept it as input rather than embedding "/graph" in the regex. | ||
| // Anchoring on `^<httpRoot>/v1...` instead of `^.*?/v1...` prevents the | ||
| // regex from over-matching URLs where the colon-syntax shape appears as | ||
| // a sub-path of something else (e.g. `/foo/v1.0/drives/abc/root:/x`). | ||
| // | ||
| // Capture groups (root-anchored): | ||
| // 1: prefix up to and including the driveID | ||
| // 2: driveID | ||
| // 3: path (with leading slash) | ||
| // 4: suffix (with leading slash) — empty for direct item lookup | ||
| // | ||
| // Capture groups (item-anchored): | ||
| // 1: prefix up to and including the driveID | ||
| // 2: driveID | ||
| // 3: anchor itemID | ||
| // 4: path (with leading slash) | ||
| // 5: suffix (with leading slash) | ||
| func compileColonRegexes(httpRoot string) (root, item *regexp.Regexp) { | ||
| // Strip the trailing slash so the regex glues `/v1...` on cleanly. The | ||
| // graph service's defaults.Sanitize already trims trailing slashes for | ||
| // non-root values, but we still need to collapse "/" → "" ourselves so | ||
| // the root-mounted case doesn't produce "^//v1..." (which never matches). | ||
| httpRoot = strings.TrimRight(httpRoot, "/") | ||
| prefix := `^(` + regexp.QuoteMeta(httpRoot) + `/v1(?:\.0|beta1)/drives/([^/]+))` | ||
| root = regexp.MustCompile(prefix + `/root:(/.+?)(?::(/[^:]+))?:?$`) | ||
| item = regexp.MustCompile(prefix + `/items/([^/]+):(/.+?)(?::(/[^:]+))?:?$`) | ||
| return root, item | ||
| } | ||
|
|
||
| type contextKey string | ||
|
|
||
| // OriginalPathContextKey holds the pre-rewrite request path for downstream | ||
| // tracing/logging consumers. | ||
| const OriginalPathContextKey contextKey = "graph.original_path" | ||
|
|
||
| // Sentinels distinguishing the resolution outcomes that map to specific HTTP | ||
| // statuses. Anything else surfaces as 500. | ||
| // | ||
| // errPathNotFound — path doesn't exist or the user lacks permission to | ||
| // see it. Both collapse to 404 (no existence disclosure). | ||
| // errInvalidRequest — client sent a malformed input (unparseable drive/item | ||
| // id, drive/item mismatch in item-anchored form). 400. | ||
| // errUnauthenticated — the gateway said the caller isn't authenticated for | ||
| // the lookup (token expired, cross-storage auth, etc.). 401. | ||
| var ( | ||
| errPathNotFound = errors.New("path not found") | ||
| errInvalidRequest = errors.New("invalid request") | ||
| errUnauthenticated = errors.New("unauthenticated") | ||
| ) | ||
|
|
||
| // ResolveGraphPath returns middleware that detects colon-syntax path | ||
| // lookup URLs and rewrites them to the canonical | ||
| // /{version}/drives/{driveID}/items/{resolvedItemID}{suffix} form before | ||
| // chi performs route matching. The requested API version is preserved | ||
| // (for example, "v1.0" or "v1beta1"). | ||
| // | ||
| // Two URL shapes are recognized: | ||
| // | ||
| // /{version}/drives/{driveID}/root:/<path>[:/<suffix>][:] | ||
| // /{version}/drives/{driveID}/items/{itemID}:/<path>[:/<suffix>][:] | ||
| // | ||
| // Path resolution runs as the request user via CS3 Stat. NOT_FOUND and | ||
| // PERMISSION_DENIED collapse to 404 (no existence disclosure); operational | ||
| // failures (gateway selection, RPC transport, unexpected status) surface | ||
| // as 5xx so outages aren't masked. | ||
| // | ||
| // URLs without colon syntax fast-path through with a single substring check. | ||
| func ResolveGraphPath(httpRoot string, gws pool.Selectable[gateway.GatewayAPIClient], logger log.Logger) func(http.Handler) http.Handler { | ||
| l := logger.With().Str("middleware", "graphPathLookup").Logger() | ||
| rootColonRe, itemColonRe := compileColonRegexes(httpRoot) | ||
| return func(next http.Handler) http.Handler { | ||
| return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
| // Fast-path: skip URLs that can't be colon-syntax. | ||
| if !strings.Contains(r.URL.Path, ":") { | ||
| next.ServeHTTP(w, r) | ||
| return | ||
| } | ||
|
|
||
| original := r.URL.Path | ||
| rewritten, err := rewriteColonPath(r.Context(), rootColonRe, itemColonRe, gws, l, original) | ||
| switch { | ||
| case errors.Is(err, errPathNotFound): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: not found") | ||
| errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") | ||
| return | ||
| case errors.Is(err, errInvalidRequest): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: invalid request") | ||
| errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, "invalid request") | ||
| return | ||
| case errors.Is(err, errUnauthenticated): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: unauthenticated") | ||
| errorcode.Unauthenticated.Render(w, r, http.StatusUnauthorized, "unauthenticated") | ||
| return | ||
| case err != nil: | ||
| l.Error().Err(err).Str("original", original).Msg("colon-path resolution: internal error") | ||
| errorcode.GeneralException.Render( | ||
| w, r, http.StatusInternalServerError, "internal error resolving path", | ||
| ) | ||
| return | ||
| case rewritten == "": | ||
| // No colon-syntax match — pass through untouched. | ||
| next.ServeHTTP(w, r) | ||
| return | ||
| } | ||
|
|
||
| l.Debug(). | ||
| Str("original", original). | ||
| Str("rewritten", rewritten). | ||
| Msg("colon-path resolution: rewrote") | ||
|
|
||
| ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) | ||
| r = r.WithContext(ctx) | ||
| r.URL.Path = rewritten | ||
| // Match the chi-escaping workaround in Graph.ServeHTTP: RawPath | ||
| // must be a valid encoded form of Path so chi's tree match (which | ||
| // reads RawPath when non-empty) routes the rewritten URL the same | ||
| // way it routes the original. The previous RawPath (set by | ||
| // Graph.ServeHTTP) was for the pre-rewrite Path and no longer | ||
| // matches; EscapedPath recomputes a canonical encoding for the | ||
| // new Path. | ||
| // | ||
| // Drive/item IDs in this codebase have the shape | ||
| // {storage}${space}!{opaque}. EscapedPath leaves `$` literal (not | ||
| // escaped in path segments per RFC 3986) but encodes `!` as `%21`. | ||
| // chi.URLParam therefore returns the param with `%21`; downstream | ||
| // handlers (e.g. parseIDParam, GetDriveAndItemIDParam) already | ||
| // call url.PathUnescape before parsing the ID, so the round-trip | ||
| // works. See go-chi/chi#641 for the underlying chi behavior. | ||
| r.URL.RawPath = r.URL.EscapedPath() | ||
| next.ServeHTTP(w, r) | ||
|
dschmidt marked this conversation as resolved.
|
||
| }) | ||
| } | ||
| } | ||
|
|
||
| // colonMatch is the normalized result of matching either the root- or | ||
| // item-anchored colon-syntax regex. The two regexes capture different | ||
| // groups; this struct erases that difference for downstream resolution. | ||
| type colonMatch struct { | ||
| prefix string // canonical prefix up to and including /drives/{driveID} | ||
| driveIDStr string // captured driveID for item-anchored form (empty for root-anchored, where anchorIDStr already IS the driveID) | ||
| anchorIDStr string // resource id to anchor path resolution (driveID for root, itemID for item-anchored) | ||
| relPath string // relative path with leading slash | ||
| suffix string // suffix with leading slash (e.g. "/children"); may be empty | ||
| } | ||
|
|
||
| // rewriteColonPath returns: | ||
| // - "" + nil — no colon-syntax pattern matched (passthrough) | ||
| // - rewritten + nil — matched and resolved to a canonical URL | ||
| // - "" + errPathNotFound — path doesn't exist or user lacks permission (404) | ||
| // - "" + errInvalidRequest — malformed input (400) | ||
| // - "" + errUnauthenticated — gateway said caller isn't authenticated (401) | ||
| // - "" + other error — operational / internal failure (5xx) | ||
| func rewriteColonPath( | ||
| ctx context.Context, | ||
| rootColonRe, itemColonRe *regexp.Regexp, | ||
| gws pool.Selectable[gateway.GatewayAPIClient], | ||
| logger zerolog.Logger, | ||
| urlPath string, | ||
| ) (string, error) { | ||
| var match colonMatch | ||
| switch { | ||
| case matchInto(rootColonRe, urlPath, &match, rootMatchExtract): | ||
| case matchInto(itemColonRe, urlPath, &match, itemMatchExtract): | ||
| default: | ||
| return "", nil | ||
| } | ||
|
|
||
| // r.URL.Path is already URL-decoded by net/http; do NOT call url.PathUnescape | ||
| // here, that would double-decode and let crafted inputs like "%252F" become | ||
| // "/", changing path semantics. | ||
| anchor, err := storagespace.ParseID(match.anchorIDStr) | ||
| if err != nil { | ||
| // Unparseable input is malformed by the client, not "not found". | ||
| logger.Debug().Err(err).Str("anchor", match.anchorIDStr).Msg("invalid anchor id in colon path") | ||
| return "", errInvalidRequest | ||
| } | ||
|
|
||
| // Item-anchored form ("/drives/{driveID}/items/{itemID}:/...") captures | ||
| // driveID separately. Validate the itemID belongs to the given driveID | ||
| // (storage + space prefix) — otherwise the request is malformed and we | ||
| // short-circuit instead of doing an unnecessary CS3 Stat. | ||
| if match.driveIDStr != "" { | ||
| drive, err := storagespace.ParseID(match.driveIDStr) | ||
| if err != nil { | ||
| logger.Debug().Err(err).Str("driveID", match.driveIDStr).Msg("invalid drive id in colon path") | ||
| return "", errInvalidRequest | ||
| } | ||
| if drive.GetStorageId() != anchor.GetStorageId() || drive.GetSpaceId() != anchor.GetSpaceId() { | ||
| logger.Debug(). | ||
| Str("driveID", match.driveIDStr). | ||
| Str("itemID", match.anchorIDStr). | ||
| Msg("drive id does not match item id storage/space") | ||
| return "", errInvalidRequest | ||
| } | ||
| } | ||
|
|
||
| itemID, err := resolvePath(ctx, gws, logger, &anchor, match.relPath) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| return buildCanonicalPath(match.prefix, itemID, match.suffix), nil | ||
| } | ||
|
|
||
| // matchInto runs the regex; on a hit, populates *out via extract and returns true. | ||
| // A small indirection that lets the switch in rewriteColonPath stay flat. | ||
| func matchInto(re *regexp.Regexp, s string, out *colonMatch, extract func([]string) colonMatch) bool { | ||
| m := re.FindStringSubmatch(s) | ||
| if m == nil { | ||
| return false | ||
| } | ||
| *out = extract(m) | ||
| return true | ||
| } | ||
|
|
||
| func rootMatchExtract(m []string) colonMatch { | ||
| // Root-anchored: anchorIDStr IS the driveID, so leave driveIDStr empty | ||
| // to skip the drive/item match check (it would compare driveID to itself). | ||
| return colonMatch{prefix: m[1], anchorIDStr: m[2], relPath: m[3], suffix: m[4]} | ||
| } | ||
|
|
||
| func itemMatchExtract(m []string) colonMatch { | ||
| // Item-anchored: capture driveID (m[2]) so the resolver can validate it | ||
| // matches the itemID's storage/space. | ||
| return colonMatch{prefix: m[1], driveIDStr: m[2], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} | ||
| } | ||
|
dschmidt marked this conversation as resolved.
|
||
|
|
||
| // resolvePath translates a relative filesystem path (anchored at the given | ||
| // CS3 resource id) into the resolved item's id, running with the request | ||
| // user's permissions via CS3 Stat. | ||
| func resolvePath( | ||
| ctx context.Context, | ||
| gws pool.Selectable[gateway.GatewayAPIClient], | ||
| logger zerolog.Logger, | ||
| anchor *storageprovider.ResourceId, | ||
| rawPath string, | ||
| ) (string, error) { | ||
| gw, err := gws.Next() | ||
| if err != nil { | ||
| return "", fmt.Errorf("gateway selector: %w", err) | ||
| } | ||
|
|
||
| // rawPath comes from r.URL.Path which is already decoded by net/http — | ||
| // no extra unescape (would double-decode crafted "%25xx" inputs). | ||
| statRes, err := gw.Stat(ctx, &storageprovider.StatRequest{ | ||
| Ref: &storageprovider.Reference{ | ||
| ResourceId: anchor, | ||
| Path: utils.MakeRelativePath(rawPath), | ||
| }, | ||
| }) | ||
| if err != nil { | ||
| return "", fmt.Errorf("CS3 Stat: %w", err) | ||
| } | ||
|
|
||
| switch statRes.GetStatus().GetCode() { | ||
| case cs3rpc.Code_CODE_OK: | ||
| // fall through | ||
| case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: | ||
| return "", errPathNotFound | ||
| case cs3rpc.Code_CODE_UNAUTHENTICATED: | ||
| return "", errUnauthenticated | ||
| default: | ||
| return "", fmt.Errorf( | ||
| "CS3 Stat returned %s: %s", | ||
| statRes.GetStatus().GetCode(), | ||
| statRes.GetStatus().GetMessage(), | ||
| ) | ||
|
dschmidt marked this conversation as resolved.
|
||
| } | ||
|
|
||
| id := statRes.GetInfo().GetId() | ||
| if id == nil { | ||
| return "", fmt.Errorf("CS3 Stat returned OK but missing Info.Id") | ||
| } | ||
| return storagespace.FormatResourceID(id), nil | ||
| } | ||
|
|
||
| func buildCanonicalPath(prefix, itemID, suffix string) string { | ||
| // r.URL.Path is the decoded form per Go's net/http convention; chi tree | ||
| // matching reads it directly. Insert the raw itemID without escaping so | ||
| // chi's {itemID} param captures the same string the handler will see | ||
| // after parseIDParam unescapes (no-op for already-decoded chars). | ||
| return prefix + "/items/" + itemID + suffix | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.