From 0f3a8bd053fc20655da6e707c3a1a3e4a7c5fe0b Mon Sep 17 00:00:00 2001 From: Trevin Chow Date: Fri, 8 May 2026 15:14:06 -0700 Subject: [PATCH] fix(ce-resolve-pr-feedback): paginate GraphQL connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On long-lived PRs that accumulated more than one page of review threads, the skill silently dropped everything past page 1 of each GraphQL connection and reported "0 of 0 resolved" while real findings sat unanswered. The two GraphQL fetch scripts now paginate. `gh api graphql --paginate` follows only one `pageInfo` per response, so `get-pr-comments` issues three separate paginated queries (reviewThreads, comments, reviews) and assembles the result before running the existing filter; the inline per-thread comment cap is also bumped from 10 to 100 (without follow-up pagination — threads exceeding that depth are rare and out of scope here). `get-thread-for-comment` paginates reviewThreads the same way. A new tests/resolve-pr-feedback-pagination.test.ts enforces the contract: each top-level connection is paginated with `after: $endCursor` and selects `pageInfo { hasNextPage endCursor }`. Fixes #798. --- .../scripts/get-pr-comments | 165 +++++++++++------- .../scripts/get-thread-for-comment | 25 ++- tests/resolve-pr-feedback-pagination.test.ts | 69 ++++++++ 3 files changed, 188 insertions(+), 71 deletions(-) create mode 100644 tests/resolve-pr-feedback-pagination.test.ts diff --git a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments index 1aa15ac37..f89760086 100755 --- a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments +++ b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments @@ -24,14 +24,27 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then exit 1 fi -# Fetch review threads, regular PR comments, and review bodies in one query. # Output is a JSON object with four keys: -# review_threads - unresolved, non-outdated inline code review threads -# pr_comments - top-level PR conversation comments (excludes PR author and known review bots) -# review_bodies - review submissions with non-empty body text (excludes PR author and known review bots) +# review_threads - unresolved inline review threads, edge-wrapped as +# [{ node: { id, isResolved, isOutdated, path, line, ..., +# comments: { nodes: [...] } } }] +# pr_comments - top-level PR conversation comments (excludes PR author +# and known CI/status bots) +# review_bodies - review submissions with non-empty body text (same +# filtering as pr_comments) # cross_invocation - cross-invocation awareness envelope: # signal: true when both resolved and unresolved threads exist (multi-round review) -# resolved_threads: last N resolved threads by recency, for cluster analysis input +# resolved_threads: last 10 resolved threads by recency, for cluster analysis input +# +# Pagination (issue #798): each top-level connection -- reviewThreads, +# comments, reviews -- is fetched in its own paginated query because +# `gh api graphql --paginate` only follows the outermost pageInfo per +# response. Combining them into one query (as this script previously did) +# silently dropped everything past page 1 on long-lived PRs and made the +# skill report "0 of 0 resolved" while real findings sat unanswered. +# Per-thread inline `comments` are fetched up to 100 per thread without +# follow-up pagination; threads that exceed 100 comments are rare and out of +# scope for this fix. # # Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source. # Their output is structurally never actionable -- coverage numbers, build @@ -45,84 +58,110 @@ fi # has a content-aware actionability check and Silent Drop rule that handles # wrappers correctly, so we trust that layer instead. Add new logins to the CI # list only if their output is structurally non-actionable like codecov's. -gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query=' -query FetchPRFeedback($owner: String!, $repo: String!, $pr: Int!) { + +threads_pages=$(gh api graphql --paginate --slurp \ + -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ + -f query=' +query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { repository(owner: $owner, name: $repo) { pullRequest(number: $pr) { author { login } - reviewThreads(first: 50) { - edges { - node { - id - isResolved - isOutdated - path - line - originalLine - startLine - originalStartLine - comments(first: 10) { - nodes { - id - author { login } - body - createdAt - url - } + reviewThreads(first: 100, after: $endCursor) { + nodes { + id + isResolved + isOutdated + path + line + originalLine + startLine + originalStartLine + comments(first: 100) { + nodes { + id + author { login } + body + createdAt + url } } } + pageInfo { hasNextPage endCursor } } - comments(first: 100) { + } + } +}') + +comments_pages=$(gh api graphql --paginate --slurp \ + -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ + -f query=' +query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { + repository(owner: $owner, name: $repo) { + pullRequest(number: $pr) { + comments(first: 100, after: $endCursor) { nodes { id author { login } body } + pageInfo { hasNextPage endCursor } } - reviews(first: 50) { + } + } +}') + +reviews_pages=$(gh api graphql --paginate --slurp \ + -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ + -f query=' +query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { + repository(owner: $owner, name: $repo) { + pullRequest(number: $pr) { + reviews(first: 100, after: $endCursor) { nodes { id author { login } body state } + pageInfo { hasNextPage endCursor } } } } -}' | jq '.data.repository.pullRequest as $pr | - # Structurally non-actionable bot output; always dropped. +}') + +# Resolution semantics: `isOutdated` means the diff hunk around the comment +# has shifted since the thread was opened -- not that the reviewer concern +# was addressed. Resolution state is the only authoritative signal; outdated +# threads are still surfaced (with their isOutdated flag intact) so the +# resolver can factor in that the referenced line may have moved. +jq -n \ + --argjson threads "$threads_pages" \ + --argjson comments "$comments_pages" \ + --argjson reviews "$reviews_pages" ' + ($threads[0].data.repository.pullRequest.author) as $author | + [$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads | + [$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments | + [$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews | ["codecov"] as $ci_bot_logins | - # Unresolved threads. `isOutdated` means the diff hunk around the comment - # has shifted since the thread was opened -- not that the reviewer concern - # was addressed. Resolution state is the only authoritative signal; outdated - # threads are still surfaced (with their isOutdated flag intact) so the - # resolver can factor in that the referenced line may have moved. - [$pr.reviewThreads.edges[] - | select(.node.isResolved == false)] as $unresolved | - # Resolved threads for cross-invocation awareness (last 10 by most recent comment) - [$pr.reviewThreads.edges[] - | select(.node.isResolved == true) - | { thread_id: .node.id, path: .node.path, line: .node.line, - first_comment_body: .node.comments.nodes[0].body, - last_comment_at: ([.node.comments.nodes[].createdAt] | sort | last) }] - | sort_by(.last_comment_at) | .[-10:] | reverse as $resolved | -{ - review_threads: $unresolved, - pr_comments: [$pr.comments.nodes[] - | select(.author.login != $pr.author.login) - | select( - .author.login as $l | $ci_bot_logins | index($l) | not - ) - | select(.body | test("^\\s*$") | not)], - review_bodies: [$pr.reviews.nodes[] - | select(.body != null and .body != "") - | select(.author.login != $pr.author.login) - | select( - .author.login as $l | $ci_bot_logins | index($l) | not - )], - cross_invocation: { - signal: (($resolved | length) > 0 and ($unresolved | length) > 0), - resolved_threads: $resolved - } -}' + [$all_threads[] | select(.isResolved == false)] as $unresolved | + ([$all_threads[] + | select(.isResolved == true) + | { thread_id: .id, path: .path, line: .line, + first_comment_body: .comments.nodes[0].body, + last_comment_at: ([.comments.nodes[].createdAt] | sort | last) }] + | sort_by(.last_comment_at) | .[-10:] | reverse) as $resolved | + { + review_threads: [$unresolved[] | { node: . }], + pr_comments: [$all_comments[] + | select(.author.login != $author.login) + | select(.author.login as $l | $ci_bot_logins | index($l) | not) + | select(.body | test("^\\s*$") | not)], + review_bodies: [$all_reviews[] + | select(.body != null and .body != "") + | select(.author.login != $author.login) + | select(.author.login as $l | $ci_bot_logins | index($l) | not)], + cross_invocation: { + signal: (($resolved | length) > 0 and ($unresolved | length) > 0), + resolved_threads: $resolved + } + }' diff --git a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment index 7f45f3e4d..7dadb8e1c 100755 --- a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment +++ b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment @@ -1,8 +1,8 @@ #!/usr/bin/env bash # Maps a PR review comment node ID to its parent thread. -# Fetches thread IDs and first comment IDs to find the match, -# then returns the matching thread with full comment details. +# Fetches all review threads (paginated) and their comments, then returns the +# thread whose comments contain the target ID. set -e @@ -28,11 +28,17 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then exit 1 fi -gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query=' -query($owner: String!, $repo: String!, $pr: Int!) { +# Pagination (issue #798): paginate the reviewThreads connection so PRs with +# more than one page of threads can still resolve a comment to its parent +# thread. Per-thread comments are still capped at 100 -- threads exceeding +# that depth are not paginated here. +threads_pages=$(gh api graphql --paginate --slurp \ + -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ + -f query=' +query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { repository(owner: $owner, name: $repo) { pullRequest(number: $pr) { - reviewThreads(first: 100) { + reviewThreads(first: 100, after: $endCursor) { nodes { id isResolved @@ -52,11 +58,14 @@ query($owner: String!, $repo: String!, $pr: Int!) { } } } + pageInfo { hasNextPage endCursor } } } } -}' | jq -e --arg cid "$COMMENT_NODE_ID" ' - [.data.repository.pullRequest.reviewThreads.nodes[] - | select(.comments.nodes | map(.id) | index($cid))] +}') + +echo "$threads_pages" | jq -e --arg cid "$COMMENT_NODE_ID" ' + [.[].data.repository.pullRequest.reviewThreads.nodes[] + | select(.comments.nodes | map(.id) | index($cid))] | if length == 0 then error("No thread found for comment \($cid)") else .[0] end ' diff --git a/tests/resolve-pr-feedback-pagination.test.ts b/tests/resolve-pr-feedback-pagination.test.ts new file mode 100644 index 000000000..b8de9cf1f --- /dev/null +++ b/tests/resolve-pr-feedback-pagination.test.ts @@ -0,0 +1,69 @@ +import { readFileSync } from "fs" +import path from "path" +import { describe, expect, test } from "bun:test" + +/** + * The two GraphQL fetch scripts in ce-resolve-pr-feedback must paginate. + * + * Issue #798: previous versions used fixed `first: N` page sizes with no + * cursor loop, so PRs with more than one page of review threads / comments / + * reviews silently dropped everything past page 1. The skill then reported + * "0 of 0 resolved" while real findings sat unanswered. + * + * `gh api graphql --paginate` follows only one `pageInfo` per response, so + * `get-pr-comments` must issue a separate paginated query for each top-level + * connection. `get-thread-for-comment` paginates the single `reviewThreads` + * connection it queries. + */ + +const SCRIPTS_DIR = path.join( + process.cwd(), + "plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts", +) + +const PAGE_INFO_SELECTION = /pageInfo\s*\{\s*hasNextPage\s+endCursor\s*\}/ + +function read(name: string): string { + return readFileSync(path.join(SCRIPTS_DIR, name), "utf8") +} + +describe("ce-resolve-pr-feedback scripts paginate GraphQL connections (issue #798)", () => { + test("get-pr-comments uses --paginate for every top-level connection", () => { + const body = read("get-pr-comments") + const paginateCount = (body.match(/gh api graphql --paginate\b/g) ?? []).length + expect( + paginateCount, + "get-pr-comments must issue three paginated queries (reviewThreads, comments, reviews); `gh api graphql --paginate` only follows the outermost pageInfo per response, so combining them in one query silently drops everything past page 1.", + ).toBeGreaterThanOrEqual(3) + }) + + test("get-pr-comments paginates every connection it queries", () => { + const body = read("get-pr-comments") + for (const conn of ["reviewThreads", "comments", "reviews"]) { + const re = new RegExp(`${conn}\\(first:\\s*\\d+,\\s*after:\\s*\\$endCursor\\)`) + expect( + re.test(body), + `get-pr-comments must call ${conn}(first: N, after: $endCursor); fixed page sizes truncate on long-lived PRs.`, + ).toBe(true) + } + }) + + test("get-pr-comments selects pageInfo { hasNextPage endCursor } in each query", () => { + const body = read("get-pr-comments") + const matches = body.match(new RegExp(PAGE_INFO_SELECTION.source, "g")) ?? [] + expect( + matches.length, + "Each paginated GraphQL query must select pageInfo { hasNextPage endCursor } so `gh api graphql --paginate` can drive the cursor loop.", + ).toBeGreaterThanOrEqual(3) + }) + + test("get-thread-for-comment paginates the reviewThreads connection", () => { + const body = read("get-thread-for-comment") + expect( + body, + "get-thread-for-comment must paginate reviewThreads, otherwise comment lookups fail on PRs with >100 threads.", + ).toMatch(/gh api graphql --paginate\b/) + expect(body).toMatch(/reviewThreads\(first:\s*\d+,\s*after:\s*\$endCursor\)/) + expect(body).toMatch(PAGE_INFO_SELECTION) + }) +})