Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,27 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
exit 1
fi

# Fetch review threads, regular PR comments, and review bodies in one query.
# Output is a JSON object with four keys:
# review_threads - unresolved, non-outdated inline code review threads
# pr_comments - top-level PR conversation comments (excludes PR author and known review bots)
# review_bodies - review submissions with non-empty body text (excludes PR author and known review bots)
# review_threads - unresolved inline review threads, edge-wrapped as
# [{ node: { id, isResolved, isOutdated, path, line, ...,
# comments: { nodes: [...] } } }]
# pr_comments - top-level PR conversation comments (excludes PR author
# and known CI/status bots)
# review_bodies - review submissions with non-empty body text (same
# filtering as pr_comments)
# cross_invocation - cross-invocation awareness envelope:
# signal: true when both resolved and unresolved threads exist (multi-round review)
# resolved_threads: last N resolved threads by recency, for cluster analysis input
# resolved_threads: last 10 resolved threads by recency, for cluster analysis input
#
# Pagination (issue #798): each top-level connection -- reviewThreads,
# comments, reviews -- is fetched in its own paginated query because
# `gh api graphql --paginate` only follows the outermost pageInfo per
# response. Combining them into one query (as this script previously did)
# silently dropped everything past page 1 on long-lived PRs and made the
# skill report "0 of 0 resolved" while real findings sat unanswered.
# Per-thread inline `comments` are fetched up to 100 per thread without
# follow-up pagination; threads that exceed 100 comments are rare and out of
# scope for this fix.
#
# Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source.
# Their output is structurally never actionable -- coverage numbers, build
Expand All @@ -45,84 +58,110 @@ fi
# has a content-aware actionability check and Silent Drop rule that handles
# wrappers correctly, so we trust that layer instead. Add new logins to the CI
# list only if their output is structurally non-actionable like codecov's.
gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query='
query FetchPRFeedback($owner: String!, $repo: String!, $pr: Int!) {

threads_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
author { login }
reviewThreads(first: 50) {
edges {
node {
id
isResolved
isOutdated
path
line
originalLine
startLine
originalStartLine
comments(first: 10) {
nodes {
id
author { login }
body
createdAt
url
}
reviewThreads(first: 100, after: $endCursor) {
nodes {
id
isResolved
isOutdated
path
line
originalLine
startLine
originalStartLine
comments(first: 100) {
nodes {
id
author { login }
body
createdAt
url
}
}
}
pageInfo { hasNextPage endCursor }
}
comments(first: 100) {
}
}
}')

comments_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
comments(first: 100, after: $endCursor) {
nodes {
id
author { login }
body
}
pageInfo { hasNextPage endCursor }
}
reviews(first: 50) {
}
}
}')

reviews_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
reviews(first: 100, after: $endCursor) {
nodes {
id
author { login }
body
state
}
pageInfo { hasNextPage endCursor }
}
}
}
}' | jq '.data.repository.pullRequest as $pr |
# Structurally non-actionable bot output; always dropped.
}')

# Resolution semantics: `isOutdated` means the diff hunk around the comment
# has shifted since the thread was opened -- not that the reviewer concern
# was addressed. Resolution state is the only authoritative signal; outdated
# threads are still surfaced (with their isOutdated flag intact) so the
# resolver can factor in that the referenced line may have moved.
jq -n \
--argjson threads "$threads_pages" \
--argjson comments "$comments_pages" \
--argjson reviews "$reviews_pages" '
($threads[0].data.repository.pullRequest.author) as $author |
[$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads |
[$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments |
[$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews |
["codecov"] as $ci_bot_logins |
# Unresolved threads. `isOutdated` means the diff hunk around the comment
# has shifted since the thread was opened -- not that the reviewer concern
# was addressed. Resolution state is the only authoritative signal; outdated
# threads are still surfaced (with their isOutdated flag intact) so the
# resolver can factor in that the referenced line may have moved.
[$pr.reviewThreads.edges[]
| select(.node.isResolved == false)] as $unresolved |
# Resolved threads for cross-invocation awareness (last 10 by most recent comment)
[$pr.reviewThreads.edges[]
| select(.node.isResolved == true)
| { thread_id: .node.id, path: .node.path, line: .node.line,
first_comment_body: .node.comments.nodes[0].body,
last_comment_at: ([.node.comments.nodes[].createdAt] | sort | last) }]
| sort_by(.last_comment_at) | .[-10:] | reverse as $resolved |
{
review_threads: $unresolved,
pr_comments: [$pr.comments.nodes[]
| select(.author.login != $pr.author.login)
| select(
.author.login as $l | $ci_bot_logins | index($l) | not
)
| select(.body | test("^\\s*$") | not)],
review_bodies: [$pr.reviews.nodes[]
| select(.body != null and .body != "")
| select(.author.login != $pr.author.login)
| select(
.author.login as $l | $ci_bot_logins | index($l) | not
)],
cross_invocation: {
signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
resolved_threads: $resolved
}
}'
[$all_threads[] | select(.isResolved == false)] as $unresolved |
([$all_threads[]
| select(.isResolved == true)
| { thread_id: .id, path: .path, line: .line,
first_comment_body: .comments.nodes[0].body,
last_comment_at: ([.comments.nodes[].createdAt] | sort | last) }]
| sort_by(.last_comment_at) | .[-10:] | reverse) as $resolved |
{
review_threads: [$unresolved[] | { node: . }],
pr_comments: [$all_comments[]
| select(.author.login != $author.login)
| select(.author.login as $l | $ci_bot_logins | index($l) | not)
| select(.body | test("^\\s*$") | not)],
review_bodies: [$all_reviews[]
| select(.body != null and .body != "")
| select(.author.login != $author.login)
| select(.author.login as $l | $ci_bot_logins | index($l) | not)],
cross_invocation: {
signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
resolved_threads: $resolved
}
}'
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env bash

# Maps a PR review comment node ID to its parent thread.
# Fetches thread IDs and first comment IDs to find the match,
# then returns the matching thread with full comment details.
# Fetches all review threads (paginated) and their comments, then returns the
# thread whose comments contain the target ID.

set -e

Expand All @@ -28,11 +28,17 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
exit 1
fi

gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query='
query($owner: String!, $repo: String!, $pr: Int!) {
# Pagination (issue #798): paginate the reviewThreads connection so PRs with
# more than one page of threads can still resolve a comment to its parent
# thread. Per-thread comments are still capped at 100 -- threads exceeding
# that depth are not paginated here.
threads_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
reviewThreads(first: 100) {
reviewThreads(first: 100, after: $endCursor) {
nodes {
id
isResolved
Expand All @@ -52,11 +58,14 @@ query($owner: String!, $repo: String!, $pr: Int!) {
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}' | jq -e --arg cid "$COMMENT_NODE_ID" '
[.data.repository.pullRequest.reviewThreads.nodes[]
| select(.comments.nodes | map(.id) | index($cid))]
}')

echo "$threads_pages" | jq -e --arg cid "$COMMENT_NODE_ID" '
[.[].data.repository.pullRequest.reviewThreads.nodes[]
| select(.comments.nodes | map(.id) | index($cid))]
| if length == 0 then error("No thread found for comment \($cid)") else .[0] end
'
69 changes: 69 additions & 0 deletions tests/resolve-pr-feedback-pagination.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { readFileSync } from "fs"
import path from "path"
import { describe, expect, test } from "bun:test"

/**
* The two GraphQL fetch scripts in ce-resolve-pr-feedback must paginate.
*
* Issue #798: previous versions used fixed `first: N` page sizes with no
* cursor loop, so PRs with more than one page of review threads / comments /
* reviews silently dropped everything past page 1. The skill then reported
* "0 of 0 resolved" while real findings sat unanswered.
*
* `gh api graphql --paginate` follows only one `pageInfo` per response, so
* `get-pr-comments` must issue a separate paginated query for each top-level
* connection. `get-thread-for-comment` paginates the single `reviewThreads`
* connection it queries.
*/

const SCRIPTS_DIR = path.join(
process.cwd(),
"plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts",
)

const PAGE_INFO_SELECTION = /pageInfo\s*\{\s*hasNextPage\s+endCursor\s*\}/

function read(name: string): string {
return readFileSync(path.join(SCRIPTS_DIR, name), "utf8")
}

describe("ce-resolve-pr-feedback scripts paginate GraphQL connections (issue #798)", () => {
test("get-pr-comments uses --paginate for every top-level connection", () => {
const body = read("get-pr-comments")
const paginateCount = (body.match(/gh api graphql --paginate\b/g) ?? []).length
expect(
paginateCount,
"get-pr-comments must issue three paginated queries (reviewThreads, comments, reviews); `gh api graphql --paginate` only follows the outermost pageInfo per response, so combining them in one query silently drops everything past page 1.",
).toBeGreaterThanOrEqual(3)
})

test("get-pr-comments paginates every connection it queries", () => {
const body = read("get-pr-comments")
for (const conn of ["reviewThreads", "comments", "reviews"]) {
const re = new RegExp(`${conn}\\(first:\\s*\\d+,\\s*after:\\s*\\$endCursor\\)`)
expect(
re.test(body),
`get-pr-comments must call ${conn}(first: N, after: $endCursor); fixed page sizes truncate on long-lived PRs.`,
).toBe(true)
}
})

test("get-pr-comments selects pageInfo { hasNextPage endCursor } in each query", () => {
const body = read("get-pr-comments")
const matches = body.match(new RegExp(PAGE_INFO_SELECTION.source, "g")) ?? []
expect(
matches.length,
"Each paginated GraphQL query must select pageInfo { hasNextPage endCursor } so `gh api graphql --paginate` can drive the cursor loop.",
).toBeGreaterThanOrEqual(3)
})

test("get-thread-for-comment paginates the reviewThreads connection", () => {
const body = read("get-thread-for-comment")
expect(
body,
"get-thread-for-comment must paginate reviewThreads, otherwise comment lookups fail on PRs with >100 threads.",
).toMatch(/gh api graphql --paginate\b/)
expect(body).toMatch(/reviewThreads\(first:\s*\d+,\s*after:\s*\$endCursor\)/)
expect(body).toMatch(PAGE_INFO_SELECTION)
})
})