EveryInc · tmchow · May 8, 2026 · May 8, 2026
diff --git a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments
@@ -24,14 +24,27 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
     exit 1
 fi
 
-# Fetch review threads, regular PR comments, and review bodies in one query.
 # Output is a JSON object with four keys:
-#   review_threads   - unresolved, non-outdated inline code review threads
-#   pr_comments      - top-level PR conversation comments (excludes PR author and known review bots)
-#   review_bodies    - review submissions with non-empty body text (excludes PR author and known review bots)
+#   review_threads   - unresolved inline review threads, edge-wrapped as
+#                      [{ node: { id, isResolved, isOutdated, path, line, ...,
+#                                 comments: { nodes: [...] } } }]
+#   pr_comments      - top-level PR conversation comments (excludes PR author
+#                      and known CI/status bots)
+#   review_bodies    - review submissions with non-empty body text (same
+#                      filtering as pr_comments)
 #   cross_invocation - cross-invocation awareness envelope:
 #     signal: true when both resolved and unresolved threads exist (multi-round review)
-#     resolved_threads: last N resolved threads by recency, for cluster analysis input
+#     resolved_threads: last 10 resolved threads by recency, for cluster analysis input
+#
+# Pagination (issue #798): each top-level connection -- reviewThreads,
+# comments, reviews -- is fetched in its own paginated query because
+# `gh api graphql --paginate` only follows the outermost pageInfo per
+# response. Combining them into one query (as this script previously did)
+# silently dropped everything past page 1 on long-lived PRs and made the
+# skill report "0 of 0 resolved" while real findings sat unanswered.
+# Per-thread inline `comments` are fetched up to 100 per thread without
+# follow-up pagination; threads that exceed 100 comments are rare and out of
+# scope for this fix.
 #
 # Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source.
 # Their output is structurally never actionable -- coverage numbers, build
@@ -45,84 +58,110 @@ fi
 # has a content-aware actionability check and Silent Drop rule that handles
 # wrappers correctly, so we trust that layer instead. Add new logins to the CI
 # list only if their output is structurally non-actionable like codecov's.
-gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query='
-query FetchPRFeedback($owner: String!, $repo: String!, $pr: Int!) {
+
+threads_pages=$(gh api graphql --paginate --slurp \
+  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
+  -f query='
+query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
   repository(owner: $owner, name: $repo) {
     pullRequest(number: $pr) {
       author { login }
-      reviewThreads(first: 50) {
-        edges {
-          node {
-            id
-            isResolved
-            isOutdated
-            path
-            line
-            originalLine
-            startLine
-            originalStartLine
-            comments(first: 10) {
-              nodes {
-                id
-                author { login }
-                body
-                createdAt
-                url
-              }
+      reviewThreads(first: 100, after: $endCursor) {
+        nodes {
+          id
+          isResolved
+          isOutdated
+          path
+          line
+          originalLine
+          startLine
+          originalStartLine
+          comments(first: 100) {
+            nodes {
+              id
+              author { login }
+              body
+              createdAt
+              url
             }
           }
         }
+        pageInfo { hasNextPage endCursor }
       }
-      comments(first: 100) {
+    }
+  }
+}')
+
+comments_pages=$(gh api graphql --paginate --slurp \
+  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
+  -f query='
+query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
+  repository(owner: $owner, name: $repo) {
+    pullRequest(number: $pr) {
+      comments(first: 100, after: $endCursor) {
         nodes {
           id
           author { login }
           body
         }
+        pageInfo { hasNextPage endCursor }
       }
-      reviews(first: 50) {
+    }
+  }
+}')
+
+reviews_pages=$(gh api graphql --paginate --slurp \
+  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
+  -f query='
+query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
+  repository(owner: $owner, name: $repo) {
+    pullRequest(number: $pr) {
+      reviews(first: 100, after: $endCursor) {
         nodes {
           id
           author { login }
           body
           state
         }
+        pageInfo { hasNextPage endCursor }
       }
     }
   }
-}' | jq '.data.repository.pullRequest as $pr |
-  # Structurally non-actionable bot output; always dropped.
+}')
+
+# Resolution semantics: `isOutdated` means the diff hunk around the comment
+# has shifted since the thread was opened -- not that the reviewer concern
+# was addressed. Resolution state is the only authoritative signal; outdated
+# threads are still surfaced (with their isOutdated flag intact) so the
+# resolver can factor in that the referenced line may have moved.
+jq -n \
+  --argjson threads "$threads_pages" \
+  --argjson comments "$comments_pages" \
+  --argjson reviews "$reviews_pages" '
+  ($threads[0].data.repository.pullRequest.author) as $author |
+  [$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads |
+  [$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments |
+  [$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews |
   ["codecov"] as $ci_bot_logins |
-  # Unresolved threads. `isOutdated` means the diff hunk around the comment
-  # has shifted since the thread was opened -- not that the reviewer concern
-  # was addressed. Resolution state is the only authoritative signal; outdated
-  # threads are still surfaced (with their isOutdated flag intact) so the
-  # resolver can factor in that the referenced line may have moved.
-  [$pr.reviewThreads.edges[]
-    | select(.node.isResolved == false)] as $unresolved |
-  # Resolved threads for cross-invocation awareness (last 10 by most recent comment)
-  [$pr.reviewThreads.edges[]
-    | select(.node.isResolved == true)
-    | { thread_id: .node.id, path: .node.path, line: .node.line,
-        first_comment_body: .node.comments.nodes[0].body,
-        last_comment_at: ([.node.comments.nodes[].createdAt] | sort | last) }]
-    | sort_by(.last_comment_at) | .[-10:] | reverse as $resolved |
-{
-  review_threads: $unresolved,
-  pr_comments: [$pr.comments.nodes[]
-    | select(.author.login != $pr.author.login)
-    | select(
-        .author.login as $l | $ci_bot_logins | index($l) | not
-      )
-    | select(.body | test("^\\s*$") | not)],
-  review_bodies: [$pr.reviews.nodes[]
-    | select(.body != null and .body != "")
-    | select(.author.login != $pr.author.login)
-    | select(
-        .author.login as $l | $ci_bot_logins | index($l) | not
-      )],
-  cross_invocation: {
-    signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
-    resolved_threads: $resolved
-  }
-}'
+  [$all_threads[] | select(.isResolved == false)] as $unresolved |
+  ([$all_threads[]
+    | select(.isResolved == true)
+    | { thread_id: .id, path: .path, line: .line,
+        first_comment_body: .comments.nodes[0].body,
+        last_comment_at: ([.comments.nodes[].createdAt] | sort | last) }]
+    | sort_by(.last_comment_at) | .[-10:] | reverse) as $resolved |
+  {
+    review_threads: [$unresolved[] | { node: . }],
+    pr_comments: [$all_comments[]
+      | select(.author.login != $author.login)
+      | select(.author.login as $l | $ci_bot_logins | index($l) | not)
+      | select(.body | test("^\\s*$") | not)],
+    review_bodies: [$all_reviews[]
+      | select(.body != null and .body != "")
+      | select(.author.login != $author.login)
+      | select(.author.login as $l | $ci_bot_logins | index($l) | not)],
+    cross_invocation: {
+      signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
+      resolved_threads: $resolved
+    }
+  }'
diff --git a/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment b/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-thread-for-comment
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 
 # Maps a PR review comment node ID to its parent thread.
-# Fetches thread IDs and first comment IDs to find the match,
-# then returns the matching thread with full comment details.
+# Fetches all review threads (paginated) and their comments, then returns the
+# thread whose comments contain the target ID.
 
 set -e
 
@@ -28,11 +28,17 @@ if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
     exit 1
 fi
 
-gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query='
-query($owner: String!, $repo: String!, $pr: Int!) {
+# Pagination (issue #798): paginate the reviewThreads connection so PRs with
+# more than one page of threads can still resolve a comment to its parent
+# thread. Per-thread comments are still capped at 100 -- threads exceeding
+# that depth are not paginated here.
+threads_pages=$(gh api graphql --paginate --slurp \
+  -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
+  -f query='
+query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
   repository(owner: $owner, name: $repo) {
     pullRequest(number: $pr) {
-      reviewThreads(first: 100) {
+      reviewThreads(first: 100, after: $endCursor) {
         nodes {
           id
           isResolved
@@ -52,11 +58,14 @@ query($owner: String!, $repo: String!, $pr: Int!) {
             }
           }
         }
+        pageInfo { hasNextPage endCursor }
       }
     }
   }
-}' | jq -e --arg cid "$COMMENT_NODE_ID" '
-  [.data.repository.pullRequest.reviewThreads.nodes[]
-  | select(.comments.nodes | map(.id) | index($cid))]
+}')
+
+echo "$threads_pages" | jq -e --arg cid "$COMMENT_NODE_ID" '
+  [.[].data.repository.pullRequest.reviewThreads.nodes[]
+   | select(.comments.nodes | map(.id) | index($cid))]
   | if length == 0 then error("No thread found for comment \($cid)") else .[0] end
 '
diff --git a/tests/resolve-pr-feedback-pagination.test.ts b/tests/resolve-pr-feedback-pagination.test.ts
@@ -0,0 +1,69 @@
+import { readFileSync } from "fs"
+import path from "path"
+import { describe, expect, test } from "bun:test"
+
+/**
+ * The two GraphQL fetch scripts in ce-resolve-pr-feedback must paginate.
+ *
+ * Issue #798: previous versions used fixed `first: N` page sizes with no
+ * cursor loop, so PRs with more than one page of review threads / comments /
+ * reviews silently dropped everything past page 1. The skill then reported
+ * "0 of 0 resolved" while real findings sat unanswered.
+ *
+ * `gh api graphql --paginate` follows only one `pageInfo` per response, so
+ * `get-pr-comments` must issue a separate paginated query for each top-level
+ * connection. `get-thread-for-comment` paginates the single `reviewThreads`
+ * connection it queries.
+ */
+
+const SCRIPTS_DIR = path.join(
+  process.cwd(),
+  "plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts",
+)
+
+const PAGE_INFO_SELECTION = /pageInfo\s*\{\s*hasNextPage\s+endCursor\s*\}/
+
+function read(name: string): string {
+  return readFileSync(path.join(SCRIPTS_DIR, name), "utf8")
+}
+
+describe("ce-resolve-pr-feedback scripts paginate GraphQL connections (issue #798)", () => {
+  test("get-pr-comments uses --paginate for every top-level connection", () => {
+    const body = read("get-pr-comments")
+    const paginateCount = (body.match(/gh api graphql --paginate\b/g) ?? []).length
+    expect(
+      paginateCount,
+      "get-pr-comments must issue three paginated queries (reviewThreads, comments, reviews); `gh api graphql --paginate` only follows the outermost pageInfo per response, so combining them in one query silently drops everything past page 1.",
+    ).toBeGreaterThanOrEqual(3)
+  })
+
+  test("get-pr-comments paginates every connection it queries", () => {
+    const body = read("get-pr-comments")
+    for (const conn of ["reviewThreads", "comments", "reviews"]) {
+      const re = new RegExp(`${conn}\\(first:\\s*\\d+,\\s*after:\\s*\\$endCursor\\)`)
+      expect(
+        re.test(body),
+        `get-pr-comments must call ${conn}(first: N, after: $endCursor); fixed page sizes truncate on long-lived PRs.`,
+      ).toBe(true)
+    }
+  })
+
+  test("get-pr-comments selects pageInfo { hasNextPage endCursor } in each query", () => {
+    const body = read("get-pr-comments")
+    const matches = body.match(new RegExp(PAGE_INFO_SELECTION.source, "g")) ?? []
+    expect(
+      matches.length,
+      "Each paginated GraphQL query must select pageInfo { hasNextPage endCursor } so `gh api graphql --paginate` can drive the cursor loop.",
+    ).toBeGreaterThanOrEqual(3)
+  })
+
+  test("get-thread-for-comment paginates the reviewThreads connection", () => {
+    const body = read("get-thread-for-comment")
+    expect(
+      body,
+      "get-thread-for-comment must paginate reviewThreads, otherwise comment lookups fail on PRs with >100 threads.",
+    ).toMatch(/gh api graphql --paginate\b/)
+    expect(body).toMatch(/reviewThreads\(first:\s*\d+,\s*after:\s*\$endCursor\)/)
+    expect(body).toMatch(PAGE_INFO_SELECTION)
+  })
+})