+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum AgentDiscoveryPathScope {
+ /// The user's personal agent configuration directory.
+ #[serde(rename = "user")]
+ User,
+ /// A project's repository agent directory.
+ #[serde(rename = "project")]
+ Project,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
/// Where the agent definition was loaded from
///
///
@@ -16931,6 +17290,56 @@ pub enum InstalledPluginSourceUrlSource {
Url,
}
+/// Whether the target is a single file or a directory of instruction files
+///
+///
+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum InstructionDiscoveryPathKind {
+ /// The target is a single instruction file.
+ #[serde(rename = "file")]
+ File,
+ /// The target is a directory that holds instruction files.
+ #[serde(rename = "directory")]
+ Directory,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
+/// Which tier this target belongs to
+///
+///
+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum InstructionDiscoveryPathLocation {
+ /// Instructions live in user-level configuration.
+ #[serde(rename = "user")]
+ User,
+ /// Instructions live in repository-level configuration.
+ #[serde(rename = "repository")]
+ Repository,
+ /// Instructions live under the current working directory.
+ #[serde(rename = "working-directory")]
+ WorkingDirectory,
+ /// Instructions live in plugin-provided configuration.
+ #[serde(rename = "plugin")]
+ Plugin,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
/// Where this source lives — used for UI grouping
///
///
@@ -17455,6 +17864,53 @@ pub enum ModelPolicyState {
Unknown,
}
+/// Provider type. Defaults to "openai" for generic OpenAI-compatible APIs.
+///
+///
+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ProviderConfigType {
+ /// Generic OpenAI-compatible API.
+ #[serde(rename = "openai")]
+ Openai,
+ /// Azure OpenAI Service endpoint.
+ #[serde(rename = "azure")]
+ Azure,
+ /// Anthropic API endpoint.
+ #[serde(rename = "anthropic")]
+ Anthropic,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
+/// Wire API format (openai/azure only). Defaults to "completions".
+///
+///
+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum ProviderConfigWireApi {
+ /// OpenAI Chat Completions wire format.
+ #[serde(rename = "completions")]
+ Completions,
+ /// OpenAI Responses API wire format.
+ #[serde(rename = "responses")]
+ Responses,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
/// Allowed values for the `OptionsUpdateAdditionalContentExclusionPolicyScope` enumeration.
///
///
@@ -18124,53 +18580,6 @@ pub enum PermissionsSetApproveAllSource {
Unknown,
}
-/// Provider type. Defaults to "openai" for generic OpenAI-compatible APIs.
-///
-///
-///
-/// **Experimental.** This type is part of an experimental wire-protocol surface
-/// and may change or be removed in future SDK or CLI releases.
-///
-///
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
-pub enum ProviderConfigType {
- /// Generic OpenAI-compatible API.
- #[serde(rename = "openai")]
- Openai,
- /// Azure OpenAI Service endpoint.
- #[serde(rename = "azure")]
- Azure,
- /// Anthropic API endpoint.
- #[serde(rename = "anthropic")]
- Anthropic,
- /// Unknown variant for forward compatibility.
- #[default]
- #[serde(other)]
- Unknown,
-}
-
-/// Wire API format (openai/azure only). Defaults to "completions".
-///
-///
-///
-/// **Experimental.** This type is part of an experimental wire-protocol surface
-/// and may change or be removed in future SDK or CLI releases.
-///
-///
-#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
-pub enum ProviderConfigWireApi {
- /// OpenAI Chat Completions wire format.
- #[serde(rename = "completions")]
- Completions,
- /// OpenAI Responses API wire format.
- #[serde(rename = "responses")]
- Responses,
- /// Unknown variant for forward compatibility.
- #[default]
- #[serde(other)]
- Unknown,
-}
-
/// Provider family. Matches the `type` field of a BYOK provider config.
///
///
@@ -18904,6 +19313,34 @@ pub enum ShellKillSignal {
Unknown,
}
+/// Which tier this directory belongs to
+///
+///
+///
+/// **Experimental.** This type is part of an experimental wire-protocol surface
+/// and may change or be removed in future SDK or CLI releases.
+///
+///
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum SkillDiscoveryScope {
+ /// A project's repository skill directory.
+ #[serde(rename = "project")]
+ Project,
+ /// The user's personal Copilot skill directory.
+ #[serde(rename = "personal-copilot")]
+ PersonalCopilot,
+ /// The user's personal agents skill directory.
+ #[serde(rename = "personal-agents")]
+ PersonalAgents,
+ /// A configured custom skill directory.
+ #[serde(rename = "custom")]
+ Custom,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
/// Agent prompt result discriminator
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub enum SlashCommandAgentPromptResultKind {
diff --git a/rust/src/generated/rpc.rs b/rust/src/generated/rpc.rs
index 22fd08904..82319dbbf 100644
--- a/rust/src/generated/rpc.rs
+++ b/rust/src/generated/rpc.rs
@@ -280,6 +280,37 @@ impl<'a> ClientRpcAgents<'a> {
.await?;
Ok(serde_json::from_value(_value)?)
}
+
+ /// Returns the canonical directories where a client may create custom agents that the runtime will recognize, including ones that do not exist yet. Project directories become active once created.
+ ///
+ /// Wire method: `agents.getDiscoveryPaths`.
+ ///
+ /// # Parameters
+ ///
+ /// * `params` - Optional project paths to include when enumerating agent discovery directories.
+ ///
+ /// # Returns
+ ///
+ /// Canonical locations where custom agents can be created so the runtime will recognize them.
+ ///
+ ///
+ ///
+ /// **Experimental.** This API is part of an experimental wire-protocol surface
+ /// and may change or be removed in future SDK or CLI releases. Pin both the
+ /// SDK and CLI versions if your code depends on it.
+ ///
+ ///
+ pub async fn get_discovery_paths(
+ &self,
+ params: AgentsGetDiscoveryPathsRequest,
+ ) -> Result
{
+ let wire_params = serde_json::to_value(params)?;
+ let _value = self
+ .client
+ .call(rpc_methods::AGENTS_GETDISCOVERYPATHS, Some(wire_params))
+ .await?;
+ Ok(serde_json::from_value(_value)?)
+ }
}
/// `instructions.*` RPCs.
@@ -319,6 +350,40 @@ impl<'a> ClientRpcInstructions<'a> {
.await?;
Ok(serde_json::from_value(_value)?)
}
+
+ /// Returns the canonical files and directories where a client may create custom instructions that the runtime will recognize, including ones that do not exist yet. Repository targets become active once created.
+ ///
+ /// Wire method: `instructions.getDiscoveryPaths`.
+ ///
+ /// # Parameters
+ ///
+ /// * `params` - Optional project paths to include when enumerating instruction discovery targets.
+ ///
+ /// # Returns
+ ///
+ /// Canonical files and directories where custom instructions can be created so the runtime will recognize them.
+ ///
+ ///
+ ///
+ /// **Experimental.** This API is part of an experimental wire-protocol surface
+ /// and may change or be removed in future SDK or CLI releases. Pin both the
+ /// SDK and CLI versions if your code depends on it.
+ ///
+ ///
+ pub async fn get_discovery_paths(
+ &self,
+ params: InstructionsGetDiscoveryPathsRequest,
+ ) -> Result {
+ let wire_params = serde_json::to_value(params)?;
+ let _value = self
+ .client
+ .call(
+ rpc_methods::INSTRUCTIONS_GETDISCOVERYPATHS,
+ Some(wire_params),
+ )
+ .await?;
+ Ok(serde_json::from_value(_value)?)
+ }
}
/// `mcp.*` RPCs.
@@ -1974,6 +2039,37 @@ impl<'a> ClientRpcSkills<'a> {
.await?;
Ok(serde_json::from_value(_value)?)
}
+
+ /// Returns the canonical directories where a client may create skills that the runtime will recognize, including ones that do not exist yet. Project directories become active once created.
+ ///
+ /// Wire method: `skills.getDiscoveryPaths`.
+ ///
+ /// # Parameters
+ ///
+ /// * `params` - Optional project paths to enumerate.
+ ///
+ /// # Returns
+ ///
+ /// Canonical locations where skills can be created so the runtime will recognize them.
+ ///
+ ///
+ ///
+ /// **Experimental.** This API is part of an experimental wire-protocol surface
+ /// and may change or be removed in future SDK or CLI releases. Pin both the
+ /// SDK and CLI versions if your code depends on it.
+ ///
+ ///
+ pub async fn get_discovery_paths(
+ &self,
+ params: SkillsGetDiscoveryPathsRequest,
+ ) -> Result {
+ let wire_params = serde_json::to_value(params)?;
+ let _value = self
+ .client
+ .call(rpc_methods::SKILLS_GETDISCOVERYPATHS, Some(wire_params))
+ .await?;
+ Ok(serde_json::from_value(_value)?)
+ }
}
/// `skills.config.*` RPCs.
diff --git a/rust/src/generated/session_events.rs b/rust/src/generated/session_events.rs
index 040859462..639b466d1 100644
--- a/rust/src/generated/session_events.rs
+++ b/rust/src/generated/session_events.rs
@@ -117,6 +117,15 @@ pub enum SessionEventType {
HookEnd,
#[serde(rename = "hook.progress")]
HookProgress,
+ ///
+ ///
+ ///
+ /// **Experimental.** This type is part of an experimental wire-protocol surface
+ /// and may change or be removed in future SDK or CLI releases.
+ ///
+ ///
+ #[serde(rename = "session.binary_asset")]
+ SessionBinaryAsset,
#[serde(rename = "system.message")]
SystemMessage,
#[serde(rename = "system.notification")]
@@ -309,6 +318,8 @@ pub enum SessionEventData {
HookEnd(HookEndData),
#[serde(rename = "hook.progress")]
HookProgress(HookProgressData),
+ #[serde(rename = "session.binary_asset")]
+ SessionBinaryAsset(SessionBinaryAssetData),
#[serde(rename = "system.message")]
SystemMessage(SystemMessageData),
#[serde(rename = "system.notification")]
@@ -1031,7 +1042,10 @@ pub struct CompactionCompleteCompactionTokensUsedCopilotUsageTokenDetail {
#[serde(rename_all = "camelCase")]
pub(crate) struct CompactionCompleteCompactionTokensUsedCopilotUsage {
/// Itemized token usage breakdown
- pub token_details: Vec,
+ #[doc(hidden)]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub(crate) token_details:
+ Option>,
/// Total cost in nano-AI units for this request
pub total_nano_aiu: f64,
}
@@ -1374,9 +1388,11 @@ pub struct AssistantUsageCopilotUsageTokenDetail {
/// Per-request cost and usage data from the CAPI copilot_usage response field
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
-pub(crate) struct AssistantUsageCopilotUsage {
+pub struct AssistantUsageCopilotUsage {
/// Itemized token usage breakdown
- pub token_details: Vec,
+ #[doc(hidden)]
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub(crate) token_details: Option>,
/// Total cost in nano-AI units for this request
pub total_nano_aiu: f64,
}
@@ -1432,9 +1448,8 @@ pub struct AssistantUsageData {
#[serde(skip_serializing_if = "Option::is_none")]
pub content_filter_triggered: Option,
/// Per-request cost and usage data from the CAPI copilot_usage response field
- #[doc(hidden)]
#[serde(skip_serializing_if = "Option::is_none")]
- pub(crate) copilot_usage: Option,
+ pub copilot_usage: Option,
/// Model multiplier cost for billing purposes
///
///
@@ -1876,6 +1891,16 @@ pub struct ToolExecutionCompleteUIResource {
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ToolExecutionCompleteResult {
+ /// Model-facing binary results (base64 inline or size-omitted markers) sent to the LLM for this tool call
+ ///
+ ///
+ ///
+ /// **Experimental.** This type is part of an experimental wire-protocol surface
+ /// and may change or be removed in future SDK or CLI releases.
+ ///
+ ///
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub binary_results_for_llm: Option
>,
/// Concise tool result text sent to the LLM for chat completion, potentially truncated for token efficiency
pub content: String,
/// Structured content blocks (text, images, audio, resources) returned by the tool in their native format
@@ -2139,6 +2164,28 @@ pub struct HookProgressData {
pub temporary: Option,
}
+/// Session event "session.binary_asset". Canonical bytes for a content-addressed binary asset shared by reference across events
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct SessionBinaryAssetData {
+ /// Content-addressed id for this binary asset (e.g. "sha256:...").
+ pub asset_id: String,
+ /// Decoded byte length of the binary asset
+ pub byte_length: i64,
+ /// Base64-encoded binary data
+ pub data: String,
+ /// Human-readable description of the binary data
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub description: Option,
+ /// Optional metadata from the producing tool.
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub metadata: Option>,
+ /// MIME type of the binary asset
+ pub mime_type: String,
+ /// Binary asset type discriminator. Use "image" for images and "resource" otherwise.
+ pub r#type: BinaryAssetType,
+}
+
/// Metadata about the prompt template and its construction
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
@@ -3828,6 +3875,21 @@ pub enum SkillInvokedTrigger {
Unknown,
}
+/// Binary asset type discriminator. Use "image" for images and "resource" otherwise.
+#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub enum BinaryAssetType {
+ /// Binary image data.
+ #[serde(rename = "image")]
+ Image,
+ /// Other binary resource data.
+ #[serde(rename = "resource")]
+ Resource,
+ /// Unknown variant for forward compatibility.
+ #[default]
+ #[serde(other)]
+ Unknown,
+}
+
/// Message role: "system" for system prompts, "developer" for developer-injected instructions
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub enum SystemMessageRole {
diff --git a/rust/tests/e2e/hooks.rs b/rust/tests/e2e/hooks.rs
index d41dee621..b4a211d87 100644
--- a/rust/tests/e2e/hooks.rs
+++ b/rust/tests/e2e/hooks.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
use std::sync::Arc;
use async_trait::async_trait;
@@ -122,7 +123,19 @@ async fn should_invoke_both_pretooluse_and_posttooluse_hooks_for_single_tool_cal
let post = recv_with_timeout(&mut post_rx, "postToolUse hook").await;
assert_eq!(pre.0, *session.id());
assert_eq!(post.0, *session.id());
- assert_eq!(pre.1, post.1);
+
+ let mut pre_tools: HashSet = HashSet::from([pre.1]);
+ while let Ok((_, tool_name)) = pre_rx.try_recv() {
+ pre_tools.insert(tool_name);
+ }
+ let mut post_tools: HashSet = HashSet::from([post.1]);
+ while let Ok((_, tool_name, _)) = post_rx.try_recv() {
+ post_tools.insert(tool_name);
+ }
+ assert!(
+ pre_tools.intersection(&post_tools).next().is_some(),
+ "expected a tool to appear in both pre and post hooks, got pre={pre_tools:?} post={post_tools:?}"
+ );
session.disconnect().await.expect("disconnect session");
client.stop().await.expect("stop client");
diff --git a/rust/tests/e2e/hooks_extended.rs b/rust/tests/e2e/hooks_extended.rs
index d4b6b0a55..ef53987ff 100644
--- a/rust/tests/e2e/hooks_extended.rs
+++ b/rust/tests/e2e/hooks_extended.rs
@@ -333,7 +333,6 @@ async fn should_allow_posttooluse_to_return_modifiedresult() {
let session = client
.create_session(
ctx.approve_all_session_config()
- .with_available_tools(["report_intent"])
.with_hooks(Arc::new(RecordingHooks::post_tool(tx))),
)
.await
@@ -341,17 +340,22 @@ async fn should_allow_posttooluse_to_return_modifiedresult() {
let answer = session
.send_and_wait(
- "Call the report_intent tool with intent 'Testing post hook', then reply done.",
+ "Call the view tool to read the current directory, then reply done.",
)
.await
.expect("send")
.expect("assistant message");
- let mut saw_report_intent = false;
+ let mut saw_view = false;
while let Ok(input) = rx.try_recv() {
- saw_report_intent |= input.tool_name == "report_intent";
+ saw_view |= input.tool_name == "view";
}
- assert!(saw_report_intent, "expected postToolUse hook for report_intent");
- assert_eq!(assistant_message_content(&answer), "Done.");
+ assert!(saw_view, "expected postToolUse hook for view");
+ assert!(
+ assistant_message_content(&answer)
+ .to_lowercase()
+ .contains("done"),
+ "expected assistant message to contain 'done'"
+ );
session.disconnect().await.expect("disconnect session");
client.stop().await.expect("stop client");
@@ -362,6 +366,7 @@ async fn should_allow_posttooluse_to_return_modifiedresult() {
}
#[tokio::test]
+#[ignore = "Fails with 1.0.64-0 runtime: built-in tools are not available when hooks restrict availableTools, so the failure path cannot be exercised. Follow up with runtime team."]
async fn should_invoke_posttoolusefailure_hook_for_failed_tool_result() {
with_e2e_context(
"hooks_extended",
@@ -583,8 +588,8 @@ impl SessionHooks for RecordingHooks {
input: PostToolUseInput,
_ctx: HookContext,
) -> Option {
- let output = (self.post_tool.is_some() && input.tool_name == "report_intent").then(|| {
- PostToolUseOutput {
+ let output =
+ (self.post_tool.is_some() && input.tool_name == "view").then(|| PostToolUseOutput {
modified_result: Some(json!({
"textResultForLlm": "modified by post hook",
"resultType": "success",
@@ -592,8 +597,7 @@ impl SessionHooks for RecordingHooks {
})),
suppress_output: Some(false),
..PostToolUseOutput::default()
- }
- });
+ });
if let Some(tx) = &self.post_tool {
let _ = tx.send(input);
}
diff --git a/rust/tests/e2e/rpc_server.rs b/rust/tests/e2e/rpc_server.rs
index dff8bfd1e..27cad2f69 100644
--- a/rust/tests/e2e/rpc_server.rs
+++ b/rust/tests/e2e/rpc_server.rs
@@ -165,6 +165,7 @@ async fn should_discover_server_mcp_and_skills() {
.rpc()
.skills()
.discover(SkillsDiscoverRequest {
+ exclude_host_skills: None,
project_paths: None,
skill_directories: Some(vec![
skill_directory.to_string_lossy().to_string(),
@@ -191,6 +192,7 @@ async fn should_discover_server_mcp_and_skills() {
.rpc()
.skills()
.discover(SkillsDiscoverRequest {
+ exclude_host_skills: None,
project_paths: None,
skill_directories: Some(vec![
skill_directory.to_string_lossy().to_string(),
diff --git a/rust/tests/e2e/session_todos_changed.rs b/rust/tests/e2e/session_todos_changed.rs
index ef73a92dd..ebace39b3 100644
--- a/rust/tests/e2e/session_todos_changed.rs
+++ b/rust/tests/e2e/session_todos_changed.rs
@@ -3,7 +3,9 @@ use github_copilot_sdk::session_events::SessionEventType;
use super::support::{wait_for_event, with_e2e_context};
const PROMPT: &str = concat!(
- "Use the sql tool to execute exactly these statements, in order, with no extra rows:\n",
+ "Use the sql tool exactly once to execute all three of the following statements ",
+ "together, in this exact order, in a single sql tool call (a single query string ",
+ "containing all three statements):\n",
"1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');\n",
"2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');\n",
"3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\n",
diff --git a/scripts/codegen/go.ts b/scripts/codegen/go.ts
index 39b28e44f..9c74977fb 100644
--- a/scripts/codegen/go.ts
+++ b/scripts/codegen/go.ts
@@ -3174,7 +3174,11 @@ export function generateGoSessionEventsCode(
if (typeof propSchema !== "object") continue;
const prop = propSchema as JSONSchema7;
const isReq = required.has(propName);
- const goName = toGoFieldName(propName);
+ let goName = toGoFieldName(propName);
+ // Avoid conflict with the Type() SessionEventType interface method
+ if (goName === "Type") {
+ goName = "Discriminator";
+ }
const goType = resolveGoPropertyType(prop, variant.dataClassName, propName, isReq, ctx);
if (prop.description) {
diff --git a/test/harness/package-lock.json b/test/harness/package-lock.json
index 9fc481e8a..5a6e086e0 100644
--- a/test/harness/package-lock.json
+++ b/test/harness/package-lock.json
@@ -9,7 +9,7 @@
"version": "1.0.0",
"license": "ISC",
"devDependencies": {
- "@github/copilot": "^1.0.63",
+ "@github/copilot": "^1.0.64-0",
"@modelcontextprotocol/sdk": "^1.26.0",
"@types/node": "^25.3.3",
"@types/node-forge": "^1.3.14",
@@ -501,9 +501,9 @@
}
},
"node_modules/@github/copilot": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.63.tgz",
- "integrity": "sha512-e8DRYiWJQc4kepVXsXjC8vpDU2FXS/TfR+Z6p/KAojfcwIUZzKMAfCV5D1lD25hV4CryVH1Z9t7mHqChickj0Q==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.64-0.tgz",
+ "integrity": "sha512-PlH7ByBHjmPLqLXS4CE2q8hN6CFEfkCMV6ScBEzW/u73+KYQB4fGNouo8Lr8okL6D5CW5rzPJbsXyISyJqVOZg==",
"dev": true,
"license": "SEE LICENSE IN LICENSE.md",
"dependencies": {
@@ -514,20 +514,20 @@
"copilot": "npm-loader.js"
},
"optionalDependencies": {
- "@github/copilot-darwin-arm64": "1.0.63",
- "@github/copilot-darwin-x64": "1.0.63",
- "@github/copilot-linux-arm64": "1.0.63",
- "@github/copilot-linux-x64": "1.0.63",
- "@github/copilot-linuxmusl-arm64": "1.0.63",
- "@github/copilot-linuxmusl-x64": "1.0.63",
- "@github/copilot-win32-arm64": "1.0.63",
- "@github/copilot-win32-x64": "1.0.63"
+ "@github/copilot-darwin-arm64": "1.0.64-0",
+ "@github/copilot-darwin-x64": "1.0.64-0",
+ "@github/copilot-linux-arm64": "1.0.64-0",
+ "@github/copilot-linux-x64": "1.0.64-0",
+ "@github/copilot-linuxmusl-arm64": "1.0.64-0",
+ "@github/copilot-linuxmusl-x64": "1.0.64-0",
+ "@github/copilot-win32-arm64": "1.0.64-0",
+ "@github/copilot-win32-x64": "1.0.64-0"
}
},
"node_modules/@github/copilot-darwin-arm64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.63.tgz",
- "integrity": "sha512-z6CMBxNDlKvT6bvOpqhu4M2bhb0daEbVwSe9SN9WfDUJbt7bpoL7OKKas428iyPSWHoL2WXwxSsy/FjIwSLV6w==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.64-0.tgz",
+ "integrity": "sha512-97DUGiuYrkCYOlSSLWMmr+K0uGzAxz1JOL/GyO/7mNl6V/1xgs6Van1Jj+Dpj4ly96iHE8lUIW8cQNCG66644g==",
"cpu": [
"arm64"
],
@@ -542,9 +542,9 @@
}
},
"node_modules/@github/copilot-darwin-x64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.63.tgz",
- "integrity": "sha512-YKd7cXZgAGxhudzrtWdWh2NS35p2G5bV22Gz3jhEyBTqmq45o4sD4OwO87+UpkvM+3nZpwsHaLd3a+ILYX6OXg==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.64-0.tgz",
+ "integrity": "sha512-2PXY4mSFtIjFdRaAt8PakegRgGtf6Sz9z6U/dIgVygNfctVNzaL5FH65PNPm8Y80jaDvEcz1/XY5MiQtxnlzZQ==",
"cpu": [
"x64"
],
@@ -559,9 +559,9 @@
}
},
"node_modules/@github/copilot-linux-arm64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.63.tgz",
- "integrity": "sha512-A3DOeEfmsJH9j1N+QLc7WXmESBskbezmhDyhyAJcHkw0ngRbKctuWQf/evUHFMh/kgwy1Lr/+9jXJm3NZqr0MA==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.64-0.tgz",
+ "integrity": "sha512-PLP+vR508fOTlCr9CSZiXi9geicHKXuX9jLGdwNqK2TMZO5TqCLz8wP7dBEmkdkeXcFKovMb8nQVB1Toc6xutw==",
"cpu": [
"arm64"
],
@@ -576,9 +576,9 @@
}
},
"node_modules/@github/copilot-linux-x64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.63.tgz",
- "integrity": "sha512-OMKfZJRoDaJOV7vuWX/nFPNdLa9/H+nhajdE83v4YT9mKLXr86aWrkXE3pPoDYsKWvgQFHg4APA6oZPao0Fyow==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.64-0.tgz",
+ "integrity": "sha512-NvVjQ69zr390ijzo2f75+v0DHm6xnvPbi67ugnKDk7ZPbx8P3vSxVdAnrzrrL4T3T8ng3pJANcC4p+eGbx+UDw==",
"cpu": [
"x64"
],
@@ -593,9 +593,9 @@
}
},
"node_modules/@github/copilot-linuxmusl-arm64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.63.tgz",
- "integrity": "sha512-jcIo6B3uHgcOluNfUHp+6atShKKrXYBPLaRyF6aDT699lwI83gW9KTDuEvDs5FDg8qWsWFfOl+al2dkWDYD3CQ==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.64-0.tgz",
+ "integrity": "sha512-qCnVF5vIcTO74CukAENZo8e5nqXm4QUshuKN69aiZb5GOhVvyyIKsf5Jo7ikZt54jJBHycAMUKlTA8L3/nK+KA==",
"cpu": [
"arm64"
],
@@ -610,9 +610,9 @@
}
},
"node_modules/@github/copilot-linuxmusl-x64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.63.tgz",
- "integrity": "sha512-BEdBbEF3fG7VqXzuaAY4JtmbdGSkpJFeb2ZQYaMpq7OP3aS7ssGe1cCX8ehZNegcMM/eb4GC6PXNXsvl3X/PAQ==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.64-0.tgz",
+ "integrity": "sha512-WDBEmkBk1RulTfdLK5IuttNBadjLOBpvQyonGQ/aLeaetRNNdapoygrSjFU7q1QBSenmCyanXH6D+TS7tP3Qsw==",
"cpu": [
"x64"
],
@@ -627,9 +627,9 @@
}
},
"node_modules/@github/copilot-win32-arm64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.63.tgz",
- "integrity": "sha512-7FqUwOmtoeBoOn4zkKQqRL+WGFwektVRSr5Po2FvPAbKxGXGyFXApZTmRLqVcHhMKDRzMb8KLST1LU1TMTY/wg==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.64-0.tgz",
+ "integrity": "sha512-PC7yuUKcVbhli4bpzWFVT3juxj+v/iONazetNe3tMpHWza3W7MeFRifzAseSErKQCt2fHJth3m8bQAwFN2jfrA==",
"cpu": [
"arm64"
],
@@ -644,9 +644,9 @@
}
},
"node_modules/@github/copilot-win32-x64": {
- "version": "1.0.63",
- "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.63.tgz",
- "integrity": "sha512-RC/6y9KHdw/YRCrCEksF2RzbeblfBUNE7bkYZxygaQGYThuv1GeZL2YD2jVqxC2LxKzsUmWGvwEMxerfR6pmeQ==",
+ "version": "1.0.64-0",
+ "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.64-0.tgz",
+ "integrity": "sha512-d2fnUTIlqNxCqS2PuV+FD99ZOYBaX72OLtAmphbKyz36KyZ6D4ssiu8M4vHVTKWWdyc3TWiLsnIB+ryWdv1gGw==",
"cpu": [
"x64"
],
diff --git a/test/harness/package.json b/test/harness/package.json
index b4b4c16e2..669ebee1b 100644
--- a/test/harness/package.json
+++ b/test/harness/package.json
@@ -14,7 +14,7 @@
"node": "^20.19.0 || >=22.12.0"
},
"devDependencies": {
- "@github/copilot": "^1.0.63",
+ "@github/copilot": "^1.0.64-0",
"@modelcontextprotocol/sdk": "^1.26.0",
"@types/node": "^25.3.3",
"@types/node-forge": "^1.3.14",
diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts
index 9c2467b2a..5e6b8e3f1 100644
--- a/test/harness/replayingCapiProxy.ts
+++ b/test/harness/replayingCapiProxy.ts
@@ -56,6 +56,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
{ toolName: "*", normalizer: normalizeLargeOutputFilepaths },
{ toolName: "${shell}", normalizer: normalizeShellExitMarkers },
{ toolName: "*", normalizer: normalizeGhAuthMessages },
+ { toolName: "*", normalizer: normalizeAvailableToolNames },
{ toolName: "read_agent", normalizer: normalizeReadAgentTimings },
];
@@ -1157,6 +1158,44 @@ function normalizeReadAgentTimings(result: string): string {
.replace(/\bduration: \d+(?:\.\d+)?s\b/g, "duration: 0s");
}
+// Maps the platform-specific shell tool family names to stable placeholders.
+// On Windows the runtime exposes powershell/read_powershell/stop_powershell/...,
+// on Linux/macOS it exposes bash/read_bash/stop_bash/.... Ordered so that the
+// prefixed names are handled explicitly; \b boundaries keep bare names from
+// matching inside the prefixed ones.
+const shellToolFamilyReplacements: ReadonlyArray = [
+ [/\bread_powershell\b/g, "${read_shell}"],
+ [/\bstop_powershell\b/g, "${stop_shell}"],
+ [/\blist_powershell\b/g, "${list_shell}"],
+ [/\bwrite_powershell\b/g, "${write_shell}"],
+ [/\bpowershell\b/g, "${shell}"],
+ [/\bread_bash\b/g, "${read_shell}"],
+ [/\bstop_bash\b/g, "${stop_shell}"],
+ [/\blist_bash\b/g, "${list_shell}"],
+ [/\bwrite_bash\b/g, "${write_shell}"],
+ [/\bbash\b/g, "${shell}"],
+];
+
+function normalizeShellToolFamilyNames(text: string): string {
+ let result = text;
+ for (const [pattern, replacement] of shellToolFamilyReplacements) {
+ result = result.replace(pattern, replacement);
+ }
+ return result;
+}
+
+// When a model calls a tool that doesn't exist (e.g., the removed report_intent
+// tool), the runtime replies with "Available tools that can be called are ."
+// The shell tool family names in that list are platform-specific, so normalize
+// them to placeholders to keep snapshots matching across Windows/Linux/macOS.
+function normalizeAvailableToolNames(result: string): string {
+ return result.replace(
+ /(Available tools that can be called are )([^.]*)/g,
+ (_full, prefix: string, list: string) =>
+ prefix + normalizeShellToolFamilyNames(list),
+ );
+}
+
// Transforms a single OpenAI-style inbound response message into normalized form
function transformOpenAIResponseChoice(
choices: ChatCompletion.Choice[],
diff --git a/test/snapshots/abort/should_abort_during_active_streaming.yaml b/test/snapshots/abort/should_abort_during_active_streaming.yaml
index 1f5c085eb..bd18eab2f 100644
--- a/test/snapshots/abort/should_abort_during_active_streaming.yaml
+++ b/test/snapshots/abort/should_abort_during_active_streaming.yaml
@@ -1,16 +1,5 @@
models:
- claude-sonnet-4.5
-# Two stored conversations cover the two legal histories after aborting an
-# in-flight streaming turn:
-# conv0 (race-win): the essay assistant turn finalized before the abort, so
-# it remains in history -> [system, user(essay),
-# assistant(essay), user(recovery)].
-# conv1 (race-lose): the abort dropped the in-flight essay assistant turn, so
-# it is absent from history -> [system, user(essay),
-# user(recovery)]. This is the common outcome with the
-# Rust-reqwest transport in CLI 1.0.61+.
-# The replay proxy matches a request as a strict prefix of a stored conversation
-# and returns the next assistant message, so both histories resolve correctly.
conversations:
- messages:
- role: system
@@ -39,13 +28,3 @@ conversations:
content: Say 'abort_recovery_ok'.
- role: assistant
content: abort_recovery_ok
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Write a very long essay about the history of computing, covering every decade from the 1940s to the 2020s in
- great detail.
- - role: user
- content: Say 'abort_recovery_ok'.
- - role: assistant
- content: abort_recovery_ok
diff --git a/test/snapshots/builtin_tools/should_capture_exit_code_in_output.yaml b/test/snapshots/builtin_tools/should_capture_exit_code_in_output.yaml
index 3285ae23c..01cf1298d 100644
--- a/test/snapshots/builtin_tools/should_capture_exit_code_in_output.yaml
+++ b/test/snapshots/builtin_tools/should_capture_exit_code_in_output.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"command":"echo hello && echo world","description":"Run echo hello && echo world"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
diff --git a/test/snapshots/builtin_tools/should_create_a_new_file.yaml b/test/snapshots/builtin_tools/should_create_a_new_file.yaml
index fe75810a1..bf9288cf0 100644
--- a/test/snapshots/builtin_tools/should_create_a_new_file.yaml
+++ b/test/snapshots/builtin_tools/should_create_a_new_file.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"path":"${workdir}/new_file.txt","file_text":"Created by test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Created file ${workdir}/new_file.txt with 15 characters
@@ -54,4 +56,4 @@ conversations:
tool_call_id: toolcall_2
content: 1. Created by test
- role: assistant
- content: File created and confirmed! The file 'new_file.txt' contains "Created by test" as expected.
+ content: ✓ Done! Created `new_file.txt` with content "Created by test" and confirmed the content matches.
diff --git a/test/snapshots/builtin_tools/should_edit_a_file_successfully.yaml b/test/snapshots/builtin_tools/should_edit_a_file_successfully.yaml
index b2d3d9451..0f2141862 100644
--- a/test/snapshots/builtin_tools/should_edit_a_file_successfully.yaml
+++ b/test/snapshots/builtin_tools/should_edit_a_file_successfully.yaml
@@ -41,7 +41,9 @@ conversations:
arguments: '{"path":"${workdir}/edit_me.txt","old_str":"Hello World","new_str":"Hi Universe"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: File ${workdir}/edit_me.txt updated with changes.
@@ -60,6 +62,8 @@ conversations:
3.
- role: assistant
content: |-
- The file now contains:
- 1. Hi Universe
- 2. Goodbye World
+ Done! The file now contains:
+ ```
+ Hi Universe
+ Goodbye World
+ ```
diff --git a/test/snapshots/builtin_tools/should_find_files_by_pattern.yaml b/test/snapshots/builtin_tools/should_find_files_by_pattern.yaml
index 0fa78ce4f..6cf85ea51 100644
--- a/test/snapshots/builtin_tools/should_find_files_by_pattern.yaml
+++ b/test/snapshots/builtin_tools/should_find_files_by_pattern.yaml
@@ -42,11 +42,14 @@ conversations:
arguments: '{"pattern":"**/*.ts"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: ./src/index.ts
- role: assistant
content: |-
- Found **1 TypeScript file**:
+ I found **1 TypeScript file** in this directory:
+
- `src/index.ts`
diff --git a/test/snapshots/builtin_tools/should_handle_nonexistent_file_gracefully.yaml b/test/snapshots/builtin_tools/should_handle_nonexistent_file_gracefully.yaml
index 2e5e4368c..c5c00fb65 100644
--- a/test/snapshots/builtin_tools/should_handle_nonexistent_file_gracefully.yaml
+++ b/test/snapshots/builtin_tools/should_handle_nonexistent_file_gracefully.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"path":"${workdir}/does_not_exist.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Path ${workdir}/does_not_exist.txt does not exist. Please provide a valid path.
diff --git a/test/snapshots/builtin_tools/should_read_file_with_line_range.yaml b/test/snapshots/builtin_tools/should_read_file_with_line_range.yaml
index 382da0908..cd17d8670 100644
--- a/test/snapshots/builtin_tools/should_read_file_with_line_range.yaml
+++ b/test/snapshots/builtin_tools/should_read_file_with_line_range.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"path":"${workdir}/lines.txt","view_range":[2,4]}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
@@ -48,7 +50,7 @@ conversations:
4. line4
- role: assistant
content: |-
- Lines 2 through 4 contain:
+ Lines 2 through 4 of 'lines.txt' contain:
- Line 2: "line2"
- - Line 3: "line3"
+ - Line 3: "line3"
- Line 4: "line4"
diff --git a/test/snapshots/builtin_tools/should_search_for_patterns_in_files.yaml b/test/snapshots/builtin_tools/should_search_for_patterns_in_files.yaml
index a1723fa0b..f0af500b6 100644
--- a/test/snapshots/builtin_tools/should_search_for_patterns_in_files.yaml
+++ b/test/snapshots/builtin_tools/should_search_for_patterns_in_files.yaml
@@ -39,37 +39,9 @@ conversations:
arguments: '{"pattern":"^ap","path":"${workdir}/data.txt","output_mode":"content","-n":true}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_1
- content: |-
- ./data.txt:1:apple
- ./data.txt:3:apricot
- - role: assistant
- content: |-
- The search found **2 lines** starting with 'ap':
- - Line 1: `apple`
- - Line 3: `apricot`
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Search for lines starting with 'ap' in the file 'data.txt'. Tell me which lines matched.
- - role: assistant
- tool_calls:
- - id: toolcall_0
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Searching file for pattern"}'
- - id: toolcall_1
- type: function
- function:
- name: grep
- arguments: '{"pattern":"^ap","path":"${workdir}/data.txt","output_mode":"content","-n":true}'
- - role: tool
- tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
@@ -77,6 +49,6 @@ conversations:
${workdir}/data.txt:3:apricot
- role: assistant
content: |-
- The search found **2 lines** starting with 'ap':
- - Line 1: `apple`
- - Line 3: `apricot`
+ Two lines start with 'ap':
+ - **Line 1:** apple
+ - **Line 3:** apricot
diff --git a/test/snapshots/client_options/should_use_client_cwd_for_default_workingdirectory.yaml b/test/snapshots/client_options/should_use_client_cwd_for_default_workingdirectory.yaml
index b44846fdc..6d9167e94 100644
--- a/test/snapshots/client_options/should_use_client_cwd_for_default_workingdirectory.yaml
+++ b/test/snapshots/client_options/should_use_client_cwd_for_default_workingdirectory.yaml
@@ -20,14 +20,11 @@ conversations:
arguments: '{"path":"${workdir}/client-cwd/marker.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. I am in the client cwd
- role: assistant
- content: |-
- The file `marker.txt` says:
-
- ```
- I am in the client cwd
- ```
+ content: 'The file `marker.txt` says: "I am in the client cwd"'
diff --git a/test/snapshots/event_fidelity/should_emit_events_in_correct_order_for_tool_using_conversation.yaml b/test/snapshots/event_fidelity/should_emit_events_in_correct_order_for_tool_using_conversation.yaml
index ba555111f..8ce730f0f 100644
--- a/test/snapshots/event_fidelity/should_emit_events_in_correct_order_for_tool_using_conversation.yaml
+++ b/test/snapshots/event_fidelity/should_emit_events_in_correct_order_for_tool_using_conversation.yaml
@@ -39,9 +39,16 @@ conversations:
arguments: '{"path":"${workdir}/hello.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Hello World
- role: assistant
- content: "The file 'hello.txt' contains: \"Hello World\""
+ content: |-
+ The file 'hello.txt' contains:
+
+ ```
+ Hello World
+ ```
diff --git a/test/snapshots/event_fidelity/should_emit_tool_execution_events_with_correct_fields.yaml b/test/snapshots/event_fidelity/should_emit_tool_execution_events_with_correct_fields.yaml
index e0b414069..a6583a15e 100644
--- a/test/snapshots/event_fidelity/should_emit_tool_execution_events_with_correct_fields.yaml
+++ b/test/snapshots/event_fidelity/should_emit_tool_execution_events_with_correct_fields.yaml
@@ -39,9 +39,15 @@ conversations:
arguments: '{"path":"${workdir}/data.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. test data
- role: assistant
- content: "The file 'data.txt' contains: `test data`"
+ content: |-
+ The file `data.txt` contains:
+ ```
+ test data
+ ```
diff --git a/test/snapshots/hooks/invoke_both_hooks_for_single_tool_call.yaml b/test/snapshots/hooks/invoke_both_hooks_for_single_tool_call.yaml
index 0bb88c130..6a51857ab 100644
--- a/test/snapshots/hooks/invoke_both_hooks_for_single_tool_call.yaml
+++ b/test/snapshots/hooks/invoke_both_hooks_for_single_tool_call.yaml
@@ -39,9 +39,15 @@ conversations:
arguments: '{"path":"${workdir}/both.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Testing both hooks!
- role: assistant
- content: 'The file contains: "Testing both hooks!"'
+ content: |-
+ The file **both.txt** contains:
+ ```
+ Testing both hooks!
+ ```
diff --git a/test/snapshots/hooks/invoke_post_tool_use_hook_after_model_runs_a_tool.yaml b/test/snapshots/hooks/invoke_post_tool_use_hook_after_model_runs_a_tool.yaml
index 59369b1c8..18b324f09 100644
--- a/test/snapshots/hooks/invoke_post_tool_use_hook_after_model_runs_a_tool.yaml
+++ b/test/snapshots/hooks/invoke_post_tool_use_hook_after_model_runs_a_tool.yaml
@@ -39,9 +39,14 @@ conversations:
arguments: '{"path":"${workdir}/world.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. World from the test!
- role: assistant
- content: 'The file world.txt contains: "World from the test!"'
+ content: |-
+ The file `world.txt` contains:
+
+ **"World from the test!"**
diff --git a/test/snapshots/hooks/invoke_pre_tool_use_hook_when_model_runs_a_tool.yaml b/test/snapshots/hooks/invoke_pre_tool_use_hook_when_model_runs_a_tool.yaml
index 24241d900..1ce0fe67a 100644
--- a/test/snapshots/hooks/invoke_pre_tool_use_hook_when_model_runs_a_tool.yaml
+++ b/test/snapshots/hooks/invoke_pre_tool_use_hook_when_model_runs_a_tool.yaml
@@ -39,9 +39,16 @@ conversations:
arguments: '{"path":"${workdir}/hello.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Hello from the test!
- role: assistant
- content: The file says "Hello from the test!"
+ content: |-
+ The file **hello.txt** contains:
+
+ ```
+ Hello from the test!
+ ```
diff --git a/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_a_single_tool_call.yaml b/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_a_single_tool_call.yaml
index dd55858bf..6a51857ab 100644
--- a/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_a_single_tool_call.yaml
+++ b/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_a_single_tool_call.yaml
@@ -39,9 +39,15 @@ conversations:
arguments: '{"path":"${workdir}/both.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Testing both hooks!
- role: assistant
- content: 'The file contains a single line: "Testing both hooks!"'
+ content: |-
+ The file **both.txt** contains:
+ ```
+ Testing both hooks!
+ ```
diff --git a/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_single_tool_call.yaml b/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_single_tool_call.yaml
index ff422fee1..6a51857ab 100644
--- a/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_single_tool_call.yaml
+++ b/test/snapshots/hooks/should_invoke_both_pretooluse_and_posttooluse_hooks_for_single_tool_call.yaml
@@ -39,9 +39,15 @@ conversations:
arguments: '{"path":"${workdir}/both.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Testing both hooks!
- role: assistant
- content: 'The file `both.txt` contains: "Testing both hooks!"'
+ content: |-
+ The file **both.txt** contains:
+ ```
+ Testing both hooks!
+ ```
diff --git a/test/snapshots/hooks/should_invoke_posttooluse_hook_after_model_runs_a_tool.yaml b/test/snapshots/hooks/should_invoke_posttooluse_hook_after_model_runs_a_tool.yaml
index 518f6d06e..18b324f09 100644
--- a/test/snapshots/hooks/should_invoke_posttooluse_hook_after_model_runs_a_tool.yaml
+++ b/test/snapshots/hooks/should_invoke_posttooluse_hook_after_model_runs_a_tool.yaml
@@ -39,9 +39,14 @@ conversations:
arguments: '{"path":"${workdir}/world.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. World from the test!
- role: assistant
- content: 'The file contains: "World from the test!"'
+ content: |-
+ The file `world.txt` contains:
+
+ **"World from the test!"**
diff --git a/test/snapshots/hooks/should_invoke_pretooluse_hook_when_model_runs_a_tool.yaml b/test/snapshots/hooks/should_invoke_pretooluse_hook_when_model_runs_a_tool.yaml
index e1f807fdc..1ce0fe67a 100644
--- a/test/snapshots/hooks/should_invoke_pretooluse_hook_when_model_runs_a_tool.yaml
+++ b/test/snapshots/hooks/should_invoke_pretooluse_hook_when_model_runs_a_tool.yaml
@@ -39,9 +39,16 @@ conversations:
arguments: '{"path":"${workdir}/hello.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Hello from the test!
- role: assistant
- content: 'The file contains: "Hello from the test!"'
+ content: |-
+ The file **hello.txt** contains:
+
+ ```
+ Hello from the test!
+ ```
diff --git a/test/snapshots/hooks_extended/should_allow_posttooluse_to_return_modifiedresult.yaml b/test/snapshots/hooks_extended/should_allow_posttooluse_to_return_modifiedresult.yaml
index cf1292873..a340e9326 100644
--- a/test/snapshots/hooks_extended/should_allow_posttooluse_to_return_modifiedresult.yaml
+++ b/test/snapshots/hooks_extended/should_allow_posttooluse_to_return_modifiedresult.yaml
@@ -5,34 +5,10 @@ conversations:
- role: system
content: ${system}
- role: user
- content: Call the report_intent tool with intent 'Testing post hook', then reply done.
+ content: Call the view tool to read the current directory, then reply done.
- role: assistant
tool_calls:
- id: toolcall_0
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing post hook"}'
- - role: assistant
- tool_calls:
- - id: toolcall_1
- type: function
- function:
- name: view
- arguments: '{"path":"${workdir}"}'
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Call the report_intent tool with intent 'Testing post hook', then reply done.
- - role: assistant
- tool_calls:
- - id: toolcall_0
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing post hook"}'
- - id: toolcall_1
type: function
function:
name: view
@@ -40,8 +16,5 @@ conversations:
- role: tool
tool_call_id: toolcall_0
content: modified by post hook
- - role: tool
- tool_call_id: toolcall_1
- content: Tool 'view' does not exist. Available tools that can be called are report_intent.
- role: assistant
content: Done.
diff --git a/test/snapshots/hooks_extended/should_allow_pretooluse_to_return_modifiedargs_and_suppressoutput.yaml b/test/snapshots/hooks_extended/should_allow_pretooluse_to_return_modifiedargs_and_suppressoutput.yaml
index cae46a153..8415fe771 100644
--- a/test/snapshots/hooks_extended/should_allow_pretooluse_to_return_modifiedargs_and_suppressoutput.yaml
+++ b/test/snapshots/hooks_extended/should_allow_pretooluse_to_return_modifiedargs_and_suppressoutput.yaml
@@ -42,9 +42,17 @@ conversations:
arguments: '{"value":"modified by hook"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, echo_value.
- role: tool
tool_call_id: toolcall_1
content: modified by hook
- role: assistant
- content: 'The echo_value returned: **"modified by hook"**'
+ content: >-
+ The echo_value function returned: **"modified by hook"**
+
+
+ Interestingly, even though I called it with the value 'original', the result shows 'modified by hook',
+ suggesting there may be some interceptor or hook modifying the parameter before it reaches the actual
+ function.
diff --git a/test/snapshots/mode_handlers/should_invoke_exit_plan_mode_handler_when_model_uses_tool.yaml b/test/snapshots/mode_handlers/should_invoke_exit_plan_mode_handler_when_model_uses_tool.yaml
index 94233b05e..d3e915f6d 100644
--- a/test/snapshots/mode_handlers/should_invoke_exit_plan_mode_handler_when_model_uses_tool.yaml
+++ b/test/snapshots/mode_handlers/should_invoke_exit_plan_mode_handler_when_model_uses_tool.yaml
@@ -1,15 +1,5 @@
models:
- claude-sonnet-4.5
-# Two stored conversations cover the two CLI versions of the exit_plan_mode
-# post-approval tool result. The CLI injects this text and feeds it back to the
-# model; the SDK never asserts on its exact wording. The interactive-mode message
-# was reworded in copilot-agent-runtime:
-# conv0: CLI 1.0.61 and earlier -> "...interactive mode (edits require manual
-# approval). Proceed with implementing the plan."
-# conv1: newer runtime builds -> "...interactive mode. Start implementing the
-# plan now, in this same response. ..."
-# The replay proxy matches a request as a strict prefix of a stored conversation
-# and returns the next assistant message, so both versions resolve correctly.
conversations:
- messages:
- role: system
@@ -22,32 +12,15 @@ conversations:
type: function
function:
name: exit_plan_mode
- arguments: '{"summary":"Greeting file implementation plan","actions":["interactive","autopilot","exit_only"],"recommendedAction":"interactive"}'
+ arguments: '{"summary":"Greeting file implementation
+ plan","actions":["interactive","autopilot","exit_only"],"recommendedAction":"interactive"}'
- role: tool
tool_call_id: toolcall_0
- content: |-
+ content: >-
Plan approved! Exited plan mode.
- You are now in interactive mode (edits require manual approval). Proceed with implementing the plan.
- - role: assistant
- content: Plan approved; I will wait for the next instruction before making changes.
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Create a brief implementation plan for adding a greeting.txt file, then request approval with exit_plan_mode.
- - role: assistant
- tool_calls:
- - id: toolcall_0
- type: function
- function:
- name: exit_plan_mode
- arguments: '{"summary":"Greeting file implementation plan","actions":["interactive","autopilot","exit_only"],"recommendedAction":"interactive"}'
- - role: tool
- tool_call_id: toolcall_0
- content: |-
- Plan approved! Exited plan mode.
- You are now in interactive mode. Start implementing the plan now, in this same response. Approving the plan is your go-signal, so do not stop to ask whether to proceed or wait for another message.
+ You are now in interactive mode. Start implementing the plan now, in this same response. Approving the plan is
+ your go-signal, so do not stop to ask whether to proceed or wait for another message.
- role: assistant
content: Plan approved; I will wait for the next instruction before making changes.
diff --git a/test/snapshots/multi_client/both_clients_see_tool_request_and_completion_events.yaml b/test/snapshots/multi_client/both_clients_see_tool_request_and_completion_events.yaml
index b4b14d0ea..62f0d004a 100644
--- a/test/snapshots/multi_client/both_clients_see_tool_request_and_completion_events.yaml
+++ b/test/snapshots/multi_client/both_clients_see_tool_request_and_completion_events.yaml
@@ -42,9 +42,11 @@ conversations:
arguments: '{"seed":"hello"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, magic_number.
- role: tool
tool_call_id: toolcall_1
content: MAGIC_hello_42
- role: assistant
- content: The magic number for seed 'hello' is **MAGIC_hello_42**.
+ content: "The magic number tool with seed 'hello' returned: **MAGIC_hello_42**"
diff --git a/test/snapshots/multi_client/disconnecting_client_removes_its_tools.yaml b/test/snapshots/multi_client/disconnecting_client_removes_its_tools.yaml
index 1da7b9282..b6410e0d1 100644
--- a/test/snapshots/multi_client/disconnecting_client_removes_its_tools.yaml
+++ b/test/snapshots/multi_client/disconnecting_client_removes_its_tools.yaml
@@ -42,26 +42,47 @@ conversations:
arguments: '{"input":"test1"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, stable_tool, ephemeral_tool.
- role: tool
tool_call_id: toolcall_1
content: STABLE_test1
- role: assistant
content: "The stable_tool returned: **STABLE_test1**"
+ - messages:
+ - role: system
+ content: ${system}
- role: user
- content: Use the ephemeral_tool with input 'test2' and tell me the result.
- - role: assistant
- content: I'll call the ephemeral_tool with input 'test2' for you.
+ content: Use the stable_tool with input 'test1' and tell me the result.
- role: assistant
+ content: I'll call the stable_tool with input 'test1' for you.
tool_calls:
- - id: toolcall_2
+ - id: toolcall_0
type: function
function:
name: report_intent
- arguments: '{"intent":"Testing ephemeral_tool"}'
+ arguments: '{"intent":"Testing stable_tool"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: stable_tool
+ arguments: '{"input":"test1"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: STABLE_test1
+ - role: assistant
+ content: "The stable_tool returned: **STABLE_test1**"
+ - role: user
+ content: Use the ephemeral_tool with input 'test2' and tell me the result.
+ - role: assistant
+ content: I'll call the ephemeral_tool with input 'test2' for you.
- role: assistant
tool_calls:
- - id: toolcall_3
+ - id: toolcall_2
type: function
function:
name: ephemeral_tool
@@ -86,7 +107,7 @@ conversations:
arguments: '{"input":"test1"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: STABLE_test1
@@ -98,20 +119,12 @@ conversations:
content: I'll call the ephemeral_tool with input 'test2' for you.
tool_calls:
- id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing ephemeral_tool"}'
- - id: toolcall_3
type: function
function:
name: ephemeral_tool
arguments: '{"input":"test2"}'
- role: tool
tool_call_id: toolcall_2
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_3
content: EPHEMERAL_test2
- role: assistant
content: "The ephemeral_tool returned: **EPHEMERAL_test2**"
@@ -130,18 +143,11 @@ conversations:
Use the stable_tool with input 'still_here'. Also try using ephemeral_tool if it is available.
- role: assistant
- content: I'll call the stable_tool with input 'still_here'. The ephemeral_tool is no longer available, so I can only use
- the stable_tool.
+ content: I'll call the stable_tool with input 'still_here'. The ephemeral_tool is no longer available according to the
+ system notification.
- role: assistant
tool_calls:
- - id: toolcall_4
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing tool availability"}'
- - role: assistant
- tool_calls:
- - id: toolcall_5
+ - id: toolcall_3
type: function
function:
name: stable_tool
@@ -166,7 +172,7 @@ conversations:
arguments: '{"input":"test1"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: STABLE_test1
@@ -178,20 +184,12 @@ conversations:
content: I'll call the ephemeral_tool with input 'test2' for you.
tool_calls:
- id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing ephemeral_tool"}'
- - id: toolcall_3
type: function
function:
name: ephemeral_tool
arguments: '{"input":"test2"}'
- role: tool
tool_call_id: toolcall_2
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_3
content: EPHEMERAL_test2
- role: assistant
content: "The ephemeral_tool returned: **EPHEMERAL_test2**"
@@ -210,29 +208,19 @@ conversations:
Use the stable_tool with input 'still_here'. Also try using ephemeral_tool if it is available.
- role: assistant
- content: I'll call the stable_tool with input 'still_here'. The ephemeral_tool is no longer available, so I can only use
- the stable_tool.
+ content: I'll call the stable_tool with input 'still_here'. The ephemeral_tool is no longer available according to the
+ system notification.
tool_calls:
- - id: toolcall_4
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Testing tool availability"}'
- - id: toolcall_5
+ - id: toolcall_3
type: function
function:
name: stable_tool
arguments: '{"input":"still_here"}'
- role: tool
- tool_call_id: toolcall_4
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_5
+ tool_call_id: toolcall_3
content: STABLE_still_here
- role: assistant
- content: >-
+ content: |-
The stable_tool returned: **STABLE_still_here**
-
- The ephemeral_tool is not available anymore (it was removed as indicated in the tools_changed_notice), so I
- could only call the stable_tool.
+ The ephemeral_tool is not available - it was removed and I cannot call it.
diff --git a/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml b/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml
index e67357589..2397bfdc0 100644
--- a/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml
+++ b/test/snapshots/multi_client/one_client_approves_permission_and_both_see_the_result.yaml
@@ -42,9 +42,11 @@ conversations:
arguments: '{"file_text":"hello world","path":"${workdir}/hello.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Created file ${workdir}/hello.txt with 11 characters
- role: assistant
- content: Done - I created hello.txt containing "hello world".
+ content: Done! I've created hello.txt with the text "hello world" in your current directory.
diff --git a/test/snapshots/multi_client/two_clients_register_different_tools_and_agent_uses_both.yaml b/test/snapshots/multi_client/two_clients_register_different_tools_and_agent_uses_both.yaml
index d0c95ba8a..c04864d82 100644
--- a/test/snapshots/multi_client/two_clients_register_different_tools_and_agent_uses_both.yaml
+++ b/test/snapshots/multi_client/two_clients_register_different_tools_and_agent_uses_both.yaml
@@ -42,26 +42,47 @@ conversations:
arguments: '{"countryCode":"US"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, city_lookup, currency_lookup.
- role: tool
tool_call_id: toolcall_1
content: CITY_FOR_US
- role: assistant
- content: The city_lookup tool returned **"CITY_FOR_US"** for the country code 'US'.
+ content: "The city_lookup tool returned: **CITY_FOR_US**"
+ - messages:
+ - role: system
+ content: ${system}
- role: user
- content: Now use the currency_lookup tool with countryCode 'US' and tell me the result.
- - role: assistant
- content: I'll call the currency_lookup tool with the country code 'US' for you.
+ content: Use the city_lookup tool with countryCode 'US' and tell me the result.
- role: assistant
+ content: I'll call the city_lookup tool with the country code 'US' for you.
tool_calls:
- - id: toolcall_2
+ - id: toolcall_0
type: function
function:
name: report_intent
- arguments: '{"intent":"Looking up currency information"}'
+ arguments: '{"intent":"Looking up city information"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: city_lookup
+ arguments: '{"countryCode":"US"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: CITY_FOR_US
+ - role: assistant
+ content: "The city_lookup tool returned: **CITY_FOR_US**"
+ - role: user
+ content: Now use the currency_lookup tool with countryCode 'US' and tell me the result.
+ - role: assistant
+ content: I'll call the currency_lookup tool with the country code 'US' for you.
- role: assistant
tool_calls:
- - id: toolcall_3
+ - id: toolcall_2
type: function
function:
name: currency_lookup
@@ -86,32 +107,24 @@ conversations:
arguments: '{"countryCode":"US"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: CITY_FOR_US
- role: assistant
- content: The city_lookup tool returned **"CITY_FOR_US"** for the country code 'US'.
+ content: "The city_lookup tool returned: **CITY_FOR_US**"
- role: user
content: Now use the currency_lookup tool with countryCode 'US' and tell me the result.
- role: assistant
content: I'll call the currency_lookup tool with the country code 'US' for you.
tool_calls:
- id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Looking up currency information"}'
- - id: toolcall_3
type: function
function:
name: currency_lookup
arguments: '{"countryCode":"US"}'
- role: tool
tool_call_id: toolcall_2
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_3
content: CURRENCY_FOR_US
- role: assistant
- content: The currency_lookup tool returned **"CURRENCY_FOR_US"** for the country code 'US'.
+ content: "The currency_lookup tool returned: **CURRENCY_FOR_US**"
diff --git a/test/snapshots/multi_turn/should_handle_file_creation_then_reading_across_turns.yaml b/test/snapshots/multi_turn/should_handle_file_creation_then_reading_across_turns.yaml
index 64af54b27..0d79c3e1a 100644
--- a/test/snapshots/multi_turn/should_handle_file_creation_then_reading_across_turns.yaml
+++ b/test/snapshots/multi_turn/should_handle_file_creation_then_reading_across_turns.yaml
@@ -39,28 +39,14 @@ conversations:
arguments: '{"path":"${workdir}/greeting.txt","file_text":"Hello from multi-turn test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Created file ${workdir}/greeting.txt with 26 characters
- role: assistant
content: Created `greeting.txt` with the content "Hello from multi-turn test".
- - role: user
- content: Read the file 'greeting.txt' and tell me its exact contents.
- - role: assistant
- tool_calls:
- - id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Reading greeting file"}'
- - role: assistant
- tool_calls:
- - id: toolcall_3
- type: function
- function:
- name: view
- arguments: '{"path":"${workdir}/greeting.txt"}'
- messages:
- role: system
content: ${system}
@@ -80,7 +66,7 @@ conversations:
arguments: '{"path":"${workdir}/greeting.txt","file_text":"Hello from multi-turn test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: Created file ${workdir}/greeting.txt with 26 characters
@@ -91,20 +77,17 @@ conversations:
- role: assistant
tool_calls:
- id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Reading greeting file"}'
- - id: toolcall_3
type: function
function:
name: view
arguments: '{"path":"${workdir}/greeting.txt"}'
- role: tool
tool_call_id: toolcall_2
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_3
content: 1. Hello from multi-turn test
- role: assistant
- content: "The exact contents of `greeting.txt` are: `Hello from multi-turn test`"
+ content: |-
+ The exact contents of `greeting.txt` are:
+
+ ```
+ Hello from multi-turn test
+ ```
diff --git a/test/snapshots/multi_turn/should_use_tool_results_from_previous_turns.yaml b/test/snapshots/multi_turn/should_use_tool_results_from_previous_turns.yaml
index 8e9ce440d..b930bb46a 100644
--- a/test/snapshots/multi_turn/should_use_tool_results_from_previous_turns.yaml
+++ b/test/snapshots/multi_turn/should_use_tool_results_from_previous_turns.yaml
@@ -39,7 +39,34 @@ conversations:
arguments: '{"path":"${workdir}/secret.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: 1. The magic number is 42.
+ - role: assistant
+ content: The magic number is **42**.
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Read the file 'secret.txt' and tell me what the magic number is.
+ - role: assistant
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Reading secret file"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: view
+ arguments: '{"path":"${workdir}/secret.txt"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: 1. The magic number is 42.
@@ -48,4 +75,4 @@ conversations:
- role: user
content: What is that magic number multiplied by 2?
- role: assistant
- content: 42 × 2 = **84**
+ content: The magic number (42) multiplied by 2 is **84**.
diff --git a/test/snapshots/pending_work_resume/should_continue_parallel_pending_external_tool_requests_after_resume.yaml b/test/snapshots/pending_work_resume/should_continue_parallel_pending_external_tool_requests_after_resume.yaml
index 0216a2116..acae9a8b2 100644
--- a/test/snapshots/pending_work_resume/should_continue_parallel_pending_external_tool_requests_after_resume.yaml
+++ b/test/snapshots/pending_work_resume/should_continue_parallel_pending_external_tool_requests_after_resume.yaml
@@ -18,12 +18,12 @@ conversations:
function:
name: pending_lookup_b
arguments: '{"value":"beta"}'
- - role: tool
- tool_call_id: toolcall_1
- content: PARALLEL_B_BETA
- role: tool
tool_call_id: toolcall_0
content: PARALLEL_A_ALPHA
+ - role: tool
+ tool_call_id: toolcall_1
+ content: PARALLEL_B_BETA
- role: assistant
content: |-
Both lookups completed successfully:
diff --git a/test/snapshots/pending_work_resume/should_continue_pending_permission_request_after_resume.yaml b/test/snapshots/pending_work_resume/should_continue_pending_permission_request_after_resume.yaml
index 37a4a0bae..4856cdc4c 100644
--- a/test/snapshots/pending_work_resume/should_continue_pending_permission_request_after_resume.yaml
+++ b/test/snapshots/pending_work_resume/should_continue_pending_permission_request_after_resume.yaml
@@ -13,11 +13,3 @@ conversations:
function:
name: resume_permission_tool
arguments: '{"value":"alpha"}'
- - role: tool
- tool_call_id: toolcall_0
- content: PERMISSION_RESUMED_ALPHA
- - role: assistant
- content: |-
- I called `resume_permission_tool` with the value 'alpha' and received the result:
-
- **PERMISSION_RESUMED_ALPHA**
diff --git a/test/snapshots/permissions/async_permission_handler.yaml b/test/snapshots/permissions/async_permission_handler.yaml
index 5cc63c085..1d46c38a4 100644
--- a/test/snapshots/permissions/async_permission_handler.yaml
+++ b/test/snapshots/permissions/async_permission_handler.yaml
@@ -19,7 +19,7 @@ conversations:
type: function
function:
name: ${shell}
- arguments: '{"command":"echo test","description":"Run echo test command"}'
+ arguments: '{"command":"echo test","description":"Run echo test"}'
- messages:
- role: system
content: ${system}
@@ -36,15 +36,17 @@ conversations:
type: function
function:
name: ${shell}
- arguments: '{"command":"echo test","description":"Run echo test command"}'
+ arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
test
- role: assistant
- content: The command successfully executed and outputted "test" to the console, then exited with code 0 (indicating
- success).
+ content: The command ran successfully and output `test` to the console. It completed with exit code 0, which means it
+ executed without any errors.
diff --git a/test/snapshots/permissions/permission_handler_for_shell_commands.yaml b/test/snapshots/permissions/permission_handler_for_shell_commands.yaml
index 7078d1dba..1d46c38a4 100644
--- a/test/snapshots/permissions/permission_handler_for_shell_commands.yaml
+++ b/test/snapshots/permissions/permission_handler_for_shell_commands.yaml
@@ -5,7 +5,26 @@ conversations:
- role: system
content: ${system}
- role: user
- content: Run 'echo hello' and tell me the output
+ content: Run 'echo test' and tell me what happens
+ - role: assistant
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Running echo command"}'
+ - role: assistant
+ tool_calls:
+ - id: toolcall_1
+ type: function
+ function:
+ name: ${shell}
+ arguments: '{"command":"echo test","description":"Run echo test"}'
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Run 'echo test' and tell me what happens
- role: assistant
tool_calls:
- id: toolcall_0
@@ -17,14 +36,17 @@ conversations:
type: function
function:
name: ${shell}
- arguments: '{"command":"echo hello","description":"Run echo hello"}'
+ arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
- hello
+ test
- role: assistant
- content: The output is `hello`.
+ content: The command ran successfully and output `test` to the console. It completed with exit code 0, which means it
+ executed without any errors.
diff --git a/test/snapshots/permissions/permission_handler_for_write_operations.yaml b/test/snapshots/permissions/permission_handler_for_write_operations.yaml
index a08639800..a4ede6fcb 100644
--- a/test/snapshots/permissions/permission_handler_for_write_operations.yaml
+++ b/test/snapshots/permissions/permission_handler_for_write_operations.yaml
@@ -42,19 +42,57 @@ conversations:
arguments: '{"path":"${workdir}/test.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. original content
- role: assistant
+ content: "Now I'll replace 'original' with 'modified':"
+ - role: assistant
+ tool_calls:
+ - id: toolcall_2
+ type: function
+ function:
+ name: edit
+ arguments: '{"new_str":"modified content","old_str":"original content","path":"${workdir}/test.txt"}'
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Edit test.txt and replace 'original' with 'modified'
+ - role: assistant
+ content: I'll view the file first to see its contents, then make the replacement.
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Editing test.txt file"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: view
+ arguments: '{"path":"${workdir}/test.txt"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: 1. original content
+ - role: assistant
+ content: "Now I'll replace 'original' with 'modified':"
tool_calls:
- id: toolcall_2
type: function
function:
name: edit
- arguments: '{"path":"${workdir}/test.txt","old_str":"original content","new_str":"modified content"}'
+ arguments: '{"new_str":"modified content","old_str":"original content","path":"${workdir}/test.txt"}'
- role: tool
tool_call_id: toolcall_2
content: File ${workdir}/test.txt updated with changes.
- role: assistant
- content: Done! I've replaced 'original' with 'modified' in test.txt.
+ content: Done! I've replaced 'original' with 'modified' in test.txt. The file now contains "modified content".
diff --git a/test/snapshots/permissions/resume_session_with_permission_handler.yaml b/test/snapshots/permissions/resume_session_with_permission_handler.yaml
index 3eb05551a..6296a0d73 100644
--- a/test/snapshots/permissions/resume_session_with_permission_handler.yaml
+++ b/test/snapshots/permissions/resume_session_with_permission_handler.yaml
@@ -7,7 +7,7 @@ conversations:
- role: user
content: What is 1+1?
- role: assistant
- content: 1+1 equals 2.
+ content: 1+1 = 2
- role: user
content: Run 'echo resumed' for me
- role: assistant
@@ -23,14 +23,14 @@ conversations:
type: function
function:
name: ${shell}
- arguments: '{"command":"echo resumed","description":"Run echo resumed"}'
+ arguments: '{"description":"Run echo resumed","command":"echo resumed"}'
- messages:
- role: system
content: ${system}
- role: user
content: What is 1+1?
- role: assistant
- content: 1+1 equals 2.
+ content: 1+1 = 2
- role: user
content: Run 'echo resumed' for me
- role: assistant
@@ -44,14 +44,16 @@ conversations:
type: function
function:
name: ${shell}
- arguments: '{"command":"echo resumed","description":"Run echo resumed"}'
+ arguments: '{"description":"Run echo resumed","command":"echo resumed"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
resumed
- role: assistant
- content: 'Command executed successfully - output: "resumed"'
+ content: Done! The command output "resumed".
diff --git a/test/snapshots/permissions/should_deny_permission_when_handler_returns_denied.yaml b/test/snapshots/permissions/should_deny_permission_when_handler_returns_denied.yaml
index 9e54aa424..ef6f60dbe 100644
--- a/test/snapshots/permissions/should_deny_permission_when_handler_returns_denied.yaml
+++ b/test/snapshots/permissions/should_deny_permission_when_handler_returns_denied.yaml
@@ -22,29 +22,3 @@ conversations:
function:
name: view
arguments: '{"path":"${workdir}/protected.txt"}'
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Edit protected.txt and replace 'protected' with 'hacked'.
- - role: assistant
- content: I'll view the file first, then make the edit.
- tool_calls:
- - id: toolcall_0
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Editing protected.txt file"}'
- - id: toolcall_1
- type: function
- function:
- name: view
- arguments: '{"path":"${workdir}/protected.txt"}'
- - role: tool
- tool_call_id: toolcall_0
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_1
- content: Permission denied and could not request permission from user
- - role: assistant
- content: I don't have permission to view or edit protected.txt, so I can't make that change.
diff --git a/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies.yaml b/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies.yaml
index c0fc46a9a..0d25979c7 100644
--- a/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies.yaml
+++ b/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies.yaml
@@ -39,10 +39,12 @@ conversations:
arguments: '{"command":"node --version","description":"Check Node.js version"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Permission denied and could not request permission from user
- role: assistant
- content: Permission was denied to run the command. This may be due to security policies or execution restrictions in the
- current environment.
+ content: I don't have permission to execute commands in this environment. It seems access is restricted. You may need to
+ grant permission or run the command directly in your terminal.
diff --git a/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies_after_resume.yaml b/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies_after_resume.yaml
index 551ba8f91..9a23c55f0 100644
--- a/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies_after_resume.yaml
+++ b/test/snapshots/permissions/should_deny_tool_operations_when_handler_explicitly_denies_after_resume.yaml
@@ -47,10 +47,12 @@ conversations:
arguments: '{"command":"node --version","description":"Check Node.js version"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Permission denied and could not request permission from user
- role: assistant
- content: The command was denied due to insufficient permissions. You'll need to grant permission to run commands in this
- session.
+ content: I don't have permission to run commands in this environment. Please try running `node --version` directly in
+ your terminal.
diff --git a/test/snapshots/permissions/should_handle_async_permission_handler.yaml b/test/snapshots/permissions/should_handle_async_permission_handler.yaml
index f28c96ca7..1d46c38a4 100644
--- a/test/snapshots/permissions/should_handle_async_permission_handler.yaml
+++ b/test/snapshots/permissions/should_handle_async_permission_handler.yaml
@@ -39,12 +39,14 @@ conversations:
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
test
- role: assistant
- content: The command successfully executed and printed "test" to the console, then exited with exit code 0 (indicating
- success).
+ content: The command ran successfully and output `test` to the console. It completed with exit code 0, which means it
+ executed without any errors.
diff --git a/test/snapshots/permissions/should_handle_permission_handler_errors_gracefully.yaml b/test/snapshots/permissions/should_handle_permission_handler_errors_gracefully.yaml
index cee78a092..2a2db6210 100644
--- a/test/snapshots/permissions/should_handle_permission_handler_errors_gracefully.yaml
+++ b/test/snapshots/permissions/should_handle_permission_handler_errors_gracefully.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Permission denied and could not request permission from user
diff --git a/test/snapshots/permissions/should_invoke_permission_handler_for_write_operations.yaml b/test/snapshots/permissions/should_invoke_permission_handler_for_write_operations.yaml
index fe1ecb341..a4ede6fcb 100644
--- a/test/snapshots/permissions/should_invoke_permission_handler_for_write_operations.yaml
+++ b/test/snapshots/permissions/should_invoke_permission_handler_for_write_operations.yaml
@@ -42,11 +42,49 @@ conversations:
arguments: '{"path":"${workdir}/test.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. original content
- role: assistant
+ content: "Now I'll replace 'original' with 'modified':"
+ - role: assistant
+ tool_calls:
+ - id: toolcall_2
+ type: function
+ function:
+ name: edit
+ arguments: '{"new_str":"modified content","old_str":"original content","path":"${workdir}/test.txt"}'
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Edit test.txt and replace 'original' with 'modified'
+ - role: assistant
+ content: I'll view the file first to see its contents, then make the replacement.
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Editing test.txt file"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: view
+ arguments: '{"path":"${workdir}/test.txt"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: 1. original content
+ - role: assistant
+ content: "Now I'll replace 'original' with 'modified':"
tool_calls:
- id: toolcall_2
type: function
diff --git a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
index b529a01e6..90407df6f 100644
--- a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
+++ b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
@@ -39,11 +39,13 @@ conversations:
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
test
- role: assistant
- content: Command executed successfully, output is "test".
+ content: "✓ Command output: `test`"
diff --git a/test/snapshots/permissions/should_resume_session_with_permission_handler.yaml b/test/snapshots/permissions/should_resume_session_with_permission_handler.yaml
index 69a52be87..6296a0d73 100644
--- a/test/snapshots/permissions/should_resume_session_with_permission_handler.yaml
+++ b/test/snapshots/permissions/should_resume_session_with_permission_handler.yaml
@@ -47,11 +47,13 @@ conversations:
arguments: '{"description":"Run echo resumed","command":"echo resumed"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
resumed
- role: assistant
- content: "The command executed successfully and output: **resumed**"
+ content: Done! The command output "resumed".
diff --git a/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml b/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml
index e9550b2cb..3a6d66dc8 100644
--- a/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml
+++ b/test/snapshots/permissions/should_short_circuit_permission_handler_when_set_approve_all_enabled.yaml
@@ -20,12 +20,14 @@ conversations:
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
test
- role: assistant
- content: The command successfully executed and printed "test" to the console, then exited with exit code 0 (indicating
- success).
+ content: The command executed successfully and output "test" to the console. The shell completed with exit code 0, which
+ indicates success.
diff --git a/test/snapshots/permissions/tool_call_id_in_permission_requests.yaml b/test/snapshots/permissions/tool_call_id_in_permission_requests.yaml
index 3620c2c1c..90407df6f 100644
--- a/test/snapshots/permissions/tool_call_id_in_permission_requests.yaml
+++ b/test/snapshots/permissions/tool_call_id_in_permission_requests.yaml
@@ -39,11 +39,13 @@ conversations:
arguments: '{"command":"echo test","description":"Run echo test"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
test
- role: assistant
- content: "The command executed successfully and output: `test`"
+ content: "✓ Command output: `test`"
diff --git a/test/snapshots/rpc_session_state_extras/should_get_current_tool_metadata_after_initialization.yaml b/test/snapshots/rpc_session_state_extras/should_get_current_tool_metadata_after_initialization.yaml
index 80307eb59..73f049900 100644
--- a/test/snapshots/rpc_session_state_extras/should_get_current_tool_metadata_after_initialization.yaml
+++ b/test/snapshots/rpc_session_state_extras/should_get_current_tool_metadata_after_initialization.yaml
@@ -7,4 +7,4 @@ conversations:
- role: user
content: What is 2+2?
- role: assistant
- content: "4"
\ No newline at end of file
+ content: "4"
diff --git a/test/snapshots/rpc_shell_and_fleet/should_start_fleet_and_complete_custom_tool_task.yaml b/test/snapshots/rpc_shell_and_fleet/should_start_fleet_and_complete_custom_tool_task.yaml
index b550e96f2..65ced1e36 100644
--- a/test/snapshots/rpc_shell_and_fleet/should_start_fleet_and_complete_custom_tool_task.yaml
+++ b/test/snapshots/rpc_shell_and_fleet/should_start_fleet_and_complete_custom_tool_task.yaml
@@ -168,12 +168,16 @@ conversations:
arguments: '{"content":"copilot-sdk-fleet-rpc"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, record_fleet_completion.
- role: tool
tool_call_id: toolcall_1
content: copilot-sdk-fleet-rpc
- role: assistant
- content: |-
+ content: >-
✅ **Fleet task complete!**
- I've successfully recorded the fleet completion with content 'copilot-sdk-fleet-rpc'. The validation is done.
+
+ I've successfully recorded the fleet completion with content 'copilot-sdk-fleet-rpc'. The validation task has
+ been completed as requested.
diff --git a/test/snapshots/rpc_ui_ephemeral_query/should_answer_ephemeral_query.yaml b/test/snapshots/rpc_ui_ephemeral_query/should_answer_ephemeral_query.yaml
index 015797050..fec44be1f 100644
--- a/test/snapshots/rpc_ui_ephemeral_query/should_answer_ephemeral_query.yaml
+++ b/test/snapshots/rpc_ui_ephemeral_query/should_answer_ephemeral_query.yaml
@@ -7,4 +7,4 @@ conversations:
- role: user
content: In one word, what is the primary color of a clear daytime sky?
- role: assistant
- content: Blue.
\ No newline at end of file
+ content: Blue.
diff --git a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
index ab9174fc4..c2e705ed2 100644
--- a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
+++ b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
@@ -41,11 +41,13 @@ conversations:
command","initial_wait":5,"mode":"sync"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: |-
done
- role: assistant
- content: The command completed successfully, waiting 2 seconds before echoing "done".
+ content: Command completed successfully! The output was "done" after the 2 second sleep.
diff --git a/test/snapshots/session/should_abort_a_session.yaml b/test/snapshots/session/should_abort_a_session.yaml
index 9f6c42c2b..dbbbd32aa 100644
--- a/test/snapshots/session/should_abort_a_session.yaml
+++ b/test/snapshots/session/should_abort_a_session.yaml
@@ -42,11 +42,11 @@ conversations:
arguments: '{"command":"sleep 100","description":"Run sleep 100 command","mode":"sync","initial_wait":105}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: The execution of this tool, or a previous tool was interrupted.
- role: user
content: What is 2+2?
- role: assistant
- content: "4"
+ content: 2 + 2 = 4
diff --git a/test/snapshots/session/should_accept_blob_attachments.yaml b/test/snapshots/session/should_accept_blob_attachments.yaml
index fe584aa8b..1cca7142d 100644
--- a/test/snapshots/session/should_accept_blob_attachments.yaml
+++ b/test/snapshots/session/should_accept_blob_attachments.yaml
@@ -48,7 +48,9 @@ conversations:
arguments: '{"path":"${workdir}/test-pixel.png"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Viewed image file successfully.
@@ -57,20 +59,6 @@ conversations:
Image file at path ${workdir}/test-pixel.png
[image]
- role: assistant
- content: >-
- This is an extremely small image - it appears to be essentially a **single white pixel** or a very tiny white
- square on a transparent background. The image is minimal in size and content, likely just a few pixels in
- dimension. It's the kind of test image that might be used for:
-
-
- - Testing image loading/rendering functionality
-
- - Placeholder purposes
-
- - Minimal file size requirements
-
- - Image processing pipeline validation
-
-
- The file name "test-pixel.png" confirms this is indeed a test image consisting of just a single pixel or very
- small pixel cluster.
+ content: This is a very small image - essentially a **single yellow/gold pixel** or a tiny square. It appears to be a
+ minimal test image, likely 1x1 pixel in size, which matches its filename "test-pixel.png". The color is a
+ bright yellow or golden hue.
diff --git a/test/snapshots/session/should_send_with_directory_attachment.yaml b/test/snapshots/session/should_send_with_directory_attachment.yaml
index 59943a81c..aa410c929 100644
--- a/test/snapshots/session/should_send_with_directory_attachment.yaml
+++ b/test/snapshots/session/should_send_with_directory_attachment.yaml
@@ -56,12 +56,15 @@ conversations:
arguments: '{"path":"${workdir}/attached-directory"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: readme.txt
- role: assistant
content: |-
- The attached directory contains a single file: **readme.txt**
+ The attached directory contains one file:
+ - **readme.txt**
Would you like me to view its contents?
diff --git a/test/snapshots/session/should_send_with_file_attachment.yaml b/test/snapshots/session/should_send_with_file_attachment.yaml
index 9985090a1..23e05d946 100644
--- a/test/snapshots/session/should_send_with_file_attachment.yaml
+++ b/test/snapshots/session/should_send_with_file_attachment.yaml
@@ -53,13 +53,15 @@ conversations:
arguments: '{"path":"${workdir}/attached-file.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. FILE_ATTACHMENT_SENTINEL
- role: assistant
content: |-
- The attached file contains:
+ The file contains:
```
FILE_ATTACHMENT_SENTINEL
diff --git a/test/snapshots/session/should_send_with_github_reference_attachment.yaml b/test/snapshots/session/should_send_with_github_reference_attachment.yaml
index 99280e3d8..6e298de55 100644
--- a/test/snapshots/session/should_send_with_github_reference_attachment.yaml
+++ b/test/snapshots/session/should_send_with_github_reference_attachment.yaml
@@ -14,6 +14,5 @@ conversations:
#1234 - Add E2E attachment coverage [issue] [open] (https://github.com/github/copilot-sdk/issues/1234)
- role: assistant
- content: >-
- The referenced GitHub issue is #1234, "Add E2E attachment coverage", in the github/copilot-sdk repository. It
- is currently open and can be found at https://github.com/github/copilot-sdk/issues/1234.
+ content: 'The referenced GitHub issue is #1234, "Add E2E attachment coverage", in the github/copilot-sdk repository. It
+ is currently open and can be found at https://github.com/github/copilot-sdk/issues/1234.'
diff --git a/test/snapshots/session_config/should_accept_message_attachments.yaml b/test/snapshots/session_config/should_accept_message_attachments.yaml
index 2a345b4b3..e9fbabb05 100644
--- a/test/snapshots/session_config/should_accept_message_attachments.yaml
+++ b/test/snapshots/session_config/should_accept_message_attachments.yaml
@@ -56,10 +56,14 @@ conversations:
arguments: '{"path":"${workdir}/attached.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. This file is attached
- role: assistant
- content: The attached file contains a single line of text that simply states "This file is attached" - it's a minimal
- test file confirming its attachment status.
+ content: |-
+ The attached file contains a single line of text that says: "This file is attached"
+
+ It's a simple text file confirming its status as an attached file.
diff --git a/test/snapshots/session_config/should_apply_workingdirectory_on_session_resume.yaml b/test/snapshots/session_config/should_apply_workingdirectory_on_session_resume.yaml
index 3ae6f2df0..52cc114f9 100644
--- a/test/snapshots/session_config/should_apply_workingdirectory_on_session_resume.yaml
+++ b/test/snapshots/session_config/should_apply_workingdirectory_on_session_resume.yaml
@@ -20,7 +20,9 @@ conversations:
arguments: '{"path":"${workdir}/resume-subproject/resume-marker.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. I am in the resume working directory
@@ -28,6 +30,4 @@ conversations:
content: |-
The file `resume-marker.txt` says:
- ```
- I am in the resume working directory
- ```
+ > I am in the resume working directory
diff --git a/test/snapshots/session_config/should_forward_provider_wire_model.yaml b/test/snapshots/session_config/should_forward_provider_wire_model.yaml
index 6d25ae167..76b6bb6f4 100644
--- a/test/snapshots/session_config/should_forward_provider_wire_model.yaml
+++ b/test/snapshots/session_config/should_forward_provider_wire_model.yaml
@@ -1,5 +1,4 @@
models:
- - claude-sonnet-4.5
- test-wire-model
conversations:
- messages:
diff --git a/test/snapshots/session_config/should_use_workingdirectory_for_tool_execution.yaml b/test/snapshots/session_config/should_use_workingdirectory_for_tool_execution.yaml
index 49c7bb74f..18dfab04e 100644
--- a/test/snapshots/session_config/should_use_workingdirectory_for_tool_execution.yaml
+++ b/test/snapshots/session_config/should_use_workingdirectory_for_tool_execution.yaml
@@ -39,7 +39,9 @@ conversations:
arguments: '{"path":"${workdir}/subproject/marker.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. I am in the subdirectory
diff --git a/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml
index 59f47983e..1eb0acd72 100644
--- a/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml
+++ b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml
@@ -39,29 +39,70 @@ conversations:
arguments: '{"path":"${workdir}/test.png"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Viewed image file successfully.
- role: assistant
- content: The image shows **GitHub's Octocat mascot** - a cat with octopus tentacles wearing a smile. It's a simple,
- monochromatic illustration (gray/black on white background) of GitHub's iconic logo character.
+ content: "I tried to view the image, but I'm not able to see the image content in the response. Let me check if the file
+ exists and try again:"
+ - role: assistant
+ tool_calls:
+ - id: toolcall_2
+ type: function
+ function:
+ name: ${shell}
+ arguments: '{"command":"echo True","description":"Check if file exists"}'
+ - messages:
+ - role: system
+ content: ${system}
- role: user
content: Use the view tool to look at the file test.png and describe what you see
- role: assistant
tool_calls:
- - id: toolcall_2
+ - id: toolcall_0
type: function
function:
name: report_intent
arguments: '{"intent":"Viewing image file"}'
- - role: assistant
- tool_calls:
- - id: toolcall_3
+ - id: toolcall_1
type: function
function:
name: view
arguments: '{"path":"${workdir}/test.png"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: Viewed image file successfully.
+ - role: assistant
+ content: "I tried to view the image, but I'm not able to see the image content in the response. Let me check if the file
+ exists and try again:"
+ tool_calls:
+ - id: toolcall_2
+ type: function
+ function:
+ name: ${shell}
+ arguments: '{"command":"echo True","description":"Check if file exists"}'
+ - role: tool
+ tool_call_id: toolcall_2
+ content: |-
+ True
+
+ - role: assistant
+ content: >-
+ The file exists. Unfortunately, I'm unable to see the actual image content from the view tool output. The tool
+ confirmed it viewed the image successfully, but the image data wasn't included in the response I received.
+ This might be a limitation of the current environment or the way the image data is being transmitted.
+
+
+ To help you better, could you describe what the image contains, or would you like me to try a different
+ approach (like checking the file properties or using an image processing tool)?
- messages:
- role: system
content: ${system}
@@ -81,30 +122,46 @@ conversations:
arguments: '{"path":"${workdir}/test.png"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: Viewed image file successfully.
- - role: assistant
- content: The image shows **GitHub's Octocat mascot** - a cat with octopus tentacles wearing a smile. It's a simple,
- monochromatic illustration (gray/black on white background) of GitHub's iconic logo character.
- role: user
- content: Use the view tool to look at the file test.png and describe what you see
+ content: |-
+ Image file at path ${workdir}/test.png
+ [image]
- role: assistant
+ content: "I tried to view the image, but I'm not able to see the image content in the response. Let me check if the file
+ exists and try again:"
tool_calls:
- id: toolcall_2
type: function
function:
- name: report_intent
- arguments: '{"intent":"Viewing image file"}'
+ name: ${shell}
+ arguments: '{"command":"echo True","description":"Check if file exists"}'
+ - role: tool
+ tool_call_id: toolcall_2
+ content: |-
+ True
+
+ - role: assistant
+ content: >-
+ The file exists. Unfortunately, I'm unable to see the actual image content from the view tool output. The tool
+ confirmed it viewed the image successfully, but the image data wasn't included in the response I received.
+ This might be a limitation of the current environment or the way the image data is being transmitted.
+
+
+ To help you better, could you describe what the image contains, or would you like me to try a different
+ approach (like checking the file properties or using an image processing tool)?
+ - role: user
+ content: Use the view tool to look at the file test.png and describe what you see
+ - role: assistant
+ tool_calls:
- id: toolcall_3
type: function
function:
name: view
arguments: '{"path":"${workdir}/test.png"}'
- - role: tool
- tool_call_id: toolcall_2
- content: Intent logged
- role: tool
tool_call_id: toolcall_3
content: Viewed image file successfully.
@@ -113,11 +170,6 @@ conversations:
Image file at path ${workdir}/test.png
[image]
- role: assistant
- content: >-
- You're right to show me the actual image! Looking at it now, I can see that **test.png is essentially a blank
- or nearly blank white image**. There's no Octocat or any other visible content - it appears to be just an
- empty white/very light colored image.
-
-
- I apologize for my previous incorrect description. The image file doesn't contain any discernible graphics or
- text.
+ content: I can see the image now! It shows a simple light blue/cyan colored square or rectangle positioned against a
+ white background. The shape appears to be a solid colored geometric form - it's a fairly minimal image with
+ just this single blue-tinted rectangular element.
diff --git a/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml
index 7bad63fe8..b987a4e63 100644
--- a/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml
+++ b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml
@@ -42,7 +42,9 @@ conversations:
arguments: '{"path":"${workdir}/test.png"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Viewed image file successfully.
@@ -51,9 +53,14 @@ conversations:
Image file at path ${workdir}/test.png
[image]
- role: assistant
- content: The image appears to be a small, blank or nearly blank white/light-colored rectangle. There's no visible text,
- graphics, or distinct content in the image - it's essentially an empty or placeholder image file. The
- dimensions appear quite small, and it could be used as a test file or placeholder in a project.
+ content: >-
+ I can see the image file test.png. It appears to be a very small, mostly white or extremely light-colored
+ square image. The image is quite minimal and doesn't contain any clearly visible text, graphics, or distinct
+ features - it looks essentially blank or nearly blank with a very light/faded appearance.
+
+
+ If this image is supposed to contain specific content, it may be either corrupted, extremely low contrast, or
+ intentionally blank.
- messages:
- role: system
content: ${system}
@@ -74,82 +81,33 @@ conversations:
arguments: '{"path":"${workdir}/test.png"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: Viewed image file successfully.
- role: assistant
- content: The image appears to be a small, blank or nearly blank white/light-colored rectangle. There's no visible text,
- graphics, or distinct content in the image - it's essentially an empty or placeholder image file. The
- dimensions appear quite small, and it could be used as a test file or placeholder in a project.
+ content: >-
+ I can see the image file test.png. It appears to be a very small, mostly white or extremely light-colored
+ square image. The image is quite minimal and doesn't contain any clearly visible text, graphics, or distinct
+ features - it looks essentially blank or nearly blank with a very light/faded appearance.
+
+
+ If this image is supposed to contain specific content, it may be either corrupted, extremely low contrast, or
+ intentionally blank.
- role: user
content: Use the view tool to look at the file test.png and describe what you see
- - role: assistant
- content: I'll view the test.png file again for you.
- role: assistant
tool_calls:
- id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Viewing image file"}'
- - role: assistant
- tool_calls:
- - id: toolcall_3
- type: function
- function:
- name: view
- arguments: '{"path":"${workdir}/test.png"}'
- - messages:
- - role: system
- content: ${system}
- - role: user
- content: Use the view tool to look at the file test.png and describe what you see
- - role: assistant
- content: I'll use the view tool to examine the test.png file.
- tool_calls:
- - id: toolcall_0
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Viewing image file"}'
- - id: toolcall_1
- type: function
- function:
- name: view
- arguments: '{"path":"${workdir}/test.png"}'
- - role: tool
- tool_call_id: toolcall_0
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_1
- content: Viewed image file successfully.
- - role: assistant
- content: The image appears to be a small, blank or nearly blank white/light-colored rectangle. There's no visible text,
- graphics, or distinct content in the image - it's essentially an empty or placeholder image file. The
- dimensions appear quite small, and it could be used as a test file or placeholder in a project.
- - role: user
- content: Use the view tool to look at the file test.png and describe what you see
- - role: assistant
- content: I'll view the test.png file again for you.
- tool_calls:
- - id: toolcall_2
- type: function
- function:
- name: report_intent
- arguments: '{"intent":"Viewing image file"}'
- - id: toolcall_3
type: function
function:
name: view
arguments: '{"path":"${workdir}/test.png"}'
- role: tool
tool_call_id: toolcall_2
- content: Intent logged
- - role: tool
- tool_call_id: toolcall_3
content: Viewed image file successfully.
- role: assistant
- content: The image shows a small, blank white or very light-colored rectangle. There's no visible content, text,
- graphics, or imagery - it appears to be an empty or placeholder PNG file. It's likely used as a test file
- given its name "test.png".
+ content: I can see the test.png image. It shows a very small, simple image that appears to be mostly white or very light
+ colored - essentially a blank or nearly blank square. There are no visible text, graphics, patterns, or
+ distinct features that I can discern. The image looks like it could be a placeholder, a test file, or perhaps
+ an image with extremely low contrast that makes any content difficult to see.
diff --git a/test/snapshots/session_fs_sqlite/should_allow_subagents_to_use_sql_tool_via_inherited_sessionfs.yaml b/test/snapshots/session_fs_sqlite/should_allow_subagents_to_use_sql_tool_via_inherited_sessionfs.yaml
index edeeecec7..32b8bc354 100644
--- a/test/snapshots/session_fs_sqlite/should_allow_subagents_to_use_sql_tool_via_inherited_sessionfs.yaml
+++ b/test/snapshots/session_fs_sqlite/should_allow_subagents_to_use_sql_tool_via_inherited_sessionfs.yaml
@@ -81,7 +81,9 @@ conversations:
for the SQL operation (e.g., \\\"Insert test todo\\\").\",\"mode\":\"sync\"}"
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: >-
@@ -94,5 +96,5 @@ conversations:
- Status: `done`
- role: assistant
- content: Perfect! The task agent successfully ran the SQL insert query. The todo record with ID `subagent-test`, title
- `Created by subagent`, and status `done` has been inserted into the todos table.
+ content: Done! The task agent successfully ran the SQL insert query. The todo with id 'subagent-test', title 'Created by
+ subagent', and status 'done' has been inserted into the database.
diff --git a/test/snapshots/session_fs_sqlite/should_route_sql_queries_through_the_sessionfs_sqlite_handler.yaml b/test/snapshots/session_fs_sqlite/should_route_sql_queries_through_the_sessionfs_sqlite_handler.yaml
index c2bf57494..b26cfbfeb 100644
--- a/test/snapshots/session_fs_sqlite/should_route_sql_queries_through_the_sessionfs_sqlite_handler.yaml
+++ b/test/snapshots/session_fs_sqlite/should_route_sql_queries_through_the_sessionfs_sqlite_handler.yaml
@@ -42,11 +42,50 @@ conversations:
arguments: '{"description":"Create items table","query":"CREATE TABLE items (id TEXT PRIMARY KEY, name TEXT)"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: Schema operation completed successfully.
- role: assistant
+ content: "Now I'll insert the row:"
+ - role: assistant
+ tool_calls:
+ - id: toolcall_2
+ type: function
+ function:
+ name: sql
+ arguments: "{\"description\":\"Insert Widget row\",\"query\":\"INSERT INTO items (id, name) VALUES ('a1', 'Widget')\"}"
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Use the sql tool to create a table called "items" with columns id (TEXT PRIMARY KEY) and name (TEXT). Then
+ insert a row with id "a1" and name "Widget".
+ - role: assistant
+ content: I'll create the table and insert the row using the SQL tool.
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Creating database table"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: sql
+ arguments: '{"description":"Create items table","query":"CREATE TABLE items (id TEXT PRIMARY KEY, name TEXT)"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: Schema operation completed successfully.
+ - role: assistant
+ content: "Now I'll insert the row:"
tool_calls:
- id: toolcall_2
type: function
@@ -57,5 +96,5 @@ conversations:
tool_call_id: toolcall_2
content: "1 row(s) inserted. Last inserted row ID: 1."
- role: assistant
- content: Done! I've created the `items` table with `id` and `name` columns, and inserted the row with id "a1" and name
+ content: Done! Created the `items` table with columns `id` and `name`, and inserted the row with id "a1" and name
"Widget".
diff --git a/test/snapshots/session_todos_changed/fires_session_todos_changed_and_exposes_rows_and_dependencies.yaml b/test/snapshots/session_todos_changed/fires_session_todos_changed_and_exposes_rows_and_dependencies.yaml
index d57a49bdf..425c772fc 100644
--- a/test/snapshots/session_todos_changed/fires_session_todos_changed_and_exposes_rows_and_dependencies.yaml
+++ b/test/snapshots/session_todos_changed/fires_session_todos_changed_and_exposes_rows_and_dependencies.yaml
@@ -5,77 +5,65 @@ conversations:
- role: system
content: ${system}
- role: user
- content: |-
- Use the sql tool to execute exactly these statements, in order, with no extra rows:
+ content: >-
+ Use the sql tool exactly once to execute all three of the following statements together, in this exact order,
+ in a single sql tool call (a single query string containing all three statements):
+
1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');
+
2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');
+
3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');
+
Then stop. Do not insert any other rows or create any other tables.
- role: assistant
- content: I'll execute those three SQL statements in order.
+ content: "I'll execute all three SQL statements together in a single call:"
- role: assistant
tool_calls:
- id: toolcall_0
type: function
function:
name: sql
- arguments: "{\"description\":\"Insert alpha todo\",\"query\":\"INSERT INTO todos (id, title, status) VALUES ('alpha',
- 'First todo', 'pending');\"}"
- - role: assistant
- tool_calls:
- - id: toolcall_1
- type: function
- function:
- name: sql
- arguments: "{\"description\":\"Insert beta todo\",\"query\":\"INSERT INTO todos (id, title, status) VALUES ('beta',
- 'Second todo', 'done');\"}"
- - role: assistant
- tool_calls:
- - id: toolcall_2
- type: function
- function:
- name: sql
- arguments: "{\"description\":\"Insert dependency\",\"query\":\"INSERT INTO todo_deps (todo_id, depends_on) VALUES
- ('beta', 'alpha');\"}"
+ arguments: "{\"description\":\"Insert todos and dependency\",\"query\":\"INSERT INTO todos (id, title, status) VALUES
+ ('alpha', 'First todo', 'pending');\\nINSERT INTO todos (id, title, status) VALUES ('beta', 'Second
+ todo', 'done');\\nINSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\"}"
- messages:
- role: system
content: ${system}
- role: user
- content: |-
- Use the sql tool to execute exactly these statements, in order, with no extra rows:
+ content: >-
+ Use the sql tool exactly once to execute all three of the following statements together, in this exact order,
+ in a single sql tool call (a single query string containing all three statements):
+
1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');
+
2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');
+
3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');
+
Then stop. Do not insert any other rows or create any other tables.
- role: assistant
- content: I'll execute those three SQL statements in order.
+ content: "I'll execute all three SQL statements together in a single call:"
tool_calls:
- id: toolcall_0
type: function
function:
name: sql
- arguments: "{\"description\":\"Insert alpha todo\",\"query\":\"INSERT INTO todos (id, title, status) VALUES ('alpha',
- 'First todo', 'pending');\"}"
- - id: toolcall_1
- type: function
- function:
- name: sql
- arguments: "{\"description\":\"Insert beta todo\",\"query\":\"INSERT INTO todos (id, title, status) VALUES ('beta',
- 'Second todo', 'done');\"}"
- - id: toolcall_2
- type: function
- function:
- name: sql
- arguments: "{\"description\":\"Insert dependency\",\"query\":\"INSERT INTO todo_deps (todo_id, depends_on) VALUES
- ('beta', 'alpha');\"}"
+ arguments: "{\"description\":\"Insert todos and dependency\",\"query\":\"INSERT INTO todos (id, title, status) VALUES
+ ('alpha', 'First todo', 'pending');\\nINSERT INTO todos (id, title, status) VALUES ('beta', 'Second
+ todo', 'done');\\nINSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\"}"
- role: tool
tool_call_id: toolcall_0
- content: "1 row(s) inserted. Last inserted row ID: 1."
- - role: tool
- tool_call_id: toolcall_1
- content: "1 row(s) inserted. Last inserted row ID: 2."
- - role: tool
- tool_call_id: toolcall_2
- content: "1 row(s) inserted. Last inserted row ID: 1."
+ content: |-
+ Executed 3 statement(s):
+
+ 1. INSERT
+ 1 row(s) inserted. Last inserted row ID: 1.
+
+ 2. INSERT
+ 1 row(s) inserted. Last inserted row ID: 2.
+
+ 3. INSERT
+ 1 row(s) inserted. Last inserted row ID: 1.
- role: assistant
- content: Done. All three statements executed successfully.
+ content: Done. All three statements executed successfully in a single SQL call.
diff --git a/test/snapshots/subagent_hooks/should_invoke_pretooluse_and_posttooluse_hooks_for_sub_agent_tool_calls.yaml b/test/snapshots/subagent_hooks/should_invoke_pretooluse_and_posttooluse_hooks_for_sub_agent_tool_calls.yaml
index ea2aa5093..2a73f1ef8 100644
--- a/test/snapshots/subagent_hooks/should_invoke_pretooluse_and_posttooluse_hooks_for_sub_agent_tool_calls.yaml
+++ b/test/snapshots/subagent_hooks/should_invoke_pretooluse_and_posttooluse_hooks_for_sub_agent_tool_calls.yaml
@@ -25,6 +25,38 @@ conversations:
arguments: '{"agent_type":"explore","name":"read-file","description":"Reading subagent-test.txt","prompt":"Read the file
\"subagent-test.txt\" in the current directory (${workdir}) and report its complete contents. Use the
view tool to read the file and provide the full content in your response.","mode":"background"}'
+ - messages:
+ - role: system
+ content: ${system}
+ - role: user
+ content: Use the task tool to spawn an explore agent that reads the file subagent-test.txt in the current directory and
+ reports its contents. You must use the task tool.
+ - role: assistant
+ content: I'll spawn an explore agent to read the file and report its contents.
+ tool_calls:
+ - id: toolcall_0
+ type: function
+ function:
+ name: report_intent
+ arguments: '{"intent":"Spawning explore agent"}'
+ - id: toolcall_1
+ type: function
+ function:
+ name: task
+ arguments: '{"agent_type":"explore","name":"read-file","description":"Reading subagent-test.txt","prompt":"Read the file
+ \"subagent-test.txt\" in the current directory (${workdir}) and report its complete contents. Use the
+ view tool to read the file and provide the full content in your response.","mode":"background"}'
+ - role: tool
+ tool_call_id: toolcall_0
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
+ - role: tool
+ tool_call_id: toolcall_1
+ content: "Agent started in background with agent_id: read-file. You'll be notified when it completes. Tell the user
+ you're waiting and end your response, or continue unrelated work until notified."
+ - role: assistant
+ content: I've launched an explore agent to read subagent-test.txt. Waiting for it to complete...
- messages:
- role: system
content: ${system}
@@ -89,19 +121,19 @@ conversations:
view tool to read the file and provide the full content in your response.","mode":"background"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist.
- role: tool
tool_call_id: toolcall_1
content: "Agent started in background with agent_id: read-file. You'll be notified when it completes. Tell the user
you're waiting and end your response, or continue unrelated work until notified."
- role: assistant
- content: I've spawned an explore agent to read the file. Waiting for it to complete...
+ content: I've launched an explore agent to read subagent-test.txt. Waiting for it to complete...
- role: user
content: >-
Agent "read-file" (explore) has completed successfully. Use read_agent with agent_id "read-file" to retrieve
- unread results.
+ the full results.
- role: assistant
@@ -131,7 +163,7 @@ conversations:
The file contains a single line with the text "Hello from subagent test!".
- role: assistant
content: |-
- The explore agent successfully read the file. The contents of `subagent-test.txt` are:
+ The explore agent successfully read the file. The contents of **subagent-test.txt** are:
```
Hello from subagent test!
diff --git a/test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml b/test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml
index fcc84423d..5721955d2 100644
--- a/test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml
+++ b/test/snapshots/system_message_sections/should_use_replaced_identity_section_in_response.yaml
@@ -7,9 +7,7 @@ conversations:
- role: user
content: Who are you?
- role: assistant
- content: >-
- I'm Botanica, your helpful gardening assistant! I'm here to help you
- with all things related to plants and gardening. Whether you have
- questions about plant care, garden design, soil preparation, pest
- management, or anything else in the world of gardening, I'm happy to
- help. What would you like to know about plants or gardening today?
+ content: I'm Botanica, your helpful gardening assistant! I'm here to help you with all things related to plants and
+ gardening. Whether you have questions about plant care, garden design, soil preparation, pest management, or
+ anything else in the world of gardening, I'm happy to help. What would you like to know about plants or
+ gardening today?
diff --git a/test/snapshots/system_message_transform/should_apply_transform_modifications_to_section_content.yaml b/test/snapshots/system_message_transform/should_apply_transform_modifications_to_section_content.yaml
index 98004f2b0..4b7c058b2 100644
--- a/test/snapshots/system_message_transform/should_apply_transform_modifications_to_section_content.yaml
+++ b/test/snapshots/system_message_transform/should_apply_transform_modifications_to_section_content.yaml
@@ -21,13 +21,15 @@ conversations:
arguments: '{"path":"${workdir}/hello.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Hello!
- role: assistant
content: |-
- The file hello.txt contains:
+ The file **hello.txt** contains:
```
Hello!
```
diff --git a/test/snapshots/system_message_transform/should_invoke_transform_callbacks_with_section_content.yaml b/test/snapshots/system_message_transform/should_invoke_transform_callbacks_with_section_content.yaml
index 631a8eef8..0b1d9755f 100644
--- a/test/snapshots/system_message_transform/should_invoke_transform_callbacks_with_section_content.yaml
+++ b/test/snapshots/system_message_transform/should_invoke_transform_callbacks_with_section_content.yaml
@@ -42,13 +42,11 @@ conversations:
arguments: '{"path":"${workdir}/test.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Hello transform!
- role: assistant
- content: |-
- The file test.txt contains:
- ```
- Hello transform!
- ```
+ content: 'The file `test.txt` contains: **"Hello transform!"**'
diff --git a/test/snapshots/system_message_transform/should_work_with_static_overrides_and_transforms_together.yaml b/test/snapshots/system_message_transform/should_work_with_static_overrides_and_transforms_together.yaml
index 9cb515694..0681b569d 100644
--- a/test/snapshots/system_message_transform/should_work_with_static_overrides_and_transforms_together.yaml
+++ b/test/snapshots/system_message_transform/should_work_with_static_overrides_and_transforms_together.yaml
@@ -42,9 +42,16 @@ conversations:
arguments: '{"path":"${workdir}/combo.txt"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task.
- role: tool
tool_call_id: toolcall_1
content: 1. Combo test!
- role: assistant
- content: The file combo.txt contains a single line that says "Combo test!"
+ content: |-
+ The file `combo.txt` contains:
+
+ ```
+ Combo test!
+ ```
diff --git a/test/snapshots/tool_results/should_pass_validated_zod_parameters_to_tool_handler.yaml b/test/snapshots/tool_results/should_pass_validated_zod_parameters_to_tool_handler.yaml
index cf3bc94af..2388d7d8f 100644
--- a/test/snapshots/tool_results/should_pass_validated_zod_parameters_to_tool_handler.yaml
+++ b/test/snapshots/tool_results/should_pass_validated_zod_parameters_to_tool_handler.yaml
@@ -39,9 +39,11 @@ conversations:
arguments: '{"operation":"add","a":17,"b":25}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, grep, glob,
+ task, calculate.
- role: tool
tool_call_id: toolcall_1
content: "42"
- role: assistant
- content: 17 + 25 = 42
+ content: The result of 17 + 25 is **42**.
diff --git a/test/snapshots/tools/overrides_built_in_tool_with_custom_tool.yaml b/test/snapshots/tools/overrides_built_in_tool_with_custom_tool.yaml
index ec8dc20ef..5410d3f29 100644
--- a/test/snapshots/tools/overrides_built_in_tool_with_custom_tool.yaml
+++ b/test/snapshots/tools/overrides_built_in_tool_with_custom_tool.yaml
@@ -42,10 +42,12 @@ conversations:
arguments: '{"query":"hello"}'
- role: tool
tool_call_id: toolcall_0
- content: Intent logged
+ content: Tool 'report_intent' does not exist. Available tools that can be called are ${shell}, ${read_shell},
+ ${stop_shell}, ${list_shell}, view, create, edit, web_fetch, skill, sql, read_agent, list_agents, glob, task,
+ grep.
- role: tool
tool_call_id: toolcall_1
content: "CUSTOM_GREP_RESULT: hello"
- role: assistant
- content: 'The grep search found a result for "hello" in the current directory. The output shows `CUSTOM_GREP_RESULT:
- hello`, indicating the custom grep implementation found a match.'
+ content: "Found: The search returned `CUSTOM_GREP_RESULT: hello`, indicating the grep tool found the word 'hello' in the
+ codebase."