microsoft · LEDazzio01 · Mar 19, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/python/packages/anthropic/AGENTS.md b/python/packages/anthropic/AGENTS.md
@@ -4,9 +4,24 @@ Integration with Anthropic's Claude API.
 
 ## Main Classes
 
-- **`AnthropicClient`** - Chat client for Anthropic Claude models
+- **`AnthropicClient`** - Full-featured chat client for Anthropic Claude models (includes middleware, telemetry, and function invocation support)
+- **`RawAnthropicClient`** - Low-level chat client without middleware, telemetry, or function invocation layers. Use this only when you need to compose custom layers manually.
 - **`AnthropicChatOptions`** - Options TypedDict for Anthropic-specific parameters
 
+## Client Architecture
+
+`AnthropicClient` composes the standard public layer stack around `RawAnthropicClient`:
+
+```
+AnthropicClient
+  └─ FunctionInvocationLayer   ← owns the tool/function calling loop
+      └─ ChatMiddlewareLayer   ← applies chat middleware per model call
+          └─ ChatTelemetryLayer ← per-call telemetry (inside middleware)
+              └─ RawAnthropicClient ← raw Anthropic API calls
+```
+
+Most users should use `AnthropicClient`. Use `RawAnthropicClient` only if you need to apply a custom subset of layers.
+
 ## Usage
 
 ```python
@@ -19,7 +34,7 @@ response = await client.get_response("Hello")
 ## Import Path
 
 ```python
-from agent_framework.anthropic import AnthropicClient
+from agent_framework.anthropic import AnthropicClient, RawAnthropicClient
 # or directly:
-from agent_framework_anthropic import AnthropicClient
+from agent_framework_anthropic import AnthropicClient, RawAnthropicClient
 ```
diff --git a/python/packages/core/AGENTS.md b/python/packages/core/AGENTS.md
@@ -129,6 +129,30 @@ class LoggingMiddleware(AgentMiddleware):
 agent = Agent(..., middleware=[LoggingMiddleware()])
 ```
 
+### Chat Client Layer Architecture
+
+Public chat clients (e.g., `OpenAIChatClient`, `AnthropicClient`) compose a standard stack of mixin layers on top of a raw/base client. The layer ordering from outermost to innermost is:
+
+```
+PublicClient (e.g., OpenAIChatClient)
+  └─ FunctionInvocationLayer   ← owns the tool/function calling loop; routes function middleware
+      └─ ChatMiddlewareLayer   ← applies chat middleware per inner model call (outside telemetry)
+          └─ ChatTelemetryLayer ← per-call OpenTelemetry spans (inside chat middleware)
+              └─ Raw/BaseChatClient ← raw provider API calls
+```
+
+
+**Key behaviors:**
+- **Chat middleware runs per inner model call** — within the function calling loop, so middleware sees each individual LLM call rather than only the outer request.
+- **Chat middleware is outside telemetry** — middleware latency does not skew per-call telemetry timings.
+- **Per-call middleware** can be passed via `client_kwargs={"middleware": [...]}` on `get_response()`. Mixed chat and function middleware is automatically categorized and routed to the appropriate layer.
+
+
+**Raw vs Public clients:**
+- **Raw clients** (e.g., `RawOpenAIChatClient`, `RawAnthropicClient`) only extend `BaseChatClient` — no middleware, telemetry, or function invocation support.
+- **Public clients** compose all standard layers around the raw client and are what most users should use.
+- Use raw clients only when you need to compose a custom subset of layers.
+
 ### Custom Chat Client
 
 ```python

diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py
@@ -1866,6 +1866,9 @@ def _process_update(response: ChatResponse | AgentResponse, update: ChatResponse
             response.finish_reason = update.finish_reason
         if update.model_id is not None:
             response.model_id = update.model_id
+    if isinstance(response, AgentResponse) and isinstance(update, AgentResponseUpdate):
+        if update.finish_reason is not None:
+            response.finish_reason = update.finish_reason
     response.continuation_token = update.continuation_token
 
 
@@ -2369,6 +2372,7 @@ def __init__(
         response_id: str | None = None,
         agent_id: str | None = None,
         created_at: CreatedAtT | None = None,
+        finish_reason: FinishReasonLiteral | FinishReason | None = None,
         usage_details: UsageDetails | None = None,
         value: ResponseModelT | None = None,
         response_format: type[BaseModel] | None = None,
@@ -2384,6 +2388,7 @@ def __init__(
             agent_id: The identifier of the agent that produced this response. Useful in multi-agent
                 scenarios to track which agent generated the response.
             created_at: A timestamp for the chat response.
+            finish_reason: Optional reason the agent finished (e.g., "stop", "length", "tool_calls").
             usage_details: The usage details for the chat response.
             value: The structured output of the agent run response, if applicable.
             response_format: Optional response format for the agent response.
@@ -2410,6 +2415,7 @@ def __init__(
         self.response_id = response_id
         self.agent_id = agent_id
         self.created_at = created_at
+        self.finish_reason = finish_reason
         self.usage_details = usage_details
         self._value: ResponseModelT | None = value
         self._response_format: type[BaseModel] | None = response_format
@@ -2604,6 +2610,7 @@ def __init__(
         response_id: str | None = None,
         message_id: str | None = None,
         created_at: CreatedAtT | None = None,
+        finish_reason: FinishReasonLiteral | FinishReason | None = None,
         continuation_token: ContinuationToken | None = None,
         additional_properties: dict[str, Any] | None = None,
         raw_representation: Any | None = None,
@@ -2619,6 +2626,7 @@ def __init__(
             response_id: Optional ID of the response of which this update is a part.
             message_id: Optional ID of the message of which this update is a part.
             created_at: Optional timestamp for the chat response update.
+            finish_reason: Optional finish reason for the operation (e.g., "stop", "length", "tool_calls").
             continuation_token: Optional token for resuming a long-running background operation.
                 When present, indicates the operation is still in progress.
             additional_properties: Optional additional properties associated with the chat response update.
@@ -2645,6 +2653,7 @@ def __init__(
         self.response_id = response_id
         self.message_id = message_id
         self.created_at = created_at
+        self.finish_reason = finish_reason
         self.continuation_token = continuation_token
         self.additional_properties = _restore_compaction_annotation_in_additional_properties(
             additional_properties,
@@ -2677,6 +2686,7 @@ def map_chat_to_agent_update(update: ChatResponseUpdate, agent_name: str | None)
         response_id=update.response_id,
         message_id=update.message_id,
         created_at=update.created_at,
+        finish_reason=update.finish_reason,
         continuation_token=update.continuation_token,
         additional_properties=update.additional_properties,
         raw_representation=update,

diff --git a/python/packages/core/tests/core/test_finish_reason.py b/python/packages/core/tests/core/test_finish_reason.py
@@ -0,0 +1,100 @@
+from agent_framework import (
+    AgentResponse,
+    AgentResponseUpdate,
+    ChatResponseUpdate,
+    Content,
+    Message,
+)
+from agent_framework._types import _process_update, map_chat_to_agent_update
+
+
+def test_agent_response_init_with_finish_reason() -> None:
+    """Test that AgentResponse correctly initializes and stores finish_reason."""
+    response = AgentResponse(
+        messages=[Message("assistant", [Content.from_text("test")])],
+        finish_reason="stop",
+    )
+    assert response.finish_reason == "stop"
+
+
+def test_agent_response_update_init_with_finish_reason() -> None:
+    """Test that AgentResponseUpdate correctly initializes and stores finish_reason."""
+    update = AgentResponseUpdate(
+        contents=[Content.from_text("test")],
+        role="assistant",
+        finish_reason="stop",
+    )
+    assert update.finish_reason == "stop"
+
+
+def test_map_chat_to_agent_update_forwards_finish_reason() -> None:
+    """Test that mapping a ChatResponseUpdate with finish_reason forwards it."""
+    chat_update = ChatResponseUpdate(
+        contents=[Content.from_text("test")],
+        finish_reason="length",
+    )
+    agent_update = map_chat_to_agent_update(chat_update, agent_name="test_agent")
+
+    assert agent_update.finish_reason == "length"
+    assert agent_update.author_name == "test_agent"
+
+
+def test_process_update_propagates_finish_reason_to_agent_response() -> None:
+    """Test that _process_update correctly updates an AgentResponse from an AgentResponseUpdate."""
+    response = AgentResponse(messages=[Message("assistant", [Content.from_text("test")])])
+    update = AgentResponseUpdate(
+        contents=[Content.from_text("more text")],
+        role="assistant",
+        finish_reason="stop",
+    )
+
+    # Process the update
+    _process_update(response, update)
+
+    assert response.finish_reason == "stop"
+
+
+def test_process_update_does_not_overwrite_with_none() -> None:
+    """Test that _process_update does not overwrite an existing finish_reason with None."""
+    response = AgentResponse(
+        messages=[Message("assistant", [Content.from_text("test")])],
+        finish_reason="length",
+    )
+    update = AgentResponseUpdate(
+        contents=[Content.from_text("more text")],
+        role="assistant",
+        finish_reason=None,
+    )
+
+    # Process the update
+    _process_update(response, update)
+
+    assert response.finish_reason == "length"
+
+
+def test_agent_response_serialization_includes_finish_reason() -> None:
+    """Test that AgentResponse serializes correctly, including finish_reason."""
+    response = AgentResponse(
+        messages=[Message("assistant", [Content.from_text("test")])],
+        response_id="test_123",
+        finish_reason="stop",
+    )
+
+    # Serialize using the framework's API and verify finish_reason is included.
+    data = response.to_dict()
+    assert "finish_reason" in data
+    assert data["finish_reason"] == "stop"
+
+
+def test_agent_response_update_serialization_includes_finish_reason() -> None:
+    """Test that AgentResponseUpdate serializes correctly, including finish_reason."""
+    update = AgentResponseUpdate(
+        contents=[Content.from_text("test")],
+        role="assistant",
+        response_id="test_456",
+        finish_reason="tool_calls",
+    )
+
+    data = update.to_dict()
+    assert "finish_reason" in data
+    assert data["finish_reason"] == "tool_calls"