Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 20 additions & 36 deletions client/anthropic.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ type anthropicResponse struct {
Input json.RawMessage `json:"input,omitempty"`
} `json:"content"`
StopReason string `json:"stop_reason"`
Usage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
Usage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
CacheReadInputTokens int `json:"cache_read_input_tokens"`
} `json:"usage"`
}

Expand Down Expand Up @@ -182,6 +184,7 @@ func parseImageString(img string) (mediaType, data string, isBase64 bool) {
// This is not implemented here; opts.ResponseFormat is ignored for Anthropic.
// Future work: implement tool-use-based structured output for Anthropic.
func (c *AnthropicClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error) {
messages = SanitizeMessages(messages)
if opts.Model == "" {
return nil, fmt.Errorf("eyrie: model is required for anthropic")
}
Expand All @@ -192,25 +195,12 @@ func (c *AnthropicClient) Chat(ctx context.Context, messages []EyrieMessage, opt

var body []byte
if opts.EnableCaching {
// Use cached request builder for Anthropic prompt caching support
cachedReq := buildAnthropicCachedRequest(messages, opts.Model, maxTokens, opts.Temperature, false)
allMessages := messages
if opts.System != "" {
if existing, ok := cachedReq["system"]; ok && existing != nil {
// System already set as cached array; prepend the opts.System
_ = existing // already handled by buildAnthropicCachedRequest
} else {
cachedReq["system"] = []map[string]interface{}{
{
"type": "text",
"text": opts.System,
"cache_control": map[string]string{"type": "ephemeral"},
},
}
}
}
if tools := convertToAnthropicTools(opts.Tools); len(tools) > 0 {
cachedReq["tools"] = tools
allMessages = append([]EyrieMessage{{Role: "system", Content: opts.System}}, allMessages...)
}
tools := convertToAnthropicTools(opts.Tools)
cachedReq := buildAnthropicCachedRequest(allMessages, opts.Model, maxTokens, opts.Temperature, false, tools)
body, _ = json.Marshal(cachedReq)
} else {
msgs, system := buildAnthropicMessages(messages)
Expand Down Expand Up @@ -272,14 +262,18 @@ func (c *AnthropicClient) Chat(ctx context.Context, messages []EyrieMessage, opt
Content: content, FinishReason: ar.StopReason, ToolCalls: toolCalls,
RequestID: requestID,
Usage: &EyrieUsage{
PromptTokens: ar.Usage.InputTokens, CompletionTokens: ar.Usage.OutputTokens,
TotalTokens: ar.Usage.InputTokens + ar.Usage.OutputTokens,
PromptTokens: ar.Usage.InputTokens,
CompletionTokens: ar.Usage.OutputTokens,
TotalTokens: ar.Usage.InputTokens + ar.Usage.OutputTokens,
CacheCreationTokens: ar.Usage.CacheCreationInputTokens,
CacheReadTokens: ar.Usage.CacheReadInputTokens,
},
}, nil
}

// StreamChat sends a streaming message to Anthropic.
func (c *AnthropicClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error) {
messages = SanitizeMessages(messages)
if opts.Model == "" {
return nil, fmt.Errorf("eyrie: model is required for anthropic")
}
Expand All @@ -290,22 +284,12 @@ func (c *AnthropicClient) StreamChat(ctx context.Context, messages []EyrieMessag

var body []byte
if opts.EnableCaching {
// Use cached request builder for Anthropic prompt caching support
cachedReq := buildAnthropicCachedRequest(messages, opts.Model, maxTokens, opts.Temperature, true)
allMessages := messages
if opts.System != "" {
if _, ok := cachedReq["system"]; !ok {
cachedReq["system"] = []map[string]interface{}{
{
"type": "text",
"text": opts.System,
"cache_control": map[string]string{"type": "ephemeral"},
},
}
}
}
if tools := convertToAnthropicTools(opts.Tools); len(tools) > 0 {
cachedReq["tools"] = tools
allMessages = append([]EyrieMessage{{Role: "system", Content: opts.System}}, allMessages...)
}
tools := convertToAnthropicTools(opts.Tools)
cachedReq := buildAnthropicCachedRequest(allMessages, opts.Model, maxTokens, opts.Temperature, true, tools)
body, _ = json.Marshal(cachedReq)
} else {
msgs, system := buildAnthropicMessages(messages)
Expand Down
69 changes: 44 additions & 25 deletions client/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,32 +65,18 @@ type anthropicCachedMessage struct {
}

// buildAnthropicCachedRequest builds an Anthropic request body with cache_control.
// Use this instead of the standard request builder when prompt caching is desired.
func buildAnthropicCachedRequest(messages []EyrieMessage, model string, maxTokens int, temperature *float64, stream bool) map[string]interface{} {
var system string
var msgs []interface{}
// It reuses buildAnthropicMessages for proper tool_use/tool_result handling,
// then applies cache_control breakpoints following Anthropic's best practices:
// - System prompt gets cache_control (cached for all turns)
// - Second-to-last message gets cache_control (caches conversation prefix)
// - Last tool definition gets cache_control (caches tool schema)
func buildAnthropicCachedRequest(messages []EyrieMessage, model string, maxTokens int, temperature *float64, stream bool, tools []anthropicTool) map[string]interface{} {
msgs, system := buildAnthropicMessages(messages)

for _, m := range messages {
if m.Role == "system" {
system = m.Content
continue
}
msgs = append(msgs, map[string]interface{}{"role": m.Role, "content": m.Content})
}

// Apply cache breakpoint to second-to-last message
// Apply cache breakpoint to second-to-last non-system message
if len(msgs) >= 2 {
idx := len(msgs) - 2
if msg, ok := msgs[idx].(map[string]interface{}); ok {
content := msg["content"].(string)
msg["content"] = []map[string]interface{}{
{
"type": "text",
"text": content,
"cache_control": map[string]string{"type": "ephemeral"},
},
}
}
applyCacheBreakpointToMessage(msgs[idx])
}

req := map[string]interface{}{
Expand All @@ -102,14 +88,47 @@ func buildAnthropicCachedRequest(messages []EyrieMessage, model string, maxToken
if system != "" {
req["system"] = []map[string]interface{}{
{
"type": "text",
"text": system,
"type": "text",
"text": system,
"cache_control": map[string]string{"type": "ephemeral"},
},
}
}
if len(tools) > 0 {
toolMaps := make([]map[string]interface{}, len(tools))
for i, t := range tools {
toolMaps[i] = map[string]interface{}{
"name": t.Name,
"description": t.Description,
"input_schema": t.InputSchema,
}
}
// Annotate last tool with cache_control
toolMaps[len(toolMaps)-1]["cache_control"] = map[string]string{"type": "ephemeral"}
req["tools"] = toolMaps
}
if temperature != nil {
req["temperature"] = *temperature
}
return req
}

// applyCacheBreakpointToMessage adds cache_control to a message's content.
// Handles both string content and array content (tool_use/tool_result blocks).
func applyCacheBreakpointToMessage(msg map[string]interface{}) {
content := msg["content"]
switch c := content.(type) {
case string:
msg["content"] = []map[string]interface{}{
{
"type": "text",
"text": c,
"cache_control": map[string]string{"type": "ephemeral"},
},
}
case []map[string]interface{}:
if len(c) > 0 {
c[len(c)-1]["cache_control"] = map[string]string{"type": "ephemeral"}
}
}
}
176 changes: 176 additions & 0 deletions client/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package client

import (
"encoding/json"
"testing"
)

func TestBuildAnthropicCachedRequest_BasicMessages(t *testing.T) {
messages := []EyrieMessage{
{Role: "system", Content: "You are helpful."},
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi there!"},
{Role: "user", Content: "How are you?"},
}
req := buildAnthropicCachedRequest(messages, "claude-sonnet-4-20250514", 4096, nil, false, nil)

// System should be array with cache_control
system, ok := req["system"].([]map[string]interface{})
if !ok || len(system) != 1 {
t.Fatal("expected system as array with one element")
}
if system[0]["cache_control"] == nil {
t.Fatal("expected cache_control on system")
}
if system[0]["text"] != "You are helpful." {
t.Fatal("system text mismatch")
}

// Messages: second-to-last (index 1, assistant) should have cache_control
msgs := req["messages"].([]map[string]interface{})
if len(msgs) != 3 { // 3 non-system messages
t.Fatalf("expected 3 messages, got %d", len(msgs))
}

// Second to last message (index 1 = assistant "Hi there!") should be array with cache_control
assistantContent, ok := msgs[1]["content"].([]map[string]interface{})
if !ok {
t.Fatal("expected assistant content to be array after cache breakpoint")
}
if assistantContent[0]["cache_control"] == nil {
t.Fatal("expected cache_control on second-to-last message")
}
}

func TestBuildAnthropicCachedRequest_ToolUsePropagated(t *testing.T) {
messages := []EyrieMessage{
{Role: "user", Content: "read file.go"},
{Role: "assistant", Content: "", ToolUse: []ToolCall{
{ID: "tc1", Name: "read", Arguments: map[string]interface{}{"path": "file.go"}},
}},
{Role: "user", Content: "", ToolResult: &ToolResult{ToolUseID: "tc1", Content: "package main"}},
{Role: "user", Content: "now edit it"},
}
req := buildAnthropicCachedRequest(messages, "claude-sonnet-4-20250514", 4096, nil, false, nil)

msgs := req["messages"].([]map[string]interface{})
if len(msgs) != 4 {
t.Fatalf("expected 4 messages, got %d", len(msgs))
}

// Verify tool_use message (index 1) preserved
assistantMsg := msgs[1]
content, ok := assistantMsg["content"].([]map[string]interface{})
if !ok {
t.Fatal("expected assistant tool_use as array content")
}
found := false
for _, block := range content {
if block["type"] == "tool_use" {
found = true
break
}
}
if !found {
t.Fatal("expected tool_use block in assistant message")
}

// Verify tool_result message (index 2) is the cached one (second-to-last)
toolResultMsg := msgs[2]
trContent, ok := toolResultMsg["content"].([]map[string]interface{})
if !ok {
t.Fatal("expected tool_result as array content")
}
if trContent[len(trContent)-1]["cache_control"] == nil {
t.Fatal("expected cache_control on second-to-last message (tool_result)")
}
}

func TestBuildAnthropicCachedRequest_ToolsAnnotated(t *testing.T) {
messages := []EyrieMessage{
{Role: "user", Content: "hello"},
}
tools := []anthropicTool{
{Name: "read", Description: "Read a file", InputSchema: map[string]interface{}{"type": "object"}},
{Name: "write", Description: "Write a file", InputSchema: map[string]interface{}{"type": "object"}},
{Name: "bash", Description: "Run command", InputSchema: map[string]interface{}{"type": "object"}},
}
req := buildAnthropicCachedRequest(messages, "claude-sonnet-4-20250514", 4096, nil, false, tools)

toolMaps, ok := req["tools"].([]map[string]interface{})
if !ok || len(toolMaps) != 3 {
t.Fatalf("expected 3 tools, got %v", req["tools"])
}

// Only the LAST tool should have cache_control
if toolMaps[0]["cache_control"] != nil {
t.Fatal("first tool should not have cache_control")
}
if toolMaps[1]["cache_control"] != nil {
t.Fatal("second tool should not have cache_control")
}
if toolMaps[2]["cache_control"] == nil {
t.Fatal("last tool must have cache_control")
}
}

func TestCacheUsageParsing(t *testing.T) {
responseJSON := `{
"id": "msg_123",
"content": [{"type": "text", "text": "Hello!"}],
"stop_reason": "end_turn",
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"cache_creation_input_tokens": 1000,
"cache_read_input_tokens": 800
}
}`

var ar anthropicResponse
if err := json.Unmarshal([]byte(responseJSON), &ar); err != nil {
t.Fatalf("failed to unmarshal: %v", err)
}

if ar.Usage.CacheCreationInputTokens != 1000 {
t.Fatalf("expected cache_creation=1000, got %d", ar.Usage.CacheCreationInputTokens)
}
if ar.Usage.CacheReadInputTokens != 800 {
t.Fatalf("expected cache_read=800, got %d", ar.Usage.CacheReadInputTokens)
}

// Verify it propagates to EyrieUsage
usage := &EyrieUsage{
PromptTokens: ar.Usage.InputTokens,
CompletionTokens: ar.Usage.OutputTokens,
TotalTokens: ar.Usage.InputTokens + ar.Usage.OutputTokens,
CacheCreationTokens: ar.Usage.CacheCreationInputTokens,
CacheReadTokens: ar.Usage.CacheReadInputTokens,
}
if usage.CacheCreationTokens != 1000 || usage.CacheReadTokens != 800 {
t.Fatal("cache tokens not propagated correctly")
}
}

func TestBuildAnthropicCachedRequest_NoSystem(t *testing.T) {
messages := []EyrieMessage{
{Role: "user", Content: "Hello"},
{Role: "assistant", Content: "Hi"},
{Role: "user", Content: "Bye"},
}
req := buildAnthropicCachedRequest(messages, "claude-sonnet-4-20250514", 4096, nil, false, nil)

if _, ok := req["system"]; ok {
t.Fatal("should not have system key when no system message")
}
}

func TestBuildAnthropicCachedRequest_StreamFlag(t *testing.T) {
messages := []EyrieMessage{
{Role: "user", Content: "Hello"},
}
req := buildAnthropicCachedRequest(messages, "claude-sonnet-4-20250514", 4096, nil, true, nil)
if req["stream"] != true {
t.Fatal("expected stream=true")
}
}
Loading
Loading