diff --git a/docs/BUGFIX-setup-writer-model-location.md b/docs/BUGFIX-setup-writer-model-location.md new file mode 100644 index 0000000000..b974bae2c8 --- /dev/null +++ b/docs/BUGFIX-setup-writer-model-location.md @@ -0,0 +1,59 @@ +# Bug: `claw setup` Writes `model` Inside `provider` Instead of Top-Level + +## Status +- **Discovered:** 2026-04-27 +- **Impact:** Users running `claw setup` get their model config ignored; session falls back to unknown model selection behavior + +## Problem + +The `claw setup` wizard writes settings in this format: + +```json +{ + "provider": { + "apiKey": "...", + "baseUrl": "...", + "kind": "openai", + "model": "glm-5.1-fast" + } +} +``` + +But `RuntimeConfig::model()` in `crates/runtime/src/config.rs` parses the `model` field from the **top level**: + +```rust +fn parse_optional_model(merged: &JsonValue) -> Option { + // Expects { "model": "glm-5.1-fast", ... } + merged.get("model").and_then(|v| v.as_str()).map(str::to_string) +} +``` + +Result: The model setting is silently ignored, and sessions use whatever fallback/default logic applies. + +## Workaround + +Manually edit `~/.claw/settings.json` to move `model` to the top level: + +```json +{ + "model": "glm-5.1-fast", + "provider": { + "apiKey": "...", + "baseUrl": "...", + "kind": "openai" + } +} +``` + +## Fix Required + +In the setup wizard code, ensure the `model` field is written at the top level of the JSON object, not nested under `provider`. + +**Likely location:** `crates/rusty-claude-cli/src/setup.rs` or similar setup-writer module. + +**Change:** After collecting model input, write to top-level `"model"` key instead of `provider.model`. + +## Related + +- Config loading logic: `crates/runtime/src/config.rs` +- RuntimeFeatureConfig parses: `model: parse_optional_model(&merged_value)` diff --git a/docs/DEBUG-neuralwatt-tool-call-issue.md b/docs/DEBUG-neuralwatt-tool-call-issue.md new file mode 100644 index 0000000000..0fcb18af9f --- /dev/null +++ b/docs/DEBUG-neuralwatt-tool-call-issue.md @@ -0,0 +1,213 @@ +# Debug Report: NeuralWatt API Tool Call Issue + +**Date:** 2026-04-27 +**Reporter:** Claw Code user via OpenClaw +**Model:** glm-5.1-fast (and glm-5-fast) +**Issue:** Intermittent 400 Bad Request errors with tool calls + +## Summary + +User reports getting HTTP 400 errors when using tool calls with Claw Code against NeuralWatt API. The error message shows: +``` +[error-kind: api_http_error] +error: api returned 400 Bad Request (invalid_request_error): HTTP 400 from backend (no parseable body) +``` + +**Note:** The "no parseable body" part suggests the backend response was not valid JSON, which may indicate a backend error rather than a validation issue. + +## Successful Test Cases + +### 1. Basic chat completion (no tools) +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "Say hello"}], + "max_tokens": 50 + }' +``` +**Result:** ✅ Works + +### 2. Simple tool call +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "What is the weather in Chicago?"}], + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"] + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works - model correctly returns tool call + +### 3. Tool with `additionalProperties: false` (Claw normalization) +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "List files in current directory"}], + "tools": [{ + "type": "function", + "function": { + "name": "bash", + "description": "Execute a bash command", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string", "description": "The command to execute"} + }, + "required": ["command"], + "additionalProperties": false + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works + +### 4. Tool with optional parameters +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "Read the file test.txt"}], + "tools": [{ + "type": "function", + "function": { + "name": "read_file", + "description": "Read file contents", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Path to the file"}, + "offset": {"type": "number", "description": "Line number to start reading from"}, + "limit": {"type": "number", "description": "Maximum number of lines to read"} + }, + "required": ["path"], + "additionalProperties": false + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works + +## Claw Tool Definition Format + +Claw Code normalizes tool schemas before sending to OpenAI-compatible APIs: + +```rust +fn normalize_object_schema(schema: &mut Value) { + if let Some(obj) = schema.as_object_mut() { + if obj.get("type").and_then(Value::as_str) == Some("object") { + obj.entry("properties").or_insert_with(|| json!({})); + obj.entry("additionalProperties") + .or_insert(Value::Bool(false)); + } + // Recursively normalize nested objects + // ... + } +} +``` + +This adds: +- `"properties": {}` if missing for object types +- `"additionalProperties": false` if missing for object types + +## Possible Causes + +1. **Large request body** - Claw may send many tools with large schemas +2. **Streaming mode** - Claw uses streaming, may differ from non-streaming +3. **Backend transient errors** - "no parseable body" suggests backend crash/error +4. **Specific schema patterns** - Certain nested schemas may trigger validation issues + +## Information Needed from Provider + +1. Raw HTTP request body that caused the 400 error +2. Actual response body returned (for "no parseable body" cases) +3. Backend logs for the failing request +4. Any schema validation errors on the backend + +## Actual Failure Scenario + +The 400 error occurred when **starting a new session**, not resuming an existing tool call. + +Session evidence: +- `session-1777295962205-0.jsonl`: Previous session using `moonshotai/Kimi-K2.6`, completed successfully with no incomplete tool calls +- `session-1777301106352-0.jsonl`: New session with `glm-5.1-fast`, contains only session_meta - no messages + +**The failure happened on the initial API call** when Claw sends: +1. System prompt (large, includes tool documentation) +2. Tool definitions (~20+ tools with complex schemas) + +This is NOT a "resumed tool call" issue - it's an initial session startup failure. + +## Reproduction Attempt + +To reproduce with a realistic Claw-like request, try: + +```bash +# This simulates a typical Claw request with multiple tools +curl -v "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d @- << 'EOF' +{ + "model": "glm-5.1-fast", + "messages": [ + {"role": "user", "content": "Read the file /mnt/data/git/RadGameRandom01/godot-rad-defense/scripts/placement_controller.gd"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "read_file", + "description": "Read the contents of a file. Supports text files and images.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Path to the file to read"}, + "offset": {"type": "number", "description": "Line number to start reading from (1-indexed)"}, + "limit": {"type": "number", "description": "Maximum number of lines to read"} + }, + "required": ["path"], + "additionalProperties": false + } + } + } + ], + "tool_choice": "auto", + "stream": true +} +EOF +``` + +## Contact + +For follow-up, the user can provide: +- Claw session file with the failing request +- Timestamp of the error for backend log correlation +- Request ID if available in response headers diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index b3800d6acf..9da1e22d1a 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -497,10 +497,12 @@ impl StreamState { } for choice in chunk.choices { + // Handle reasoning/thinking from various provider fields if let Some(reasoning) = choice .delta .reasoning_content .filter(|value| !value.is_empty()) + .or(choice.delta.thinking.and_then(|t| t.content).filter(|value| !value.is_empty())) { if !self.thinking_started { self.thinking_started = true; @@ -728,6 +730,7 @@ impl ToolCallState { #[derive(Debug, Deserialize)] struct ChatCompletionResponse { + #[serde(default)] id: String, model: String, choices: Vec, @@ -775,6 +778,7 @@ struct OpenAiUsage { #[derive(Debug, Deserialize)] struct ChatCompletionChunk { + #[serde(default)] id: String, #[serde(default)] model: Option, @@ -786,6 +790,7 @@ struct ChatCompletionChunk { #[derive(Debug, Deserialize)] struct ChunkChoice { + #[serde(default)] delta: ChunkDelta, #[serde(default)] finish_reason: Option, @@ -795,12 +800,21 @@ struct ChunkChoice { struct ChunkDelta { #[serde(default)] content: Option, + /// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content` #[serde(default)] reasoning_content: Option, + #[serde(default)] + thinking: Option, #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")] tool_calls: Vec, } +#[derive(Debug, Default, Deserialize)] +struct ThinkingDelta { + #[serde(default)] + content: Option, +} + #[derive(Debug, Deserialize)] struct DeltaToolCall { #[serde(default)] @@ -1351,7 +1365,50 @@ fn parse_sse_frame( data_lines.push(data.trim_start()); } } + // If no SSE data lines found, check if the entire frame is raw JSON (error or otherwise) if data_lines.is_empty() { + // Detect raw JSON error response (not SSE-framed) + if let Ok(raw) = serde_json::from_str::(trimmed) { + if let Some(err_obj) = raw.get("error") { + let msg = err_obj + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("provider returned an error") + .to_string(); + let code = err_obj + .get("code") + .and_then(serde_json::Value::as_u64) + .map(|c| c as u16); + let status = reqwest::StatusCode::from_u16(code.unwrap_or(500)) + .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR); + return Err(ApiError::Api { + status, + error_type: err_obj + .get("type") + .and_then(|t| t.as_str()) + .map(str::to_owned), + message: Some(msg), + request_id: None, + body: trimmed.chars().take(500).collect(), + retryable: false, + suggested_action: suggested_action_for_status(status), + retry_after: None, + }); + } + } + // Detect HTML responses + if trimmed.starts_with('<') || trimmed.starts_with(" SlashCommand::Plan { mode: remainder }, "review" => SlashCommand::Review { scope: remainder }, + "team" => SlashCommand::Team { action: remainder }, "tasks" => SlashCommand::Tasks { args: remainder }, "theme" => SlashCommand::Theme { name: remainder }, "voice" => SlashCommand::Voice { mode: remainder }, "usage" => SlashCommand::Usage { scope: remainder }, +<<<<<<< HEAD +======= + "setup" => SlashCommand::Setup, +>>>>>>> 2f6a225 (fix: make id field optional in OpenAI response parsing) "rename" => SlashCommand::Rename { name: remainder }, "copy" => SlashCommand::Copy { target: remainder }, "hooks" => SlashCommand::Hooks { args: remainder }, diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index e4fd3db0d3..03f04053cb 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -128,7 +128,7 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio // is NOT an assistant message that contains a ToolUse block (i.e. the // pair is actually broken at the boundary). loop { - if k == 0 || k <= compacted_prefix_len { + if k == 0 || k <= compacted_prefix_len || k >= session.messages.len() { break; } let first_preserved = &session.messages[k]; diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index df4d8da452..cb37c75e42 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -4563,6 +4563,85 @@ impl LiveCli { TerminalRenderer::new().color_theme(), &mut stdout, )?; + + // ============================================================================ + // Auto-compact retry on context window errors + // ============================================================================ + // When the model API returns a context_window_blocked error (because the request + // exceeds the model's context window), we automatically: + // 1. Compact the session (remove old messages to free up space) + // 2. Retry the original request with the compacted session + // 3. Report results to the user + // + // This eliminates the need for users to manually run /compact when they + // hit context limits - the recovery happens automatically. + // + // Detection: We look for "context_window" or "Context window" in the error + // message, which covers error types like: + // - "context_window_blocked" + // - "Context window blocked" + // - "This model's maximum context length is X tokens..." + // ============================================================================ + + let error_str = error.to_string(); + let is_context_window = error_str.contains("context_window") || error_str.contains("Context window"); + + if is_context_window { + println!(" Auto-compacting session and retrying..."); + + // Step 1: Compact the session to free up context space + // We set max_estimated_tokens to 0 to compact as aggressively as needed + let result = runtime::compact_session( + runtime.session(), + CompactionConfig { + max_estimated_tokens: 0, + ..CompactionConfig::default() + }, + ); + let removed = result.removed_message_count; + + // Only proceed if compaction actually happened (messages were removed) + // or there's still a session to work with + if removed > 0 || result.compacted_session.messages.len() > 0 { + if removed > 0 { + // Report compaction results to user + println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false)); + } + + // Step 2: Build a new runtime with the compacted session and retry + let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?; + drop(hook_abort_monitor); // not needed for retry + + // Step 3: Run the turn again with the smaller session + let mut rp = CliPermissionPrompter::new(self.permission_mode); + match new_runtime.run_turn(input, Some(&mut rp)) { + Ok(summary) => { + // Success! Replace old runtime with the new compacted one + self.replace_runtime(new_runtime)?; + spinner.finish( + "✨ Done (after auto-compact)", + TerminalRenderer::new().color_theme(), + &mut stdout, + )?; + println!(); + // If additional auto-compaction happened during retry, + // report that too + if let Some(event) = summary.auto_compaction { + println!("{}", format_auto_compaction_notice(event.removed_message_count)); + } + // Save the compacted session to disk + self.persist_session()?; + return Ok(()); + } + // If retry also fails, propagate the new error + Err(retry_error) => { + return Err(Box::new(retry_error)); + } + } + } + } + + // If not a context window error, return original error Err(Box::new(error)) } } diff --git a/rust/scripts/install.sh b/rust/scripts/install.sh new file mode 100755 index 0000000000..344a7b5c62 --- /dev/null +++ b/rust/scripts/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +# Build the release binary +cargo build --release + +# Link to ~/.local/bin +mkdir -p "$HOME/.local/bin" +ln -sf "$(pwd)/target/release/claw" "$HOME/.local/bin/claw" + +echo "✓ Claw installed to ~/.local/bin/claw"