From 0e16c91b9d939c5df22d4cc8d81c0acd31e4ee46 Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Thu, 23 Apr 2026 13:57:25 -0500 Subject: [PATCH 1/9] feat: auto-compact and retry on context window errors When the model API returns a context_window_blocked error (because the request exceeds the model's context window), the CLI now automatically: 1. Compact the session (remove old messages to free up space) 2. Retry the original request with the compacted session 3. Report results to the user This eliminates the need for users to manually run /compact when they hit context limits - the recovery happens automatically. ## Technical Details - Detection: Looks for 'context_window' or 'Context window' in error message - Uses runtime::compact_session() to aggressively compact (max_estimated_tokens=0) - Creates new runtime with compacted session and retries the turn - Reports compaction results and final status to user ## Testing Tested successfully with a request that exceeded model's context: - Auto-compact triggered: 'Messages removed 19, Messages kept 5' - Successfully retried and completed after compaction --- rust/crates/rusty-claude-cli/src/main.rs | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index df4d8da452..cb37c75e42 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -4563,6 +4563,85 @@ impl LiveCli { TerminalRenderer::new().color_theme(), &mut stdout, )?; + + // ============================================================================ + // Auto-compact retry on context window errors + // ============================================================================ + // When the model API returns a context_window_blocked error (because the request + // exceeds the model's context window), we automatically: + // 1. Compact the session (remove old messages to free up space) + // 2. Retry the original request with the compacted session + // 3. Report results to the user + // + // This eliminates the need for users to manually run /compact when they + // hit context limits - the recovery happens automatically. + // + // Detection: We look for "context_window" or "Context window" in the error + // message, which covers error types like: + // - "context_window_blocked" + // - "Context window blocked" + // - "This model's maximum context length is X tokens..." + // ============================================================================ + + let error_str = error.to_string(); + let is_context_window = error_str.contains("context_window") || error_str.contains("Context window"); + + if is_context_window { + println!(" Auto-compacting session and retrying..."); + + // Step 1: Compact the session to free up context space + // We set max_estimated_tokens to 0 to compact as aggressively as needed + let result = runtime::compact_session( + runtime.session(), + CompactionConfig { + max_estimated_tokens: 0, + ..CompactionConfig::default() + }, + ); + let removed = result.removed_message_count; + + // Only proceed if compaction actually happened (messages were removed) + // or there's still a session to work with + if removed > 0 || result.compacted_session.messages.len() > 0 { + if removed > 0 { + // Report compaction results to user + println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false)); + } + + // Step 2: Build a new runtime with the compacted session and retry + let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?; + drop(hook_abort_monitor); // not needed for retry + + // Step 3: Run the turn again with the smaller session + let mut rp = CliPermissionPrompter::new(self.permission_mode); + match new_runtime.run_turn(input, Some(&mut rp)) { + Ok(summary) => { + // Success! Replace old runtime with the new compacted one + self.replace_runtime(new_runtime)?; + spinner.finish( + "✨ Done (after auto-compact)", + TerminalRenderer::new().color_theme(), + &mut stdout, + )?; + println!(); + // If additional auto-compaction happened during retry, + // report that too + if let Some(event) = summary.auto_compaction { + println!("{}", format_auto_compaction_notice(event.removed_message_count)); + } + // Save the compacted session to disk + self.persist_session()?; + return Ok(()); + } + // If retry also fails, propagate the new error + Err(retry_error) => { + return Err(Box::new(retry_error)); + } + } + } + } + + // If not a context window error, return original error Err(Box::new(error)) } } From 93267744c45500305da3df43687cc02a67d5bc1d Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 20:32:40 -0500 Subject: [PATCH 2/9] fix: make id field optional in OpenAI response parsing Some OpenAI-compatible providers (e.g., GLM-5) omit the `id` field in streaming and non-streaming responses. Adding #[serde(default)] allows the parser to accept these responses instead of failing with "missing field `id`". Co-Authored-By: Claude Opus 4.7 --- rust/crates/api/src/providers/openai_compat.rs | 2 ++ rust/crates/commands/src/lib.rs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index b3800d6acf..f0b340d7f9 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -728,6 +728,7 @@ impl ToolCallState { #[derive(Debug, Deserialize)] struct ChatCompletionResponse { + #[serde(default)] id: String, model: String, choices: Vec, @@ -775,6 +776,7 @@ struct OpenAiUsage { #[derive(Debug, Deserialize)] struct ChatCompletionChunk { + #[serde(default)] id: String, #[serde(default)] model: Option, diff --git a/rust/crates/commands/src/lib.rs b/rust/crates/commands/src/lib.rs index 5e8f5eba8b..5570b2a615 100644 --- a/rust/crates/commands/src/lib.rs +++ b/rust/crates/commands/src/lib.rs @@ -1472,10 +1472,15 @@ pub fn validate_slash_command_input( } "plan" => SlashCommand::Plan { mode: remainder }, "review" => SlashCommand::Review { scope: remainder }, + "team" => SlashCommand::Team { action: remainder }, "tasks" => SlashCommand::Tasks { args: remainder }, "theme" => SlashCommand::Theme { name: remainder }, "voice" => SlashCommand::Voice { mode: remainder }, "usage" => SlashCommand::Usage { scope: remainder }, +<<<<<<< HEAD +======= + "setup" => SlashCommand::Setup, +>>>>>>> 2f6a225 (fix: make id field optional in OpenAI response parsing) "rename" => SlashCommand::Rename { name: remainder }, "copy" => SlashCommand::Copy { target: remainder }, "hooks" => SlashCommand::Hooks { args: remainder }, From f9743b695416f49df4428625a97ae45b6939c6a6 Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 20:40:25 -0500 Subject: [PATCH 3/9] chore: add install script for rebuild and link Adds scripts/install.sh that builds the release binary and links it to ~/.local/bin/claw. Run after code changes to update the CLI. Co-Authored-By: Claude Opus 4.7 --- rust/scripts/install.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100755 rust/scripts/install.sh diff --git a/rust/scripts/install.sh b/rust/scripts/install.sh new file mode 100755 index 0000000000..344a7b5c62 --- /dev/null +++ b/rust/scripts/install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +# Build the release binary +cargo build --release + +# Link to ~/.local/bin +mkdir -p "$HOME/.local/bin" +ln -sf "$(pwd)/target/release/claw" "$HOME/.local/bin/claw" + +echo "✓ Claw installed to ~/.local/bin/claw" From 403074b64d009340a30deae0c17b8e6f75db37a3 Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 20:42:46 -0500 Subject: [PATCH 4/9] fix: detect HTML responses in streaming path When a provider returns HTML (e.g., error page, wrong endpoint) instead of JSON in an SSE stream, provide a clear error message instead of hanging or failing with a cryptic parse error. Co-Authored-By: Claude Opus 4.7 --- rust/crates/api/src/providers/openai_compat.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index f0b340d7f9..4458c9e65d 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -1390,6 +1390,20 @@ fn parse_sse_frame( }); } } + // Detect HTML or other non-JSON responses early for better error messages + let trimmed_payload = payload.trim(); + if trimmed_payload.starts_with('<') || trimmed_payload.starts_with("(&payload) .map(Some) .map_err(|error| ApiError::json_deserialize(provider, model, &payload, error)) From 3a5b0717d3968ee6338068cde0426ba0eb011233 Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 20:46:32 -0500 Subject: [PATCH 5/9] fix: detect raw JSON errors in streaming path When a provider returns a JSON error (e.g., {"error":{"message":"..."}}) without SSE framing (no "data:" prefix), the SSE parser was silently ignoring it and hanging. Now detects and surfaces these errors. Also handles HTML responses that lack SSE framing. Co-Authored-By: Claude Opus 4.7 --- .../crates/api/src/providers/openai_compat.rs | 57 ++++++++++++++----- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 4458c9e65d..2d0247320c 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -1353,7 +1353,50 @@ fn parse_sse_frame( data_lines.push(data.trim_start()); } } + // If no SSE data lines found, check if the entire frame is raw JSON (error or otherwise) if data_lines.is_empty() { + // Detect raw JSON error response (not SSE-framed) + if let Ok(raw) = serde_json::from_str::(trimmed) { + if let Some(err_obj) = raw.get("error") { + let msg = err_obj + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("provider returned an error") + .to_string(); + let code = err_obj + .get("code") + .and_then(serde_json::Value::as_u64) + .map(|c| c as u16); + let status = reqwest::StatusCode::from_u16(code.unwrap_or(500)) + .unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR); + return Err(ApiError::Api { + status, + error_type: err_obj + .get("type") + .and_then(|t| t.as_str()) + .map(str::to_owned), + message: Some(msg), + request_id: None, + body: trimmed.chars().take(500).collect(), + retryable: false, + suggested_action: suggested_action_for_status(status), + retry_after: None, + }); + } + } + // Detect HTML responses + if trimmed.starts_with('<') || trimmed.starts_with("(&payload) .map(Some) .map_err(|error| ApiError::json_deserialize(provider, model, &payload, error)) From 3f36a1645381505ffb8b01d5817aff17aa4a43aa Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 21:04:43 -0500 Subject: [PATCH 6/9] fix: support reasoning_content and thinking fields in streaming Some providers (GLM, DeepSeek) emit reasoning tokens in `reasoning_content` or nested `thinking.content` fields instead of `content`. Added support for these fields so reasoning models work correctly. Co-Authored-By: Claude Opus 4.7 --- rust/crates/api/src/providers/openai_compat.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 2d0247320c..a4d8c7169e 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -497,10 +497,12 @@ impl StreamState { } for choice in chunk.choices { + // Handle reasoning/thinking from various provider fields if let Some(reasoning) = choice .delta .reasoning_content .filter(|value| !value.is_empty()) + .or(choice.delta.thinking.and_then(|t| t.content).filter(|value| !value.is_empty())) { if !self.thinking_started { self.thinking_started = true; @@ -797,12 +799,21 @@ struct ChunkChoice { struct ChunkDelta { #[serde(default)] content: Option, + /// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content` #[serde(default)] reasoning_content: Option, + #[serde(default)] + thinking: Option, #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")] tool_calls: Vec, } +#[derive(Debug, Default, Deserialize)] +struct ThinkingDelta { + #[serde(default)] + content: Option, +} + #[derive(Debug, Deserialize)] struct DeltaToolCall { #[serde(default)] From d9db978fba34a0f729b1452f6d755b7e50ce3a4b Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 21:07:47 -0500 Subject: [PATCH 7/9] fix: make delta field optional in ChunkChoice The final streaming chunk from some providers contains only finish_reason and usage, with no delta field. Made it optional to prevent parse errors. Co-Authored-By: Claude Opus 4.7 --- rust/crates/api/src/providers/openai_compat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index a4d8c7169e..9da1e22d1a 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -790,6 +790,7 @@ struct ChatCompletionChunk { #[derive(Debug, Deserialize)] struct ChunkChoice { + #[serde(default)] delta: ChunkDelta, #[serde(default)] finish_reason: Option, From a15c6023a19e38ab22eb5c3dc043a1ddcf1f9c72 Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Wed, 29 Apr 2026 22:36:37 -0500 Subject: [PATCH 8/9] fix: bounds check in compact boundary loop When preserve_recent_messages == 0, raw_keep_from equals messages.len(), causing index out of bounds when accessing session.messages[k]. Added k >= session.messages.len() check to prevent panic. Reason: Compaction with preserve_recent_messages=0 triggered OOB access when checking for tool-use/tool-result pair preservation at boundary. Co-Authored-By: Claude Opus 4.7 --- rust/crates/runtime/src/compact.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index e4fd3db0d3..03f04053cb 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -128,7 +128,7 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio // is NOT an assistant message that contains a ToolUse block (i.e. the // pair is actually broken at the boundary). loop { - if k == 0 || k <= compacted_prefix_len { + if k == 0 || k <= compacted_prefix_len || k >= session.messages.len() { break; } let first_preserved = &session.messages[k]; From 378ef51eb3ac620808fdfd2ed1b6af1e83cb9e1c Mon Sep 17 00:00:00 2001 From: TheArchitectit Date: Thu, 11 Jun 2026 16:58:00 -0500 Subject: [PATCH 9/9] docs: add bugfix and debug notes --- docs/BUGFIX-setup-writer-model-location.md | 59 ++++++ docs/DEBUG-neuralwatt-tool-call-issue.md | 213 +++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 docs/BUGFIX-setup-writer-model-location.md create mode 100644 docs/DEBUG-neuralwatt-tool-call-issue.md diff --git a/docs/BUGFIX-setup-writer-model-location.md b/docs/BUGFIX-setup-writer-model-location.md new file mode 100644 index 0000000000..b974bae2c8 --- /dev/null +++ b/docs/BUGFIX-setup-writer-model-location.md @@ -0,0 +1,59 @@ +# Bug: `claw setup` Writes `model` Inside `provider` Instead of Top-Level + +## Status +- **Discovered:** 2026-04-27 +- **Impact:** Users running `claw setup` get their model config ignored; session falls back to unknown model selection behavior + +## Problem + +The `claw setup` wizard writes settings in this format: + +```json +{ + "provider": { + "apiKey": "...", + "baseUrl": "...", + "kind": "openai", + "model": "glm-5.1-fast" + } +} +``` + +But `RuntimeConfig::model()` in `crates/runtime/src/config.rs` parses the `model` field from the **top level**: + +```rust +fn parse_optional_model(merged: &JsonValue) -> Option { + // Expects { "model": "glm-5.1-fast", ... } + merged.get("model").and_then(|v| v.as_str()).map(str::to_string) +} +``` + +Result: The model setting is silently ignored, and sessions use whatever fallback/default logic applies. + +## Workaround + +Manually edit `~/.claw/settings.json` to move `model` to the top level: + +```json +{ + "model": "glm-5.1-fast", + "provider": { + "apiKey": "...", + "baseUrl": "...", + "kind": "openai" + } +} +``` + +## Fix Required + +In the setup wizard code, ensure the `model` field is written at the top level of the JSON object, not nested under `provider`. + +**Likely location:** `crates/rusty-claude-cli/src/setup.rs` or similar setup-writer module. + +**Change:** After collecting model input, write to top-level `"model"` key instead of `provider.model`. + +## Related + +- Config loading logic: `crates/runtime/src/config.rs` +- RuntimeFeatureConfig parses: `model: parse_optional_model(&merged_value)` diff --git a/docs/DEBUG-neuralwatt-tool-call-issue.md b/docs/DEBUG-neuralwatt-tool-call-issue.md new file mode 100644 index 0000000000..0fcb18af9f --- /dev/null +++ b/docs/DEBUG-neuralwatt-tool-call-issue.md @@ -0,0 +1,213 @@ +# Debug Report: NeuralWatt API Tool Call Issue + +**Date:** 2026-04-27 +**Reporter:** Claw Code user via OpenClaw +**Model:** glm-5.1-fast (and glm-5-fast) +**Issue:** Intermittent 400 Bad Request errors with tool calls + +## Summary + +User reports getting HTTP 400 errors when using tool calls with Claw Code against NeuralWatt API. The error message shows: +``` +[error-kind: api_http_error] +error: api returned 400 Bad Request (invalid_request_error): HTTP 400 from backend (no parseable body) +``` + +**Note:** The "no parseable body" part suggests the backend response was not valid JSON, which may indicate a backend error rather than a validation issue. + +## Successful Test Cases + +### 1. Basic chat completion (no tools) +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "Say hello"}], + "max_tokens": 50 + }' +``` +**Result:** ✅ Works + +### 2. Simple tool call +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "What is the weather in Chicago?"}], + "tools": [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"} + }, + "required": ["location"] + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works - model correctly returns tool call + +### 3. Tool with `additionalProperties: false` (Claw normalization) +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "List files in current directory"}], + "tools": [{ + "type": "function", + "function": { + "name": "bash", + "description": "Execute a bash command", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string", "description": "The command to execute"} + }, + "required": ["command"], + "additionalProperties": false + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works + +### 4. Tool with optional parameters +```bash +curl -s "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "glm-5.1-fast", + "messages": [{"role": "user", "content": "Read the file test.txt"}], + "tools": [{ + "type": "function", + "function": { + "name": "read_file", + "description": "Read file contents", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Path to the file"}, + "offset": {"type": "number", "description": "Line number to start reading from"}, + "limit": {"type": "number", "description": "Maximum number of lines to read"} + }, + "required": ["path"], + "additionalProperties": false + } + } + }], + "tool_choice": "auto" + }' +``` +**Result:** ✅ Works + +## Claw Tool Definition Format + +Claw Code normalizes tool schemas before sending to OpenAI-compatible APIs: + +```rust +fn normalize_object_schema(schema: &mut Value) { + if let Some(obj) = schema.as_object_mut() { + if obj.get("type").and_then(Value::as_str) == Some("object") { + obj.entry("properties").or_insert_with(|| json!({})); + obj.entry("additionalProperties") + .or_insert(Value::Bool(false)); + } + // Recursively normalize nested objects + // ... + } +} +``` + +This adds: +- `"properties": {}` if missing for object types +- `"additionalProperties": false` if missing for object types + +## Possible Causes + +1. **Large request body** - Claw may send many tools with large schemas +2. **Streaming mode** - Claw uses streaming, may differ from non-streaming +3. **Backend transient errors** - "no parseable body" suggests backend crash/error +4. **Specific schema patterns** - Certain nested schemas may trigger validation issues + +## Information Needed from Provider + +1. Raw HTTP request body that caused the 400 error +2. Actual response body returned (for "no parseable body" cases) +3. Backend logs for the failing request +4. Any schema validation errors on the backend + +## Actual Failure Scenario + +The 400 error occurred when **starting a new session**, not resuming an existing tool call. + +Session evidence: +- `session-1777295962205-0.jsonl`: Previous session using `moonshotai/Kimi-K2.6`, completed successfully with no incomplete tool calls +- `session-1777301106352-0.jsonl`: New session with `glm-5.1-fast`, contains only session_meta - no messages + +**The failure happened on the initial API call** when Claw sends: +1. System prompt (large, includes tool documentation) +2. Tool definitions (~20+ tools with complex schemas) + +This is NOT a "resumed tool call" issue - it's an initial session startup failure. + +## Reproduction Attempt + +To reproduce with a realistic Claw-like request, try: + +```bash +# This simulates a typical Claw request with multiple tools +curl -v "https://api.neuralwatt.com/v1/chat/completions" \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d @- << 'EOF' +{ + "model": "glm-5.1-fast", + "messages": [ + {"role": "user", "content": "Read the file /mnt/data/git/RadGameRandom01/godot-rad-defense/scripts/placement_controller.gd"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "read_file", + "description": "Read the contents of a file. Supports text files and images.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Path to the file to read"}, + "offset": {"type": "number", "description": "Line number to start reading from (1-indexed)"}, + "limit": {"type": "number", "description": "Maximum number of lines to read"} + }, + "required": ["path"], + "additionalProperties": false + } + } + } + ], + "tool_choice": "auto", + "stream": true +} +EOF +``` + +## Contact + +For follow-up, the user can provide: +- Claw session file with the failing request +- Timestamp of the error for backend log correlation +- Request ID if available in response headers