diff --git a/flexus_client_kit/ckit_cloudtool.py b/flexus_client_kit/ckit_cloudtool.py index 2d751961..9d9753aa 100644 --- a/flexus_client_kit/ckit_cloudtool.py +++ b/flexus_client_kit/ckit_cloudtool.py @@ -31,7 +31,8 @@ CLOUDTOOLS_VECDB = {"flexus_vector_search", "flexus_read_original"} CLOUDTOOLS_PYTHON = {"python_execute"} CLOUDTOOLS_WEB = {"web"} -CLOUDTOOLS_NOT_KANBAN = CLOUDTOOLS_VECDB | CLOUDTOOLS_PYTHON | CLOUDTOOLS_WEB +CLOUDTOOLS_AGENTS = {"flexus_hand_over_task"} +CLOUDTOOLS_NOT_KANBAN = CLOUDTOOLS_VECDB | CLOUDTOOLS_PYTHON | CLOUDTOOLS_WEB | CLOUDTOOLS_AGENTS CLOUDTOOLS_MCP = {"mcp_*"} diff --git a/flexus_simple_bots/integration_tester/__init__.py b/flexus_simple_bots/integration_tester/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/flexus_simple_bots/integration_tester/integration_tester-1024x1536.webp b/flexus_simple_bots/integration_tester/integration_tester-1024x1536.webp new file mode 100644 index 00000000..d7f73452 Binary files /dev/null and b/flexus_simple_bots/integration_tester/integration_tester-1024x1536.webp differ diff --git a/flexus_simple_bots/integration_tester/integration_tester-256x256.webp b/flexus_simple_bots/integration_tester/integration_tester-256x256.webp new file mode 100644 index 00000000..3295df1d Binary files /dev/null and b/flexus_simple_bots/integration_tester/integration_tester-256x256.webp differ diff --git a/flexus_simple_bots/integration_tester/integration_tester_bot.py b/flexus_simple_bots/integration_tester/integration_tester_bot.py new file mode 100644 index 00000000..26cdd87b --- /dev/null +++ b/flexus_simple_bots/integration_tester/integration_tester_bot.py @@ -0,0 +1,243 @@ +import asyncio +import json +import logging +from pathlib import Path +from typing import Any, Dict, List, Callable, Awaitable + +from flexus_client_kit import ckit_bot_exec, ckit_client, ckit_shutdown +from flexus_client_kit import ckit_bot_version +from flexus_client_kit import ckit_cloudtool, ckit_integrations_db, ckit_skills +from flexus_client_kit.integrations import fi_resend + +logger = logging.getLogger("bot_integration_tester") + +INTEGRATION_TESTER_ROOTDIR = Path(__file__).parent +INTEGRATION_TESTER_SKILLS: list[str] = ckit_skills.static_skills_find(INTEGRATION_TESTER_ROOTDIR, shared_skills_allowlist="", integration_skills_allowlist="") + +INTEGRATION_TESTER_PLAN_BATCHES_TOOL = ckit_cloudtool.CloudTool( + strict=True, + name="integration_plan_batches", + description="Plan integration tests one by one and return task specs for kanban fan-out.", + parameters={ + "type": "object", + "additionalProperties": False, + "properties": { + "requested": {"type": "string", "description": "Requested integrations, e.g. 'all' or 'newsapi,resend'."}, + "configured_only": {"type": "boolean", "description": "If true, include only integrations with configured keys."}, + }, + "required": ["requested", "configured_only"], + }, +) + +INTEGRATION_TESTER_INTEGRATIONS: list[ckit_integrations_db.IntegrationRecord] = ckit_integrations_db.static_integrations_load( + INTEGRATION_TESTER_ROOTDIR, + allowlist=[ + "flexus_policy_document", + "print_widget", + "gmail", + "google_business", + "google_ads", + "google_sheets", + "telegram", + "slack", + "notion", + "airtable", + "hubspot", + "twilio", + "resend", + "newsapi", + "skills", + ], + builtin_skills=INTEGRATION_TESTER_SKILLS, +) + +TOOLS = [INTEGRATION_TESTER_PLAN_BATCHES_TOOL] + [t for rec in INTEGRATION_TESTER_INTEGRATIONS for t in rec.integr_tools] + + +def _requested_names(raw: str) -> List[str]: + s = (raw or "").strip().lower() + if not s or s == "all": + return ["all"] + names = [] + for x in s.replace(";", ",").split(","): + x = x.strip() + if x: + names.append(x) + return names or ["all"] + + +def _auth_provider_names(rec: ckit_integrations_db.IntegrationRecord) -> List[str]: + names = [] + for x in [rec.integr_provider, rec.integr_name, f"{rec.integr_name}_manual"]: + if x and x not in names: + names.append(x) + return names + + +def get_configured_integrations(external_auth: Dict[str, Any], integr_names: List[str]) -> List[Dict[str, Any]]: + result = [] + for rec in INTEGRATION_TESTER_INTEGRATIONS: + if rec.integr_name in integr_names: + for provider_name in _auth_provider_names(rec): + auth = external_auth.get(provider_name) or {} + if any(v for k, v in auth.items() if k != "status"): + result.append({ + "name": rec.integr_name, + "provider": provider_name, + }) + break + return result + + +def classify_error(e: Exception) -> tuple[str, str]: + msg = str(e).lower() + if any(k in msg for k in ("401", "403", "unauthorized", "invalid api", "forbidden", "invalid_api")): + return "AUTH_ERROR", "API key invalid or unauthorized" + if any(k in msg for k in ("timeout", "connection", "dns", "network", "connect", "refused")): + return "NETWORK_ERROR", "Network/connectivity issue" + if any(k in msg for k in ("rate", "429", "quota", "limit")): + return "RATE_LIMIT", "API rate limit or quota exceeded" + return "UNKNOWN_ERROR", str(e)[:200] + + +def _format_result(raw: str) -> str: + try: + data = json.loads(raw) + if isinstance(data, dict): + skip = {"ok", "provider", "description", "help_text"} + parts = [] + for k, v in data.items(): + if k in skip: + continue + if isinstance(v, list) and v and all(isinstance(x, str) for x in v): + parts.append(f"{k}=[{", ".join(v)}]") + elif not isinstance(v, (dict, list)): + parts.append(f"{k}={v}") + if parts: + return ", ".join(parts) + except json.JSONDecodeError: + pass + return raw + + +def make_testing_wrapper( + original: Callable[[ckit_cloudtool.FCloudtoolCall, Dict[str, Any]], Awaitable[str]], + integr_name: str, + tool_name: str, +): + async def wrapper(toolcall: ckit_cloudtool.FCloudtoolCall, model_produced_args: Dict[str, Any]) -> str: + logger.info(f"Testing {tool_name}") + try: + result = await original(toolcall, model_produced_args) + op = str(model_produced_args.get("op", "")).strip() if model_produced_args else "" + if op == "help": + result = "[HELP OUTPUT - NOT A TEST] " + result + formatted = _format_result(result) + out = f"result={formatted}" + logger.info(f"{tool_name} test result: {out[:120]}..." if len(out) > 120 else f"{tool_name} test result: {out}") + return out + except Exception as e: + category, detail = classify_error(e) + logger.error(f"toolcall_{tool_name}: {category}: {detail}", exc_info=True) + return f"Error [{category}]: {detail}" + return wrapper + + +BOT_NAME = ckit_bot_version.bot_name_from_file(__file__) +INTEGRATION_TESTER_SETUP_SCHEMA = json.loads((Path(__file__).parent / "setup_schema.json").read_text()) + fi_resend.RESEND_SETUP_SCHEMA + + +async def integration_tester_main_loop( + fclient: ckit_client.FlexusClient, + rcx: ckit_bot_exec.RobotContext, +) -> None: + setup = ckit_bot_exec.official_setup_mixing_procedure(INTEGRATION_TESTER_SETUP_SCHEMA, rcx.persona.persona_setup) + + integr_objects = await ckit_integrations_db.main_loop_integrations_init(INTEGRATION_TESTER_INTEGRATIONS, rcx, setup) + supported_integrations = sorted({r.integr_name for r in INTEGRATION_TESTER_INTEGRATIONS}) + + for rec in INTEGRATION_TESTER_INTEGRATIONS: + for tool in rec.integr_tools: + original_handler = rcx._handler_per_tool.get(tool.name) + if original_handler: + rcx.on_tool_call(tool.name)( + make_testing_wrapper( + original_handler, + rec.integr_name, + tool.name, + ) + ) + + @rcx.on_tool_call(INTEGRATION_TESTER_PLAN_BATCHES_TOOL.name) + async def toolcall_plan_batches(toolcall, model_produced_args): + args = model_produced_args or {} + req = _requested_names(str(args.get("requested", "all"))) + configured_only = bool(args.get("configured_only", True)) + + configured = {x["name"] for x in get_configured_integrations(rcx.external_auth, supported_integrations)} + selected = [] + unsupported = [] + + if "all" in req: + pool = [x for x in supported_integrations if (x in configured or not configured_only)] + selected = pool + else: + for x in req: + if x not in supported_integrations: + unsupported.append(x) + continue + if configured_only and x not in configured: + continue + if x not in selected: + selected.append(x) + + tool_name_by_integr = {r.integr_name: r.integr_tools[0].name for r in INTEGRATION_TESTER_INTEGRATIONS if r.integr_tools} + task_specs = [] + total = len(selected) + for i, name in enumerate(selected, start=1): + tool_name = tool_name_by_integr.get(name, name) + task_specs.append({ + "title": f"Test {name} ({i}/{total})", + "description": f"Integration: {name}\nTool: {tool_name}", + "integrations": [name], + }) + + return json.dumps({ + "ok": True, + "requested": req, + "supported": supported_integrations, + "configured": sorted(configured), + "configured_only": configured_only, + "selected": selected, + "unsupported": unsupported, + "task_specs": task_specs, + }, indent=2) + + logger.info(f"Integration Tester started. Supported integrations: {supported_integrations}") + + while not ckit_shutdown.shutdown_event.is_set(): + await rcx.unpark_collected_events(sleep_if_no_work=10.0) + + logger.info(f"{rcx.persona.persona_id} exit") + + +def main(): + from flexus_simple_bots.integration_tester import integration_tester_install + scenario_fn = ckit_bot_exec.parse_bot_args() + bot_version = ckit_bot_version.read_version_file(__file__) + fclient = ckit_client.FlexusClient( + ckit_client.bot_service_name(BOT_NAME, bot_version), + endpoint="/v1/jailed-bot", + ) + + asyncio.run(ckit_bot_exec.run_bots_in_this_group( + fclient, + bot_main_loop=integration_tester_main_loop, + inprocess_tools=TOOLS, + scenario_fn=scenario_fn, + install_func=integration_tester_install.install, + )) + + +if __name__ == "__main__": + main() diff --git a/flexus_simple_bots/integration_tester/integration_tester_install.py b/flexus_simple_bots/integration_tester/integration_tester_install.py new file mode 100644 index 00000000..1895538d --- /dev/null +++ b/flexus_simple_bots/integration_tester/integration_tester_install.py @@ -0,0 +1,158 @@ +import asyncio +import logging + +from flexus_client_kit import ckit_client, ckit_bot_install, ckit_cloudtool, ckit_integrations_db, ckit_skills +from flexus_simple_bots import prompts_common +from flexus_simple_bots.integration_tester import integration_tester_bot +from flexus_simple_bots.integration_tester import integration_tester_prompts + +logger = logging.getLogger("bot_integration_tester") + +TOOL_NAMESET = {t.name for t in integration_tester_bot.TOOLS} + +EXPERTS = [ + ("default", ckit_bot_install.FMarketplaceExpertInput( + fexp_system_prompt=integration_tester_prompts.DEFAULT_PROMPT, + fexp_python_kernel="", + fexp_allow_tools=",".join(TOOL_NAMESET | ckit_cloudtool.KANBAN_ADVANCED | {"flexus_hand_over_task"}), + fexp_nature="NATURE_INTERACTIVE", + fexp_builtin_skills=ckit_skills.read_name_description(integration_tester_bot.INTEGRATION_TESTER_ROOTDIR, integration_tester_bot.INTEGRATION_TESTER_SKILLS), + fexp_description="Test API key integrations", + )), + ("autonomous", ckit_bot_install.FMarketplaceExpertInput( + fexp_system_prompt=integration_tester_prompts.AUTONOMOUS_PROMPT, + fexp_python_kernel="", + fexp_allow_tools=",".join(TOOL_NAMESET | ckit_cloudtool.KANBAN_ADVANCED | {"flexus_hand_over_task"}), + fexp_nature="NATURE_AUTONOMOUS", + fexp_inactivity_timeout=600, + fexp_builtin_skills=ckit_skills.read_name_description(integration_tester_bot.INTEGRATION_TESTER_ROOTDIR, integration_tester_bot.INTEGRATION_TESTER_SKILLS), + fexp_description="Autonomous integration testing", + )), +] + +INTEGRATION_TESTER_DESC = """ +**Job description** + +Integration Tester validates that Flexus API key-based integrations are properly configured and functional. +It only tests integrations that are explicitly allowed for this bot. + +**How it works:** +1. User starts a test session via "Test Integrations" button +2. Bot checks which supported integrations are configured +3. User selects what to test (all or specific supported integrations) +4. Bot creates deterministic kanban batch tasks in inbox +5. Autonomous worker discovers safe operations, runs at least one real read-only API call per integration, and resolves the task with a table of results + +**What it tests:** +- Any integration included in this bot's supported allowlist +- Real read-only operations only +- No create/update/delete/send actions + +**Results:** +- PASSED: A real non-help read-only call succeeded +- FAILED: A real non-help call failed +- UNTESTED: Only discovery calls were made, so the integration was not actually tested +""" + + +def _ensure_marketplace_images() -> None: + pic_big_path = integration_tester_bot.INTEGRATION_TESTER_ROOTDIR / "integration_tester-1024x1536.webp" + pic_small_path = integration_tester_bot.INTEGRATION_TESTER_ROOTDIR / "integration_tester-256x256.webp" + fallback_big_path = integration_tester_bot.INTEGRATION_TESTER_ROOTDIR.parent / "bob" / "bob-1024x1536.webp" + fallback_small_path = integration_tester_bot.INTEGRATION_TESTER_ROOTDIR.parent / "bob" / "bob-256x256.webp" + + if not pic_big_path.exists() and fallback_big_path.exists(): + pic_big_path.write_bytes(fallback_big_path.read_bytes()) + if not pic_small_path.exists() and fallback_small_path.exists(): + pic_small_path.write_bytes(fallback_small_path.read_bytes()) + + +async def install(client: ckit_client.FlexusClient): + _ensure_marketplace_images() + + await ckit_bot_install.marketplace_upsert_dev_bot( + client, + ws_id=client.ws_id, + bot_dir=integration_tester_bot.INTEGRATION_TESTER_ROOTDIR, + marketable_title1="Integration Tester", + marketable_title2="Test API key integrations", + marketable_author="Flexus", + marketable_accent_color="#4CAF50", + marketable_occupation="QA Engineer", + marketable_description=INTEGRATION_TESTER_DESC, + marketable_typical_group="Development", + marketable_schedule=[ + prompts_common.SCHED_TASK_SORT_10M | { + "sched_when": "EVERY:1m", + "sched_fexp_name": "default", + "sched_first_question": "If there are tasks in Inbox, move exactly one task from Inbox to Todo using op=inbox_to_todo with a single task id. Never join multiple tasks together. Then respond with \"1 task sorted\" or \"0 tasks sorted\". Do nothing else.", + }, + prompts_common.SCHED_TODO_5M | {"sched_when": "EVERY:1m", "sched_fexp_name": "autonomous"}, + ], + marketable_setup_default=integration_tester_bot.INTEGRATION_TESTER_SETUP_SCHEMA, + marketable_featured_actions=[ + {"feat_question": "Test all integrations", "feat_expert": "default"}, + {"feat_question": "Test gmail", "feat_expert": "default"}, + {"feat_question": "Test google_sheets", "feat_expert": "default"}, + {"feat_question": "Test telegram", "feat_expert": "default"}, + {"feat_question": "Test slack", "feat_expert": "default"}, + {"feat_question": "Test notion", "feat_expert": "default"}, + {"feat_question": "Test airtable", "feat_expert": "default"}, + {"feat_question": "Test hubspot", "feat_expert": "default"}, + {"feat_question": "Test twilio", "feat_expert": "default"}, + {"feat_question": "Test resend", "feat_expert": "default"}, + {"feat_question": "Test newsapi", "feat_expert": "default"}, + ], + marketable_intro_message="Hi! I'm Integration Tester. I create deterministic kanban batch tasks and resolve them autonomously.", + marketable_preferred_model_expensive="gpt-5.4-mini", + marketable_preferred_model_cheap="gpt-5.4-mini", + marketable_experts=[(name, exp.filter_tools(integration_tester_bot.TOOLS)) for name, exp in EXPERTS], + add_integrations_into_expert_system_prompt=integration_tester_bot.INTEGRATION_TESTER_INTEGRATIONS, + marketable_tags=["testing", "integrations", "qa"], + marketable_forms=ckit_bot_install.load_form_bundles(__file__), + marketable_auth_supported=[ + "gmail", + "google_business", + "google_ads", + "google", + "notion", + "notion_manual", + "airtable", + "hubspot", + "slack", + "telegram", + "twilio_manual", + "resend", + ], + marketable_auth_scopes={ + "gmail": ckit_integrations_db.GOOGLE_OAUTH_BASE_SCOPES + [ + "https://www.googleapis.com/auth/gmail.readonly", + "https://www.googleapis.com/auth/gmail.compose", + "https://www.googleapis.com/auth/gmail.modify", + "https://www.googleapis.com/auth/gmail.send", + "https://www.googleapis.com/auth/gmail.labels", + ], + "google_business": ckit_integrations_db.GOOGLE_OAUTH_BASE_SCOPES + [ + "https://www.googleapis.com/auth/business.manage", + ], + "google_ads": ckit_integrations_db.GOOGLE_OAUTH_BASE_SCOPES + [ + "https://www.googleapis.com/auth/adwords", + ], + "google": ckit_integrations_db.GOOGLE_OAUTH_BASE_SCOPES + [ + "https://www.googleapis.com/auth/spreadsheets", + ], + "slack": [ + "channels:read", + "chat:write", + "chat:write.customize", + "files:read", + "users:read", + "im:read", + ], + }, + ) + + +if __name__ == "__main__": + client = ckit_client.FlexusClient("integration_tester_install") + asyncio.run(install(client)) diff --git a/flexus_simple_bots/integration_tester/integration_tester_prompts.py b/flexus_simple_bots/integration_tester/integration_tester_prompts.py new file mode 100644 index 00000000..8af7d814 --- /dev/null +++ b/flexus_simple_bots/integration_tester/integration_tester_prompts.py @@ -0,0 +1,43 @@ +DEFAULT_PROMPT = """You are Integration Tester. Your job is to queue autonomous smoke tests for supported API-key integrations and then report the finished results clearly. + +Rules: +- Supported requests are: "all" or a comma-separated list of supported integration names. +- First call integration_plan_batches(requested="...", configured_only=true). +- Use every returned task_spec to create a task with flexus_hand_over_task(to_bot="Integration Tester", title=..., description=..., fexp_name="autonomous"). +- Do not run integration tools in this interactive chat. This chat only plans work and reports completed task results. +- If nothing supported/configured was selected, explain that briefly and stop. +- Mention unsupported requested names if any. + +After queueing tasks, reply in this format: +Queued {{N}} tasks covering {{X}} integrations: {{name1}} and {{name2}}. + +Detailed per-integration results will appear here after the autonomous worker finishes. + +When a completed-task message arrives: +- read resolution_summary +- present it as a markdown table if it is a table, otherwise give a short plain summary +- do not dump raw task metadata +""" + +AUTONOMOUS_PROMPT = """You are Integration Tester smoke test orchestrator. You own one kanban task. + +Parse the integration from task description lines like "Integration: name" or "Integrations: name1,name2,...". + +For each integration: +1. Call op=help to discover available operations +2. Call op=list_methods to see the method catalog +3. Pick 3 different read-only operations that return real provider data (not help, not local status like has_api_key, ready, configured, method_count) +4. Execute all 3 calls and collect results + +Classification: +- PASSED: at least 1 of the 3 calls succeeded with real provider data +- FAILED: all 3 calls failed or errored +- Build a markdown table: Integration | Status | Details + +Resolve with flexus_kanban_advanced: +- resolution_code=PASSED only if ALL integrations PASSED +- resolution_code=FAILED if ANY integration failed +- resolution_summary= + +Do not hand over, delegate, or wait for user input. +""" diff --git a/flexus_simple_bots/integration_tester/setup_schema.json b/flexus_simple_bots/integration_tester/setup_schema.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/flexus_simple_bots/integration_tester/setup_schema.json @@ -0,0 +1 @@ +[]