| """Fusion-style delegation harness built with the OpenHands SDK. | |
| Install: | |
| uv pip install openhands-sdk openhands-tools | |
| Run: | |
| export LLM_API_KEY="..." # or export OPENHANDS_API_KEY="..." | |
| export MAIN_MODEL="openhands/gpt-5.5" | |
| export SIDEKICK_MODEL="openhands/minimax-m2.7" | |
| uv run python fusion_harness_example.py "Find and fix the failing tests in this repo." | |
| What this demonstrates: | |
| - the main agent keeps one high-capability LLM profile for the whole task | |
| - the cheap sidekick is registered as a sub-agent with its own LLM profile | |
| - the main agent can issue several task-tool calls in one response | |
| - tool_concurrency_limit lets those sidekick calls run concurrently | |
| - sidekick returns ESCALATE_TO_MAIN when work exceeds its budget | |
| """ | |
| from future import annotations | |
| import os | |
| import sys | |
| import tempfile | |
| from pathlib import Path | |
| from pydantic import SecretStr | |
| from openhands.sdk import Agent, AgentContext, Conversation, LLM, LLMProfileStore, Tool | |
| from openhands.sdk.context import Skill | |
| from openhands.sdk.subagent import register_agent | |
| from openhands.sdk.subagent.schema import AgentDefinition | |
| from openhands.tools.delegate import DelegationVisualizer | |
| from openhands.tools.file_editor import FileEditorTool | |
| from openhands.tools.task import TaskToolSet | |
| from openhands.tools.terminal import TerminalTool | |
| DEFAULT_MAIN_MODEL = "openhands/gpt-5.5" | |
| DEFAULT_SIDEKICK_MODEL = "openhands/minimax-m2.7" | |
| SIDEKICK_SKILL = """ | |
| You are a fast, low-cost sidekick agent. Your job is to help the main agent, | |
| not to complete the whole user request on your own. | |
| Rules: | |
| 1. Prefer read-only investigation: inspect files, locate relevant code, propose | |
| small plans, and draft patch sketches. Do not edit files unless the prompt | |
| explicitly asks you to. | |
| 2. Keep output compact and structured. Use at most three tool calls unless the | |
| prompt explicitly allows more. Prefer broad signals over exhaustive reading. | |
| 3. If the task requires broad architecture decisions, many-file edits, unknown | |
| product judgement, security-sensitive changes, or you are not confident, | |
| stop and return exactly: | |
| ESCALATE_TO_MAIN: <short reason> | |
| 4. Otherwise return: | |
| FINDINGS: | |
| - ... | |
| PROPOSED_NEXT_STEP: | |
| - ... | |
| RISK: | |
| - low|medium|high, with one sentence why | |
| """.strip() | |
| ORCHESTRATOR_SUFFIX = """ | |
| You are the main high-capability agent in a fusion-style harness. | |
| Critical parallel-delegation protocol: | |
| - If the user lists multiple independent areas, your initial delegation step MUST | |
| be a single assistant response containing one task tool call per area, all | |
| with subagent_type='sidekick'. | |
| - Do not delegate only the first area and wait. Do not say you will launch | |
| several sidekicks and then call only one. Actually emit all sidekick task calls | |
| in the same tool-call batch so tool_concurrency_limit can run them in | |
| parallel. | |
| - Before the initial delegation batch, avoid direct terminal/file-editor work | |
| unless the user did not provide enough information to form sidekick prompts. | |
| - After all sidekick observations return, review them yourself, deduplicate, and | |
| make final prioritization with the main model. | |
| Good sidekick tasks: | |
| - locate relevant files | |
| - inspect test failures or logs | |
| - summarize a narrow subsystem | |
| - draft a small patch plan | |
| - check docs or dependency files | |
| Do not delegate broad design, final decisions, risky edits, or cross-cutting | |
| implementation. If any sidekick returns ESCALATE_TO_MAIN, stop delegating that | |
| thread and handle it yourself with the main model. | |
| Always review sidekick output before acting. Treat sidekick output as advisory, | |
| not authoritative. The final answer and any code changes are your responsibility. | |
| """.strip() | |
| def require_api_key() -> str: | |
| api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENHANDS_API_KEY") | |
| if not api_key: | | | raise RuntimeError( | | | "Set LLM_API_KEY, or OPENHANDS_API_KEY when using OpenHands-hosted models." | | | ) | | | return api_key | | | def save_profile( | | | store: LLMProfileStore, | | | name: str, | | | usage_id: str, | | | model: str, | | | base_url: str | None, | | | ) -> None: | | | store.save( | | | name, | | | LLM( | | | usage_id=usage_id, | | | model=model, | | | base_url=base_url, | | | ), | | | include_secrets=False, | | | ) | |
| def load_profile(store: LLMProfileStore, name: str, api_key: str) -> LLM: | |
| return store.load(name).model_copy(update={"api_key": SecretStr(api_key)}) | |
| def build_profiles(api_key: str) -> tuple[LLMProfileStore, LLM]: | |
| base_url = os.getenv("LLM_BASE_URL") | |
| main_model = os.getenv("MAIN_MODEL", DEFAULT_MAIN_MODEL) | | | sidekick_model = os.getenv("SIDEKICK_MODEL", DEFAULT_SIDEKICK_MODEL) | |
| profile_dir = Path(tempfile.mkdtemp(prefix="openhands-fusion-profiles-")) | |
| store = LLMProfileStore(base_dir=str(profile_dir)) | |
| save_profile( | | | store, | | | name="fusion-main", | | | usage_id="main", | | | model=main_model, | | | base_url=base_url, | | | ) | | | save_profile( | | | store, | | | name="fusion-sidekick", | | | usage_id="sidekick", | | | model=sidekick_model, | | | base_url=base_url, | | | ) | | | return store, load_profile(store, "fusion-main", api_key) | |
| def register_sidekick(store: LLMProfileStore, api_key: str) -> None: | |
| def create_sidekick(_: LLM) -> Agent: | |
| return Agent( | |
| llm=load_profile(store, "fusion-sidekick", api_key), | |
| tools=[ | |
| Tool(name=TerminalTool.name), | |
| Tool(name=FileEditorTool.name), | |
| ], | | | tool_concurrency_limit=3, | |
| agent_context=AgentContext( | |
| skills=[ | |
| Skill( | | | name="fusion_sidekick_protocol", | | | content=SIDEKICK_SKILL, | | | trigger=None, | | | ) | | | ], | | | system_message_suffix="Stay within the sidekick protocol.", | | | ), | | | ) | | | register_agent( | | | name="sidekick", | | | factory_func=create_sidekick, | | | description=AgentDefinition( | | | name="sidekick", | | | description=( | | | "Fast low-cost sub-agent for bounded investigation, patch " | | | "sketches, and escalation signals." | | | ), | |
| max_iteration_per_run=int(os.getenv("SIDEKICK_MAX_ITERATIONS", "6")), | |
| max_budget_per_run=float(os.getenv("SIDEKICK_MAX_BUDGET", "0.10")), | |
| ), | | | ) | | | def build_main_agent(main_llm: LLM) -> Agent: | | | return Agent( | | | llm=main_llm, | |
| tools=[ | |
| Tool(name=TaskToolSet.name), | |
| Tool(name=TerminalTool.name), | |
| Tool(name=FileEditorTool.name), | |
| ], | |
| tool_concurrency_limit=int(os.getenv("MAIN_TOOL_CONCURRENCY", "8")), | |
| agent_context=AgentContext(system_message_suffix=ORCHESTRATOR_SUFFIX), | |
| ) | |
| def fusion_prompt(user_task: str) -> str: | |
| return f""" | |
| User task: | |
| {user_task} | |
| Run this in a fusion-style workflow. | |
| MANDATORY INITIAL DELEGATION RULE: | |
| If the user task contains multiple independent areas, directories, files, | |
| questions, or investigation threads, your next tool-using assistant response MUST | |
| contain one task tool call for every independent item, all in the same response, | |
| all with subagent_type='sidekick'. This is the core behavior being tested. Do | |
| not call only one task. Do not perform direct terminal/file_editor investigation | |
| first. Do not wait between sidekick launches. | |
| After that parallel task batch: | |
| 1. Wait for all sidekick observations. | |
| 2. Review the sidekick reports yourself. | |
| 3. If any sidekick escalates or the work becomes complex, continue with the main | |
| model rather than switching models or restarting the conversation. | |
| 4. Complete the task and summarize what was done. | |
| """.strip() | |
| def run(user_task: str, workspace: Path) -> None: | |
| api_key = require_api_key() | |
| store, main_llm = build_profiles(api_key) | |
| register_sidekick(store, api_key) | |
| conversation = Conversation( | |
| agent=build_main_agent(main_llm), | |
| workspace=workspace, | |
| visualizer=DelegationVisualizer(name="Fusion main"), | |
| persistence_dir=Path(tempfile.mkdtemp(prefix="openhands-fusion-run-")), | |
| max_iteration_per_run=int(os.getenv("MAIN_MAX_ITERATIONS", "40")), | |
| ) | |
| conversation.send_message(fusion_prompt(user_task)) | |
| conversation.run() | |
| metrics = conversation.conversation_stats.get_combined_metrics() | |
| print(f"\nTotal estimated cost: ${metrics.accumulated_cost:.6f}") | |
| def main() -> None: | |
| user_task = " ".join(sys.argv[1:]).strip() | |
| if not user_task: | | | user_task = "Analyze this repository and suggest the smallest useful improvement." | |
| run(user_task=user_task, workspace=Path.cwd()) | |
| if __name__ == "__main__": | |
| main() |