Fusion Harness: How to combine a more expensive main model and a sidekick model

wpnews.pro

| """Fusion-style delegation harness built with the OpenHands SDK. | | | Install: | | | uv pip install openhands-sdk openhands-tools | | | Run: | | | export LLM_API_KEY="..." # or export OPENHANDS_API_KEY="..." | | | export MAIN_MODEL="openhands/gpt-5.5" | | | export SIDEKICK_MODEL="openhands/minimax-m2.7" | | | uv run python fusion_harness_example.py "Find and fix the failing tests in this repo." | | | What this demonstrates: | | | - the main agent keeps one high-capability LLM profile for the whole task | | | - the cheap sidekick is registered as a sub-agent with its own LLM profile | | | - the main agent can issue several task-tool calls in one response | | | - tool_concurrency_limit lets those sidekick calls run concurrently | | | - sidekick returns ESCALATE_TO_MAIN when work exceeds its budget | | | """ | | | from future import annotations | | | import os | | | import sys | | | import tempfile | | | from pathlib import Path | | | from pydantic import SecretStr | | | from openhands.sdk import Agent, AgentContext, Conversation, LLM, LLMProfileStore, Tool | | | from openhands.sdk.context import Skill | | | from openhands.sdk.subagent import register_agent | | | from openhands.sdk.subagent.schema import AgentDefinition | | | from openhands.tools.delegate import DelegationVisualizer | | | from openhands.tools.file_editor import FileEditorTool | | | from openhands.tools.task import TaskToolSet | | | from openhands.tools.terminal import TerminalTool | | | DEFAULT_MAIN_MODEL = "openhands/gpt-5.5" | | | DEFAULT_SIDEKICK_MODEL = "openhands/minimax-m2.7" | | | SIDEKICK_SKILL = """ | | | You are a fast, low-cost sidekick agent. Your job is to help the main agent, | | | not to complete the whole user request on your own. | | | Rules: | | | 1. Prefer read-only investigation: inspect files, locate relevant code, propose | | | small plans, and draft patch sketches. Do not edit files unless the prompt | | | explicitly asks you to. | | | 2. Keep output compact and structured. Use at most three tool calls unless the | | | prompt explicitly allows more. Prefer broad signals over exhaustive reading. | | | 3. If the task requires broad architecture decisions, many-file edits, unknown | | | product judgement, security-sensitive changes, or you are not confident, | | | stop and return exactly: | | | ESCALATE_TO_MAIN: <short reason> | | | 4. Otherwise return: | | | FINDINGS: | | | - ... | | | PROPOSED_NEXT_STEP: | | | - ... | | | RISK: | | | - low|medium|high, with one sentence why | | | """.strip() | | | ORCHESTRATOR_SUFFIX = """ | | | You are the main high-capability agent in a fusion-style harness. | | | Critical parallel-delegation protocol: | | | - If the user lists multiple independent areas, your initial delegation step MUST | | | be a single assistant response containing one task tool call per area, all | | | with subagent_type='sidekick'. | | | - Do not delegate only the first area and wait. Do not say you will launch | | | several sidekicks and then call only one. Actually emit all sidekick task calls | | | in the same tool-call batch so tool_concurrency_limit can run them in | | | parallel. | | | - Before the initial delegation batch, avoid direct terminal/file-editor work | | | unless the user did not provide enough information to form sidekick prompts. | | | - After all sidekick observations return, review them yourself, deduplicate, and | | | make final prioritization with the main model. | | | Good sidekick tasks: | | | - locate relevant files | | | - inspect test failures or logs | | | - summarize a narrow subsystem | | | - draft a small patch plan | | | - check docs or dependency files | | | Do not delegate broad design, final decisions, risky edits, or cross-cutting | | | implementation. If any sidekick returns ESCALATE_TO_MAIN, stop delegating that | | | thread and handle it yourself with the main model. | | | Always review sidekick output before acting. Treat sidekick output as advisory, | | | not authoritative. The final answer and any code changes are your responsibility. | |

| """.strip() | |
| def require_api_key() -> str: | |
| api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENHANDS_API_KEY") | |

| def load_profile(store: LLMProfileStore, name: str, api_key: str) -> LLM: | |
| return store.load(name).model_copy(update={"api_key": SecretStr(api_key)}) | |
| def build_profiles(api_key: str) -> tuple[LLMProfileStore, LLM]: | |
| base_url = os.getenv("LLM_BASE_URL") | |

| main_model = os.getenv("MAIN_MODEL", DEFAULT_MAIN_MODEL) | | | sidekick_model = os.getenv("SIDEKICK_MODEL", DEFAULT_SIDEKICK_MODEL) | |

| profile_dir = Path(tempfile.mkdtemp(prefix="openhands-fusion-profiles-")) | |
| store = LLMProfileStore(base_dir=str(profile_dir)) | |

| def register_sidekick(store: LLMProfileStore, api_key: str) -> None: | |
| def create_sidekick(_: LLM) -> Agent: | |

| return Agent( | |

| llm=load_profile(store, "fusion-sidekick", api_key), | |
| tools=[ | |
| Tool(name=TerminalTool.name), | |
| Tool(name=FileEditorTool.name), | |

| ], | | | tool_concurrency_limit=3, | |

| agent_context=AgentContext( | |
| skills=[ | |

| max_iteration_per_run=int(os.getenv("SIDEKICK_MAX_ITERATIONS", "6")), | |
| max_budget_per_run=float(os.getenv("SIDEKICK_MAX_BUDGET", "0.10")), | |

| tools=[ | |
| Tool(name=TaskToolSet.name), | |
| Tool(name=TerminalTool.name), | |
| Tool(name=FileEditorTool.name), | |

| ], | |

| tool_concurrency_limit=int(os.getenv("MAIN_TOOL_CONCURRENCY", "8")), | |
| agent_context=AgentContext(system_message_suffix=ORCHESTRATOR_SUFFIX), | |

| ) | | | def fusion_prompt(user_task: str) -> str: | | | return f""" | | | User task: | | | {user_task} | | | Run this in a fusion-style workflow. | | | MANDATORY INITIAL DELEGATION RULE: | | | If the user task contains multiple independent areas, directories, files, | | | questions, or investigation threads, your next tool-using assistant response MUST | | | contain one task tool call for every independent item, all in the same response, | | | all with subagent_type='sidekick'. This is the core behavior being tested. Do | | | not call only one task. Do not perform direct terminal/file_editor investigation | | | first. Do not wait between sidekick launches. | | | After that parallel task batch: | | | 1. Wait for all sidekick observations. | | | 2. Review the sidekick reports yourself. | | | 3. If any sidekick escalates or the work becomes complex, continue with the main | | | model rather than switching models or restarting the conversation. | | | 4. Complete the task and summarize what was done. | |

| """.strip() | |
| def run(user_task: str, workspace: Path) -> None: | |
| api_key = require_api_key() | |
| store, main_llm = build_profiles(api_key) | |
| register_sidekick(store, api_key) | |
| conversation = Conversation( | |
| agent=build_main_agent(main_llm), | |

| workspace=workspace, | |

| visualizer=DelegationVisualizer(name="Fusion main"), | |
| persistence_dir=Path(tempfile.mkdtemp(prefix="openhands-fusion-run-")), | |
| max_iteration_per_run=int(os.getenv("MAIN_MAX_ITERATIONS", "40")), | |

| ) | |

| conversation.send_message(fusion_prompt(user_task)) | |
| conversation.run() | |

| metrics = conversation.conversation_stats.get_combined_metrics() | |

| print(f"\nTotal estimated cost: ${metrics.accumulated_cost:.6f}") | |
| def main() -> None: | |
| user_task = " ".join(sys.argv[1:]).strip() | |

| if not user_task: | | | user_task = "Analyze this repository and suggest the smallest useful improvement." | |

| run(user_task=user_task, workspace=Path.cwd()) | |
| if __name__ == "__main__": | |
| main() |

source & further reading

gist.github.com — original article Show your Neon branch in your terminal prompt (Starship) — copy-paste / agent-ready setup immunity-agent: the control plane for AI agents — intercept, policy, audit Fix: This model is not supported when using X-OpenAI-Internal-Codex-Responses-Lite

Fusion Harness: How to combine a more expensive main model and a sidekick model

Run your AI side-project on zahid.host