Everything you have built so far is reactive.
User sends a message. System processes it. System sends a response. Done.
An agent is different. An agent receives a goal, not a message. It decides what steps to take to achieve that goal. It uses tools. It observes the results. It adjusts its plan. It continues until the goal is achieved or it determines the goal cannot be achieved.
"Summarize this document" is a task. One call. One response.
"Research recent papers on transformer efficiency, write a comparison table, and save it as a CSV" is a goal. An agent needs to search the web multiple times, decide which papers are relevant, extract data from multiple sources, format it consistently, handle failures, and write to disk. Five to twenty tool calls. Dynamic decisions at each step.
This is the frontier of AI engineering. Agents are brittle. They fail in surprising ways. They are also what makes AI systems feel genuinely useful rather than just responsive.
print("Agent vs Non-Agent:")
print()
print("NON-AGENT (chain/pipeline):")
print(" - Fixed sequence of steps")
print(" - Steps determined at design time")
print(" - No ability to react to intermediate results")
print(" - Predictable, debuggable, less capable")
print()
print("AGENT:")
print(" - LLM decides what to do at each step")
print(" - Steps determined at runtime based on observations")
print(" - Can loop, backtrack, try alternative approaches")
print(" - Powerful, unpredictable, capable of novel solutions")
print()
agent_properties = {
"Perception": "Receives inputs: user goal, tool results, memory",
"Reasoning": "LLM decides what to do next given current state",
"Action": "Executes tools, writes files, calls APIs, searches",
"Memory": "Maintains context across multiple steps",
"Goal": "Works toward a specified objective, not just responding",
}
print("The five properties of an agent:")
for prop, description in agent_properties.items():
print(f" {prop:<15}: {description}")
print()
print("The ReAct pattern (Reason + Act):")
print(" Thought: 'I need to find the population of Tokyo'")
print(" Action: search_web('Tokyo population 2024')")
print(" Observation: '13.96 million in city proper, 37.4M metro'")
print(" Thought: 'I have the answer, now I can respond'")
print(" Answer: 'Tokyo's population is approximately 13.96 million...'")
python
import json
import os
from typing import List, Dict, Callable, Any, Optional
from dataclasses import dataclass, field
import anthropic
@dataclass
class Tool:
name: str
description: str
fn: Callable
schema: Dict
def to_api_format(self) -> Dict:
return {
"name": self.name,
"description": self.description,
"input_schema": self.schema
}
class AgentMemory:
def __init__(self, max_steps: int = 20):
self.steps: List[Dict] = []
self.max_steps = max_steps
def add_step(self, role: str, content: Any):
self.steps.append({"role": role, "content": content})
def get_messages(self) -> List[Dict]:
return self.steps.copy()
def __len__(self):
return len(self.steps)
class Agent:
"""
A simple but complete agent using Claude with tool use.
Implements the ReAct (Reason + Act) loop.
"""
def __init__(self, tools: List[Tool], system_prompt: str = "",
model: str = "claude-3-5-haiku-20241022",
max_steps: int = 15, verbose: bool = True):
self.client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
self.tools = {t.name: t for t in tools}
self.system = system_prompt or self._default_system()
self.model = model
self.max_steps = max_steps
self.verbose = verbose
def _default_system(self) -> str:
return """You are a helpful AI agent. You have access to tools to help complete tasks.
Use tools when needed. Think step by step. When you have enough information to answer, respond directly.
If a task fails, explain what went wrong and what you tried."""
def _execute_tool(self, tool_name: str, tool_input: Dict) -> str:
if tool_name not in self.tools:
return json.dumps({"error": f"Tool '{tool_name}' not found"})
try:
result = self.tools[tool_name].fn(**tool_input)
return json.dumps(result) if not isinstance(result, str) else result
except Exception as e:
return json.dumps({"error": str(e)})
def run(self, goal: str) -> str:
memory = AgentMemory(self.max_steps)
api_tools = [t.to_api_format() for t in self.tools.values()]
memory.add_step("user", goal)
if self.verbose:
print(f"\n{'='*60}")
print(f"Agent Goal: {goal}")
print(f"{'='*60}")
for step in range(self.max_steps):
response = self.client.messages.create(
model = self.model,
max_tokens = 1024,
system = self.system,
tools = api_tools,
messages = memory.get_messages()
)
if response.stop_reason == "end_turn":
answer = next(
(b.text for b in response.content if b.type == "text"), "")
if self.verbose:
print(f"\n✓ Final Answer: {answer[:200]}")
return answer
if response.stop_reason == "tool_use":
memory.add_step("assistant", response.content)
tool_results = []
for block in response.content:
if block.type == "tool_use":
if self.verbose:
print(f"\n[Step {step+1}] 🔧 {block.name}({json.dumps(block.input)[:80]})")
result = self._execute_tool(block.name, block.input)
if self.verbose:
print(f" ↳ {result[:120]}")
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})
memory.add_step("user", tool_results)
else:
break
return "Agent reached maximum steps without completing the task."
print("Agent class built. Now we need tools.")
php
import math
import datetime
import random
def calculator(expression: str) -> Dict:
"""Evaluate a mathematical expression safely."""
try:
allowed = set("0123456789+-*/()., ")
if not all(c in allowed for c in expression):
return {"error": "Invalid characters in expression"}
result = eval(expression, {"__builtins__": {}},
{"sqrt": math.sqrt, "pi": math.pi, "e": math.e})
return {"result": round(float(result), 6), "expression": expression}
except Exception as e:
return {"error": str(e)}
def web_search(query: str, max_results: int = 3) -> Dict:
"""Simulated web search (replace with real API in production)."""
mock_results = {
"transformer architecture": [
{"title": "Attention Is All You Need", "snippet": "Introduces the transformer architecture using self-attention mechanisms.", "url": "arxiv.org/abs/1706.03762"},
{"title": "BERT paper", "snippet": "Bidirectional encoder representations from transformers for NLP.", "url": "arxiv.org/abs/1810.04805"},
],
"python list comprehension": [
{"title": "Python Docs", "snippet": "List comprehensions provide a concise way to create lists: [expr for item in iterable if condition]", "url": "docs.python.org"},
],
"climate change": [
{"title": "IPCC Report 2023", "snippet": "Global surface temperature increased by 1.1°C above pre-industrial levels.", "url": "ipcc.ch/report/ar6"},
{"title": "NASA Climate", "snippet": "CO2 levels reached 421 ppm in 2023, highest in 3 million years.", "url": "climate.nasa.gov"},
],
}
query_lower = query.lower()
for key, results in mock_results.items():
if any(word in query_lower for word in key.split()):
return {"query": query, "results": results[:max_results]}
return {"query": query, "results": [
{"title": f"Result for '{query}'",
"snippet": f"Information about {query}. This is a simulated search result.",
"url": f"example.com/search?q={query.replace(' ', '+')}"}
]}
def get_current_time(timezone: str = "UTC") -> Dict:
now = datetime.datetime.utcnow()
return {
"datetime": now.strftime("%Y-%m-%d %H:%M:%S"),
"timezone": timezone,
"date": now.strftime("%B %d, %Y"),
"day": now.strftime("%A")
}
def write_file(filename: str, content: str) -> Dict:
try:
with open(filename, "w") as f:
f.write(content)
return {"status": "success", "filename": filename,
"bytes_written": len(content)}
except Exception as e:
return {"error": str(e)}
def read_file(filename: str) -> Dict:
try:
with open(filename, "r") as f:
content = f.read()
return {"filename": filename, "content": content,
"lines": content.count("\n") + 1}
except FileNotFoundError:
return {"error": f"File '{filename}' not found"}
def python_repl(code: str) -> Dict:
"""Execute Python code and return output."""
import io, contextlib
output = io.StringIO()
try:
with contextlib.redirect_stdout(output):
exec(code, {"__builtins__": __builtins__})
return {"output": output.getvalue(), "error": None}
except Exception as e:
return {"output": output.getvalue(), "error": str(e)}
TOOLS = [
Tool(
name="calculator",
description="Evaluate mathematical expressions. Supports +,-,*,/,(,),sqrt,pi,e",
fn=calculator,
schema={
"type": "object",
"properties": {
"expression": {"type": "string", "description": "Math expression to evaluate"}
},
"required": ["expression"]
}
),
Tool(
name="web_search",
description="Search the web for current information on any topic",
fn=web_search,
schema={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
"max_results": {"type": "integer", "description": "Number of results", "default": 3}
},
"required": ["query"]
}
),
Tool(
name="get_current_time",
description="Get the current date and time",
fn=get_current_time,
schema={
"type": "object",
"properties": {
"timezone": {"type": "string", "description": "Timezone name", "default": "UTC"}
}
}
),
Tool(
name="write_file",
description="Write text content to a file",
fn=write_file,
schema={
"type": "object",
"properties": {
"filename": {"type": "string", "description": "File name to write"},
"content": {"type": "string", "description": "Content to write"}
},
"required": ["filename", "content"]
}
),
Tool(
name="read_file",
description="Read content from a file",
fn=read_file,
schema={
"type": "object",
"properties": {
"filename": {"type": "string", "description": "File name to read"}
},
"required": ["filename"]
}
),
Tool(
name="python_repl",
description="Execute Python code and return the output",
fn=python_repl,
schema={
"type": "object",
"properties": {
"code": {"type": "string", "description": "Python code to execute"}
},
"required": ["code"]
}
),
]
print(f"Tool library ready: {len(TOOLS)} tools")
for tool in TOOLS:
print(f" • {tool.name}: {tool.description[:50]}")
agent = Agent(tools=TOOLS, max_steps=10, verbose=True)
tasks = [
"What is 15% of 847 plus the square root of 144?",
"Search for information about the transformer architecture, then write a 3-sentence summary to a file called 'transformer_summary.txt'",
"What day of the week is it today? Then calculate how many days until the next New Year's Day.",
]
for task in tasks[:1]:
print(f"\n{'#'*60}")
result = agent.run(task)
print(f"\nResult: {result}")
Output:
[Step 1] 🔧 calculator({"expression": "847 * 0.15 + sqrt(144)"}) ↳ {"result": 139.05, "expression": "847 * 0.15 + sqrt(144)"}
✓ Final Answer: 15% of 847 is 127.05, and the square root of 144 is 12. The sum is 127.05 + 12 = 139.05 research_task = """ Search for information about BERT and GPT transformer models. Compare them by searching for both separately. Then write a markdown comparison table to a file called 'llm_comparison.md' with columns: Model, Type, Pretraining Objective, Best Use Case. """
print("Running multi-step research agent:") result = agent.run(research_task) print(f"\nFinal result: {result}")
try: result = read_file("llm_comparison.md") if "error" not in result: print(f"\nFile created successfully:") print(result["content"]) except: pass print("\nAgent Failure Modes You Will Encounter:") print()
failure_modes = { "Infinite loops": { "description": "Agent keeps calling the same tool expecting different results", "example": "Search fails → search again → search again → max steps", "fix": "Add step counter, detect repeated tool calls, add termination conditions" }, "Tool hallucination": { "description": "Agent invents tool parameters that do not match the schema", "example": "Calls calculator({'math': '2+2'}) instead of {'expression': '2+2'}", "fix": "Validate inputs against schema before execution, strict schema definitions" }, "Goal drift": { "description": "Agent pursues a sub-goal and forgets the original goal", "example": "Asked to 'find a restaurant', agent spends all steps on dietary research", "fix": "Include original goal in every message, add goal-check in system prompt" }, "Over-tool-use": { "description": "Agent calls tools for things it already knows", "example": "Uses calculator to compute 2+2, searches web for 'what is Python'", "fix": "Better system prompt guidance, cost-awareness in tool descriptions" }, "Cascading errors": { "description": "Early tool failure propagates through all subsequent steps", "example": "File read fails → all downstream processing fails silently", "fix": "Error handling in tool functions, check for error keys in results" }, "Context window overflow": { "description": "Many tool calls accumulate and exceed context limit", "example": "20+ tool calls with large results → API error", "fix": "Summarize tool results, limit result size, truncate old history" }, }
for mode, info in failure_modes.items(): print(f" {mode}:") print(f" What: {info['description']}") print(f" Example: {info['example']}") print(f" Fix: {info['fix']}") print() PLANNING_SYSTEM = """You are a planning agent. For complex tasks:
- First create a plan as numbered steps
- Execute each step using available tools
- Verify each step succeeded before proceeding
- If a step fails, adjust the plan
Always show your reasoning before calling tools. Format thoughts as: 'Thought: [your reasoning]'"""
planning_agent = Agent( tools = TOOLS, system_prompt = PLANNING_SYSTEM, max_steps = 15, verbose = True )
print("Planning agent for complex multi-step task:") complex_task = """ Calculate the compound interest on $10,000 at 7% annual rate for 10 years. Then generate a Python script that prints a table showing the balance at the end of each year. Save the script as 'compound_interest.py'. """ result = planning_agent.run(complex_task) python def evaluate_agent(agent, test_cases): """Evaluate agent on a set of test cases.""" results = [] for case in test_cases: start = import("time").time() try: answer = agent.run(case["goal"]) success = case"check" except Exception as e: answer = str(e) success = False elapsed = import("time").time() - start
results.append({
"goal": case["goal"][:40],
"success": success,
"time": elapsed,
"steps": "N/A",
})
print("\nAgent Evaluation:")
print(f"{'Goal':<42} {'Success':>8} {'Time':>8}")
print("=" * 62)
for r in results:
print(f"{r['goal']:<42} {'✓' if r['success'] else '✗':>8} {r['time']:>7.1f}s")
accuracy = sum(r["success"] for r in results) / len(results)
avg_time = sum(r["time"] for r in results) / len(results)
print(f"\nAccuracy: {accuracy:.0%} | Avg time: {avg_time:.1f}s")
test_suite = [ { "goal": "Calculate 17 * 23 + 144", "check": lambda a: "535" in a }, { "goal": "Search for Python list comprehension syntax", "check": lambda a: any(w in a.lower() for w in ["for", "if", "[", "list"]) }, { "goal": "Write 'Hello World' to hello.txt then read it back", "check": lambda a: "hello" in a.lower() or "world" in a.lower() }, ]
evaluate_agent(agent, test_suite) print("\nEssential Agent Reference Links:") print()
refs = { "Papers": [ ("ReAct: Reason + Act in LLMs", "arxiv.org/abs/2210.03629"), ("Toolformer: Teaching LLMs to use tools", "arxiv.org/abs/2302.04761"), ("AutoGPT: Autonomous agents", "github.com/Significant-Gravitas/AutoGPT"), ("AgentBench: Evaluating agents", "arxiv.org/abs/2308.03688"), ("Chain-of-Thought Prompting", "arxiv.org/abs/2201.11903"), ], "Frameworks": [ ("LangChain Agents", "python.langchain.com/docs/modules/agents"), ("LlamaIndex Agents", "docs.llamaindex.ai/en/stable/use_cases/agents"), ("Anthropic Tool Use", "docs.anthropic.com/en/docs/build-with-claude/tool-use"), ("OpenAI Assistants API", "platform.openai.com/docs/assistants/overview"), ("CrewAI (multi-agent)", "crewai.com"), ("AutoGen (Microsoft)", "github.com/microsoft/autogen"), ], "Tutorials": [ ("Build an AI Agent from Scratch", "towardsdatascience.com/ai-agents-from-scratch"), ("Anthropic Cookbook: Agents", "github.com/anthropics/anthropic-cookbook/tree/main/tool_use"), ("DeepLearning.AI Agent Courses", "learn.deeplearning.ai"), ("LangGraph (stateful agents)", "langchain-ai.github.io/langgraph"), ], "Cheat Sheets": [ ("Agent design patterns", "lilianweng.github.io/posts/2023-06-23-agent"), ("Tool use best practices", "docs.anthropic.com/en/docs/build-with-claude/tool-use"), ("Prompt engineering for agents", "learnprompting.org/docs/advanced/agents"), ], }
for category, links in refs.items(): print(f" {category}:") for name, url in links: print(f" • {name:<42} {url}") print()
Create `agent_practice.py`
.
Part 1: tool library. Implement at least five tools: calculator, web search (mock), time/date, file read/write, and one domain-specific tool of your choice (weather lookup, stock prices, unit converter). Test each tool function directly before plugging into the agent.
Part 2: single-step tasks. Run the agent on five tasks that require exactly one tool call. Verify it calls the right tool with the right arguments each time.
Part 3: multi-step tasks. Run on three tasks requiring 3-5 tool calls each. Examples: "Search for X, compute a calculation on the result, save to file." Track how many steps each task takes. Does the agent complete them correctly?
Part 4: failure injection. Modify one tool to randomly fail 30% of the time. Run a task that depends on that tool 10 times. Does the agent handle failures gracefully? Does it retry? Adjust the system prompt to make it more resilient.
Single agents work alone. Multi-agent systems divide complex tasks between specialized agents: a researcher agent, a writer agent, a code reviewer agent, each doing what it does best, coordinated by an orchestrator. That is the next post.