Skip to main content

Building Agents

Create AI agents from scratch using the Claude API

3-4 hours
2 min read
Updated January 15, 2026

Building Agents

Time to get your hands dirty. This guide takes you from "hello world" to a working agent that can use tools, manage state, and handle errors gracefully.

What We're Building
A complete agent system

Setting Up

Prerequisites

Bash
# Check Python version (3.9+ required)
python --version
# Create project
mkdir my-agent && cd my-agent
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
# Install dependencies
pip install anthropic python-dotenv

Project Structure

    • agent.py
    • tools.py
    • memory.py
    • config.py
    • .env
    • requirements.txt

Configuration

Create .env:

Bash
ANTHROPIC_API_KEY=your-api-key-here

Create config.py:

Python
import os
from dotenv import load_dotenv
load_dotenv()
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
MODEL = "claude-sonnet-4-20250514"
MAX_TOKENS = 4096

Your First Agent

Let's build an agent that can do math. Simple, but it demonstrates all the core concepts.

  1. 1

    Define a Tool

    Create tools.py:

    Python
    import math
    from typing import Any
    TOOLS = [{
    "name": "calculator",
    "description": "Perform mathematical calculations. Supports arithmetic, powers, roots, trig.",
    "input_schema": {
    "type": "object",
    "properties": {
    "expression": {
    "type": "string",
    "description": "Math expression to evaluate, e.g., '2 + 2' or 'sqrt(16)'"
    }
    },
    "required": ["expression"]
    }
    }]
    def execute_tool(name: str, inputs: dict) -> Any:
    """Route tool calls to implementations."""
    if name == "calculator":
    return calculate(inputs["expression"])
    return f"Unknown tool: {name}"
    def calculate(expression: str) -> str:
    """Safely evaluate a math expression."""
    try:
    safe_dict = {
    "abs": abs, "round": round, "min": min, "max": max,
    "sqrt": math.sqrt, "log": math.log, "pow": pow,
    "sin": math.sin, "cos": math.cos, "tan": math.tan,
    "pi": math.pi, "e": math.e
    }
    result = eval(expression, {"__builtins__": {}}, safe_dict)
    return str(result)
    except Exception as e:
    return f"Error: {str(e)}"
  2. 2

    Build the Agent Loop

    Create agent.py:

    Python
    from anthropic import Anthropic
    from tools import TOOLS, execute_tool
    from config import ANTHROPIC_API_KEY, MODEL, MAX_TOKENS
    client = Anthropic(api_key=ANTHROPIC_API_KEY)
    def run_agent(user_message: str) -> str:
    """Run the agent with a user message."""
    messages = [{"role": "user", "content": user_message}]
    while True:
    # Call Claude
    response = client.messages.create(
    model=MODEL,
    max_tokens=MAX_TOKENS,
    tools=TOOLS,
    messages=messages
    )
    # Check for tool use
    if response.stop_reason == "tool_use":
    tool_results = []
    for block in response.content:
    if block.type == "tool_use":
    result = execute_tool(block.name, block.input)
    tool_results.append({
    "type": "tool_result",
    "tool_use_id": block.id,
    "content": result
    })
    # Add assistant response and tool results
    messages.append({"role": "assistant", "content": response.content})
    messages.append({"role": "user", "content": tool_results})
    else:
    # Extract final text and return
    return "".join(
    block.text for block in response.content
    if hasattr(block, "text")
    )
    if __name__ == "__main__":
    result = run_agent("What is sqrt(144) + 5^3?")
    print(result)
  3. 3

    Test It

    Bash
    python agent.py

    Expected output:

    Bash
    Let me calculate that for you.
    First, sqrt(144) = 12
    Then, 5^3 = 125
    Finally, 12 + 125 = 137
    The answer is 137.

Adding More Tools

One tool is a start. Real agents need more capabilities.

File Operations

Python
import os
FILE_TOOLS = [
{
"name": "read_file",
"description": "Read contents of a file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "Path to file"}
},
"required": ["path"]
}
},
{
"name": "write_file",
"description": "Write content to a file",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "Path to file"},
"content": {"type": "string", "description": "Content to write"}
},
"required": ["path", "content"]
}
},
{
"name": "list_directory",
"description": "List files in a directory",
"input_schema": {
"type": "object",
"properties": {
"path": {"type": "string", "description": "Directory path"}
},
"required": ["path"]
}
}
]
def read_file(path: str) -> str:
try:
with open(path, 'r') as f:
return f.read()
except Exception as e:
return f"Error: {str(e)}"
def write_file(path: str, content: str) -> str:
try:
with open(path, 'w') as f:
f.write(content)
return f"Wrote {len(content)} characters to {path}"
except Exception as e:
return f"Error: {str(e)}"
def list_directory(path: str) -> str:
try:
return "\n".join(os.listdir(path))
except Exception as e:
return f"Error: {str(e)}"

Web Fetching

Python
import requests
WEB_TOOLS = [{
"name": "fetch_url",
"description": "Fetch content from a URL",
"input_schema": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to fetch"}
},
"required": ["url"]
}
}]
def fetch_url(url: str) -> str:
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return response.text[:5000] # Limit size
except Exception as e:
return f"Error: {str(e)}"

Code Execution

Python
import subprocess
import tempfile
CODE_TOOLS = [{
"name": "run_python",
"description": "Execute Python code and return output",
"input_schema": {
"type": "object",
"properties": {
"code": {"type": "string", "description": "Python code"}
},
"required": ["code"]
}
}]
def run_python(code: str) -> str:
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
f.write(code)
temp_path = f.name
result = subprocess.run(
['python', temp_path],
capture_output=True,
text=True,
timeout=30
)
output = result.stdout
if result.stderr:
output += f"\nErrors:\n{result.stderr}"
return output or "Code executed (no output)"
except subprocess.TimeoutExpired:
return "Error: Execution timed out (30s limit)"
except Exception as e:
return f"Error: {str(e)}"

Architecture Patterns

Three patterns dominate agent design. Choose based on your use case.

Agent Patterns

Pattern 1: ReAct (Reason + Act)

The agent explicitly thinks before each action:

Python
SYSTEM_PROMPT = """You solve problems step by step.
For each step:
1. THOUGHT: What do I need to do?
2. ACTION: Use a tool
3. OBSERVATION: What did I learn?
4. REPEAT until done
Always think before you act."""
def react_agent(task: str) -> str:
messages = [{"role": "user", "content": task}]
response = client.messages.create(
model=MODEL,
system=SYSTEM_PROMPT,
tools=TOOLS,
messages=messages
)
# ... continue agent loop

Pattern 2: Plan and Execute

Create a plan first, then execute it:

Python
def plan_and_execute(task: str) -> str:
# Step 1: Create plan
plan_response = client.messages.create(
model=MODEL,
system="Create a step-by-step plan. Return numbered steps.",
messages=[{"role": "user", "content": task}]
)
plan = extract_text(plan_response)
# Step 2: Execute each step
results = []
for step in parse_plan(plan):
result = execute_step(step)
results.append(result)
# Step 3: Synthesize
return synthesize_results(results)

Pattern 3: Tree of Thoughts

Explore multiple approaches, pick the best:

Python
def tree_of_thoughts(problem: str, branches: int = 3) -> str:
# Generate approaches
approaches = [generate_approach(problem) for _ in range(branches)]
# Evaluate each
scored = [(a, evaluate(a)) for a in approaches]
# Execute best
best = max(scored, key=lambda x: x[1])
return execute_approach(best[0])

Memory Management

Agents need memory to work across steps and sessions.

Short-term Memory

Keep recent context in the message history:

Python
class ShortTermMemory:
def __init__(self, max_messages: int = 20):
self.messages = []
self.max_messages = max_messages
def add(self, role: str, content: str):
self.messages.append({"role": role, "content": content})
if len(self.messages) > self.max_messages:
self.messages = self.messages[-self.max_messages:]
def get_messages(self):
return self.messages.copy()

Long-term Memory with Vectors

Store and retrieve by semantic similarity:

Python
# pip install chromadb sentence-transformers
import chromadb
from sentence_transformers import SentenceTransformer
class LongTermMemory:
def __init__(self):
self.client = chromadb.Client()
self.collection = self.client.create_collection("agent_memory")
self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
def store(self, text: str, metadata: dict = None):
embedding = self.encoder.encode(text).tolist()
self.collection.add(
embeddings=[embedding],
documents=[text],
metadatas=[metadata or {}],
ids=[str(hash(text))]
)
def search(self, query: str, n: int = 5) -> list:
embedding = self.encoder.encode(query).tolist()
results = self.collection.query(
query_embeddings=[embedding],
n_results=n
)
return results["documents"][0]

Memory-Augmented Agent

Python
class MemoryAgent:
def __init__(self):
self.short_term = ShortTermMemory()
self.long_term = LongTermMemory()
def run(self, message: str) -> str:
# Retrieve relevant memories
memories = self.long_term.search(message)
context = "\n".join(memories) if memories else ""
# Add to short-term
self.short_term.add("user", message)
# Run with context
system = f"Relevant context:\n{context}" if context else ""
response = run_agent_with_system(system, self.short_term.get_messages())
# Store response
self.long_term.store(response)
self.short_term.add("assistant", response)
return response

Error Handling

Agents fail. Plan for it.

Retry Logic

Python
import time
from anthropic import APIError, RateLimitError
def run_with_retry(message: str, max_retries: int = 3) -> str:
for attempt in range(max_retries):
try:
return run_agent(message)
except RateLimitError:
wait = 2 ** attempt
print(f"Rate limited. Waiting {wait}s...")
time.sleep(wait)
except APIError as e:
print(f"API error: {e}")
if attempt == max_retries - 1:
raise
raise Exception("Max retries exceeded")

Loop Detection

Python
def run_with_loop_detection(message: str, max_iterations: int = 20) -> str:
messages = [{"role": "user", "content": message}]
seen_calls = []
iteration = 0
while iteration < max_iterations:
response = client.messages.create(
model=MODEL,
tools=TOOLS,
messages=messages
)
if response.stop_reason == "tool_use":
current = [(b.name, str(b.input)) for b in response.content if b.type == "tool_use"]
# Check for loop
if current in seen_calls[-3:]:
messages.append({
"role": "user",
"content": "You're repeating yourself. Try a different approach or give your final answer."
})
else:
seen_calls.append(current)
# Process tools normally...
else:
return extract_text(response)
iteration += 1
return "Agent reached max iterations"

Safe Tool Execution

Python
def execute_tool_safely(name: str, inputs: dict) -> str:
try:
result = execute_tool(name, inputs)
return result
except Exception as e:
return f"Tool '{name}' failed: {str(e)}. Try a different approach."

Complete Research Agent

Let's put it all together:

Python
from dataclasses import dataclass
from typing import List
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@dataclass
class Note:
source: str
content: str
class ResearchAgent:
def __init__(self):
self.client = Anthropic()
self.notes: List[Note] = []
self.visited: set = set()
def research(self, question: str) -> str:
logger.info(f"Researching: {question}")
# Phase 1: Generate search queries
queries = self._generate_queries(question)
# Phase 2: Search and gather
for query in queries:
results = self._search(query)
for result in results:
if result.url not in self.visited:
content = self._fetch_and_extract(result.url)
self._take_notes(result.url, content)
self.visited.add(result.url)
# Phase 3: Synthesize
report = self._synthesize(question)
logger.info(f"Done. {len(self.notes)} notes from {len(self.visited)} sources")
return report
def _generate_queries(self, question: str) -> List[str]:
response = self.client.messages.create(
model=MODEL,
system="Generate 3-5 search queries to research this question.",
messages=[{"role": "user", "content": question}]
)
return parse_queries(extract_text(response))
def _search(self, query: str) -> List[dict]:
# Implement with your search API
pass
def _fetch_and_extract(self, url: str) -> str:
# Fetch URL and extract key information
pass
def _take_notes(self, source: str, content: str):
# Use Claude to extract key points
response = self.client.messages.create(
model=MODEL,
system="Extract 3-5 key facts from this content.",
messages=[{"role": "user", "content": content}]
)
self.notes.append(Note(source=source, content=extract_text(response)))
def _synthesize(self, question: str) -> str:
notes_text = "\n\n".join(f"[{n.source}]\n{n.content}" for n in self.notes)
response = self.client.messages.create(
model=MODEL,
system="Synthesize these notes into a comprehensive answer. Cite sources.",
messages=[{"role": "user", "content": f"Question: {question}\n\nNotes:\n{notes_text}"}]
)
return extract_text(response)

Testing

Unit Tests

Python
import pytest
from tools import calculate, execute_tool
def test_calculator():
assert calculate("2 + 2") == "4"
assert calculate("sqrt(16)") == "4.0"
assert "Error" in calculate("invalid")
def test_execute_tool():
result = execute_tool("calculator", {"expression": "10 * 5"})
assert result == "50"
def test_unknown_tool():
result = execute_tool("unknown", {})
assert "Unknown" in result

Integration Tests

Python
def test_agent_math():
result = run_agent("What is 15 + 27?")
assert "42" in result
def test_agent_multi_step():
result = run_agent("Calculate 10^2, then take the square root")
assert "10" in result

Next Steps

Now that you can build agents:

  1. Using Agents — Master prompting and debugging
  2. Agent Products — Ship production systems

Practice Projects

ProjectSkills Practiced
File OrganizerFile tools, categorization
Code ReviewerCode analysis, multi-file
Meeting SummarizerText processing, extraction
Data AnalystSQL, visualization

Share this article