Building Agents

Learn to build powerful AI agents from the ground up. This guide takes you from simple single-tool agents to sophisticated multi-step autonomous systems.

Setting Up Your Environment

Prerequisites

Bash

1# Python 3.9+
2python --version
3 
4# Create project
5mkdir my-agent && cd my-agent
6python -m venv venv
7source venv/bin/activate  # Windows: venv\Scripts\activate
8 
9# Install dependencies
10pip install anthropic python-dotenv

Project Structure

Bash

1my-agent/
2├── agent.py           # Main agent logic
3├── tools.py           # Tool definitions
4├── memory.py          # Memory management
5├── config.py          # Configuration
6├── .env               # API keys
7└── requirements.txt   # Dependencies

Configuration

Create .env:

Bash

ANTHROPIC_API_KEY=your-api-key-here

Create config.py:

Python

1import os
2from dotenv import load_dotenv
3 
4load_dotenv()
5 
6ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
7MODEL = "claude-sonnet-4-20250514"
8MAX_TOKENS = 4096

Your First Agent

Let's build a simple agent that can do calculations.

Step 1: Define Tools

Create tools.py:

Python

1import math
2from typing import Any
3 
4# Tool definitions for Claude
5TOOLS = [
6    {
7        "name": "calculator",
8        "description": "Perform mathematical calculations. Supports basic arithmetic, powers, roots, and trigonometry.",
9        "input_schema": {
10            "type": "object",
11            "properties": {
12                "expression": {
13                    "type": "string",
14                    "description": "Mathematical expression to evaluate, e.g., '2 + 2' or 'sqrt(16)'"
15                }
16            },
17            "required": ["expression"]
18        }
19    }
20]
21 
22def execute_tool(name: str, inputs: dict) -> Any:
23    """Execute a tool and return the result."""
24    if name == "calculator":
25        return calculate(inputs["expression"])
26    else:
27        return f"Unknown tool: {name}"
28 
29def calculate(expression: str) -> str:
30    """Safely evaluate a mathematical expression."""
31    try:
32        # Create safe math environment
33        safe_dict = {
34            "abs": abs, "round": round,
35            "min": min, "max": max,
36            "sum": sum, "pow": pow,
37            "sqrt": math.sqrt, "log": math.log,
38            "sin": math.sin, "cos": math.cos,
39            "tan": math.tan, "pi": math.pi,
40            "e": math.e
41        }
42 
43        # Evaluate expression
44        result = eval(expression, {"__builtins__": {}}, safe_dict)
45        return str(result)
46    except Exception as e:
47        return f"Error: {str(e)}"

Step 2: Build the Agent Loop

Create agent.py:

Python

1from anthropic import Anthropic
2from tools import TOOLS, execute_tool
3from config import ANTHROPIC_API_KEY, MODEL, MAX_TOKENS
4 
5client = Anthropic(api_key=ANTHROPIC_API_KEY)
6 
7def run_agent(user_message: str) -> str:
8    """Run the agent with the given user message."""
9 
10    messages = [{"role": "user", "content": user_message}]
11 
12    # Agent loop
13    while True:
14        # Call Claude
15        response = client.messages.create(
16            model=MODEL,
17            max_tokens=MAX_TOKENS,
18            tools=TOOLS,
19            messages=messages
20        )
21 
22        # Check if we should use tools
23        if response.stop_reason == "tool_use":
24            # Process tool calls
25            tool_results = []
26 
27            for block in response.content:
28                if block.type == "tool_use":
29                    result = execute_tool(block.name, block.input)
30                    tool_results.append({
31                        "type": "tool_result",
32                        "tool_use_id": block.id,
33                        "content": result
34                    })
35 
36            # Add assistant message and tool results
37            messages.append({"role": "assistant", "content": response.content})
38            messages.append({"role": "user", "content": tool_results})
39 
40        else:
41            # Agent is done - extract final text
42            final_text = ""
43            for block in response.content:
44                if hasattr(block, "text"):
45                    final_text += block.text
46 
47            return final_text
48 
49if __name__ == "__main__":
50    result = run_agent("What is the square root of 144 plus 5 to the power of 3?")
51    print(result)

Step 3: Test Your Agent

Bash

python agent.py

Expected output:

Bash

1Let me calculate that for you.
2 
3First, I'll find the square root of 144:
4sqrt(144) = 12
5 
6Then I'll calculate 5 to the power of 3:
75^3 = 125
8 
9Finally, I'll add them together:
1012 + 125 = 137
11 
12The answer is 137.

Adding More Tools

Let's expand our agent with more capabilities.

File Operations

Add to tools.py:

Python

1import os
2 
3# Add to TOOLS list
4FILE_TOOLS = [
5    {
6        "name": "read_file",
7        "description": "Read the contents of a file",
8        "input_schema": {
9            "type": "object",
10            "properties": {
11                "path": {
12                    "type": "string",
13                    "description": "Path to the file to read"
14                }
15            },
16            "required": ["path"]
17        }
18    },
19    {
20        "name": "write_file",
21        "description": "Write content to a file",
22        "input_schema": {
23            "type": "object",
24            "properties": {
25                "path": {
26                    "type": "string",
27                    "description": "Path to the file to write"
28                },
29                "content": {
30                    "type": "string",
31                    "description": "Content to write to the file"
32                }
33            },
34            "required": ["path", "content"]
35        }
36    },
37    {
38        "name": "list_directory",
39        "description": "List files and directories in a path",
40        "input_schema": {
41            "type": "object",
42            "properties": {
43                "path": {
44                    "type": "string",
45                    "description": "Directory path to list"
46                }
47            },
48            "required": ["path"]
49        }
50    }
51]
52 
53def read_file(path: str) -> str:
54    """Read a file and return its contents."""
55    try:
56        with open(path, 'r') as f:
57            return f.read()
58    except Exception as e:
59        return f"Error reading file: {str(e)}"
60 
61def write_file(path: str, content: str) -> str:
62    """Write content to a file."""
63    try:
64        with open(path, 'w') as f:
65            f.write(content)
66        return f"Successfully wrote to {path}"
67    except Exception as e:
68        return f"Error writing file: {str(e)}"
69 
70def list_directory(path: str) -> str:
71    """List contents of a directory."""
72    try:
73        items = os.listdir(path)
74        return "\n".join(items)
75    except Exception as e:
76        return f"Error listing directory: {str(e)}"

Web Search

Add web search capability:

Python

1import requests
2 
3WEB_TOOLS = [
4    {
5        "name": "web_search",
6        "description": "Search the web for information",
7        "input_schema": {
8            "type": "object",
9            "properties": {
10                "query": {
11                    "type": "string",
12                    "description": "Search query"
13                }
14            },
15            "required": ["query"]
16        }
17    },
18    {
19        "name": "fetch_url",
20        "description": "Fetch the content of a web page",
21        "input_schema": {
22            "type": "object",
23            "properties": {
24                "url": {
25                    "type": "string",
26                    "description": "URL to fetch"
27                }
28            },
29            "required": ["url"]
30        }
31    }
32]
33 
34def web_search(query: str) -> str:
35    """Search the web using a search API."""
36    # You'll need to implement with your preferred search API
37    # Options: SerpAPI, Brave Search, Tavily, etc.
38    pass
39 
40def fetch_url(url: str) -> str:
41    """Fetch content from a URL."""
42    try:
43        response = requests.get(url, timeout=10)
44        response.raise_for_status()
45        return response.text[:5000]  # Limit response size
46    except Exception as e:
47        return f"Error fetching URL: {str(e)}"

Code Execution

Add safe code execution:

Python

1import subprocess
2import tempfile
3 
4CODE_TOOLS = [
5    {
6        "name": "run_python",
7        "description": "Execute Python code and return the output",
8        "input_schema": {
9            "type": "object",
10            "properties": {
11                "code": {
12                    "type": "string",
13                    "description": "Python code to execute"
14                }
15            },
16            "required": ["code"]
17        }
18    }
19]
20 
21def run_python(code: str) -> str:
22    """Execute Python code safely."""
23    try:
24        # Write code to temp file
25        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
26            f.write(code)
27            temp_path = f.name
28 
29        # Execute with timeout
30        result = subprocess.run(
31            ['python', temp_path],
32            capture_output=True,
33            text=True,
34            timeout=30
35        )
36 
37        output = result.stdout
38        if result.stderr:
39            output += f"\nErrors:\n{result.stderr}"
40 
41        return output or "Code executed successfully (no output)"
42 
43    except subprocess.TimeoutExpired:
44        return "Error: Code execution timed out (30s limit)"
45    except Exception as e:
46        return f"Error executing code: {str(e)}"

Agent Architecture Patterns

Pattern 1: ReAct (Reason + Act)

The agent explicitly reasons before each action:

Python

1SYSTEM_PROMPT = """You are a helpful assistant that solves problems step by step.
2 
3For each step:
41. THOUGHT: Explain your reasoning
52. ACTION: Choose a tool to use
63. OBSERVATION: Analyze the result
74. REPEAT until you have the answer
8 
9Always think before you act."""
10 
11def react_agent(user_message: str) -> str:
12    messages = [
13        {"role": "user", "content": user_message}
14    ]
15 
16    response = client.messages.create(
17        model=MODEL,
18        max_tokens=MAX_TOKENS,
19        system=SYSTEM_PROMPT,
20        tools=TOOLS,
21        messages=messages
22    )
23    # ... rest of agent loop

Pattern 2: Plan and Execute

The agent creates a plan, then executes it:

Python

1PLANNER_PROMPT = """Create a step-by-step plan to accomplish this task.
2Return a numbered list of specific actions."""
3 
4EXECUTOR_PROMPT = """Execute this step of the plan using the available tools."""
5 
6def plan_and_execute(task: str) -> str:
7    # Step 1: Create plan
8    plan_response = client.messages.create(
9        model=MODEL,
10        max_tokens=1000,
11        system=PLANNER_PROMPT,
12        messages=[{"role": "user", "content": task}]
13    )
14    plan = extract_text(plan_response)
15 
16    # Step 2: Execute each step
17    results = []
18    for step in parse_plan(plan):
19        result = execute_step(step)
20        results.append(result)
21 
22    # Step 3: Synthesize results
23    return synthesize(results)

Pattern 3: Tree of Thoughts

Explore multiple approaches in parallel:

Python

1def tree_of_thoughts(problem: str, num_branches: int = 3) -> str:
2    # Generate multiple approaches
3    approaches = []
4    for i in range(num_branches):
5        approach = generate_approach(problem)
6        approaches.append(approach)
7 
8    # Evaluate each approach
9    evaluations = []
10    for approach in approaches:
11        score = evaluate_approach(approach)
12        evaluations.append((approach, score))
13 
14    # Select best approach
15    best = max(evaluations, key=lambda x: x[1])
16 
17    # Execute best approach
18    return execute_approach(best[0])

Memory Management

Short-term Memory

Keep recent context in the message history:

Python

1class ShortTermMemory:
2    def __init__(self, max_messages: int = 20):
3        self.messages = []
4        self.max_messages = max_messages
5 
6    def add(self, role: str, content: str):
7        self.messages.append({"role": role, "content": content})
8        # Trim old messages
9        if len(self.messages) > self.max_messages:
10            self.messages = self.messages[-self.max_messages:]
11 
12    def get_messages(self):
13        return self.messages.copy()

Long-term Memory with Vector Search

Store and retrieve relevant information:

Python

1# pip install chromadb sentence-transformers
2import chromadb
3from sentence_transformers import SentenceTransformer
4 
5class LongTermMemory:
6    def __init__(self, collection_name: str = "agent_memory"):
7        self.client = chromadb.Client()
8        self.collection = self.client.create_collection(collection_name)
9        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
10 
11    def store(self, text: str, metadata: dict = None):
12        """Store information in memory."""
13        embedding = self.encoder.encode(text).tolist()
14        self.collection.add(
15            embeddings=[embedding],
16            documents=[text],
17            metadatas=[metadata or {}],
18            ids=[str(hash(text))]
19        )
20 
21    def search(self, query: str, n_results: int = 5) -> list:
22        """Search memory for relevant information."""
23        embedding = self.encoder.encode(query).tolist()
24        results = self.collection.query(
25            query_embeddings=[embedding],
26            n_results=n_results
27        )
28        return results["documents"][0]

Memory-Augmented Agent

Python

1class MemoryAgent:
2    def __init__(self):
3        self.short_term = ShortTermMemory()
4        self.long_term = LongTermMemory()
5 
6    def run(self, user_message: str) -> str:
7        # Search long-term memory for relevant context
8        relevant_memories = self.long_term.search(user_message)
9        context = "\n".join(relevant_memories)
10 
11        # Add to short-term memory
12        self.short_term.add("user", user_message)
13 
14        # Run agent with context
15        system = f"Relevant context:\n{context}" if context else ""
16 
17        response = run_agent_with_system(
18            system,
19            self.short_term.get_messages()
20        )
21 
22        # Store response in long-term memory
23        self.long_term.store(response, {"type": "assistant_response"})
24        self.short_term.add("assistant", response)
25 
26        return response

Error Handling and Recovery

Retry Logic

Python

1import time
2from anthropic import APIError, RateLimitError
3 
4def run_agent_with_retry(user_message: str, max_retries: int = 3) -> str:
5    """Run agent with automatic retry on failure."""
6 
7    for attempt in range(max_retries):
8        try:
9            return run_agent(user_message)
10 
11        except RateLimitError:
12            wait_time = 2 ** attempt  # Exponential backoff
13            print(f"Rate limited. Waiting {wait_time}s...")
14            time.sleep(wait_time)
15 
16        except APIError as e:
17            print(f"API error: {e}")
18            if attempt == max_retries - 1:
19                raise
20 
21        except Exception as e:
22            print(f"Unexpected error: {e}")
23            raise
24 
25    raise Exception("Max retries exceeded")

Loop Detection

Python

1def run_agent_with_loop_detection(user_message: str, max_iterations: int = 20) -> str:
2    """Run agent with loop detection."""
3 
4    messages = [{"role": "user", "content": user_message}]
5    tool_calls = []
6    iteration = 0
7 
8    while iteration < max_iterations:
9        response = client.messages.create(
10            model=MODEL,
11            max_tokens=MAX_TOKENS,
12            tools=TOOLS,
13            messages=messages
14        )
15 
16        if response.stop_reason == "tool_use":
17            # Check for loops
18            current_calls = extract_tool_calls(response)
19 
20            if detect_loop(tool_calls, current_calls):
21                # Break the loop
22                messages.append({
23                    "role": "user",
24                    "content": "You seem to be repeating the same action. Please try a different approach or provide your final answer."
25                })
26            else:
27                tool_calls.extend(current_calls)
28                # Process tools normally
29                # ...
30 
31        else:
32            return extract_text(response)
33 
34        iteration += 1
35 
36    return "Agent reached maximum iterations"
37 
38def detect_loop(history: list, current: list, window: int = 3) -> bool:
39    """Detect if agent is stuck in a loop."""
40    if len(history) < window:
41        return False
42 
43    recent = history[-window:]
44    return all(
45        c["name"] == r["name"] and c["input"] == r["input"]
46        for c, r in zip(current, recent[-len(current):])
47    )

Graceful Degradation

Python

1def execute_tool_safely(name: str, inputs: dict) -> str:
2    """Execute tool with fallback on failure."""
3 
4    try:
5        result = execute_tool(name, inputs)
6        return result
7 
8    except Exception as e:
9        # Log the error
10        print(f"Tool {name} failed: {e}")
11 
12        # Return informative error
13        return f"Tool '{name}' encountered an error: {str(e)}. Please try an alternative approach."

Building a Complete Agent

Let's build a full-featured research agent:

Ask Claude Code:

Bash

1Create a complete research agent that:
2 
31. Takes a research question
42. Searches the web for information
53. Reads and analyzes sources
64. Takes notes and organizes findings
75. Synthesizes a comprehensive answer with citations
8 
9Include:
10- Multiple tools (search, fetch, note-taking)
11- Memory management
12- Error handling
13- Progress updates
14- Final report generation
15 
16Make it production-ready with logging and configuration.

Expected Structure

Python

1# research_agent.py
2 
3import logging
4from dataclasses import dataclass
5from typing import List
6from anthropic import Anthropic
7 
8# Configure logging
9logging.basicConfig(level=logging.INFO)
10logger = logging.getLogger(__name__)
11 
12@dataclass
13class ResearchNote:
14    source: str
15    content: str
16    relevance: float
17 
18class ResearchAgent:
19    def __init__(self):
20        self.client = Anthropic()
21        self.notes: List[ResearchNote] = []
22        self.sources_visited: set = set()
23 
24    def research(self, question: str) -> str:
25        """Conduct research on a question."""
26        logger.info(f"Starting research: {question}")
27 
28        # Phase 1: Generate search queries
29        queries = self._generate_queries(question)
30 
31        # Phase 2: Search and gather information
32        for query in queries:
33            results = self._search(query)
34            for result in results:
35                if result.url not in self.sources_visited:
36                    content = self._fetch_and_analyze(result.url)
37                    self._take_notes(result.url, content)
38                    self.sources_visited.add(result.url)
39 
40        # Phase 3: Synthesize findings
41        report = self._synthesize(question)
42 
43        logger.info(f"Research complete. {len(self.notes)} notes from {len(self.sources_visited)} sources")
44 
45        return report
46 
47    def _generate_queries(self, question: str) -> List[str]:
48        """Generate search queries for the research question."""
49        # Implementation
50        pass
51 
52    def _search(self, query: str) -> List[dict]:
53        """Search the web for information."""
54        # Implementation
55        pass
56 
57    def _fetch_and_analyze(self, url: str) -> str:
58        """Fetch and extract key information from a URL."""
59        # Implementation
60        pass
61 
62    def _take_notes(self, source: str, content: str):
63        """Extract and store relevant notes."""
64        # Implementation
65        pass
66 
67    def _synthesize(self, question: str) -> str:
68        """Synthesize notes into a comprehensive answer."""
69        # Implementation
70        pass

Testing Your Agent

Unit Tests

Python

1import pytest
2from tools import calculate, read_file, execute_tool
3 
4def test_calculator():
5    assert calculate("2 + 2") == "4"
6    assert calculate("sqrt(16)") == "4.0"
7    assert "Error" in calculate("invalid")
8 
9def test_execute_tool():
10    result = execute_tool("calculator", {"expression": "10 * 5"})
11    assert result == "50"
12 
13def test_unknown_tool():
14    result = execute_tool("unknown", {})
15    assert "Unknown tool" in result

Integration Tests

Python

1def test_agent_simple_query():
2    result = run_agent("What is 15 + 27?")
3    assert "42" in result
4 
5def test_agent_multi_step():
6    result = run_agent("Calculate 10^2 and then take the square root")
7    assert "10" in result
8 
9def test_agent_handles_errors():
10    result = run_agent("Divide 100 by 0")
11    assert "error" in result.lower() or "cannot" in result.lower()

Behavioral Tests

Python

1def test_agent_uses_tools():
2    """Verify agent actually uses tools, not just making up answers."""
3    with patch('tools.calculate') as mock_calc:
4        mock_calc.return_value = "42"
5        result = run_agent("What is sqrt(1764)?")
6        mock_calc.assert_called()
7 
8def test_agent_stops_appropriately():
9    """Verify agent doesn't loop infinitely."""
10    start = time.time()
11    result = run_agent("What is 1+1?")
12    duration = time.time() - start
13    assert duration < 30  # Should complete quickly

Next Steps

Now that you can build agents, learn to:

Using Agents: Master prompting and usage patterns
Agent Products: Build production systems

Practice Projects

File Organizer Agent: Organize files based on content
Code Review Agent: Analyze code for issues
Meeting Assistant: Summarize transcripts and extract action items
Data Analyst: Query databases and generate reports

Advanced Topics

Multi-agent collaboration
Human-in-the-loop workflows
Streaming responses
Async execution
Cost optimization

Start building! Take one of the examples above and extend it with your own tools and logic. The best way to learn agent development is through experimentation.