Building Agents
Create AI agents from scratch using the Claude API
Building Agents
Time to get your hands dirty. This guide takes you from "hello world" to a working agent that can use tools, manage state, and handle errors gracefully.
Setting Up
Prerequisites
# Check Python version (3.9+ required)python --version # Create projectmkdir my-agent && cd my-agentpython -m venv venvsource venv/bin/activate # Windows: venv\Scripts\activate # Install dependenciespip install anthropic python-dotenvProject Structure
- agent.py
- tools.py
- memory.py
- config.py
- .env
- requirements.txt
Configuration
Create .env:
ANTHROPIC_API_KEY=your-api-key-hereCreate config.py:
import osfrom dotenv import load_dotenv load_dotenv() ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")MODEL = "claude-sonnet-4-20250514"MAX_TOKENS = 4096Your First Agent
Let's build an agent that can do math. Simple, but it demonstrates all the core concepts.
- 1
Define a Tool
Create
tools.py:Pythonimport mathfrom typing import AnyTOOLS = [{"name": "calculator","description": "Perform mathematical calculations. Supports arithmetic, powers, roots, trig.","input_schema": {"type": "object","properties": {"expression": {"type": "string","description": "Math expression to evaluate, e.g., '2 + 2' or 'sqrt(16)'"}},"required": ["expression"]}}]def execute_tool(name: str, inputs: dict) -> Any:"""Route tool calls to implementations."""if name == "calculator":return calculate(inputs["expression"])return f"Unknown tool: {name}"def calculate(expression: str) -> str:"""Safely evaluate a math expression."""try:safe_dict = {"abs": abs, "round": round, "min": min, "max": max,"sqrt": math.sqrt, "log": math.log, "pow": pow,"sin": math.sin, "cos": math.cos, "tan": math.tan,"pi": math.pi, "e": math.e}result = eval(expression, {"__builtins__": {}}, safe_dict)return str(result)except Exception as e:return f"Error: {str(e)}" - 2
Build the Agent Loop
Create
agent.py:Pythonfrom anthropic import Anthropicfrom tools import TOOLS, execute_toolfrom config import ANTHROPIC_API_KEY, MODEL, MAX_TOKENSclient = Anthropic(api_key=ANTHROPIC_API_KEY)def run_agent(user_message: str) -> str:"""Run the agent with a user message."""messages = [{"role": "user", "content": user_message}]while True:# Call Clauderesponse = client.messages.create(model=MODEL,max_tokens=MAX_TOKENS,tools=TOOLS,messages=messages)# Check for tool useif response.stop_reason == "tool_use":tool_results = []for block in response.content:if block.type == "tool_use":result = execute_tool(block.name, block.input)tool_results.append({"type": "tool_result","tool_use_id": block.id,"content": result})# Add assistant response and tool resultsmessages.append({"role": "assistant", "content": response.content})messages.append({"role": "user", "content": tool_results})else:# Extract final text and returnreturn "".join(block.text for block in response.contentif hasattr(block, "text"))if __name__ == "__main__":result = run_agent("What is sqrt(144) + 5^3?")print(result) - 3
Test It
Bashpython agent.pyExpected output:
BashLet me calculate that for you.First, sqrt(144) = 12Then, 5^3 = 125Finally, 12 + 125 = 137The answer is 137.
Success
Congratulations! You just built an agent. It observed (read your question), thought (decided to use calculator), acted (called the tool), and responded. That's the entire agent loop.
Adding More Tools
One tool is a start. Real agents need more capabilities.
File Operations
import os FILE_TOOLS = [ { "name": "read_file", "description": "Read contents of a file", "input_schema": { "type": "object", "properties": { "path": {"type": "string", "description": "Path to file"} }, "required": ["path"] } }, { "name": "write_file", "description": "Write content to a file", "input_schema": { "type": "object", "properties": { "path": {"type": "string", "description": "Path to file"}, "content": {"type": "string", "description": "Content to write"} }, "required": ["path", "content"] } }, { "name": "list_directory", "description": "List files in a directory", "input_schema": { "type": "object", "properties": { "path": {"type": "string", "description": "Directory path"} }, "required": ["path"] } }] def read_file(path: str) -> str: try: with open(path, 'r') as f: return f.read() except Exception as e: return f"Error: {str(e)}" def write_file(path: str, content: str) -> str: try: with open(path, 'w') as f: f.write(content) return f"Wrote {len(content)} characters to {path}" except Exception as e: return f"Error: {str(e)}" def list_directory(path: str) -> str: try: return "\n".join(os.listdir(path)) except Exception as e: return f"Error: {str(e)}"Web Fetching
import requests WEB_TOOLS = [{ "name": "fetch_url", "description": "Fetch content from a URL", "input_schema": { "type": "object", "properties": { "url": {"type": "string", "description": "URL to fetch"} }, "required": ["url"] }}] def fetch_url(url: str) -> str: try: response = requests.get(url, timeout=10) response.raise_for_status() return response.text[:5000] # Limit size except Exception as e: return f"Error: {str(e)}"Code Execution
import subprocessimport tempfile CODE_TOOLS = [{ "name": "run_python", "description": "Execute Python code and return output", "input_schema": { "type": "object", "properties": { "code": {"type": "string", "description": "Python code"} }, "required": ["code"] }}] def run_python(code: str) -> str: try: with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: f.write(code) temp_path = f.name result = subprocess.run( ['python', temp_path], capture_output=True, text=True, timeout=30 ) output = result.stdout if result.stderr: output += f"\nErrors:\n{result.stderr}" return output or "Code executed (no output)" except subprocess.TimeoutExpired: return "Error: Execution timed out (30s limit)" except Exception as e: return f"Error: {str(e)}"Architecture Patterns
Three patterns dominate agent design. Choose based on your use case.
Pattern 1: ReAct (Reason + Act)
The agent explicitly thinks before each action:
SYSTEM_PROMPT = """You solve problems step by step. For each step:1. THOUGHT: What do I need to do?2. ACTION: Use a tool3. OBSERVATION: What did I learn?4. REPEAT until done Always think before you act.""" def react_agent(task: str) -> str: messages = [{"role": "user", "content": task}] response = client.messages.create( model=MODEL, system=SYSTEM_PROMPT, tools=TOOLS, messages=messages ) # ... continue agent loopPattern 2: Plan and Execute
Create a plan first, then execute it:
def plan_and_execute(task: str) -> str: # Step 1: Create plan plan_response = client.messages.create( model=MODEL, system="Create a step-by-step plan. Return numbered steps.", messages=[{"role": "user", "content": task}] ) plan = extract_text(plan_response) # Step 2: Execute each step results = [] for step in parse_plan(plan): result = execute_step(step) results.append(result) # Step 3: Synthesize return synthesize_results(results)Pattern 3: Tree of Thoughts
Explore multiple approaches, pick the best:
def tree_of_thoughts(problem: str, branches: int = 3) -> str: # Generate approaches approaches = [generate_approach(problem) for _ in range(branches)] # Evaluate each scored = [(a, evaluate(a)) for a in approaches] # Execute best best = max(scored, key=lambda x: x[1]) return execute_approach(best[0])Memory Management
Agents need memory to work across steps and sessions.
Short-term Memory
Keep recent context in the message history:
class ShortTermMemory: def __init__(self, max_messages: int = 20): self.messages = [] self.max_messages = max_messages def add(self, role: str, content: str): self.messages.append({"role": role, "content": content}) if len(self.messages) > self.max_messages: self.messages = self.messages[-self.max_messages:] def get_messages(self): return self.messages.copy()Long-term Memory with Vectors
Store and retrieve by semantic similarity:
# pip install chromadb sentence-transformersimport chromadbfrom sentence_transformers import SentenceTransformer class LongTermMemory: def __init__(self): self.client = chromadb.Client() self.collection = self.client.create_collection("agent_memory") self.encoder = SentenceTransformer('all-MiniLM-L6-v2') def store(self, text: str, metadata: dict = None): embedding = self.encoder.encode(text).tolist() self.collection.add( embeddings=[embedding], documents=[text], metadatas=[metadata or {}], ids=[str(hash(text))] ) def search(self, query: str, n: int = 5) -> list: embedding = self.encoder.encode(query).tolist() results = self.collection.query( query_embeddings=[embedding], n_results=n ) return results["documents"][0]Memory-Augmented Agent
class MemoryAgent: def __init__(self): self.short_term = ShortTermMemory() self.long_term = LongTermMemory() def run(self, message: str) -> str: # Retrieve relevant memories memories = self.long_term.search(message) context = "\n".join(memories) if memories else "" # Add to short-term self.short_term.add("user", message) # Run with context system = f"Relevant context:\n{context}" if context else "" response = run_agent_with_system(system, self.short_term.get_messages()) # Store response self.long_term.store(response) self.short_term.add("assistant", response) return responseError Handling
Agents fail. Plan for it.
Retry Logic
import timefrom anthropic import APIError, RateLimitError def run_with_retry(message: str, max_retries: int = 3) -> str: for attempt in range(max_retries): try: return run_agent(message) except RateLimitError: wait = 2 ** attempt print(f"Rate limited. Waiting {wait}s...") time.sleep(wait) except APIError as e: print(f"API error: {e}") if attempt == max_retries - 1: raise raise Exception("Max retries exceeded")Loop Detection
def run_with_loop_detection(message: str, max_iterations: int = 20) -> str: messages = [{"role": "user", "content": message}] seen_calls = [] iteration = 0 while iteration < max_iterations: response = client.messages.create( model=MODEL, tools=TOOLS, messages=messages ) if response.stop_reason == "tool_use": current = [(b.name, str(b.input)) for b in response.content if b.type == "tool_use"] # Check for loop if current in seen_calls[-3:]: messages.append({ "role": "user", "content": "You're repeating yourself. Try a different approach or give your final answer." }) else: seen_calls.append(current) # Process tools normally... else: return extract_text(response) iteration += 1 return "Agent reached max iterations"Safe Tool Execution
def execute_tool_safely(name: str, inputs: dict) -> str: try: result = execute_tool(name, inputs) return result except Exception as e: return f"Tool '{name}' failed: {str(e)}. Try a different approach."Complete Research Agent
Let's put it all together:
from dataclasses import dataclassfrom typing import Listimport logging logging.basicConfig(level=logging.INFO)logger = logging.getLogger(__name__) @dataclassclass Note: source: str content: str class ResearchAgent: def __init__(self): self.client = Anthropic() self.notes: List[Note] = [] self.visited: set = set() def research(self, question: str) -> str: logger.info(f"Researching: {question}") # Phase 1: Generate search queries queries = self._generate_queries(question) # Phase 2: Search and gather for query in queries: results = self._search(query) for result in results: if result.url not in self.visited: content = self._fetch_and_extract(result.url) self._take_notes(result.url, content) self.visited.add(result.url) # Phase 3: Synthesize report = self._synthesize(question) logger.info(f"Done. {len(self.notes)} notes from {len(self.visited)} sources") return report def _generate_queries(self, question: str) -> List[str]: response = self.client.messages.create( model=MODEL, system="Generate 3-5 search queries to research this question.", messages=[{"role": "user", "content": question}] ) return parse_queries(extract_text(response)) def _search(self, query: str) -> List[dict]: # Implement with your search API pass def _fetch_and_extract(self, url: str) -> str: # Fetch URL and extract key information pass def _take_notes(self, source: str, content: str): # Use Claude to extract key points response = self.client.messages.create( model=MODEL, system="Extract 3-5 key facts from this content.", messages=[{"role": "user", "content": content}] ) self.notes.append(Note(source=source, content=extract_text(response))) def _synthesize(self, question: str) -> str: notes_text = "\n\n".join(f"[{n.source}]\n{n.content}" for n in self.notes) response = self.client.messages.create( model=MODEL, system="Synthesize these notes into a comprehensive answer. Cite sources.", messages=[{"role": "user", "content": f"Question: {question}\n\nNotes:\n{notes_text}"}] ) return extract_text(response)Testing
Unit Tests
import pytestfrom tools import calculate, execute_tool def test_calculator(): assert calculate("2 + 2") == "4" assert calculate("sqrt(16)") == "4.0" assert "Error" in calculate("invalid") def test_execute_tool(): result = execute_tool("calculator", {"expression": "10 * 5"}) assert result == "50" def test_unknown_tool(): result = execute_tool("unknown", {}) assert "Unknown" in resultIntegration Tests
def test_agent_math(): result = run_agent("What is 15 + 27?") assert "42" in result def test_agent_multi_step(): result = run_agent("Calculate 10^2, then take the square root") assert "10" in resultNext Steps
Now that you can build agents:
- Using Agents — Master prompting and debugging
- Agent Products — Ship production systems
Practice Projects
| Project | Skills Practiced |
|---|---|
| File Organizer | File tools, categorization |
| Code Reviewer | Code analysis, multi-file |
| Meeting Summarizer | Text processing, extraction |
| Data Analyst | SQL, visualization |
Success
Start building! Take the calculator agent and add one more tool. Then another. Before you know it, you'll have a sophisticated multi-tool agent.